Revision dc172ecc

View differences:

ffplay.c
1582 1582
    int perms = AV_PERM_WRITE;
1583 1583
    int i, w, h, stride[4];
1584 1584
    unsigned edge;
1585
    int pixel_size;
1585 1586

  
1586 1587
    av_assert0(codec->flags & CODEC_FLAG_EMU_EDGE);
1587 1588

  
......
1609 1610
    if(!(ref = avfilter_get_video_buffer(ctx->outputs[0], perms, w, h)))
1610 1611
        return -1;
1611 1612

  
1613
    pixel_size = av_pix_fmt_descriptors[ref->format].comp[0].step_minus1+1;
1612 1614
    ref->video->w = codec->width;
1613 1615
    ref->video->h = codec->height;
1614 1616
    for(i = 0; i < 4; i ++) {
......
1616 1618
        unsigned vshift = (i == 1 || i == 2) ? av_pix_fmt_descriptors[ref->format].log2_chroma_h : 0;
1617 1619

  
1618 1620
        if (ref->data[i]) {
1619
            ref->data[i]    += (edge >> hshift) + ((edge * ref->linesize[i]) >> vshift);
1621
            ref->data[i]    += ((edge * pixel_size) >> hshift) + ((edge * ref->linesize[i]) >> vshift);
1620 1622
        }
1621 1623
        pic->data[i]     = ref->data[i];
1622 1624
        pic->linesize[i] = ref->linesize[i];
libavcodec/h264.c
459 459
    const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
460 460
    int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
461 461
    const int luma_xy= (mx&3) + ((my&3)<<2);
462
    uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
462
    uint8_t * src_y = pic->data[0] + (mx>>2)*h->pixel_size + (my>>2)*h->mb_linesize;
463 463
    uint8_t * src_cb, * src_cr;
464 464
    int extra_width= h->emu_edge_width;
465 465
    int extra_height= h->emu_edge_height;
......
476 476
       || full_my < 0-extra_height
477 477
       || full_mx + 16/*FIXME*/ > pic_width + extra_width
478 478
       || full_my + 16/*FIXME*/ > pic_height + extra_height){
479
        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
480
            src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
479
        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - 2*h->pixel_size - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
480
            src_y= s->edge_emu_buffer + 2*h->pixel_size + 2*h->mb_linesize;
481 481
        emu=1;
482 482
    }
483 483

  
......
493 493
        my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
494 494
        emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
495 495
    }
496
    src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
497
    src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
496
    src_cb= pic->data[1] + (mx>>3)*h->pixel_size + (my>>3)*h->mb_uvlinesize;
497
    src_cr= pic->data[2] + (mx>>3)*h->pixel_size + (my>>3)*h->mb_uvlinesize;
498 498

  
499 499
    if(emu){
500 500
        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
......
519 519
    qpel_mc_func *qpix_op=  qpix_put;
520 520
    h264_chroma_mc_func chroma_op= chroma_put;
521 521

  
522
    dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
523
    dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
524
    dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
522
    dest_y  += 2*x_offset*h->pixel_size + 2*y_offset*h->  mb_linesize;
523
    dest_cb +=   x_offset*h->pixel_size +   y_offset*h->mb_uvlinesize;
524
    dest_cr +=   x_offset*h->pixel_size +   y_offset*h->mb_uvlinesize;
525 525
    x_offset += 8*s->mb_x;
526 526
    y_offset += 8*(s->mb_y >> MB_FIELD);
527 527

  
......
552 552
                           int list0, int list1){
553 553
    MpegEncContext * const s = &h->s;
554 554

  
555
    dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
556
    dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
557
    dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
555
    dest_y  += 2*x_offset*h->pixel_size + 2*y_offset*h->  mb_linesize;
556
    dest_cb +=   x_offset*h->pixel_size +   y_offset*h->mb_uvlinesize;
557
    dest_cr +=   x_offset*h->pixel_size +   y_offset*h->mb_uvlinesize;
558 558
    x_offset += 8*s->mb_x;
559 559
    y_offset += 8*(s->mb_y >> MB_FIELD);
560 560

  
......
562 562
        /* don't optimize for luma-only case, since B-frames usually
563 563
         * use implicit weights => chroma too. */
564 564
        uint8_t *tmp_cb = s->obmc_scratchpad;
565
        uint8_t *tmp_cr = s->obmc_scratchpad + 8;
565
        uint8_t *tmp_cr = s->obmc_scratchpad + 8*h->pixel_size;
566 566
        uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
567 567
        int refn0 = h->ref_cache[0][ scan8[n] ];
568 568
        int refn1 = h->ref_cache[1][ scan8[n] ];
......
637 637
        const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
638 638
        const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
639 639
        uint8_t **src= h->ref_list[list][refn].data;
640
        int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
640
        int off= mx*h->pixel_size + (my + (s->mb_x&3)*4)*h->mb_linesize + 64*h->pixel_size;
641 641
        s->dsp.prefetch(src[0]+off, s->linesize, 4);
642
        off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
642
        off= (mx>>1)*h->pixel_size + ((my>>1)*h->pixel_size + (s->mb_x&7))*s->uvlinesize + 64*h->pixel_size;
643 643
        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
644 644
    }
645 645
}
......
664 664
                weight_op, weight_avg,
665 665
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
666 666
    }else if(IS_16X8(mb_type)){
667
        mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
667
        mc_part(h, 0, 0, 4, 8*h->pixel_size, dest_y, dest_cb, dest_cr, 0, 0,
668 668
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
669 669
                &weight_op[1], &weight_avg[1],
670 670
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
671
        mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
671
        mc_part(h, 8, 0, 4, 8*h->pixel_size, dest_y, dest_cb, dest_cr, 0, 4,
672 672
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
673 673
                &weight_op[1], &weight_avg[1],
674 674
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
......
698 698
                    &weight_op[3], &weight_avg[3],
699 699
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
700 700
            }else if(IS_SUB_8X4(sub_mb_type)){
701
                mc_part(h, n  , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
701
                mc_part(h, n  , 0, 2, 4*h->pixel_size, dest_y, dest_cb, dest_cr, x_offset, y_offset,
702 702
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
703 703
                    &weight_op[4], &weight_avg[4],
704 704
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
705
                mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
705
                mc_part(h, n+2, 0, 2, 4*h->pixel_size, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
706 706
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
707 707
                    &weight_op[4], &weight_avg[4],
708 708
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
......
900 900
 * Allocate buffers which are not shared amongst multiple threads.
901 901
 */
902 902
static int context_init(H264Context *h){
903
    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
904
    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
903
    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail)
904
    FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail)
905 905

  
906 906
    h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
907 907
    h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
......
1003 1003

  
1004 1004
    ff_h264_decode_init_vlc();
1005 1005

  
1006
    h->pixel_size = 1;
1007

  
1006 1008
    h->thread_context[0] = h;
1007 1009
    h->outputed_poc = h->next_outputed_poc = INT_MIN;
1008 1010
    h->prev_poc_msb= 1<<16;
......
1165 1167
    assert(s->linesize && s->uvlinesize);
1166 1168

  
1167 1169
    for(i=0; i<16; i++){
1168
        h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
1169
        h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
1170
        h->block_offset[i]= 4*((scan8[i] - scan8[0])&7)*h->pixel_size + 4*s->linesize*((scan8[i] - scan8[0])>>3);
1171
        h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7)*h->pixel_size + 8*s->linesize*((scan8[i] - scan8[0])>>3);
1170 1172
    }
1171 1173
    for(i=0; i<4; i++){
1172 1174
        h->block_offset[16+i]=
1173
        h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
1175
        h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7)*h->pixel_size + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
1174 1176
        h->block_offset[24+16+i]=
1175
        h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
1177
        h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7)*h->pixel_size + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
1176 1178
    }
1177 1179

  
1178 1180
    /* can't be in alloc_tables because linesize isn't known there.
......
1372 1374
            if(!MB_MBAFF){
1373 1375
                top_border = h->top_borders[0][s->mb_x];
1374 1376
                AV_COPY128(top_border, src_y + 15*linesize);
1377
                if (h->pixel_size == 2)
1378
                    AV_COPY128(top_border+16, src_y+15*linesize+16);
1375 1379
                if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1380
                    if (h->pixel_size == 2) {
1381
                        AV_COPY128(top_border+32, src_cb+7*uvlinesize);
1382
                        AV_COPY128(top_border+48, src_cr+7*uvlinesize);
1383
                    } else {
1376 1384
                    AV_COPY64(top_border+16, src_cb+7*uvlinesize);
1377 1385
                    AV_COPY64(top_border+24, src_cr+7*uvlinesize);
1386
                    }
1378 1387
                }
1379 1388
            }
1380 1389
        }else if(MB_MBAFF){
......
1387 1396
    // There are two lines saved, the line above the the top macroblock of a pair,
1388 1397
    // and the line above the bottom macroblock
1389 1398
    AV_COPY128(top_border, src_y + 16*linesize);
1399
    if (h->pixel_size == 2)
1400
        AV_COPY128(top_border+16, src_y+16*linesize+16);
1390 1401

  
1391 1402
    if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1403
        if (h->pixel_size == 2) {
1404
            AV_COPY128(top_border+32, src_cb+8*uvlinesize);
1405
            AV_COPY128(top_border+48, src_cr+8*uvlinesize);
1406
        } else {
1392 1407
        AV_COPY64(top_border+16, src_cb+8*uvlinesize);
1393 1408
        AV_COPY64(top_border+24, src_cr+8*uvlinesize);
1409
        }
1394 1410
    }
1395 1411
}
1396 1412

  
......
1419 1435
        deblock_top =  (s->mb_y > !!MB_FIELD);
1420 1436
    }
1421 1437

  
1422
    src_y  -=   linesize + 1;
1423
    src_cb -= uvlinesize + 1;
1424
    src_cr -= uvlinesize + 1;
1438
    src_y  -=   linesize + h->pixel_size;
1439
    src_cb -= uvlinesize + h->pixel_size;
1440
    src_cr -= uvlinesize + h->pixel_size;
1425 1441

  
1426 1442
    top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
1427 1443
    top_border    = h->top_borders[top_idx][s->mb_x];
1428 1444

  
1429 1445
#define XCHG(a,b,xchg)\
1446
    if (h->pixel_size == 2) {\
1447
        if (xchg) {\
1448
            AV_SWAP64(b+0,a+0);\
1449
            AV_SWAP64(b+8,a+8);\
1450
        } else {\
1451
            AV_COPY128(b,a); \
1452
        }\
1453
    } else \
1430 1454
if (xchg) AV_SWAP64(b,a);\
1431 1455
else      AV_COPY64(b,a);
1432 1456

  
1433 1457
    if(deblock_top){
1434 1458
        if(deblock_left){
1435
            XCHG(top_border_m1+8, src_y -7, 1);
1459
            XCHG(top_border_m1+8*h->pixel_size, src_y -7*h->pixel_size, 1);
1436 1460
        }
1437
        XCHG(top_border+0, src_y +1, xchg);
1438
        XCHG(top_border+8, src_y +9, 1);
1461
        XCHG(top_border+0*h->pixel_size, src_y +1*h->pixel_size, xchg);
1462
        XCHG(top_border+8*h->pixel_size, src_y +9*h->pixel_size, 1);
1439 1463
        if(s->mb_x+1 < s->mb_width){
1440
            XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1);
1464
            XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17*h->pixel_size, 1);
1441 1465
        }
1442 1466
    }
1443

  
1444 1467
    if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1445 1468
        if(deblock_top){
1446 1469
            if(deblock_left){
1447
                XCHG(top_border_m1+16, src_cb -7, 1);
1448
                XCHG(top_border_m1+24, src_cr -7, 1);
1470
                XCHG(top_border_m1+16*h->pixel_size, src_cb -7*h->pixel_size, 1);
1471
                XCHG(top_border_m1+24*h->pixel_size, src_cr -7*h->pixel_size, 1);
1449 1472
            }
1450
            XCHG(top_border+16, src_cb+1, 1);
1451
            XCHG(top_border+24, src_cr+1, 1);
1473
            XCHG(top_border+16*h->pixel_size, src_cb+h->pixel_size, 1);
1474
            XCHG(top_border+24*h->pixel_size, src_cr+h->pixel_size, 1);
1452 1475
        }
1453 1476
    }
1454 1477
}
1455 1478

  
1479
static av_always_inline int dctcoef_get(H264Context *h, DCTELEM *mb, int index) {
1480
    if (h->pixel_size == 1)
1481
        return mb[index];
1482
    else
1483
        return ((int32_t*)mb)[index];
1484
}
1485

  
1486
static av_always_inline void dctcoef_set(H264Context *h, DCTELEM *mb, int index, int value) {
1487
    if (h->pixel_size == 1)
1488
        mb[index] = value;
1489
    else
1490
        ((int32_t*)mb)[index] = value;
1491
}
1492

  
1456 1493
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
1457 1494
    MpegEncContext * const s = &h->s;
1458 1495
    const int mb_x= s->mb_x;
......
1469 1506
    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
1470 1507
    void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
1471 1508

  
1472
    dest_y  = s->current_picture.data[0] + (mb_x + mb_y * s->linesize  ) * 16;
1473
    dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
1474
    dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
1509
    dest_y  = s->current_picture.data[0] + (mb_x*h->pixel_size + mb_y * s->linesize  ) * 16;
1510
    dest_cb = s->current_picture.data[1] + (mb_x*h->pixel_size + mb_y * s->uvlinesize) * 8;
1511
    dest_cr = s->current_picture.data[2] + (mb_x*h->pixel_size + mb_y * s->uvlinesize) * 8;
1475 1512

  
1476
    s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1477
    s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
1513
    s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64*h->pixel_size, s->linesize, 4);
1514
    s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64*h->pixel_size, dest_cr - dest_cb, 2);
1478 1515

  
1479 1516
    h->list_counts[mb_xy]= h->list_count;
1480 1517

  
......
1511 1548
    }
1512 1549

  
1513 1550
    if (!simple && IS_INTRA_PCM(mb_type)) {
1551
        if (h->pixel_size == 2) {
1552
            const int bit_depth = h->sps.bit_depth_luma;
1553
            int j;
1554
            GetBitContext gb;
1555
            init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth);
1556

  
1557
            for (i = 0; i < 16; i++) {
1558
                uint16_t *tmp_y  = (uint16_t*)(dest_y  + i*linesize);
1559
                for (j = 0; j < 16; j++)
1560
                    tmp_y[j] = get_bits(&gb, bit_depth);
1561
            }
1562
            for (i = 0; i < 8; i++) {
1563
                uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
1564
                for (j = 0; j < 8; j++)
1565
                    tmp_cb[j] = get_bits(&gb, bit_depth);
1566
            }
1567
            for (i = 0; i < 8; i++) {
1568
                uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
1569
                for (j = 0; j < 8; j++)
1570
                    tmp_cr[j] = get_bits(&gb, bit_depth);
1571
            }
1572
        } else {
1514 1573
        for (i=0; i<16; i++) {
1515 1574
            memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
1516 1575
        }
......
1518 1577
            memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
1519 1578
            memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
1520 1579
        }
1580
        }
1521 1581
    } else {
1522 1582
        if(IS_INTRA(mb_type)){
1523 1583
            if(h->deblocking_filter)
......
1542 1602
                            uint8_t * const ptr= dest_y + block_offset[i];
1543 1603
                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
1544 1604
                            if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1545
                                h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
1605
                                h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16*h->pixel_size, linesize);
1546 1606
                            }else{
1547 1607
                                const int nnz = h->non_zero_count_cache[ scan8[i] ];
1548 1608
                                h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
1549 1609
                                                            (h->topright_samples_available<<i)&0x4000, linesize);
1550 1610
                                if(nnz){
1551
                                    if(nnz == 1 && h->mb[i*16])
1552
                                        idct_dc_add(ptr, h->mb + i*16, linesize);
1611
                                    if(nnz == 1 && dctcoef_get(h, h->mb, i*16))
1612
                                        idct_dc_add(ptr, h->mb + i*16*h->pixel_size, linesize);
1553 1613
                                    else
1554
                                        idct_add   (ptr, h->mb + i*16, linesize);
1614
                                        idct_add   (ptr, h->mb + i*16*h->pixel_size, linesize);
1555 1615
                                }
1556 1616
                            }
1557 1617
                        }
......
1568 1628
                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
1569 1629

  
1570 1630
                            if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1571
                                h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
1631
                                h->hpc.pred4x4_add[dir](ptr, h->mb + i*16*h->pixel_size, linesize);
1572 1632
                            }else{
1573 1633
                                uint8_t *topright;
1574 1634
                                int nnz, tr;
1635
                                uint64_t tr_high;
1575 1636
                                if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
1576 1637
                                    const int topright_avail= (h->topright_samples_available<<i)&0x8000;
1577 1638
                                    assert(mb_y || linesize <= block_offset[i]);
1578 1639
                                    if(!topright_avail){
1640
                                        if (h->pixel_size == 2) {
1641
                                            tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
1642
                                            topright= (uint8_t*) &tr_high;
1643
                                        } else {
1579 1644
                                        tr= ptr[3 - linesize]*0x01010101;
1580 1645
                                        topright= (uint8_t*) &tr;
1646
                                        }
1581 1647
                                    }else
1582
                                        topright= ptr + 4 - linesize;
1648
                                        topright= ptr + 4*h->pixel_size - linesize;
1583 1649
                                }else
1584 1650
                                    topright= NULL;
1585 1651

  
......
1587 1653
                                nnz = h->non_zero_count_cache[ scan8[i] ];
1588 1654
                                if(nnz){
1589 1655
                                    if(is_h264){
1590
                                        if(nnz == 1 && h->mb[i*16])
1591
                                            idct_dc_add(ptr, h->mb + i*16, linesize);
1656
                                        if(nnz == 1 && dctcoef_get(h, h->mb, i*16))
1657
                                            idct_dc_add(ptr, h->mb + i*16*h->pixel_size, linesize);
1592 1658
                                        else
1593
                                            idct_add   (ptr, h->mb + i*16, linesize);
1659
                                            idct_add   (ptr, h->mb + i*16*h->pixel_size, linesize);
1594 1660
                                    }
1595 1661
#if CONFIG_SVQ3_DECODER
1596 1662
                                    else
......
1611 1677
                            static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
1612 1678
                                                                    8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
1613 1679
                            for(i = 0; i < 16; i++)
1614
                                h->mb[dc_mapping[i]] = h->mb_luma_dc[i];
1680
                                dctcoef_set(h, h->mb, dc_mapping[i], dctcoef_get(h, h->mb_luma_dc, i));
1615 1681
                        }
1616 1682
                    }
1617 1683
                }
......
1638 1704
                            h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
1639 1705
                        }else{
1640 1706
                            for(i=0; i<16; i++){
1641
                                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1642
                                    s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
1707
                                if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16))
1708
                                    s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16*h->pixel_size, linesize);
1643 1709
                            }
1644 1710
                        }
1645 1711
                    }else{
......
1651 1717
                        idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
1652 1718
                        for(i=0; i<16; i+=di){
1653 1719
                            if(h->non_zero_count_cache[ scan8[i] ]){
1654
                                idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
1720
                                idct_add(dest_y + block_offset[i], h->mb + i*16*h->pixel_size, linesize);
1655 1721
                            }
1656 1722
                        }
1657 1723
                    }else{
......
1679 1745
            uint8_t *dest[2] = {dest_cb, dest_cr};
1680 1746
            if(transform_bypass){
1681 1747
                if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
1682
                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
1683
                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
1748
                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16*h->pixel_size, uvlinesize);
1749
                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16*h->pixel_size, uvlinesize);
1684 1750
                }else{
1685 1751
                    idct_add = s->dsp.add_pixels4;
1686 1752
                    for(i=16; i<16+8; i++){
1687
                        if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1688
                            idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1753
                        if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16))
1754
                            idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + i*16*h->pixel_size, uvlinesize);
1689 1755
                    }
1690 1756
                }
1691 1757
            }else{
1692 1758
                if(is_h264){
1693 1759
                    if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
1694
                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16     , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
1760
                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*h->pixel_size       , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
1695 1761
                    if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
1696
                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
1762
                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16+4*16)*h->pixel_size, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
1697 1763
                    h->h264dsp.h264_idct_add8(dest, block_offset,
1698 1764
                                              h->mb, uvlinesize,
1699 1765
                                              h->non_zero_count_cache);
......
2906 2972

  
2907 2973
                s->mb_x= mb_x;
2908 2974
                s->mb_y= mb_y;
2909
                dest_y  = s->current_picture.data[0] + (mb_x + mb_y * s->linesize  ) * 16;
2910
                dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2911
                dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2975
                dest_y  = s->current_picture.data[0] + (mb_x*h->pixel_size + mb_y * s->linesize  ) * 16;
2976
                dest_cb = s->current_picture.data[1] + (mb_x*h->pixel_size + mb_y * s->uvlinesize) * 8;
2977
                dest_cr = s->current_picture.data[2] + (mb_x*h->pixel_size + mb_y * s->uvlinesize) * 8;
2912 2978
                    //FIXME simplify above
2913 2979

  
2914 2980
                if (MB_FIELD) {
libavcodec/h264.h
265 265
typedef struct H264Context{
266 266
    MpegEncContext s;
267 267
    H264DSPContext h264dsp;
268
    int pixel_size;
268 269
    int chroma_qp[2]; //QPc
269 270

  
270 271
    int qp_thresh;      ///< QP threshold to skip loopfilter
......
296 297
    unsigned int top_samples_available;
297 298
    unsigned int topright_samples_available;
298 299
    unsigned int left_samples_available;
299
    uint8_t (*top_borders[2])[16+2*8];
300
    uint8_t (*top_borders[2])[(16+2*8)*2];
300 301

  
301 302
    /**
302 303
     * non zero coeff count cache.
......
406 407
    GetBitContext *intra_gb_ptr;
407 408
    GetBitContext *inter_gb_ptr;
408 409

  
409
    DECLARE_ALIGNED(16, DCTELEM, mb)[16*24];
410
    DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[16];
411
    DCTELEM mb_padding[256];        ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
410
    DECLARE_ALIGNED(16, DCTELEM, mb)[16*24*2]; ///< as a dct coeffecient is int32_t in high depth, we need to reserve twice the space.
411
    DECLARE_ALIGNED(16, DCTELEM, mb_luma_dc)[16*2];
412
    DCTELEM mb_padding[256*2];        ///< as mb is addressed by scantable[i] and scantable is uint8_t we can either check that i is not too large or ensure that there is some unused stuff after mb
412 413

  
413 414
    /**
414 415
     * Cabac
libavcodec/h264_cabac.c
1105 1105

  
1106 1106
        int j= scantable[index[--coeff_count]];
1107 1107

  
1108
        if( get_cabac( CC, ctx ) == 0 ) {
1109
            node_ctx = coeff_abs_level_transition[0][node_ctx];
1110
            if( is_dc ) {
1111
                block[j] = get_cabac_bypass_sign( CC, -1);
1112
            }else{
1113
                block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
1114
            }
1115
        } else {
1116
            int coeff_abs = 2;
1117
            ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
1118
            node_ctx = coeff_abs_level_transition[1][node_ctx];
1119

  
1120
            while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
1121
                coeff_abs++;
1122
            }
1123

  
1124
            if( coeff_abs >= 15 ) {
1125
                int j = 0;
1126
                while( get_cabac_bypass( CC ) ) {
1127
                    j++;
1128
                }
1129

  
1130
                coeff_abs=1;
1131
                while( j-- ) {
1132
                    coeff_abs += coeff_abs + get_cabac_bypass( CC );
1133
                }
1134
                coeff_abs+= 14;
1135
            }
1108
#define STORE_BLOCK(type) \
1109
        if( get_cabac( CC, ctx ) == 0 ) { \
1110
            node_ctx = coeff_abs_level_transition[0][node_ctx]; \
1111
            if( is_dc ) { \
1112
                ((type*)block)[j] = get_cabac_bypass_sign( CC, -1); \
1113
            }else{ \
1114
                ((type*)block)[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6; \
1115
            } \
1116
        } else { \
1117
            int coeff_abs = 2; \
1118
            ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; \
1119
            node_ctx = coeff_abs_level_transition[1][node_ctx]; \
1120
\
1121
            while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \
1122
                coeff_abs++; \
1123
            } \
1124
\
1125
            if( coeff_abs >= 15 ) { \
1126
                int j = 0; \
1127
                while( get_cabac_bypass( CC ) ) { \
1128
                    j++; \
1129
                } \
1130
\
1131
                coeff_abs=1; \
1132
                while( j-- ) { \
1133
                    coeff_abs += coeff_abs + get_cabac_bypass( CC ); \
1134
                } \
1135
                coeff_abs+= 14; \
1136
            } \
1137
\
1138
            if( is_dc ) { \
1139
                ((type*)block)[j] = get_cabac_bypass_sign( CC, -coeff_abs ); \
1140
            }else{ \
1141
                ((type*)block)[j] = ((int)(get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32)) >> 6; \
1142
            } \
1143
        }
1136 1144

  
1137
            if( is_dc ) {
1138
                block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
1139
            }else{
1140
                block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
1141
            }
1145
        if (h->pixel_size == 2) {
1146
            STORE_BLOCK(int32_t)
1147
        } else {
1148
            STORE_BLOCK(int16_t)
1142 1149
        }
1143 1150
    } while( coeff_count );
1144 1151
#ifdef CABAC_ON_STACK
......
1304 1311
    h->slice_table[ mb_xy ]= h->slice_num;
1305 1312

  
1306 1313
    if(IS_INTRA_PCM(mb_type)) {
1314
        const int mb_size = 384*h->sps.bit_depth_luma/8;
1307 1315
        const uint8_t *ptr;
1308 1316

  
1309 1317
        // We assume these blocks are very rare so we do not optimize it.
......
1316 1324
        }
1317 1325

  
1318 1326
        // The pixels are stored in the same order as levels in h->mb array.
1319
        memcpy(h->mb, ptr, 256); ptr+=256;
1327
        memcpy(h->mb, ptr, 2*mb_size/3); ptr+=2*mb_size/3;
1320 1328
        if(CHROMA){
1321
            memcpy(h->mb+128, ptr, 128); ptr+=128;
1329
            memcpy(h->mb+mb_size/3, ptr, mb_size/3); ptr+=mb_size/3;
1322 1330
        }
1323 1331

  
1324 1332
        ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
......
1652 1660
            //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
1653 1661
            AV_ZERO128(h->mb_luma_dc+0);
1654 1662
            AV_ZERO128(h->mb_luma_dc+8);
1663
            AV_ZERO128(h->mb_luma_dc+16);
1664
            AV_ZERO128(h->mb_luma_dc+24);
1655 1665
            decode_cabac_residual_dc( h, h->mb_luma_dc, 0, LUMA_DC_BLOCK_INDEX, scan, 16);
1656 1666

  
1657 1667
            if( cbp&15 ) {
1658 1668
                qmul = h->dequant4_coeff[0][s->qscale];
1659 1669
                for( i = 0; i < 16; i++ ) {
1660 1670
                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
1661
                    decode_cabac_residual_nondc(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
1671
                    decode_cabac_residual_nondc(h, h->mb + 16*i*h->pixel_size, 1, i, scan + 1, qmul, 15);
1662 1672
                }
1663 1673
            } else {
1664 1674
                fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
......
1668 1678
            for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
1669 1679
                if( cbp & (1<<i8x8) ) {
1670 1680
                    if( IS_8x8DCT(mb_type) ) {
1671
                        decode_cabac_residual_nondc(h, h->mb + 64*i8x8, 5, 4*i8x8,
1681
                        decode_cabac_residual_nondc(h, h->mb + 64*i8x8*h->pixel_size, 5, 4*i8x8,
1672 1682
                            scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
1673 1683
                    } else {
1674 1684
                        qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
......
1676 1686
                            const int index = 4*i8x8 + i4x4;
1677 1687
                            //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
1678 1688
//START_TIMER
1679
                            decode_cabac_residual_nondc(h, h->mb + 16*index, 2, index, scan, qmul, 16);
1689
                            decode_cabac_residual_nondc(h, h->mb + 16*index*h->pixel_size, 2, index, scan, qmul, 16);
1680 1690
//STOP_TIMER("decode_residual")
1681 1691
                        }
1682 1692
                    }
......
1691 1701
            int c;
1692 1702
            for( c = 0; c < 2; c++ ) {
1693 1703
                //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
1694
                decode_cabac_residual_dc(h, h->mb + 256 + 16*4*c, 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
1704
                decode_cabac_residual_dc(h, h->mb + (256 + 16*4*c)*h->pixel_size, 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
1695 1705
            }
1696 1706
        }
1697 1707

  
......
1702 1712
                for( i = 0; i < 4; i++ ) {
1703 1713
                    const int index = 16 + 4 * c + i;
1704 1714
                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
1705
                    decode_cabac_residual_nondc(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
1715
                    decode_cabac_residual_nondc(h, h->mb + 16*index*h->pixel_size, 4, index, scan + 1, qmul, 15);
1706 1716
                }
1707 1717
            }
1708 1718
        } else {
libavcodec/h264_cavlc.c
488 488
            zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
489 489
    }
490 490

  
491
    scantable += zeros_left + total_coeff - 1;
492
    if(n >= LUMA_DC_BLOCK_INDEX){
493
        block[*scantable] = level[0];
494
        for(i=1;i<total_coeff && zeros_left > 0;i++) {
495
            if(zeros_left < 7)
496
                run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1);
497
            else
498
                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
499
            zeros_left -= run_before;
500
            scantable -= 1 + run_before;
501
            block[*scantable]= level[i];
502
        }
503
        for(;i<total_coeff;i++) {
504
            scantable--;
505
            block[*scantable]= level[i];
506
        }
507
    }else{
508
        block[*scantable] = (level[0] * qmul[*scantable] + 32)>>6;
509
        for(i=1;i<total_coeff && zeros_left > 0;i++) {
510
            if(zeros_left < 7)
511
                run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1);
512
            else
513
                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
514
            zeros_left -= run_before;
515
            scantable -= 1 + run_before;
516
            block[*scantable]= (level[i] * qmul[*scantable] + 32)>>6;
517
        }
518
        for(;i<total_coeff;i++) {
519
            scantable--;
520
            block[*scantable]= (level[i] * qmul[*scantable] + 32)>>6;
521
        }
491
#define STORE_BLOCK(type) \
492
    scantable += zeros_left + total_coeff - 1; \
493
    if(n >= LUMA_DC_BLOCK_INDEX){ \
494
        ((type*)block)[*scantable] = level[0]; \
495
        for(i=1;i<total_coeff && zeros_left > 0;i++) { \
496
            if(zeros_left < 7) \
497
                run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
498
            else \
499
                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
500
            zeros_left -= run_before; \
501
            scantable -= 1 + run_before; \
502
            ((type*)block)[*scantable]= level[i]; \
503
        } \
504
        for(;i<total_coeff;i++) { \
505
            scantable--; \
506
            ((type*)block)[*scantable]= level[i]; \
507
        } \
508
    }else{ \
509
        ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
510
        for(i=1;i<total_coeff && zeros_left > 0;i++) { \
511
            if(zeros_left < 7) \
512
                run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
513
            else \
514
                run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
515
            zeros_left -= run_before; \
516
            scantable -= 1 + run_before; \
517
            ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
518
        } \
519
        for(;i<total_coeff;i++) { \
520
            scantable--; \
521
            ((type*)block)[*scantable]= ((int)(level[i] * qmul[*scantable] + 32))>>6; \
522
        } \
523
    }
524

  
525
    if (h->pixel_size == 2) {
526
        STORE_BLOCK(int32_t)
527
    } else {
528
        STORE_BLOCK(int16_t)
522 529
    }
523 530

  
524 531
    if(zeros_left<0){
......
605 612
        align_get_bits(&s->gb);
606 613

  
607 614
        // The pixels are stored in the same order as levels in h->mb array.
608
        for(x=0; x < (CHROMA ? 384 : 256); x++){
615
        for(x=0; x < (CHROMA ? 384 : 256)*h->sps.bit_depth_luma/8; x++){
609 616
            ((uint8_t*)h->mb)[x]= get_bits(&s->gb, 8);
610 617
        }
611 618

  
......
941 948
        if(IS_INTRA16x16(mb_type)){
942 949
            AV_ZERO128(h->mb_luma_dc+0);
943 950
            AV_ZERO128(h->mb_luma_dc+8);
951
            AV_ZERO128(h->mb_luma_dc+16);
952
            AV_ZERO128(h->mb_luma_dc+24);
944 953
            if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc, LUMA_DC_BLOCK_INDEX, scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
945 954
                return -1; //FIXME continue if partitioned and other return -1 too
946 955
            }
......
951 960
                for(i8x8=0; i8x8<4; i8x8++){
952 961
                    for(i4x4=0; i4x4<4; i4x4++){
953 962
                        const int index= i4x4 + 4*i8x8;
954
                        if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
963
                        if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index*h->pixel_size, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
955 964
                            return -1;
956 965
                        }
957 966
                    }
......
963 972
            for(i8x8=0; i8x8<4; i8x8++){
964 973
                if(cbp & (1<<i8x8)){
965 974
                    if(IS_8x8DCT(mb_type)){
966
                        DCTELEM *buf = &h->mb[64*i8x8];
975
                        DCTELEM *buf = &h->mb[64*i8x8*h->pixel_size];
967 976
                        uint8_t *nnz;
968 977
                        for(i4x4=0; i4x4<4; i4x4++){
969 978
                            if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
......
976 985
                        for(i4x4=0; i4x4<4; i4x4++){
977 986
                            const int index= i4x4 + 4*i8x8;
978 987

  
979
                            if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
988
                            if( decode_residual(h, gb, h->mb + 16*index*h->pixel_size, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
980 989
                                return -1;
981 990
                            }
982 991
                        }
......
990 999

  
991 1000
        if(cbp&0x30){
992 1001
            for(chroma_idx=0; chroma_idx<2; chroma_idx++)
993
                if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1002
                if( decode_residual(h, gb, h->mb + (256 + 16*4*chroma_idx)*h->pixel_size, CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
994 1003
                    return -1;
995 1004
                }
996 1005
        }
......
1000 1009
                const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1001 1010
                for(i4x4=0; i4x4<4; i4x4++){
1002 1011
                    const int index= 16 + 4*chroma_idx + i4x4;
1003
                    if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
1012
                    if( decode_residual(h, gb, h->mb + 16*index*h->pixel_size, index, scan + 1, qmul, 15) < 0){
1004 1013
                        return -1;
1005 1014
                    }
1006 1015
                }
libavcodec/h264_loopfilter.c
650 650
        tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
651 651
        //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
652 652
        if( dir == 0 ) {
653
            filter_mb_edgev( &img_y[4*edge], linesize, bS, qp, h );
653
            filter_mb_edgev( &img_y[4*edge*h->pixel_size], linesize, bS, qp, h );
654 654
            if( (edge&1) == 0 ) {
655
                filter_mb_edgecv( &img_cb[2*edge], uvlinesize, bS, h->chroma_qp[0], h);
656
                filter_mb_edgecv( &img_cr[2*edge], uvlinesize, bS, h->chroma_qp[1], h);
655
                filter_mb_edgecv( &img_cb[2*edge*h->pixel_size], uvlinesize, bS, h->chroma_qp[0], h);
656
                filter_mb_edgecv( &img_cr[2*edge*h->pixel_size], uvlinesize, bS, h->chroma_qp[1], h);
657 657
            }
658 658
        } else {
659 659
            filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h );
libavcodec/utils.c
286 286
        int unaligned;
287 287
        AVPicture picture;
288 288
        int stride_align[4];
289
        const int pixel_size = av_pix_fmt_descriptors[s->pix_fmt].comp[0].step_minus1+1;
289 290

  
290 291
        avcodec_get_chroma_sub_sample(s->pix_fmt, &h_chroma_shift, &v_chroma_shift);
291 292

  
......
335 336
            if((s->flags&CODEC_FLAG_EMU_EDGE) || !size[2])
336 337
                buf->data[i] = buf->base[i];
337 338
            else
338
                buf->data[i] = buf->base[i] + FFALIGN((buf->linesize[i]*EDGE_WIDTH>>v_shift) + (EDGE_WIDTH>>h_shift), stride_align[i]);
339
                buf->data[i] = buf->base[i] + FFALIGN((buf->linesize[i]*EDGE_WIDTH>>v_shift) + (pixel_size*EDGE_WIDTH>>h_shift), stride_align[i]);
339 340
        }
340 341
        if(size[1] && !size[2])
341 342
            ff_set_systematic_pal2((uint32_t*)buf->data[1], s->pix_fmt);

Also available in: Unified diff