Revision e7077f5e

View differences:

libavcodec/h264.c
459 459
    const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
460 460
    int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
461 461
    const int luma_xy= (mx&3) + ((my&3)<<2);
462
    uint8_t * src_y = pic->data[0] + (mx>>2)*h->pixel_size + (my>>2)*h->mb_linesize;
462
    uint8_t * src_y = pic->data[0] + ((mx>>2)<<h->pixel_shift) + (my>>2)*h->mb_linesize;
463 463
    uint8_t * src_cb, * src_cr;
464 464
    int extra_width= h->emu_edge_width;
465 465
    int extra_height= h->emu_edge_height;
......
476 476
       || full_my < 0-extra_height
477 477
       || full_mx + 16/*FIXME*/ > pic_width + extra_width
478 478
       || full_my + 16/*FIXME*/ > pic_height + extra_height){
479
        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - 2*h->pixel_size - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
480
            src_y= s->edge_emu_buffer + 2*h->pixel_size + 2*h->mb_linesize;
479
        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2<<h->pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
480
            src_y= s->edge_emu_buffer + (2<<h->pixel_shift) + 2*h->mb_linesize;
481 481
        emu=1;
482 482
    }
483 483

  
......
493 493
        my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
494 494
        emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
495 495
    }
496
    src_cb= pic->data[1] + (mx>>3)*h->pixel_size + (my>>3)*h->mb_uvlinesize;
497
    src_cr= pic->data[2] + (mx>>3)*h->pixel_size + (my>>3)*h->mb_uvlinesize;
496
    src_cb= pic->data[1] + ((mx>>3)<<h->pixel_shift) + (my>>3)*h->mb_uvlinesize;
497
    src_cr= pic->data[2] + ((mx>>3)<<h->pixel_shift) + (my>>3)*h->mb_uvlinesize;
498 498

  
499 499
    if(emu){
500 500
        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
......
519 519
    qpel_mc_func *qpix_op=  qpix_put;
520 520
    h264_chroma_mc_func chroma_op= chroma_put;
521 521

  
522
    dest_y  += 2*x_offset*h->pixel_size + 2*y_offset*h->  mb_linesize;
523
    dest_cb +=   x_offset*h->pixel_size +   y_offset*h->mb_uvlinesize;
524
    dest_cr +=   x_offset*h->pixel_size +   y_offset*h->mb_uvlinesize;
522
    dest_y  += (2*x_offset<<h->pixel_shift) + 2*y_offset*h->  mb_linesize;
523
    dest_cb += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
524
    dest_cr += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
525 525
    x_offset += 8*s->mb_x;
526 526
    y_offset += 8*(s->mb_y >> MB_FIELD);
527 527

  
......
552 552
                           int list0, int list1){
553 553
    MpegEncContext * const s = &h->s;
554 554

  
555
    dest_y  += 2*x_offset*h->pixel_size + 2*y_offset*h->  mb_linesize;
556
    dest_cb +=   x_offset*h->pixel_size +   y_offset*h->mb_uvlinesize;
557
    dest_cr +=   x_offset*h->pixel_size +   y_offset*h->mb_uvlinesize;
555
    dest_y  += (2*x_offset<<h->pixel_shift) + 2*y_offset*h->  mb_linesize;
556
    dest_cb += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
557
    dest_cr += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
558 558
    x_offset += 8*s->mb_x;
559 559
    y_offset += 8*(s->mb_y >> MB_FIELD);
560 560

  
......
562 562
        /* don't optimize for luma-only case, since B-frames usually
563 563
         * use implicit weights => chroma too. */
564 564
        uint8_t *tmp_cb = s->obmc_scratchpad;
565
        uint8_t *tmp_cr = s->obmc_scratchpad + 8*h->pixel_size;
565
        uint8_t *tmp_cr = s->obmc_scratchpad + (8<<h->pixel_shift);
566 566
        uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
567 567
        int refn0 = h->ref_cache[0][ scan8[n] ];
568 568
        int refn1 = h->ref_cache[1][ scan8[n] ];
......
637 637
        const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
638 638
        const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
639 639
        uint8_t **src= h->ref_list[list][refn].data;
640
        int off= mx*h->pixel_size + (my + (s->mb_x&3)*4)*h->mb_linesize + 64*h->pixel_size;
640
        int off= ((mx+64)<<h->pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize;
641 641
        s->dsp.prefetch(src[0]+off, s->linesize, 4);
642
        off= (mx>>1)*h->pixel_size + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64*h->pixel_size;
642
        off= (((mx>>1)+64)<<h->pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize;
643 643
        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
644 644
    }
645 645
}
......
664 664
                weight_op, weight_avg,
665 665
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
666 666
    }else if(IS_16X8(mb_type)){
667
        mc_part(h, 0, 0, 4, 8*h->pixel_size, dest_y, dest_cb, dest_cr, 0, 0,
667
        mc_part(h, 0, 0, 4, (8<<h->pixel_shift), dest_y, dest_cb, dest_cr, 0, 0,
668 668
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
669 669
                &weight_op[1], &weight_avg[1],
670 670
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
671
        mc_part(h, 8, 0, 4, 8*h->pixel_size, dest_y, dest_cb, dest_cr, 0, 4,
671
        mc_part(h, 8, 0, 4, (8<<h->pixel_shift), dest_y, dest_cb, dest_cr, 0, 4,
672 672
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
673 673
                &weight_op[1], &weight_avg[1],
674 674
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
......
698 698
                    &weight_op[3], &weight_avg[3],
699 699
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
700 700
            }else if(IS_SUB_8X4(sub_mb_type)){
701
                mc_part(h, n  , 0, 2, 4*h->pixel_size, dest_y, dest_cb, dest_cr, x_offset, y_offset,
701
                mc_part(h, n  , 0, 2, (4<<h->pixel_shift), dest_y, dest_cb, dest_cr, x_offset, y_offset,
702 702
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
703 703
                    &weight_op[4], &weight_avg[4],
704 704
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
705
                mc_part(h, n+2, 0, 2, 4*h->pixel_size, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
705
                mc_part(h, n+2, 0, 2, (4<<h->pixel_shift), dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
706 706
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
707 707
                    &weight_op[4], &weight_avg[4],
708 708
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
......
1006 1006
    ff_h264_decode_init_vlc();
1007 1007

  
1008 1008
    h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
1009
    h->pixel_size = 1;
1009
    h->pixel_shift = 0;
1010 1010

  
1011 1011
    h->thread_context[0] = h;
1012 1012
    h->outputed_poc = h->next_outputed_poc = INT_MIN;
......
1170 1170
    assert(s->linesize && s->uvlinesize);
1171 1171

  
1172 1172
    for(i=0; i<16; i++){
1173
        h->block_offset[i]= 4*((scan8[i] - scan8[0])&7)*h->pixel_size + 4*s->linesize*((scan8[i] - scan8[0])>>3);
1174
        h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7)*h->pixel_size + 8*s->linesize*((scan8[i] - scan8[0])>>3);
1173
        h->block_offset[i]= (4*((scan8[i] - scan8[0])&7)<<h->pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
1174
        h->block_offset[24+i]= (4*((scan8[i] - scan8[0])&7)<<h->pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
1175 1175
    }
1176 1176
    for(i=0; i<4; i++){
1177 1177
        h->block_offset[16+i]=
1178
        h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7)*h->pixel_size + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
1178
        h->block_offset[20+i]= (4*((scan8[i] - scan8[0])&7)<<h->pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
1179 1179
        h->block_offset[24+16+i]=
1180
        h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7)*h->pixel_size + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
1180
        h->block_offset[24+20+i]= (4*((scan8[i] - scan8[0])&7)<<h->pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
1181 1181
    }
1182 1182

  
1183 1183
    /* can't be in alloc_tables because linesize isn't known there.
......
1377 1377
            if(!MB_MBAFF){
1378 1378
                top_border = h->top_borders[0][s->mb_x];
1379 1379
                AV_COPY128(top_border, src_y + 15*linesize);
1380
                if (h->pixel_size == 2)
1380
                if (h->pixel_shift)
1381 1381
                    AV_COPY128(top_border+16, src_y+15*linesize+16);
1382 1382
                if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1383
                    if (h->pixel_size == 2) {
1383
                    if (h->pixel_shift) {
1384 1384
                        AV_COPY128(top_border+32, src_cb+7*uvlinesize);
1385 1385
                        AV_COPY128(top_border+48, src_cr+7*uvlinesize);
1386 1386
                    } else {
......
1399 1399
    // There are two lines saved, the line above the the top macroblock of a pair,
1400 1400
    // and the line above the bottom macroblock
1401 1401
    AV_COPY128(top_border, src_y + 16*linesize);
1402
    if (h->pixel_size == 2)
1402
    if (h->pixel_shift)
1403 1403
        AV_COPY128(top_border+16, src_y+16*linesize+16);
1404 1404

  
1405 1405
    if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1406
        if (h->pixel_size == 2) {
1406
        if (h->pixel_shift) {
1407 1407
            AV_COPY128(top_border+32, src_cb+8*uvlinesize);
1408 1408
            AV_COPY128(top_border+48, src_cr+8*uvlinesize);
1409 1409
        } else {
......
1438 1438
        deblock_top =  (s->mb_y > !!MB_FIELD);
1439 1439
    }
1440 1440

  
1441
    src_y  -=   linesize + h->pixel_size;
1442
    src_cb -= uvlinesize + h->pixel_size;
1443
    src_cr -= uvlinesize + h->pixel_size;
1441
    src_y  -=   linesize + 1 + h->pixel_shift;
1442
    src_cb -= uvlinesize + 1 + h->pixel_shift;
1443
    src_cr -= uvlinesize + 1 + h->pixel_shift;
1444 1444

  
1445 1445
    top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
1446 1446
    top_border    = h->top_borders[top_idx][s->mb_x];
1447 1447

  
1448 1448
#define XCHG(a,b,xchg)\
1449
    if (h->pixel_size == 2) {\
1449
    if (h->pixel_shift) {\
1450 1450
        if (xchg) {\
1451 1451
            AV_SWAP64(b+0,a+0);\
1452 1452
            AV_SWAP64(b+8,a+8);\
......
1459 1459

  
1460 1460
    if(deblock_top){
1461 1461
        if(deblock_left){
1462
            XCHG(top_border_m1+8*h->pixel_size, src_y -7*h->pixel_size, 1);
1462
            XCHG(top_border_m1+(8<<h->pixel_shift), src_y -(7<<h->pixel_shift), 1);
1463 1463
        }
1464
        XCHG(top_border+0*h->pixel_size, src_y +1*h->pixel_size, xchg);
1465
        XCHG(top_border+8*h->pixel_size, src_y +9*h->pixel_size, 1);
1464
        XCHG(top_border+(0<<h->pixel_shift), src_y +(1<<h->pixel_shift), xchg);
1465
        XCHG(top_border+(8<<h->pixel_shift), src_y +(9<<h->pixel_shift), 1);
1466 1466
        if(s->mb_x+1 < s->mb_width){
1467
            XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17*h->pixel_size, 1);
1467
            XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +(17<<h->pixel_shift), 1);
1468 1468
        }
1469 1469
    }
1470 1470
    if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1471 1471
        if(deblock_top){
1472 1472
            if(deblock_left){
1473
                XCHG(top_border_m1+16*h->pixel_size, src_cb -7*h->pixel_size, 1);
1474
                XCHG(top_border_m1+24*h->pixel_size, src_cr -7*h->pixel_size, 1);
1473
                XCHG(top_border_m1+(16<<h->pixel_shift), src_cb -(7<<h->pixel_shift), 1);
1474
                XCHG(top_border_m1+(24<<h->pixel_shift), src_cr -(7<<h->pixel_shift), 1);
1475 1475
            }
1476
            XCHG(top_border+16*h->pixel_size, src_cb+h->pixel_size, 1);
1477
            XCHG(top_border+24*h->pixel_size, src_cr+h->pixel_size, 1);
1476
            XCHG(top_border+(16<<h->pixel_shift), src_cb+1+h->pixel_shift, 1);
1477
            XCHG(top_border+(24<<h->pixel_shift), src_cr+1+h->pixel_shift, 1);
1478 1478
        }
1479 1479
    }
1480 1480
}
1481 1481

  
1482 1482
static av_always_inline int dctcoef_get(H264Context *h, DCTELEM *mb, int index) {
1483
    if (h->pixel_size == 1)
1483
    if (!h->pixel_shift)
1484 1484
        return mb[index];
1485 1485
    else
1486 1486
        return ((int32_t*)mb)[index];
1487 1487
}
1488 1488

  
1489 1489
static av_always_inline void dctcoef_set(H264Context *h, DCTELEM *mb, int index, int value) {
1490
    if (h->pixel_size == 1)
1490
    if (!h->pixel_shift)
1491 1491
        mb[index] = value;
1492 1492
    else
1493 1493
        ((int32_t*)mb)[index] = value;
......
1509 1509
    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
1510 1510
    void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
1511 1511

  
1512
    dest_y  = s->current_picture.data[0] + (mb_x*h->pixel_size + mb_y * s->linesize  ) * 16;
1513
    dest_cb = s->current_picture.data[1] + (mb_x*h->pixel_size + mb_y * s->uvlinesize) * 8;
1514
    dest_cr = s->current_picture.data[2] + (mb_x*h->pixel_size + mb_y * s->uvlinesize) * 8;
1512
    dest_y  = s->current_picture.data[0] + ((mb_x<<h->pixel_shift) + mb_y * s->linesize  ) * 16;
1513
    dest_cb = s->current_picture.data[1] + ((mb_x<<h->pixel_shift) + mb_y * s->uvlinesize) * 8;
1514
    dest_cr = s->current_picture.data[2] + ((mb_x<<h->pixel_shift) + mb_y * s->uvlinesize) * 8;
1515 1515

  
1516
    s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64*h->pixel_size, s->linesize, 4);
1517
    s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64*h->pixel_size, dest_cr - dest_cb, 2);
1516
    s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64<<h->pixel_shift), s->linesize, 4);
1517
    s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64<<h->pixel_shift), dest_cr - dest_cb, 2);
1518 1518

  
1519 1519
    h->list_counts[mb_xy]= h->list_count;
1520 1520

  
......
1551 1551
    }
1552 1552

  
1553 1553
    if (!simple && IS_INTRA_PCM(mb_type)) {
1554
        if (h->pixel_size == 2) {
1554
        if (h->pixel_shift) {
1555 1555
            const int bit_depth = h->sps.bit_depth_luma;
1556 1556
            int j;
1557 1557
            GetBitContext gb;
......
1605 1605
                            uint8_t * const ptr= dest_y + block_offset[i];
1606 1606
                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
1607 1607
                            if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1608
                                h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16*h->pixel_size, linesize);
1608
                                h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16<<h->pixel_shift), linesize);
1609 1609
                            }else{
1610 1610
                                const int nnz = h->non_zero_count_cache[ scan8[i] ];
1611 1611
                                h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
1612 1612
                                                            (h->topright_samples_available<<i)&0x4000, linesize);
1613 1613
                                if(nnz){
1614 1614
                                    if(nnz == 1 && dctcoef_get(h, h->mb, i*16))
1615
                                        idct_dc_add(ptr, h->mb + i*16*h->pixel_size, linesize);
1615
                                        idct_dc_add(ptr, h->mb + (i*16<<h->pixel_shift), linesize);
1616 1616
                                    else
1617
                                        idct_add   (ptr, h->mb + i*16*h->pixel_size, linesize);
1617
                                        idct_add   (ptr, h->mb + (i*16<<h->pixel_shift), linesize);
1618 1618
                                }
1619 1619
                            }
1620 1620
                        }
......
1631 1631
                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
1632 1632

  
1633 1633
                            if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1634
                                h->hpc.pred4x4_add[dir](ptr, h->mb + i*16*h->pixel_size, linesize);
1634
                                h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16<<h->pixel_shift), linesize);
1635 1635
                            }else{
1636 1636
                                uint8_t *topright;
1637 1637
                                int nnz, tr;
......
1640 1640
                                    const int topright_avail= (h->topright_samples_available<<i)&0x8000;
1641 1641
                                    assert(mb_y || linesize <= block_offset[i]);
1642 1642
                                    if(!topright_avail){
1643
                                        if (h->pixel_size == 2) {
1643
                                        if (h->pixel_shift) {
1644 1644
                                            tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
1645 1645
                                            topright= (uint8_t*) &tr_high;
1646 1646
                                        } else {
......
1648 1648
                                        topright= (uint8_t*) &tr;
1649 1649
                                        }
1650 1650
                                    }else
1651
                                        topright= ptr + 4*h->pixel_size - linesize;
1651
                                        topright= ptr + (4<<h->pixel_shift) - linesize;
1652 1652
                                }else
1653 1653
                                    topright= NULL;
1654 1654

  
......
1657 1657
                                if(nnz){
1658 1658
                                    if(is_h264){
1659 1659
                                        if(nnz == 1 && dctcoef_get(h, h->mb, i*16))
1660
                                            idct_dc_add(ptr, h->mb + i*16*h->pixel_size, linesize);
1660
                                            idct_dc_add(ptr, h->mb + (i*16<<h->pixel_shift), linesize);
1661 1661
                                        else
1662
                                            idct_add   (ptr, h->mb + i*16*h->pixel_size, linesize);
1662
                                            idct_add   (ptr, h->mb + (i*16<<h->pixel_shift), linesize);
1663 1663
                                    }
1664 1664
#if CONFIG_SVQ3_DECODER
1665 1665
                                    else
......
1708 1708
                        }else{
1709 1709
                            for(i=0; i<16; i++){
1710 1710
                                if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16))
1711
                                    s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16*h->pixel_size, linesize);
1711
                                    s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16<<h->pixel_shift), linesize);
1712 1712
                            }
1713 1713
                        }
1714 1714
                    }else{
......
1720 1720
                        idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
1721 1721
                        for(i=0; i<16; i+=di){
1722 1722
                            if(h->non_zero_count_cache[ scan8[i] ]){
1723
                                idct_add(dest_y + block_offset[i], h->mb + i*16*h->pixel_size, linesize);
1723
                                idct_add(dest_y + block_offset[i], h->mb + (i*16<<h->pixel_shift), linesize);
1724 1724
                            }
1725 1725
                        }
1726 1726
                    }else{
......
1748 1748
            uint8_t *dest[2] = {dest_cb, dest_cr};
1749 1749
            if(transform_bypass){
1750 1750
                if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
1751
                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16*h->pixel_size, uvlinesize);
1752
                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16*h->pixel_size, uvlinesize);
1751
                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16<<h->pixel_shift), uvlinesize);
1752
                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16<<h->pixel_shift), uvlinesize);
1753 1753
                }else{
1754 1754
                    idct_add = s->dsp.add_pixels4;
1755 1755
                    for(i=16; i<16+8; i++){
1756 1756
                        if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16))
1757
                            idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + i*16*h->pixel_size, uvlinesize);
1757
                            idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16<<h->pixel_shift), uvlinesize);
1758 1758
                    }
1759 1759
                }
1760 1760
            }else{
1761 1761
                if(is_h264){
1762 1762
                    if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
1763
                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*h->pixel_size       , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
1763
                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16<<h->pixel_shift)       , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
1764 1764
                    if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
1765
                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16+4*16)*h->pixel_size, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
1765
                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16)<<h->pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
1766 1766
                    h->h264dsp.h264_idct_add8(dest, block_offset,
1767 1767
                                              h->mb, uvlinesize,
1768 1768
                                              h->non_zero_count_cache);
......
2986 2986

  
2987 2987
                s->mb_x= mb_x;
2988 2988
                s->mb_y= mb_y;
2989
                dest_y  = s->current_picture.data[0] + (mb_x*h->pixel_size + mb_y * s->linesize  ) * 16;
2990
                dest_cb = s->current_picture.data[1] + (mb_x*h->pixel_size + mb_y * s->uvlinesize) * 8;
2991
                dest_cr = s->current_picture.data[2] + (mb_x*h->pixel_size + mb_y * s->uvlinesize) * 8;
2989
                dest_y  = s->current_picture.data[0] + ((mb_x<<h->pixel_shift) + mb_y * s->linesize  ) * 16;
2990
                dest_cb = s->current_picture.data[1] + ((mb_x<<h->pixel_shift) + mb_y * s->uvlinesize) * 8;
2991
                dest_cr = s->current_picture.data[2] + ((mb_x<<h->pixel_shift) + mb_y * s->uvlinesize) * 8;
2992 2992
                    //FIXME simplify above
2993 2993

  
2994 2994
                if (MB_FIELD) {
......
3482 3482
            if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) {
3483 3483
                if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
3484 3484
                    avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
3485
                    h->pixel_size = (h->sps.bit_depth_luma+7)/8;
3485
                    h->pixel_shift = h->sps.bit_depth_luma/9;
3486 3486

  
3487 3487
                    ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
3488 3488
                    ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
libavcodec/h264.h
266 266
typedef struct H264Context{
267 267
    MpegEncContext s;
268 268
    H264DSPContext h264dsp;
269
    int pixel_size;
269
    int pixel_shift;
270 270
    int chroma_qp[2]; //QPc
271 271

  
272 272
    int qp_thresh;      ///< QP threshold to skip loopfilter
libavcodec/h264_cabac.c
1143 1143
            } \
1144 1144
        }
1145 1145

  
1146
        if (h->pixel_size == 2) {
1146
        if (h->pixel_shift) {
1147 1147
            STORE_BLOCK(int32_t)
1148 1148
        } else {
1149 1149
            STORE_BLOCK(int16_t)
......
1670 1670
                qmul = h->dequant4_coeff[0][s->qscale];
1671 1671
                for( i = 0; i < 16; i++ ) {
1672 1672
                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
1673
                    decode_cabac_residual_nondc(h, h->mb + 16*i*h->pixel_size, 1, i, scan + 1, qmul, 15);
1673
                    decode_cabac_residual_nondc(h, h->mb + (16*i<<h->pixel_shift), 1, i, scan + 1, qmul, 15);
1674 1674
                }
1675 1675
            } else {
1676 1676
                fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
......
1680 1680
            for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
1681 1681
                if( cbp & (1<<i8x8) ) {
1682 1682
                    if( IS_8x8DCT(mb_type) ) {
1683
                        decode_cabac_residual_nondc(h, h->mb + 64*i8x8*h->pixel_size, 5, 4*i8x8,
1683
                        decode_cabac_residual_nondc(h, h->mb + (64*i8x8<<h->pixel_shift), 5, 4*i8x8,
1684 1684
                            scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
1685 1685
                    } else {
1686 1686
                        qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
......
1688 1688
                            const int index = 4*i8x8 + i4x4;
1689 1689
                            //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
1690 1690
//START_TIMER
1691
                            decode_cabac_residual_nondc(h, h->mb + 16*index*h->pixel_size, 2, index, scan, qmul, 16);
1691
                            decode_cabac_residual_nondc(h, h->mb + (16*index<<h->pixel_shift), 2, index, scan, qmul, 16);
1692 1692
//STOP_TIMER("decode_residual")
1693 1693
                        }
1694 1694
                    }
......
1703 1703
            int c;
1704 1704
            for( c = 0; c < 2; c++ ) {
1705 1705
                //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
1706
                decode_cabac_residual_dc(h, h->mb + (256 + 16*4*c)*h->pixel_size, 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
1706
                decode_cabac_residual_dc(h, h->mb + ((256 + 16*4*c)<<h->pixel_shift), 3, CHROMA_DC_BLOCK_INDEX+c, chroma_dc_scan, 4);
1707 1707
            }
1708 1708
        }
1709 1709

  
......
1714 1714
                for( i = 0; i < 4; i++ ) {
1715 1715
                    const int index = 16 + 4 * c + i;
1716 1716
                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
1717
                    decode_cabac_residual_nondc(h, h->mb + 16*index*h->pixel_size, 4, index, scan + 1, qmul, 15);
1717
                    decode_cabac_residual_nondc(h, h->mb + (16*index<<h->pixel_shift), 4, index, scan + 1, qmul, 15);
1718 1718
                }
1719 1719
            }
1720 1720
        } else {
libavcodec/h264_cavlc.c
522 522
        } \
523 523
    }
524 524

  
525
    if (h->pixel_size == 2) {
525
    if (h->pixel_shift) {
526 526
        STORE_BLOCK(int32_t)
527 527
    } else {
528 528
        STORE_BLOCK(int16_t)
......
961 961
                for(i8x8=0; i8x8<4; i8x8++){
962 962
                    for(i4x4=0; i4x4<4; i4x4++){
963 963
                        const int index= i4x4 + 4*i8x8;
964
                        if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index*h->pixel_size, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
964
                        if( decode_residual(h, h->intra_gb_ptr, h->mb + (16*index<<h->pixel_shift), index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
965 965
                            return -1;
966 966
                        }
967 967
                    }
......
973 973
            for(i8x8=0; i8x8<4; i8x8++){
974 974
                if(cbp & (1<<i8x8)){
975 975
                    if(IS_8x8DCT(mb_type)){
976
                        DCTELEM *buf = &h->mb[64*i8x8*h->pixel_size];
976
                        DCTELEM *buf = &h->mb[64*i8x8<<h->pixel_shift];
977 977
                        uint8_t *nnz;
978 978
                        for(i4x4=0; i4x4<4; i4x4++){
979 979
                            if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
......
986 986
                        for(i4x4=0; i4x4<4; i4x4++){
987 987
                            const int index= i4x4 + 4*i8x8;
988 988

  
989
                            if( decode_residual(h, gb, h->mb + 16*index*h->pixel_size, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
989
                            if( decode_residual(h, gb, h->mb + (16*index<<h->pixel_shift), index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
990 990
                                return -1;
991 991
                            }
992 992
                        }
......
1000 1000

  
1001 1001
        if(cbp&0x30){
1002 1002
            for(chroma_idx=0; chroma_idx<2; chroma_idx++)
1003
                if( decode_residual(h, gb, h->mb + (256 + 16*4*chroma_idx)*h->pixel_size, CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1003
                if( decode_residual(h, gb, h->mb + ((256 + 16*4*chroma_idx)<<h->pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
1004 1004
                    return -1;
1005 1005
                }
1006 1006
        }
......
1010 1010
                const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
1011 1011
                for(i4x4=0; i4x4<4; i4x4++){
1012 1012
                    const int index= 16 + 4*chroma_idx + i4x4;
1013
                    if( decode_residual(h, gb, h->mb + 16*index*h->pixel_size, index, scan + 1, qmul, 15) < 0){
1013
                    if( decode_residual(h, gb, h->mb + (16*index<<h->pixel_shift), index, scan + 1, qmul, 15) < 0){
1014 1014
                        return -1;
1015 1015
                    }
1016 1016
                }
libavcodec/h264_loopfilter.c
650 650
        tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
651 651
        //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
652 652
        if( dir == 0 ) {
653
            filter_mb_edgev( &img_y[4*edge*h->pixel_size], linesize, bS, qp, h );
653
            filter_mb_edgev( &img_y[4*edge<<h->pixel_shift], linesize, bS, qp, h );
654 654
            if( (edge&1) == 0 ) {
655
                filter_mb_edgecv( &img_cb[2*edge*h->pixel_size], uvlinesize, bS, h->chroma_qp[0], h);
656
                filter_mb_edgecv( &img_cr[2*edge*h->pixel_size], uvlinesize, bS, h->chroma_qp[1], h);
655
                filter_mb_edgecv( &img_cb[2*edge<<h->pixel_shift], uvlinesize, bS, h->chroma_qp[0], h);
656
                filter_mb_edgecv( &img_cr[2*edge<<h->pixel_shift], uvlinesize, bS, h->chroma_qp[1], h);
657 657
            }
658 658
        } else {
659 659
            filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, h );

Also available in: Unified diff