Revision b6303e6d

View differences:

libavcodec/h264.h
300 300
     * is 64 if not available.
301 301
     */
302 302
    DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
303

  
304
    /*
305
    .UU.YYYY
306
    .UU.YYYY
307
    .vv.YYYY
308
    .VV.YYYY
309
    */
303 310
    uint8_t (*non_zero_count)[32];
304 311

  
305 312
    /**
......
727 734
    const uint8_t * left_block;
728 735
    int topleft_partition= -1;
729 736
    int i;
730
    static const uint8_t left_block_options[4][8]={
731
        {0,1,2,3,7,10,8,11},
732
        {2,2,3,3,8,11,8,11},
733
        {0,0,1,1,7,10,7,10},
734
        {0,2,0,2,7,10,7,10}
737
    static const uint8_t left_block_options[4][16]={
738
        {0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*8},
739
        {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8},
740
        {0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8},
741
        {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}
735 742
    };
736 743

  
737 744
    top_xy     = mb_xy  - (s->mb_stride << FIELD_PICTURE);
......
788 795
    h->left_mb_xy[0] = left_xy[0];
789 796
    h->left_mb_xy[1] = left_xy[1];
790 797
    if(for_deblock){
798
        *((uint64_t*)&h->non_zero_count_cache[0+8*1])= *((uint64_t*)&h->non_zero_count[mb_xy][ 0]);
799
        *((uint64_t*)&h->non_zero_count_cache[0+8*2])= *((uint64_t*)&h->non_zero_count[mb_xy][ 8]);
800
        *((uint32_t*)&h->non_zero_count_cache[0+8*5])= *((uint32_t*)&h->non_zero_count[mb_xy][16]);
801
        *((uint32_t*)&h->non_zero_count_cache[4+8*3])= *((uint32_t*)&h->non_zero_count[mb_xy][20]);
802
        *((uint64_t*)&h->non_zero_count_cache[0+8*4])= *((uint64_t*)&h->non_zero_count[mb_xy][24]);
803

  
791 804
        topleft_type = 0;
792 805
        topright_type = 0;
793 806
        top_type     = h->slice_table[top_xy     ] < 0xFFFF ? s->current_picture.mb_type[top_xy]     : 0;
......
922 935
*/
923 936
//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
924 937
    if(top_type){
925
        h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
926
        h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
927
        h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
928
        h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
938
        *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8];
929 939

  
930
        h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
931
        h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
940
        h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8];
941
        h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8];
932 942

  
933
        h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
934
        h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
943
        h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8];
944
        h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8];
935 945

  
936 946
    }else{
937 947
        h->non_zero_count_cache[4+8*0]=
......
949 959

  
950 960
    for (i=0; i<2; i++) {
951 961
        if(left_type[i]){
952
            h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
953
            h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
954
            h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
955
            h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
962
            h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]];
963
            h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]];
964
            h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]];
965
            h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]];
956 966
        }else{
957 967
            h->non_zero_count_cache[3+8*1 + 2*8*i]=
958 968
            h->non_zero_count_cache[3+8*2 + 2*8*i]=
......
1204 1214
static inline void write_back_non_zero_count(H264Context *h){
1205 1215
    const int mb_xy= h->mb_xy;
1206 1216

  
1207
    h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
1208
    h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
1209
    h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
1210
    h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
1211
    h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
1212
    h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
1213
    h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
1214

  
1215
    h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
1216
    h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
1217
    h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
1218

  
1219
    h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
1220
    h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
1221
    h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
1222

  
1223
    //FIXME sort better how things are stored in non_zero_count
1224

  
1225

  
1226
    h->non_zero_count[mb_xy][13]= h->non_zero_count_cache[6+8*1];
1227
    h->non_zero_count[mb_xy][14]= h->non_zero_count_cache[6+8*2];
1228
    h->non_zero_count[mb_xy][15]= h->non_zero_count_cache[6+8*3];
1229
    h->non_zero_count[mb_xy][16]= h->non_zero_count_cache[5+8*1];
1230
    h->non_zero_count[mb_xy][17]= h->non_zero_count_cache[5+8*2];
1231
    h->non_zero_count[mb_xy][18]= h->non_zero_count_cache[5+8*3];
1232
    h->non_zero_count[mb_xy][19]= h->non_zero_count_cache[4+8*1];
1233
    h->non_zero_count[mb_xy][20]= h->non_zero_count_cache[4+8*2];
1234
    h->non_zero_count[mb_xy][21]= h->non_zero_count_cache[4+8*3];
1235

  
1236
    h->non_zero_count[mb_xy][22]= h->non_zero_count_cache[1+8*1];
1237
    h->non_zero_count[mb_xy][23]= h->non_zero_count_cache[1+8*4];
1238

  
1217
    *((uint64_t*)&h->non_zero_count[mb_xy][ 0]) = *((uint64_t*)&h->non_zero_count_cache[0+8*1]);
1218
    *((uint64_t*)&h->non_zero_count[mb_xy][ 8]) = *((uint64_t*)&h->non_zero_count_cache[0+8*2]);
1219
    *((uint32_t*)&h->non_zero_count[mb_xy][16]) = *((uint32_t*)&h->non_zero_count_cache[0+8*5]);
1220
    *((uint32_t*)&h->non_zero_count[mb_xy][20]) = *((uint32_t*)&h->non_zero_count_cache[4+8*3]);
1221
    *((uint64_t*)&h->non_zero_count[mb_xy][24]) = *((uint64_t*)&h->non_zero_count_cache[0+8*4]);
1239 1222
}
1240 1223

  
1241 1224
static inline void write_back_motion(H264Context *h, int mb_type){
libavcodec/h264_loopfilter.c
472 472
        // be done twice (one each of the field) even if we are in a
473 473
        // frame macroblock.
474 474
        //
475
        static const int nnz_idx[4] = {4,5,6,3};
476 475
        unsigned int tmp_linesize   = 2 *   linesize;
477 476
        unsigned int tmp_uvlinesize = 2 * uvlinesize;
478 477
        int mbn_xy = mb_xy - 2 * s->mb_stride;
......
488 487
                const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
489 488
                for( i = 0; i < 4; i++ ) {
490 489
                    if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
491
                        mbn_nnz[nnz_idx[i]] != 0 )
490
                        mbn_nnz[i+4+3*8] != 0 )
492 491
                        bS[i] = 2;
493 492
                    else
494 493
                        bS[i] = 1;
......
663 662
            return;
664 663
        }
665 664
    }
666

  
667
    h->non_zero_count_cache[7+8*1]=h->non_zero_count[mb_xy][0];
668
    h->non_zero_count_cache[7+8*2]=h->non_zero_count[mb_xy][1];
669
    h->non_zero_count_cache[7+8*3]=h->non_zero_count[mb_xy][2];
670
    h->non_zero_count_cache[7+8*4]=h->non_zero_count[mb_xy][3];
671
    h->non_zero_count_cache[4+8*4]=h->non_zero_count[mb_xy][4];
672
    h->non_zero_count_cache[5+8*4]=h->non_zero_count[mb_xy][5];
673
    h->non_zero_count_cache[6+8*4]=h->non_zero_count[mb_xy][6];
674

  
675
    h->non_zero_count_cache[1+8*2]=h->non_zero_count[mb_xy][9];
676
    h->non_zero_count_cache[2+8*2]=h->non_zero_count[mb_xy][8];
677
    h->non_zero_count_cache[2+8*1]=h->non_zero_count[mb_xy][7];
678

  
679
    h->non_zero_count_cache[1+8*5]=h->non_zero_count[mb_xy][12];
680
    h->non_zero_count_cache[2+8*5]=h->non_zero_count[mb_xy][11];
681
    h->non_zero_count_cache[2+8*4]=h->non_zero_count[mb_xy][10];
682

  
683
    h->non_zero_count_cache[6+8*1]=h->non_zero_count[mb_xy][13];
684
    h->non_zero_count_cache[6+8*2]=h->non_zero_count[mb_xy][14];
685
    h->non_zero_count_cache[6+8*3]=h->non_zero_count[mb_xy][15];
686
    h->non_zero_count_cache[5+8*1]=h->non_zero_count[mb_xy][16];
687
    h->non_zero_count_cache[5+8*2]=h->non_zero_count[mb_xy][17];
688
    h->non_zero_count_cache[5+8*3]=h->non_zero_count[mb_xy][18];
689
    h->non_zero_count_cache[4+8*1]=h->non_zero_count[mb_xy][19];
690
    h->non_zero_count_cache[4+8*2]=h->non_zero_count[mb_xy][20];
691
    h->non_zero_count_cache[4+8*3]=h->non_zero_count[mb_xy][21];
692

  
693
    h->non_zero_count_cache[1+8*1]=h->non_zero_count[mb_xy][22];
694
    h->non_zero_count_cache[1+8*4]=h->non_zero_count[mb_xy][23];
695

  
696 665
    // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
697 666
    if(!h->pps.cabac && h->pps.transform_8x8_mode){
698 667
        int top_type, left_type[2];
......
762 731
                         ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
763 732
                            (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
764 733
                                                                       :
765
                            h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2]))
734
                            h->non_zero_count[mbn_xy][7+(MB_FIELD ? (i&3) : (i>>2)+(mb_y&1)*2)*8]))
766 735
                    bS[i] = 2;
767 736
                else
768 737
                    bS[i] = 1;

Also available in: Unified diff