Revision 599fe45b libavcodec/h264.h

View differences:

libavcodec/h264.h
729 729

  
730 730
static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my);
731 731

  
732
static av_always_inline int fill_caches(H264Context *h, int mb_type, int for_deblock){
732
static void fill_decode_caches(H264Context *h, int mb_type){
733 733
    MpegEncContext * const s = &h->s;
734 734
    const int mb_xy= h->mb_xy;
735 735
    int topleft_xy, top_xy, topright_xy, left_xy[2];
......
746 746

  
747 747
    top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
748 748

  
749
    //FIXME deblocking could skip the intra and nnz parts.
750
//     if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
751
//         return;
752

  
753 749
    /* Wow, what a mess, why didn't they simplify the interlacing & intra
754 750
     * stuff, I can't imagine that these complex rules are worth it. */
755 751

  
......
762 758
        const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
763 759
        if(s->mb_y&1){
764 760
            if (left_mb_field_flag != curr_mb_field_flag) {
765
                if(for_deblock){
766
                    left_xy[0] = mb_xy - s->mb_stride - 1;
767
                    left_xy[1] = mb_xy                - 1;
768
                }else{
769 761
                left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1;
770 762
                if (curr_mb_field_flag) {
771 763
                    left_xy[1] += s->mb_stride;
......
776 768
                    topleft_partition = 0;
777 769
                    left_block = left_block_options[1];
778 770
                }
779
                }
780 771
            }
781 772
        }else{
782 773
            if(curr_mb_field_flag){
......
785 776
                top_xy      += s->mb_stride & (((s->current_picture.mb_type[top_xy    ]>>7)&1)-1);
786 777
            }
787 778
            if (left_mb_field_flag != curr_mb_field_flag) {
788
                if(for_deblock){
789
                    left_xy[0] = mb_xy                - 1;
790
                    left_xy[1] = mb_xy + s->mb_stride - 1;
791
                }else{
792 779
                left_xy[1] = left_xy[0] = mb_xy - 1;
793 780
                if (curr_mb_field_flag) {
794 781
                    left_xy[1] += s->mb_stride;
......
796 783
                } else {
797 784
                    left_block = left_block_options[2];
798 785
                }
799
                }
800 786
            }
801 787
        }
802 788
    }
......
804 790
    h->top_mb_xy = top_xy;
805 791
    h->left_mb_xy[0] = left_xy[0];
806 792
    h->left_mb_xy[1] = left_xy[1];
807
    if(for_deblock){
808

  
809
        //for sufficiently low qp, filtering wouldn't do anything
810
        //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
811
        int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
812
        int qp = s->current_picture.qscale_table[mb_xy];
813
        if(qp <= qp_thresh
814
           && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
815
           && (top_xy   < 0 || ((qp + s->current_picture.qscale_table[top_xy    ] + 1)>>1) <= qp_thresh)){
816
            if(!FRAME_MBAFF)
817
                return 1;
818
            if(   (left_xy[0]< 0            || ((qp + s->current_picture.qscale_table[left_xy[1]             ] + 1)>>1) <= qp_thresh)
819
               && (top_xy    < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy    -s->mb_stride] + 1)>>1) <= qp_thresh))
820
                return 1;
821
        }
822

  
823
        if(h->deblocking_filter == 2){
824
            h->top_type    = top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
825
            h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
826
            h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
827
        }else{
828
            h->top_type    = top_type     = h->slice_table[top_xy     ] < 0xFFFF ? s->current_picture.mb_type[top_xy]     : 0;
829
            h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
830
            h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
831
        }
832
        if(IS_INTRA(mb_type))
833
            return 0;
834

  
835
        AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]);
836
        AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]);
837
        *((uint32_t*)&h->non_zero_count_cache[0+8*5])= *((uint32_t*)&h->non_zero_count[mb_xy][16]);
838
        *((uint32_t*)&h->non_zero_count_cache[4+8*3])= *((uint32_t*)&h->non_zero_count[mb_xy][20]);
839
        AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]);
840

  
841
        h->cbp= h->cbp_table[mb_xy];
842

  
843
        {
844
            int list;
845
            for(list=0; list<h->list_count; list++){
846
                int8_t *ref;
847
                int y, b_stride;
848
                int16_t (*mv_dst)[2];
849
                int16_t (*mv_src)[2];
850

  
851
                if(!USES_LIST(mb_type, list)){
852
                    fill_rectangle(  h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
853
                    *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
854
                    *(uint32_t*)&h->ref_cache[list][scan8[ 2]] =
855
                    *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
856
                    *(uint32_t*)&h->ref_cache[list][scan8[10]] = ((LIST_NOT_USED)&0xFF)*0x01010101;
857
                    continue;
858
                }
859

  
860
                ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
861
                {
862
                    int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
863
                    *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
864
                    *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
865
                    ref += h->b8_stride;
866
                    *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
867
                    *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
868
                }
869

  
870
                b_stride = h->b_stride;
871
                mv_dst   = &h->mv_cache[list][scan8[0]];
872
                mv_src   = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
873
                for(y=0; y<4; y++){
874
                    AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
875
                }
876

  
877
            }
878
        }
879
    }else{
880 793
        topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
881 794
        top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
882 795
        topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
......
962 875
                }
963 876
            }
964 877
        }
965
    }
966 878

  
967 879

  
968 880
/*
......
976 888
//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
977 889
    if(top_type){
978 890
        *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8];
979
        if(!for_deblock){
980 891
            h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8];
981 892
            h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8];
982 893

  
983 894
            h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8];
984 895
            h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8];
985
        }
986
    }else if(!for_deblock){
896
    }else {
987 897
            h->non_zero_count_cache[1+8*0]=
988 898
            h->non_zero_count_cache[2+8*0]=
989 899

  
......
996 906
        if(left_type[i]){
997 907
            h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]];
998 908
            h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]];
999
            if(!for_deblock){
1000 909
                h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]];
1001 910
                h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]];
1002
            }
1003
        }else if(!for_deblock){
911
        }else{
1004 912
                h->non_zero_count_cache[3+8*1 + 2*8*i]=
1005 913
                h->non_zero_count_cache[3+8*2 + 2*8*i]=
1006 914
                h->non_zero_count_cache[0+8*1 +   8*i]=
......
1008 916
        }
1009 917
    }
1010 918

  
1011
    // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
1012
    if(for_deblock && !CABAC && h->pps.transform_8x8_mode){
1013
        if(IS_8x8DCT(top_type)){
1014
            h->non_zero_count_cache[4+8*0]=
1015
            h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4;
1016
            h->non_zero_count_cache[6+8*0]=
1017
            h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8;
1018
        }
1019
        if(IS_8x8DCT(left_type[0])){
1020
            h->non_zero_count_cache[3+8*1]=
1021
            h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF
1022
        }
1023
        if(IS_8x8DCT(left_type[1])){
1024
            h->non_zero_count_cache[3+8*3]=
1025
            h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF
1026
        }
1027

  
1028
        if(IS_8x8DCT(mb_type)){
1029
            h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
1030
            h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= h->cbp & 1;
1031

  
1032
            h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
1033
            h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
1034

  
1035
            h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
1036
            h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
1037

  
1038
            h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
1039
            h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
1040
        }
1041
    }
1042

  
1043
    if( CABAC && !for_deblock) {
919
    if( CABAC ) {
1044 920
        // top_cbp
1045 921
        if(top_type) {
1046 922
            h->top_cbp = h->cbp_table[top_xy];
......
1069 945
    if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
1070 946
        int list;
1071 947
        for(list=0; list<h->list_count; list++){
1072
            if(!for_deblock && !USES_LIST(mb_type, list) && !IS_DIRECT(mb_type)){
948
            if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type)){
1073 949
                /*if(!h->mv_cache_clean[list]){
1074 950
                    memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
1075 951
                    memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
......
1083 959
                const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
1084 960
                const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
1085 961
                AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
1086
                if(for_deblock){
1087
                    int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
1088
                    h->ref_cache[list][scan8[0] + 0 - 1*8]=
1089
                    h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
1090
                    h->ref_cache[list][scan8[0] + 2 - 1*8]=
1091
                    h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
1092
                }else{
1093 962
                    h->ref_cache[list][scan8[0] + 0 - 1*8]=
1094 963
                    h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
1095 964
                    h->ref_cache[list][scan8[0] + 2 - 1*8]=
1096 965
                    h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
1097
                }
1098 966
            }else{
1099 967
                AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
1100
                *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= (((for_deblock||top_type) ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
968
                *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
1101 969
            }
1102 970

  
1103
            if(for_deblock){
1104
                if(!IS_INTERLACED(mb_type^left_type[0])){
1105
                    if(USES_LIST(left_type[0], list)){
1106
                        const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
1107
                        const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
1108
                        int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
1109
                        *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*0];
1110
                        *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 8 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*1];
1111
                        *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +16 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*2];
1112
                        *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +24 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*3];
1113
                        h->ref_cache[list][scan8[0] - 1 + 0 ]=
1114
                        h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*0]];
1115
                        h->ref_cache[list][scan8[0] - 1 +16 ]=
1116
                        h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*1]];
1117
                    }else{
1118
                        *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0 ]=
1119
                        *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 8 ]=
1120
                        *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +16 ]=
1121
                        *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +24 ]= 0;
1122
                        h->ref_cache[list][scan8[0] - 1 + 0  ]=
1123
                        h->ref_cache[list][scan8[0] - 1 + 8  ]=
1124
                        h->ref_cache[list][scan8[0] - 1 + 16 ]=
1125
                        h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
1126
                    }
1127
                }
1128
                continue;
1129
            }else{
1130 971
            for(i=0; i<2; i++){
1131 972
                int cache_idx = scan8[0] - 1 + i*2*8;
1132 973
                if(USES_LIST(left_type[i], list)){
......
1143 984
                    h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
1144 985
                }
1145 986
            }
1146
            }
1147 987

  
1148 988
            if((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF)
1149 989
                continue;
......
1277 1117
    }
1278 1118
#endif
1279 1119

  
1280
    if(!for_deblock)
1281 1120
        h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
1282
    return 0;
1283
}
1284

  
1285
static void fill_decode_caches(H264Context *h, int mb_type){
1286
    fill_caches(h, mb_type, 0);
1287 1121
}
1288 1122

  
1289 1123
/**
......
1291 1125
 * @returns non zero if the loop filter can be skiped
1292 1126
 */
1293 1127
static int fill_filter_caches(H264Context *h, int mb_type){
1294
    return fill_caches(h, mb_type, 1);
1128
    MpegEncContext * const s = &h->s;
1129
    const int mb_xy= h->mb_xy;
1130
    int top_xy, left_xy[2];
1131
    int top_type, left_type[2];
1132
    int i;
1133

  
1134
    top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
1135

  
1136
    //FIXME deblocking could skip the intra and nnz parts.
1137

  
1138
    /* Wow, what a mess, why didn't they simplify the interlacing & intra
1139
     * stuff, I can't imagine that these complex rules are worth it. */
1140

  
1141
    left_xy[1] = left_xy[0] = mb_xy-1;
1142
    if(FRAME_MBAFF){
1143
        const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
1144
        const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
1145
        if(s->mb_y&1){
1146
            if (left_mb_field_flag != curr_mb_field_flag) {
1147
                left_xy[0] = mb_xy - s->mb_stride - 1;
1148
                left_xy[1] = mb_xy                - 1;
1149
            }
1150
        }else{
1151
            if(curr_mb_field_flag){
1152
                top_xy      += s->mb_stride & (((s->current_picture.mb_type[top_xy    ]>>7)&1)-1);
1153
            }
1154
            if (left_mb_field_flag != curr_mb_field_flag) {
1155
                left_xy[0] = mb_xy                - 1;
1156
                left_xy[1] = mb_xy + s->mb_stride - 1;
1157
            }
1158
        }
1159
    }
1160

  
1161
    h->top_mb_xy = top_xy;
1162
    h->left_mb_xy[0] = left_xy[0];
1163
    h->left_mb_xy[1] = left_xy[1];
1164
    {
1165
        //for sufficiently low qp, filtering wouldn't do anything
1166
        //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
1167
        int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
1168
        int qp = s->current_picture.qscale_table[mb_xy];
1169
        if(qp <= qp_thresh
1170
           && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
1171
           && (top_xy   < 0 || ((qp + s->current_picture.qscale_table[top_xy    ] + 1)>>1) <= qp_thresh)){
1172
            if(!FRAME_MBAFF)
1173
                return 1;
1174
            if(   (left_xy[0]< 0            || ((qp + s->current_picture.qscale_table[left_xy[1]             ] + 1)>>1) <= qp_thresh)
1175
               && (top_xy    < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy    -s->mb_stride] + 1)>>1) <= qp_thresh))
1176
                return 1;
1177
        }
1178
    }
1179

  
1180
    if(h->deblocking_filter == 2){
1181
        h->top_type    = top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
1182
        h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
1183
        h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
1184
    }else{
1185
        h->top_type    = top_type     = h->slice_table[top_xy     ] < 0xFFFF ? s->current_picture.mb_type[top_xy]     : 0;
1186
        h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
1187
        h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
1188
    }
1189
    if(IS_INTRA(mb_type))
1190
        return 0;
1191

  
1192
    AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]);
1193
    AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]);
1194
    *((uint32_t*)&h->non_zero_count_cache[0+8*5])= *((uint32_t*)&h->non_zero_count[mb_xy][16]);
1195
    *((uint32_t*)&h->non_zero_count_cache[4+8*3])= *((uint32_t*)&h->non_zero_count[mb_xy][20]);
1196
    AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]);
1197

  
1198
    h->cbp= h->cbp_table[mb_xy];
1199

  
1200
    {
1201
        int list;
1202
        for(list=0; list<h->list_count; list++){
1203
            int8_t *ref;
1204
            int y, b_stride;
1205
            int16_t (*mv_dst)[2];
1206
            int16_t (*mv_src)[2];
1207

  
1208
            if(!USES_LIST(mb_type, list)){
1209
                fill_rectangle(  h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
1210
                *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
1211
                *(uint32_t*)&h->ref_cache[list][scan8[ 2]] =
1212
                *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
1213
                *(uint32_t*)&h->ref_cache[list][scan8[10]] = ((LIST_NOT_USED)&0xFF)*0x01010101;
1214
                continue;
1215
            }
1216

  
1217
            ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
1218
            {
1219
                int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
1220
                *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
1221
                *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
1222
                ref += h->b8_stride;
1223
                *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
1224
                *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
1225
            }
1226

  
1227
            b_stride = h->b_stride;
1228
            mv_dst   = &h->mv_cache[list][scan8[0]];
1229
            mv_src   = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
1230
            for(y=0; y<4; y++){
1231
                AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
1232
            }
1233

  
1234
        }
1235
    }
1236

  
1237

  
1238
/*
1239
0 . T T. T T T T
1240
1 L . .L . . . .
1241
2 L . .L . . . .
1242
3 . T TL . . . .
1243
4 L . .L . . . .
1244
5 L . .. . . . .
1245
*/
1246
//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
1247
    if(top_type){
1248
        *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8];
1249
    }
1250

  
1251
    if(left_type[0]){
1252
        h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8];
1253
        h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8];
1254
        h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8];
1255
        h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8];
1256
    }
1257

  
1258
    // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
1259
    if(!CABAC && h->pps.transform_8x8_mode){
1260
        if(IS_8x8DCT(top_type)){
1261
            h->non_zero_count_cache[4+8*0]=
1262
            h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4;
1263
            h->non_zero_count_cache[6+8*0]=
1264
            h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8;
1265
        }
1266
        if(IS_8x8DCT(left_type[0])){
1267
            h->non_zero_count_cache[3+8*1]=
1268
            h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF
1269
        }
1270
        if(IS_8x8DCT(left_type[1])){
1271
            h->non_zero_count_cache[3+8*3]=
1272
            h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF
1273
        }
1274

  
1275
        if(IS_8x8DCT(mb_type)){
1276
            h->non_zero_count_cache[scan8[0   ]]= h->non_zero_count_cache[scan8[1   ]]=
1277
            h->non_zero_count_cache[scan8[2   ]]= h->non_zero_count_cache[scan8[3   ]]= h->cbp & 1;
1278

  
1279
            h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
1280
            h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
1281

  
1282
            h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
1283
            h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
1284

  
1285
            h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
1286
            h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
1287
        }
1288
    }
1289

  
1290
    if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
1291
        int list;
1292
        for(list=0; list<h->list_count; list++){
1293
            if(USES_LIST(top_type, list)){
1294
                const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
1295
                const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
1296
                int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
1297
                AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
1298
                h->ref_cache[list][scan8[0] + 0 - 1*8]=
1299
                h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
1300
                h->ref_cache[list][scan8[0] + 2 - 1*8]=
1301
                h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
1302
            }else{
1303
                AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
1304
                *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((LIST_NOT_USED)&0xFF)*0x01010101;
1305
            }
1306

  
1307
            if(!IS_INTERLACED(mb_type^left_type[0])){
1308
                if(USES_LIST(left_type[0], list)){
1309
                    const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
1310
                    const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
1311
                    int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
1312
                    *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*0];
1313
                    *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 8 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*1];
1314
                    *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +16 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*2];
1315
                    *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +24 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*3];
1316
                    h->ref_cache[list][scan8[0] - 1 + 0 ]=
1317
                    h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*0]];
1318
                    h->ref_cache[list][scan8[0] - 1 +16 ]=
1319
                    h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*1]];
1320
                }else{
1321
                    *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0 ]=
1322
                    *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 8 ]=
1323
                    *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +16 ]=
1324
                    *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +24 ]= 0;
1325
                    h->ref_cache[list][scan8[0] - 1 + 0  ]=
1326
                    h->ref_cache[list][scan8[0] - 1 + 8  ]=
1327
                    h->ref_cache[list][scan8[0] - 1 + 16 ]=
1328
                    h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
1329
                }
1330
            }
1331
        }
1332
    }
1333

  
1334
    return 0;
1295 1335
}
1296 1336

  
1297 1337
/**

Also available in: Unified diff