Revision 5d18eaad

View differences:

libavcodec/h264.c
54 54

  
55 55
#define MAX_MMCO_COUNT 66
56 56

  
57
/* Compiling in interlaced support reduces the speed
58
 * of progressive decoding by about 2%. */
59
#define ALLOW_INTERLACE
60

  
61
#ifdef ALLOW_INTERLACE
62
#define MB_MBAFF h->mb_mbaff
63
#define MB_FIELD h->mb_field_decoding_flag
64
#define FRAME_MBAFF h->mb_aff_frame
65
#else
66
#define MB_MBAFF 0
67
#define MB_FIELD 0
68
#define FRAME_MBAFF 0
69
#undef  IS_INTERLACED
70
#define IS_INTERLACED(mb_type) 0
71
#endif
72

  
57 73
/**
58 74
 * Sequence parameter set
59 75
 */
......
173 189

  
174 190
    int chroma_qp; //QPc
175 191

  
176
    int prev_mb_skipped; //FIXME remove (IMHO not used)
192
    int prev_mb_skipped;
193
    int next_mb_skipped;
177 194

  
178 195
    //prediction stuff
179 196
    int chroma_pred_mode;
......
231 248
    int b_stride; //FIXME use s->b4_stride
232 249
    int b8_stride;
233 250

  
251
    int mb_linesize;   ///< may be equal to s->linesize or s->linesize*2, for mbaff
252
    int mb_uvlinesize;
253

  
254
    int emu_edge_width;
255
    int emu_edge_height;
256

  
234 257
    int halfpel_flag;
235 258
    int thirdpel_flag;
236 259

  
......
254 277

  
255 278
    int slice_num;
256 279
    uint8_t *slice_table_base;
257
    uint8_t *slice_table;      ///< slice_table_base + mb_stride + 1
280
    uint8_t *slice_table;      ///< slice_table_base + 2*mb_stride + 1
258 281
    int slice_type;
259 282
    int slice_type_fixed;
260 283

  
261 284
    //interlacing specific flags
262 285
    int mb_aff_frame;
263 286
    int mb_field_decoding_flag;
287
    int mb_mbaff;              ///< mb_aff_frame && mb_field_decoding_flag
264 288

  
265 289
    int sub_mb_type[4];
266 290

  
......
291 315
    int use_weight_chroma;
292 316
    int luma_log2_weight_denom;
293 317
    int chroma_log2_weight_denom;
294
    int luma_weight[2][16];
295
    int luma_offset[2][16];
296
    int chroma_weight[2][16][2];
297
    int chroma_offset[2][16][2];
298
    int implicit_weight[16][16];
318
    int luma_weight[2][48];
319
    int luma_offset[2][48];
320
    int chroma_weight[2][48][2];
321
    int chroma_offset[2][48][2];
322
    int implicit_weight[48][48];
299 323

  
300 324
    //deblock
301 325
    int deblocking_filter;         ///< disable_deblocking_filter_idc with 1<->0
......
306 330

  
307 331
    int direct_spatial_mv_pred;
308 332
    int dist_scale_factor[16];
333
    int dist_scale_factor_field[32];
309 334
    int map_col_to_list0[2][16];
335
    int map_col_to_list0_field[2][32];
310 336

  
311 337
    /**
312 338
     * num_ref_idx_l0/1_active_minus1 + 1
313 339
     */
314
    int ref_count[2];// FIXME split for AFF
340
    int ref_count[2];            ///< counts frames or fields, depending on current mb mode
315 341
    Picture *short_ref[32];
316 342
    Picture *long_ref[32];
317 343
    Picture default_ref_list[2][32];
318
    Picture ref_list[2][32]; //FIXME size?
319
    Picture field_ref_list[2][32]; //FIXME size?
344
    Picture ref_list[2][48];     ///< 0..15: frame refs, 16..47: mbaff field refs
320 345
    Picture *delayed_pic[16]; //FIXME size?
321 346
    Picture *delayed_output_pic;
322 347

  
......
357 382
    uint8_t     direct_cache[5*8];
358 383

  
359 384
    uint8_t zigzag_scan[16];
360
    uint8_t field_scan[16];
361 385
    uint8_t zigzag_scan8x8[64];
362 386
    uint8_t zigzag_scan8x8_cavlc[64];
387
    uint8_t field_scan[16];
388
    uint8_t field_scan8x8[64];
389
    uint8_t field_scan8x8_cavlc[64];
363 390
    const uint8_t *zigzag_scan_q0;
364
    const uint8_t *field_scan_q0;
365 391
    const uint8_t *zigzag_scan8x8_q0;
366 392
    const uint8_t *zigzag_scan8x8_cavlc_q0;
393
    const uint8_t *field_scan_q0;
394
    const uint8_t *field_scan8x8_q0;
395
    const uint8_t *field_scan8x8_cavlc_q0;
367 396

  
368 397
    int x264_build;
369 398
}H264Context;
......
488 517
    //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
489 518
    // the actual condition is whether we're on the edge of a slice,
490 519
    // and even then the intra and nnz parts are unnecessary.
491
    if(for_deblock && h->slice_num == 1)
520
    if(for_deblock && h->slice_num == 1 && !FRAME_MBAFF)
492 521
        return;
493 522

  
494 523
    //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
......
505 534
    left_block[5]= 10;
506 535
    left_block[6]= 8;
507 536
    left_block[7]= 11;
508
    if(h->mb_aff_frame){
537
    if(FRAME_MBAFF){
509 538
        const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
510 539
        const int top_pair_xy      = pair_xy     - s->mb_stride;
511 540
        const int topleft_pair_xy  = top_pair_xy - 1;
......
580 609
        topright_type= h->slice_table[topright_xy] < 255 ? s->current_picture.mb_type[topright_xy]: 0;
581 610
        left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
582 611
        left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
612

  
613
        if(FRAME_MBAFF && !IS_INTRA(mb_type)){
614
            int list;
615
            int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
616
            for(i=0; i<16; i++)
617
                h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
618
            for(list=0; list<1+(h->slice_type==B_TYPE); list++){
619
                if(USES_LIST(mb_type,list)){
620
                    uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
621
                    uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
622
                    uint8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
623
                    for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
624
                        dst[0] = src[0];
625
                        dst[1] = src[1];
626
                        dst[2] = src[2];
627
                        dst[3] = src[3];
628
                    }
629
                    *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
630
                    *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
631
                    ref += h->b8_stride;
632
                    *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
633
                    *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
634
                }else{
635
                    fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
636
                    fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
637
                }
638
            }
639
        }
583 640
    }else{
584 641
        topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
585 642
        top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
......
763 820
                const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
764 821
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
765 822
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
766
                h->ref_cache[list][scan8[0] - 1 + 0*8]=
767
                h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
823
                h->ref_cache[list][scan8[0] - 1 + 0*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
824
                h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1]>>1)];
768 825
            }else{
769 826
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
770 827
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
......
777 834
                const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
778 835
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
779 836
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
780
                h->ref_cache[list][scan8[0] - 1 + 2*8]=
781
                h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
837
                h->ref_cache[list][scan8[0] - 1 + 2*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
838
                h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[3]>>1)];
782 839
            }else{
783 840
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
784 841
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
......
824 881

  
825 882
            if( h->pps.cabac ) {
826 883
                /* XXX beurk, Load mvd */
827
                if(USES_LIST(topleft_type, list)){
828
                    const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
829
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy];
830
                }else{
831
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0;
832
                }
833

  
834 884
                if(USES_LIST(top_type, list)){
835 885
                    const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
836 886
                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
......
878 928
                        *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
879 929
                    }
880 930

  
881
                    //FIXME interlacing
882
                    if(IS_DIRECT(left_type[0])){
883
                        h->direct_cache[scan8[0] - 1 + 0*8]=
931
                    if(IS_DIRECT(left_type[0]))
932
                        h->direct_cache[scan8[0] - 1 + 0*8]= 1;
933
                    else if(IS_8X8(left_type[0]))
934
                        h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
935
                    else
936
                        h->direct_cache[scan8[0] - 1 + 0*8]= 0;
937

  
938
                    if(IS_DIRECT(left_type[1]))
884 939
                        h->direct_cache[scan8[0] - 1 + 2*8]= 1;
885
                    }else if(IS_8X8(left_type[0])){
886
                        int b8_xy = h->mb2b8_xy[left_xy[0]] + 1;
887
                        h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[b8_xy];
888
                        h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[b8_xy + h->b8_stride];
889
                    }else{
890
                        h->direct_cache[scan8[0] - 1 + 0*8]=
940
                    else if(IS_8X8(left_type[1]))
941
                        h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
942
                    else
891 943
                        h->direct_cache[scan8[0] - 1 + 2*8]= 0;
944
                }
945
            }
946

  
947
            if(FRAME_MBAFF){
948
#define MAP_MVS\
949
                    MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
950
                    MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
951
                    MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
952
                    MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
953
                    MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
954
                    MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
955
                    MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
956
                    MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
957
                    MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
958
                    MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
959
                if(MB_FIELD){
960
#define MAP_F2F(idx, mb_type)\
961
                    if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
962
                        h->ref_cache[list][idx] <<= 1;\
963
                        h->mv_cache[list][idx][1] /= 2;\
964
                        h->mvd_cache[list][idx][1] /= 2;\
965
                    }
966
                    MAP_MVS
967
#undef MAP_F2F
968
                }else{
969
#define MAP_F2F(idx, mb_type)\
970
                    if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
971
                        h->ref_cache[list][idx] >>= 1;\
972
                        h->mv_cache[list][idx][1] <<= 1;\
973
                        h->mvd_cache[list][idx][1] <<= 1;\
892 974
                    }
975
                    MAP_MVS
976
#undef MAP_F2F
893 977
                }
894 978
            }
895 979
        }
......
1014 1098
    h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
1015 1099
    h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
1016 1100
    h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
1101

  
1102
    if(FRAME_MBAFF){
1103
        // store all luma nnzs, for deblocking
1104
        int v = 0, i;
1105
        for(i=0; i<16; i++)
1106
            v += (!!h->non_zero_count_cache[scan8[i]]) << i;
1107
        *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
1108
    }
1017 1109
}
1018 1110

  
1019 1111
/**
......
1036 1128
static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
1037 1129
    const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
1038 1130

  
1131
    /* there is no consistent mapping of mvs to neighboring locations that will
1132
     * make mbaff happy, so we can't move all this logic to fill_caches */
1133
    if(FRAME_MBAFF){
1134
        MpegEncContext *s = &h->s;
1135
        const int *mb_types = s->current_picture_ptr->mb_type;
1136
        const int16_t *mv;
1137
        *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
1138
        *C = h->mv_cache[list][scan8[0]-2];
1139

  
1140
        if(!MB_FIELD
1141
           && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
1142
            int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
1143
            if(IS_INTERLACED(mb_types[topright_xy])){
1144
#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
1145
                const int x4 = X4, y4 = Y4;\
1146
                const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
1147
                if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
1148
                    return LIST_NOT_USED;\
1149
                mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
1150
                h->mv_cache[list][scan8[0]-2][0] = mv[0];\
1151
                h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
1152
                return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
1153

  
1154
                SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
1155
            }
1156
        }
1157
        if(topright_ref == PART_NOT_AVAILABLE
1158
           && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
1159
           && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
1160
            if(!MB_FIELD
1161
               && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
1162
                SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
1163
            }
1164
            if(MB_FIELD
1165
               && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
1166
               && i >= scan8[0]+8){
1167
                // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
1168
                SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
1169
            }
1170
        }
1171
#undef SET_DIAG_MV
1172
    }
1173

  
1039 1174
    if(topright_ref != PART_NOT_AVAILABLE){
1040 1175
        *C= h->mv_cache[list][ i - 8 + part_width ];
1041 1176
        return topright_ref;
......
1209 1344
            h->dist_scale_factor[i] = clip((tb*tx + 32) >> 6, -1024, 1023);
1210 1345
        }
1211 1346
    }
1347
    if(FRAME_MBAFF){
1348
        for(i=0; i<h->ref_count[0]; i++){
1349
            h->dist_scale_factor_field[2*i] =
1350
            h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
1351
        }
1352
    }
1212 1353
}
1213 1354
static inline void direct_ref_list_init(H264Context * const h){
1214 1355
    MpegEncContext * const s = &h->s;
......
1237 1378
                }
1238 1379
        }
1239 1380
    }
1381
    if(FRAME_MBAFF){
1382
        for(list=0; list<2; list++){
1383
            for(i=0; i<ref1->ref_count[list]; i++){
1384
                j = h->map_col_to_list0[list][i];
1385
                h->map_col_to_list0_field[list][2*i] = 2*j;
1386
                h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
1387
            }
1388
        }
1389
    }
1240 1390
}
1241 1391

  
1242 1392
static inline void pred_direct_motion(H264Context * const h, int *mb_type){
......
1253 1403
    int sub_mb_type;
1254 1404
    int i8, i4;
1255 1405

  
1406
#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
1256 1407
    if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
1257 1408
        /* FIXME save sub mb types from previous frames (or derive from MVs)
1258 1409
         * so we know exactly what block size to use */
1259 1410
        sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1260 1411
        *mb_type =    MB_TYPE_8x8|MB_TYPE_L0L1;
1261
    }else if(!is_b8x8 && (IS_16X16(mb_type_col) || IS_INTRA(mb_type_col))){
1412
    }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
1262 1413
        sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1263 1414
        *mb_type =    MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1264 1415
    }else{
......
1267 1418
    }
1268 1419
    if(!is_b8x8)
1269 1420
        *mb_type |= MB_TYPE_DIRECT2;
1421
    if(MB_FIELD)
1422
        *mb_type |= MB_TYPE_INTERLACED;
1270 1423

  
1271 1424
    tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
1272 1425

  
......
1275 1428
        int mv[2][2];
1276 1429
        int list;
1277 1430

  
1431
        /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1432

  
1278 1433
        /* ref = min(neighbors) */
1279 1434
        for(list=0; list<2; list++){
1280 1435
            int refa = h->ref_cache[list][scan8[0] - 1];
......
1372 1527
            }
1373 1528
        }
1374 1529
    }else{ /* direct temporal mv pred */
1530
        const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1531
        const int *dist_scale_factor = h->dist_scale_factor;
1532

  
1533
        if(FRAME_MBAFF){
1534
            if(IS_INTERLACED(*mb_type)){
1535
                map_col_to_list0[0] = h->map_col_to_list0_field[0];
1536
                map_col_to_list0[1] = h->map_col_to_list0_field[1];
1537
                dist_scale_factor = h->dist_scale_factor_field;
1538
            }
1539
            if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1540
                /* FIXME assumes direct_8x8_inference == 1 */
1541
                const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1542
                int mb_types_col[2];
1543
                int y_shift;
1544

  
1545
                *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1546
                         | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1547
                         | (*mb_type & MB_TYPE_INTERLACED);
1548
                sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1549

  
1550
                if(IS_INTERLACED(*mb_type)){
1551
                    /* frame to field scaling */
1552
                    mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1553
                    mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1554
                    if(s->mb_y&1){
1555
                        l1ref0 -= 2*h->b8_stride;
1556
                        l1ref1 -= 2*h->b8_stride;
1557
                        l1mv0 -= 4*h->b_stride;
1558
                        l1mv1 -= 4*h->b_stride;
1559
                    }
1560
                    y_shift = 0;
1561

  
1562
                    if(   (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1563
                       && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1564
                       && !is_b8x8)
1565
                        *mb_type |= MB_TYPE_16x8;
1566
                    else
1567
                        *mb_type |= MB_TYPE_8x8;
1568
                }else{
1569
                    /* field to frame scaling */
1570
                    /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1571
                     * but in MBAFF, top and bottom POC are equal */
1572
                    int dy = (s->mb_y&1) ? 1 : 2;
1573
                    mb_types_col[0] =
1574
                    mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1575
                    l1ref0 += dy*h->b8_stride;
1576
                    l1ref1 += dy*h->b8_stride;
1577
                    l1mv0 += 2*dy*h->b_stride;
1578
                    l1mv1 += 2*dy*h->b_stride;
1579
                    y_shift = 2;
1580

  
1581
                    if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1582
                       && !is_b8x8)
1583
                        *mb_type |= MB_TYPE_16x16;
1584
                    else
1585
                        *mb_type |= MB_TYPE_8x8;
1586
                }
1587

  
1588
                for(i8=0; i8<4; i8++){
1589
                    const int x8 = i8&1;
1590
                    const int y8 = i8>>1;
1591
                    int ref0, scale;
1592
                    const int16_t (*l1mv)[2]= l1mv0;
1593

  
1594
                    if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1595
                        continue;
1596
                    h->sub_mb_type[i8] = sub_mb_type;
1597

  
1598
                    fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1599
                    if(IS_INTRA(mb_types_col[y8])){
1600
                        fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1601
                        fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1602
                        fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1603
                        continue;
1604
                    }
1605

  
1606
                    ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1607
                    if(ref0 >= 0)
1608
                        ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1609
                    else{
1610
                        ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1611
                        l1mv= l1mv1;
1612
                    }
1613
                    scale = dist_scale_factor[ref0];
1614
                    fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1615

  
1616
                    {
1617
                        const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1618
                        int my_col = (mv_col[1]<<y_shift)/2;
1619
                        int mx = (scale * mv_col[0] + 128) >> 8;
1620
                        int my = (scale * my_col + 128) >> 8;
1621
                        fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1622
                        fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1623
                    }
1624
                }
1625
                return;
1626
            }
1627
        }
1628

  
1629
        /* one-to-one mv scaling */
1630

  
1375 1631
        if(IS_16X16(*mb_type)){
1376 1632
            fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1377 1633
            if(IS_INTRA(mb_type_col)){
......
1379 1635
                fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
1380 1636
                fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
1381 1637
            }else{
1382
                const int ref0 = l1ref0[0] >= 0 ? h->map_col_to_list0[0][l1ref0[0]]
1383
                                                : h->map_col_to_list0[1][l1ref1[0]];
1384
                const int dist_scale_factor = h->dist_scale_factor[ref0];
1638
                const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1639
                                                : map_col_to_list0[1][l1ref1[0]];
1640
                const int scale = dist_scale_factor[ref0];
1385 1641
                const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1386 1642
                int mv_l0[2];
1387
                mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1388
                mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1643
                mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1644
                mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1389 1645
                fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref0, 1);
1390 1646
                fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0],mv_l0[1]), 4);
1391 1647
                fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]), 4);
......
1394 1650
            for(i8=0; i8<4; i8++){
1395 1651
                const int x8 = i8&1;
1396 1652
                const int y8 = i8>>1;
1397
                int ref0, dist_scale_factor;
1653
                int ref0, scale;
1398 1654
                const int16_t (*l1mv)[2]= l1mv0;
1399 1655

  
1400 1656
                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1401 1657
                    continue;
1402 1658
                h->sub_mb_type[i8] = sub_mb_type;
1659
                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1403 1660
                if(IS_INTRA(mb_type_col)){
1404 1661
                    fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1405
                    fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1406 1662
                    fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1407 1663
                    fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1408 1664
                    continue;
......
1410 1666

  
1411 1667
                ref0 = l1ref0[x8 + y8*h->b8_stride];
1412 1668
                if(ref0 >= 0)
1413
                    ref0 = h->map_col_to_list0[0][ref0];
1669
                    ref0 = map_col_to_list0[0][ref0];
1414 1670
                else{
1415
                    ref0 = h->map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1671
                    ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1416 1672
                    l1mv= l1mv1;
1417 1673
                }
1418
                dist_scale_factor = h->dist_scale_factor[ref0];
1674
                scale = dist_scale_factor[ref0];
1419 1675

  
1420 1676
                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1421
                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1422 1677
                if(IS_SUB_8X8(sub_mb_type)){
1423 1678
                    const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1424
                    int mx = (dist_scale_factor * mv_col[0] + 128) >> 8;
1425
                    int my = (dist_scale_factor * mv_col[1] + 128) >> 8;
1679
                    int mx = (scale * mv_col[0] + 128) >> 8;
1680
                    int my = (scale * mv_col[1] + 128) >> 8;
1426 1681
                    fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1427 1682
                    fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1428 1683
                }else
1429 1684
                for(i4=0; i4<4; i4++){
1430 1685
                    const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1431 1686
                    int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1432
                    mv_l0[0] = (dist_scale_factor * mv_col[0] + 128) >> 8;
1433
                    mv_l0[1] = (dist_scale_factor * mv_col[1] + 128) >> 8;
1687
                    mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1688
                    mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1434 1689
                    *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1435 1690
                        pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1436 1691
                }
......
2611 2866
                           qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
2612 2867
    MpegEncContext * const s = &h->s;
2613 2868
    const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
2614
    const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2869
    int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
2615 2870
    const int luma_xy= (mx&3) + ((my&3)<<2);
2616
    uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
2617
    uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
2618
    uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
2619
    int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
2620
    int extra_height= extra_width;
2871
    uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
2872
    uint8_t * src_cb, * src_cr;
2873
    int extra_width= h->emu_edge_width;
2874
    int extra_height= h->emu_edge_height;
2621 2875
    int emu=0;
2622 2876
    const int full_mx= mx>>2;
2623 2877
    const int full_my= my>>2;
2624 2878
    const int pic_width  = 16*s->mb_width;
2625
    const int pic_height = 16*s->mb_height;
2879
    const int pic_height = 16*s->mb_height >> MB_MBAFF;
2626 2880

  
2627 2881
    if(!pic->data[0])
2628 2882
        return;
......
2634 2888
       || full_my < 0-extra_height
2635 2889
       || full_mx + 16/*FIXME*/ > pic_width + extra_width
2636 2890
       || full_my + 16/*FIXME*/ > pic_height + extra_height){
2637
        ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2638
            src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
2891
        ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
2892
            src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
2639 2893
        emu=1;
2640 2894
    }
2641 2895

  
2642
    qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
2896
    qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
2643 2897
    if(!square){
2644
        qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
2898
        qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
2645 2899
    }
2646 2900

  
2647 2901
    if(s->flags&CODEC_FLAG_GRAY) return;
2648 2902

  
2903
    if(MB_MBAFF){
2904
        // chroma offset when predicting from a field of opposite parity
2905
        my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
2906
        emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
2907
    }
2908
    src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2909
    src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
2910

  
2649 2911
    if(emu){
2650
        ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2912
        ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2651 2913
            src_cb= s->edge_emu_buffer;
2652 2914
    }
2653
    chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
2915
    chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2654 2916

  
2655 2917
    if(emu){
2656
        ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2918
        ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
2657 2919
            src_cr= s->edge_emu_buffer;
2658 2920
    }
2659
    chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
2921
    chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
2660 2922
}
2661 2923

  
2662 2924
static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
......
2669 2931
    qpel_mc_func *qpix_op=  qpix_put;
2670 2932
    h264_chroma_mc_func chroma_op= chroma_put;
2671 2933

  
2672
    dest_y  += 2*x_offset + 2*y_offset*s->  linesize;
2673
    dest_cb +=   x_offset +   y_offset*s->uvlinesize;
2674
    dest_cr +=   x_offset +   y_offset*s->uvlinesize;
2934
    dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
2935
    dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
2936
    dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
2675 2937
    x_offset += 8*s->mb_x;
2676
    y_offset += 8*s->mb_y;
2938
    y_offset += 8*(s->mb_y >> MB_MBAFF);
2677 2939

  
2678 2940
    if(list0){
2679 2941
        Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
......
2702 2964
                           int list0, int list1){
2703 2965
    MpegEncContext * const s = &h->s;
2704 2966

  
2705
    dest_y  += 2*x_offset + 2*y_offset*s->  linesize;
2706
    dest_cb +=   x_offset +   y_offset*s->uvlinesize;
2707
    dest_cr +=   x_offset +   y_offset*s->uvlinesize;
2967
    dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
2968
    dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
2969
    dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
2708 2970
    x_offset += 8*s->mb_x;
2709
    y_offset += 8*s->mb_y;
2971
    y_offset += 8*(s->mb_y >> MB_MBAFF);
2710 2972

  
2711 2973
    if(list0 && list1){
2712 2974
        /* don't optimize for luma-only case, since B-frames usually
2713 2975
         * use implicit weights => chroma too. */
2714 2976
        uint8_t *tmp_cb = s->obmc_scratchpad;
2715
        uint8_t *tmp_cr = tmp_cb + 8*s->uvlinesize;
2716
        uint8_t *tmp_y  = tmp_cr + 8*s->uvlinesize;
2977
        uint8_t *tmp_cr = s->obmc_scratchpad + 8;
2978
        uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
2717 2979
        int refn0 = h->ref_cache[0][ scan8[n] ];
2718 2980
        int refn1 = h->ref_cache[1][ scan8[n] ];
2719 2981

  
......
2727 2989
        if(h->use_weight == 2){
2728 2990
            int weight0 = h->implicit_weight[refn0][refn1];
2729 2991
            int weight1 = 64 - weight0;
2730
            luma_weight_avg(  dest_y,  tmp_y,  s->  linesize, 5, weight0, weight1, 0);
2731
            chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, 5, weight0, weight1, 0);
2732
            chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, 5, weight0, weight1, 0);
2992
            luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
2993
            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
2994
            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
2733 2995
        }else{
2734
            luma_weight_avg(dest_y, tmp_y, s->linesize, h->luma_log2_weight_denom,
2996
            luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
2735 2997
                            h->luma_weight[0][refn0], h->luma_weight[1][refn1],
2736 2998
                            h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
2737
            chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, h->chroma_log2_weight_denom,
2999
            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2738 3000
                            h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
2739 3001
                            h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
2740
            chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, h->chroma_log2_weight_denom,
3002
            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2741 3003
                            h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
2742 3004
                            h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
2743 3005
        }
......
2749 3011
                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
2750 3012
                    qpix_put, chroma_put);
2751 3013

  
2752
        luma_weight_op(dest_y, s->linesize, h->luma_log2_weight_denom,
3014
        luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
2753 3015
                       h->luma_weight[list][refn], h->luma_offset[list][refn]);
2754 3016
        if(h->use_weight_chroma){
2755
            chroma_weight_op(dest_cb, s->uvlinesize, h->chroma_log2_weight_denom,
3017
            chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2756 3018
                             h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
2757
            chroma_weight_op(dest_cr, s->uvlinesize, h->chroma_log2_weight_denom,
3019
            chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
2758 3020
                             h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
2759 3021
        }
2760 3022
    }
......
2787 3049
        const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
2788 3050
        const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
2789 3051
        uint8_t **src= h->ref_list[list][refn].data;
2790
        int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
3052
        int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
2791 3053
        s->dsp.prefetch(src[0]+off, s->linesize, 4);
2792 3054
        off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
2793 3055
        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
......
2821 3083
                &weight_op[1], &weight_avg[1],
2822 3084
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
2823 3085
    }else if(IS_8X16(mb_type)){
2824
        mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
3086
        mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
2825 3087
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2826 3088
                &weight_op[2], &weight_avg[2],
2827 3089
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
2828
        mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
3090
        mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
2829 3091
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2830 3092
                &weight_op[2], &weight_avg[2],
2831 3093
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
......
2855 3117
                    &weight_op[4], &weight_avg[4],
2856 3118
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2857 3119
            }else if(IS_SUB_4X8(sub_mb_type)){
2858
                mc_part(h, n  , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
3120
                mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2859 3121
                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2860 3122
                    &weight_op[5], &weight_avg[5],
2861 3123
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2862
                mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
3124
                mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2863 3125
                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2864 3126
                    &weight_op[5], &weight_avg[5],
2865 3127
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
......
3065 3327
    CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8  * sizeof(uint8_t))
3066 3328

  
3067 3329
    CHECKED_ALLOCZ(h->non_zero_count    , big_mb_num * 16 * sizeof(uint8_t))
3068
    CHECKED_ALLOCZ(h->slice_table_base  , big_mb_num * sizeof(uint8_t))
3330
    CHECKED_ALLOCZ(h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
3069 3331
    CHECKED_ALLOCZ(h->top_borders[0]    , s->mb_width * (16+8+8) * sizeof(uint8_t))
3070 3332
    CHECKED_ALLOCZ(h->top_borders[1]    , s->mb_width * (16+8+8) * sizeof(uint8_t))
3071 3333
    CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
......
3077 3339
        CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
3078 3340
    }
3079 3341

  
3080
    memset(h->slice_table_base, -1, big_mb_num  * sizeof(uint8_t));
3081
    h->slice_table= h->slice_table_base + s->mb_stride + 1;
3342
    memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(uint8_t));
3343
    h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
3082 3344

  
3083 3345
    CHECKED_ALLOCZ(h->mb2b_xy  , big_mb_num * sizeof(uint32_t));
3084 3346
    CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
......
3175 3437
    /* can't be in alloc_tables because linesize isn't known there.
3176 3438
     * FIXME: redo bipred weight to not require extra buffer? */
3177 3439
    if(!s->obmc_scratchpad)
3178
        s->obmc_scratchpad = av_malloc(16*s->linesize + 2*8*s->uvlinesize);
3440
        s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
3441

  
3442
    /* some macroblocks will be accessed before they're available */
3443
    if(FRAME_MBAFF)
3444
        memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
3179 3445

  
3180 3446
//    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3181 3447
    return 0;
......
3298 3564
    int temp8, i;
3299 3565
    uint64_t temp64;
3300 3566
    int deblock_left = (s->mb_x > 0);
3301
    int deblock_top  = (s->mb_y > 0);
3567
    int deblock_top  = (s->mb_y > 1);
3302 3568

  
3303 3569
    tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
3304 3570

  
......
3323 3589
        XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
3324 3590
        XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
3325 3591
        XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
3592
        if(s->mb_x+1 < s->mb_width){
3593
            XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
3594
            XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
3595
        }
3326 3596
    }
3327 3597

  
3328 3598
    if(!(s->flags&CODEC_FLAG_GRAY)){
......
3363 3633
    dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3364 3634
    dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3365 3635

  
3366
    if (h->mb_field_decoding_flag) {
3367
        linesize = s->linesize * 2;
3368
        uvlinesize = s->uvlinesize * 2;
3636
    if (MB_FIELD) {
3637
        linesize   = h->mb_linesize   = s->linesize * 2;
3638
        uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
3369 3639
        block_offset = &h->block_offset[24];
3370 3640
        if(mb_y&1){ //FIXME move out of this func?
3371 3641
            dest_y -= s->linesize*15;
3372 3642
            dest_cb-= s->uvlinesize*7;
3373 3643
            dest_cr-= s->uvlinesize*7;
3374 3644
        }
3645
        if(FRAME_MBAFF) {
3646
            int list;
3647
            for(list=0; list<2; list++){
3648
                if(!USES_LIST(mb_type, list))
3649
                    continue;
3650
                if(IS_16X16(mb_type)){
3651
                    int8_t *ref = &h->ref_cache[list][scan8[0]];
3652
                    fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
3653
                }else{
3654
                    for(i=0; i<16; i+=4){
3655
                        //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
3656
                        int ref = h->ref_cache[list][scan8[i]];
3657
                        if(ref >= 0)
3658
                            fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
3659
                    }
3660
                }
3661
            }
3662
        }
3375 3663
    } else {
3376
        linesize = s->linesize;
3377
        uvlinesize = s->uvlinesize;
3664
        linesize   = h->mb_linesize   = s->linesize;
3665
        uvlinesize = h->mb_uvlinesize = s->uvlinesize;
3378 3666
//        dct_offset = s->linesize * 16;
3379 3667
    }
3380 3668

  
......
3389 3677
        idct_add = s->dsp.h264_idct_add;
3390 3678
    }
3391 3679

  
3680
    if(FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
3681
       && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
3682
        int mbt_y = mb_y&~1;
3683
        uint8_t *top_y  = s->current_picture.data[0] + (mbt_y * 16* s->linesize  ) + mb_x * 16;
3684
        uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3685
        uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
3686
        xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
3687
    }
3688

  
3392 3689
    if (IS_INTRA_PCM(mb_type)) {
3393 3690
        unsigned int x, y;
3394 3691

  
......
3417 3714
        }
3418 3715
    } else {
3419 3716
        if(IS_INTRA(mb_type)){
3420
            if(h->deblocking_filter) {
3421
                if (h->mb_aff_frame) {
3422
                    if (!bottom)
3423
                        xchg_pair_border(h, dest_y, dest_cb, dest_cr, s->linesize, s->uvlinesize, 1);
3424
                } else {
3425
                    xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3426
                }
3427
            }
3717
            if(h->deblocking_filter && !FRAME_MBAFF)
3718
                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
3428 3719

  
3429 3720
            if(!(s->flags&CODEC_FLAG_GRAY)){
3430 3721
                h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
......
3486 3777
                }else
3487 3778
                    svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
3488 3779
            }
3489
            if(h->deblocking_filter) {
3490
                if (h->mb_aff_frame) {
3491
                    if (bottom) {
3492
                        uint8_t *pair_dest_y  = s->current_picture.data[0] + ((mb_y-1) * 16* s->linesize  ) + mb_x * 16;
3493
                        uint8_t *pair_dest_cb = s->current_picture.data[1] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3494
                        uint8_t *pair_dest_cr = s->current_picture.data[2] + ((mb_y-1) * 8 * s->uvlinesize) + mb_x * 8;
3495
                        s->mb_y--;
3496
                        xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3497
                        s->mb_y++;
3498
                    }
3499
                } else {
3500
                    xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3501
                }
3502
            }
3780
            if(h->deblocking_filter && !FRAME_MBAFF)
3781
                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
3503 3782
        }else if(s->codec_id == CODEC_ID_H264){
3504 3783
            hl_motion(h, dest_y, dest_cb, dest_cr,
3505 3784
                      s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
......
3567 3846
        }
3568 3847
    }
3569 3848
    if(h->deblocking_filter) {
3570
        if (h->mb_aff_frame) {
3849
        if (FRAME_MBAFF) {
3850
            //FIXME try deblocking one mb at a time?
3851
            // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
3571 3852
            const int mb_y = s->mb_y - 1;
3572 3853
            uint8_t  *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
3573 3854
            const int mb_xy= mb_x + mb_y*s->mb_stride;
3574 3855
            const int mb_type_top   = s->current_picture.mb_type[mb_xy];
3575 3856
            const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
3576
            uint8_t tmp = s->current_picture.data[1][384];
3577 3857
            if (!bottom) return;
3578 3858
            pair_dest_y  = s->current_picture.data[0] + (mb_y * 16* s->linesize  ) + mb_x * 16;
3579 3859
            pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3580 3860
            pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
3581 3861

  
3862
            if(IS_INTRA(mb_type_top | mb_type_bottom))
3863
                xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
3864

  
3582 3865
            backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
3583
            // TODO deblock a pair
3866
            // deblock a pair
3584 3867
            // top
3585 3868
            s->mb_y--;
3586 3869
            tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
3587 3870
            fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
3871
            h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy]);
3588 3872
            filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
3589
            if (tmp != s->current_picture.data[1][384]) {
3590
                tprintf("modified pixel 8,1 (1)\n");
3591
            }
3592 3873
            // bottom
3593 3874
            s->mb_y++;
3594 3875
            tprintf("call mbaff filter_mb\n");
3595 3876
            fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
3877
            h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
3596 3878
            filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
3597
            if (tmp != s->current_picture.data[1][384]) {
3598
                tprintf("modified pixel 8,1 (2)\n");
3599
            }
3600 3879
        } else {
3601 3880
            tprintf("call filter_mb\n");
3602 3881
            backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
......
3820 4099
    return 0;
3821 4100
}
3822 4101

  
4102
static int fill_mbaff_ref_list(H264Context *h){
4103
    int list, i, j;
4104
    for(list=0; list<2; list++){
4105
        for(i=0; i<h->ref_count[list]; i++){
4106
            Picture *frame = &h->ref_list[list][i];
4107
            Picture *field = &h->ref_list[list][16+2*i];
4108
            field[0] = *frame;
4109
            for(j=0; j<3; j++)
4110
                field[0].linesize[j] <<= 1;
4111
            field[1] = field[0];
4112
            for(j=0; j<3; j++)
4113
                field[1].data[j] += frame->linesize[j];
4114

  
4115
            h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
4116
            h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
4117
            for(j=0; j<2; j++){
4118
                h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
4119
                h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
4120
            }
4121
        }
4122
    }
4123
    for(j=0; j<h->ref_count[1]; j++){
4124
        for(i=0; i<h->ref_count[0]; i++)
4125
            h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
4126
        memcpy(h->implicit_weight[16+2*j],   h->implicit_weight[j], sizeof(*h->implicit_weight));
4127
        memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
4128
    }
4129
}
4130

  
3823 4131
static int pred_weight_table(H264Context *h){
3824 4132
    MpegEncContext * const s = &h->s;
3825 4133
    int list, i;
......
3889 4197
    h->luma_log2_weight_denom= 5;
3890 4198
    h->chroma_log2_weight_denom= 5;
3891 4199

  
3892
    /* FIXME: MBAFF */
3893 4200
    for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3894 4201
        int poc0 = h->ref_list[0][ref0].poc;
3895 4202
        for(ref1=0; ref1 < h->ref_count[1]; ref1++){
......
4360 4667
            }
4361 4668
        }
4362 4669
        if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
4363
            memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
4670
            memcpy(h->zigzag_scan8x8,       zigzag_scan8x8,       64*sizeof(uint8_t));
4364 4671
            memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
4672
            memcpy(h->field_scan8x8,        field_scan8x8,        64*sizeof(uint8_t));
4673
            memcpy(h->field_scan8x8_cavlc,  field_scan8x8_cavlc,  64*sizeof(uint8_t));
4365 4674
        }else{
4366 4675
            int i;
4367 4676
            for(i=0; i<64; i++){
4368 4677
#define T(x) (x>>3) | ((x&7)<<3)
4369
                h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
4678
                h->zigzag_scan8x8[i]       = T(zigzag_scan8x8[i]);
4370 4679
                h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
4680
                h->field_scan8x8[i]        = T(field_scan8x8[i]);
4681
                h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
4371 4682
#undef T
4372 4683
            }
4373 4684
        }
4374 4685
        if(h->sps.transform_bypass){ //FIXME same ugly
4375
            h->zigzag_scan_q0 = zigzag_scan;
4376
            h->field_scan_q0 = field_scan;
4377
            h->zigzag_scan8x8_q0 = zigzag_scan8x8;
4686
            h->zigzag_scan_q0          = zigzag_scan;
4687
            h->zigzag_scan8x8_q0       = zigzag_scan8x8;
4378 4688
            h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
4689
            h->field_scan_q0           = field_scan;
4690
            h->field_scan8x8_q0        = field_scan8x8;
4691
            h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
4379 4692
        }else{
4380
            h->zigzag_scan_q0 = h->zigzag_scan;
4381
            h->field_scan_q0 = h->field_scan;
4382
            h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
4693
            h->zigzag_scan_q0          = h->zigzag_scan;
4694
            h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
4383 4695
            h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
4696
            h->field_scan_q0           = h->field_scan;
4697
            h->field_scan8x8_q0        = h->field_scan8x8;
4698
            h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
4384 4699
        }
4385 4700

  
4386 4701
        alloc_tables(h);
......
4408 4723
    s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
4409 4724
    h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
4410 4725

  
4726
    h->mb_mbaff = 0;
4411 4727
    h->mb_aff_frame = 0;
4412 4728
    if(h->sps.frame_mbs_only_flag){
4413 4729
        s->picture_structure= PICT_FRAME;
4414 4730
    }else{
4415 4731
        if(get_bits1(&s->gb)) { //field_pic_flag
4416 4732
            s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4733
            av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
4417 4734
        } else {
4418 4735
            s->picture_structure= PICT_FRAME;
4419
            first_mb_in_slice <<= h->sps.mb_aff;
4420 4736
            h->mb_aff_frame = h->sps.mb_aff;
4421 4737
        }
4422 4738
    }
4423 4739

  
4424 4740
    s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4425
    s->resync_mb_y = s->mb_y = first_mb_in_slice / s->mb_width;
4741
    s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
4426 4742
    if(s->mb_y >= s->mb_height){
4427 4743
        return -1;
4428 4744
    }
......
4467 4783
    if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
4468 4784
        if(h->slice_type == B_TYPE){
4469 4785
            h->direct_spatial_mv_pred= get_bits1(&s->gb);
4786
            if(h->sps.mb_aff && h->direct_spatial_mv_pred)
4787
                av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
4470 4788
        }
4471 4789
        num_ref_idx_active_override_flag= get_bits1(&s->gb);
4472 4790

  
......
4500 4818
    if(s->current_picture.reference)
4501 4819
        decode_ref_pic_marking(h);
4502 4820

  
4821
    if(FRAME_MBAFF)
4822
        fill_mbaff_ref_list(h);
4823

  
4503 4824
    if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
4504 4825
        h->cabac_init_idc = get_ue_golomb(&s->gb);
4505 4826

  
......
4544 4865

  
4545 4866
    h->slice_num++;
4546 4867

  
4868
    h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4869
    h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
4870

  
4547 4871
    if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4548 4872
        av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4549 4873
               h->slice_num,
......
4745 5069
    return 0;
4746 5070
}
4747 5071

  
5072
static void predict_field_decoding_flag(H264Context *h){
5073
    MpegEncContext * const s = &h->s;
5074
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
5075
    int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
5076
                ? s->current_picture.mb_type[mb_xy-1]
5077
                : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
5078
                ? s->current_picture.mb_type[mb_xy-s->mb_stride]
5079
                : 0;
5080
    h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
5081
}
5082

  
4748 5083
/**
4749 5084
 * decodes a P_SKIP or B_SKIP macroblock
4750 5085
 */
......
4756 5091
    memset(h->non_zero_count[mb_xy], 0, 16);
4757 5092
    memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4758 5093

  
4759
    if(h->mb_aff_frame && s->mb_skip_run==0 && (s->mb_y&1)==0){
4760
        h->mb_field_decoding_flag= get_bits1(&s->gb);
4761
    }
4762
    if(h->mb_field_decoding_flag)
5094
    if(MB_FIELD)
4763 5095
        mb_type|= MB_TYPE_INTERLACED;
4764 5096

  
4765 5097
    if( h->slice_type == B_TYPE )
......
4814 5146
            s->mb_skip_run= get_ue_golomb(&s->gb);
4815 5147

  
4816 5148
        if (s->mb_skip_run--) {
5149
            if(FRAME_MBAFF && (s->mb_y&1) == 0){
5150
                if(s->mb_skip_run==0)
5151
                    h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
5152
                else
5153
                    predict_field_decoding_flag(h);
5154
            }
4817 5155
            decode_mb_skip(h);
4818 5156
            return 0;
4819 5157
        }
4820 5158
    }
4821
    if(h->mb_aff_frame){
4822
        if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
4823
            h->mb_field_decoding_flag = get_bits1(&s->gb);
5159
    if(FRAME_MBAFF){
5160
        if( (s->mb_y&1) == 0 )
5161
            h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4824 5162
    }else
4825 5163
        h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4826 5164

  
......
4856 5194
        mb_type= i_mb_type_info[mb_type].type;
4857 5195
    }
4858 5196

  
4859
    if(h->mb_field_decoding_flag)
5197
    if(MB_FIELD)
4860 5198
        mb_type |= MB_TYPE_INTERLACED;
4861 5199

  
4862 5200
    h->slice_table[ mb_xy ]= h->slice_num;
......
4900 5238
        return 0;
4901 5239
    }
4902 5240

  
5241
    if(MB_MBAFF){
5242
        h->ref_count[0] <<= 1;
5243
        h->ref_count[1] <<= 1;
5244
    }
5245

  
4903 5246
    fill_caches(h, mb_type, 0);
4904 5247

  
4905 5248
    //mb_pred
......
4984 5327
        for(list=0; list<2; list++){
4985 5328
            int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4986 5329
            if(ref_count == 0) continue;
4987
            if (h->mb_aff_frame && h->mb_field_decoding_flag) {
4988
                ref_count <<= 1;
4989
            }
4990 5330
            for(i=0; i<4; i++){
4991 5331
                if(IS_DIRECT(h->sub_mb_type[i])) continue;
4992 5332
                if(IS_DIR(h->sub_mb_type[i], 0, list)){
......
5162 5502
//        fill_non_zero_count_cache(h);
5163 5503

  
5164 5504
        if(IS_INTERLACED(mb_type)){
5505
            scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
5165 5506
            scan= s->qscale ? h->field_scan : h->field_scan_q0;
5166 5507
            dc_scan= luma_dc_field_scan;
5167 5508
        }else{
5509
            scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5168 5510
            scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5169 5511
            dc_scan= luma_dc_zigzag_scan;
5170 5512
        }
5171
        scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
5172 5513

  
5173 5514
        dquant= get_se_golomb(&s->gb);
5174 5515

  
......
5262 5603
    s->current_picture.qscale_table[mb_xy]= s->qscale;
5263 5604
    write_back_non_zero_count(h);
5264 5605

  
5606
    if(MB_MBAFF){
5607
        h->ref_count[0] >>= 1;
5608
        h->ref_count[1] >>= 1;
5609
    }
5610

  
5265 5611
    return 0;
5266 5612
}
5267 5613

  
......
5374 5720
    }
5375 5721
}
5376 5722

  
5377
static int decode_cabac_mb_skip( H264Context *h) {
5723
static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5378 5724
    MpegEncContext * const s = &h->s;
5379
    const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5380
    const int mba_xy = mb_xy - 1;
5381
    const int mbb_xy = mb_xy - s->mb_stride;
5725
    int mba_xy, mbb_xy;
5382 5726
    int ctx = 0;
5383 5727

  
5728
    if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5729
        int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5730
        mba_xy = mb_xy - 1;
5731
        if( (mb_y&1)
5732
            && h->slice_table[mba_xy] == h->slice_num
5733
            && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5734
            mba_xy += s->mb_stride;
5735
        if( MB_FIELD ){
5736
            mbb_xy = mb_xy - s->mb_stride;
5737
            if( !(mb_y&1)
5738
                && h->slice_table[mbb_xy] == h->slice_num
5739
                && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5740
                mbb_xy -= s->mb_stride;
5741
        }else
5742
            mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5743
    }else{
5744
        int mb_xy = mb_x + mb_y*s->mb_stride;
5745
        mba_xy = mb_xy - 1;
5746
        mbb_xy = mb_xy - s->mb_stride;
5747
    }
5748

  
5384 5749
    if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5385 5750
        ctx++;
5386 5751
    if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
......
5521 5886
    else
5522 5887
        mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5523 5888

  
5524
    if( h->last_qscale_diff != 0 && ( IS_INTRA16x16(s->current_picture.mb_type[mbn_xy] ) || (h->cbp_table[mbn_xy]&0x3f) ) )
5889
    if( h->last_qscale_diff != 0 )
5525 5890
        ctx++;
5526 5891

  
5527 5892
    while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
......
5666 6031

  
5667 6032
static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5668 6033
    const int mb_xy  = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
5669
    static const int significant_coeff_flag_field_offset[2] = { 105, 277 };
5670
    static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 };
5671
    static const int significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 297 };
5672
    static const int last_significant_coeff_flag_offset[6] = { 0, 15, 29, 44, 47, 251 };
5673
    static const int coeff_abs_level_m1_offset[6] = { 227+0, 227+10, 227+20, 227+30, 227+39, 426 };
5674
    static const int significant_coeff_flag_offset_8x8[63] = {
5675
        0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
6034
    static const int significant_coeff_flag_offset[2][6] = {
6035
      { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
6036
      { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
6037
    };
6038
    static const int last_coeff_flag_offset[2][6] = {
6039
      { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
6040
      { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
6041
    };
6042
    static const int coeff_abs_level_m1_offset[6] = {
6043
        227+0, 227+10, 227+20, 227+30, 227+39, 426
6044
    };
6045
    static const int significant_coeff_flag_offset_8x8[2][63] = {
6046
      { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5676 6047
        4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5677 6048
        7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5678
       12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
6049
       12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
6050
      { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
6051
        6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
6052
        9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
6053
        9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5679 6054
    };
5680 6055
    static const int last_coeff_flag_offset_8x8[63] = {
5681 6056
        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
......
5717 6092
    }
5718 6093

  
5719 6094
    significant_coeff_ctx_base = h->cabac_state
5720
        + significant_coeff_flag_offset[cat]
5721
        + significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
6095
        + significant_coeff_flag_offset[MB_FIELD][cat];
5722 6096
    last_coeff_ctx_base = h->cabac_state
5723
        + last_significant_coeff_flag_offset[cat]
5724
        + last_significant_coeff_flag_field_offset[h->mb_field_decoding_flag];
6097
        + last_coeff_flag_offset[MB_FIELD][cat];
5725 6098
    abs_level_m1_ctx_base = h->cabac_state
5726 6099
        + coeff_abs_level_m1_offset[cat];
5727 6100

  
......
5738 6111
                } \
5739 6112
            } \
5740 6113
        }
5741
        DECODE_SIGNIFICANCE( 63, significant_coeff_flag_offset_8x8[last],
5742
                                 last_coeff_flag_offset_8x8[last] );
6114
        const int *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
6115
        DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5743 6116
    } else {
5744 6117
        DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5745 6118
    }
......
5815 6188
    const int mb_xy  = s->mb_x + s->mb_y*s->mb_stride;
5816 6189
    h->top_mb_xy     = mb_xy - s->mb_stride;
5817 6190
    h->left_mb_xy[0] = mb_xy - 1;
5818
    if(h->mb_aff_frame){
6191
    if(FRAME_MBAFF){
5819 6192
        const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
5820 6193
        const int top_pair_xy      = pair_xy     - s->mb_stride;
5821 6194
        const int top_mb_frame_flag      = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5822 6195
        const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5823
        const int curr_mb_frame_flag = !h->mb_field_decoding_flag;
6196
        const int curr_mb_frame_flag = !MB_FIELD;
5824 6197
        const int bottom = (s->mb_y & 1);
5825 6198
        if (bottom
5826 6199
                ? !curr_mb_frame_flag // bottom macroblock
......
5849 6222

  
5850 6223
    tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5851 6224
    if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
6225
        int skip;
6226
        /* a skipped mb needs the aff flag from the following mb */
6227
        if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
6228
            predict_field_decoding_flag(h);
6229
        if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
6230
            skip = h->next_mb_skipped;
6231
        else
6232
            skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5852 6233
        /* read skip flags */
5853
        if( decode_cabac_mb_skip( h ) ) {
6234
        if( skip ) {
6235
            if( FRAME_MBAFF && (s->mb_y&1)==0 ){
6236
                s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
6237
                h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
6238
                if(h->next_mb_skipped)
6239
                    predict_field_decoding_flag(h);
6240
                else
6241
                    h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
6242
            }
6243

  
5854 6244
            decode_mb_skip(h);
5855 6245

  
5856 6246
            h->cbp_table[mb_xy] = 0;
......
5861 6251

  
5862 6252
        }
5863 6253
    }
5864
    if(h->mb_aff_frame){
5865
        if ( ((s->mb_y&1) == 0) || h->prev_mb_skipped)
6254
    if(FRAME_MBAFF){
6255
        if( (s->mb_y&1) == 0 )
6256
            h->mb_mbaff =
5866 6257
            h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5867 6258
    }else
5868 6259
        h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
......
5899 6290
        h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5900 6291
        mb_type= i_mb_type_info[mb_type].type;
5901 6292
    }
5902
    if(h->mb_field_decoding_flag)
6293
    if(MB_FIELD)
5903 6294
        mb_type |= MB_TYPE_INTERLACED;
5904 6295

  
5905 6296
    h->slice_table[ mb_xy ]= h->slice_num;
......
5951 6342
        return 0;
5952 6343
    }
5953 6344

  
6345
    if(MB_MBAFF){
6346
        h->ref_count[0] <<= 1;
6347
        h->ref_count[1] <<= 1;
6348
    }
6349

  
5954 6350
    fill_caches(h, mb_type, 0);
5955 6351

  
5956 6352
    if( IS_INTRA( mb_type ) ) {
......
6205 6601
        int dqp;
6206 6602

  
6207 6603
        if(IS_INTERLACED(mb_type)){
6604
            scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
6208 6605
            scan= s->qscale ? h->field_scan : h->field_scan_q0;
6209 6606
            dc_scan= luma_dc_field_scan;
6210 6607
        }else{
6608
            scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6211 6609
            scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
6212 6610
            dc_scan= luma_dc_zigzag_scan;
6213 6611
        }
6214
        scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
6215 6612

  
6216 6613
        h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
6217 6614
        if( dqp == INT_MIN ){
......
6290 6687
        fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6291 6688
        nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6292 6689
        nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6690
        h->last_qscale_diff = 0;
6293 6691
    }
6294 6692

  
6295 6693
    s->current_picture.qscale_table[mb_xy]= s->qscale;
6296 6694
    write_back_non_zero_count(h);
6297 6695

  
6696
    if(MB_MBAFF){
6697
        h->ref_count[0] >>= 1;
6698
        h->ref_count[1] >>= 1;
6699
    }
6700

  
6298 6701
    return 0;
6299 6702
}
6300 6703

  
......
6385 6788

  
6386 6789
        int qp_index;
6387 6790
        int bS_index = (i >> 1);
6388
        if (h->mb_field_decoding_flag) {
6791
        if (!MB_FIELD) {
6389 6792
            bS_index &= ~1;
6390 6793
            bS_index |= (i & 1);
6391 6794
        }
......
6394 6797
            continue;
6395 6798
        }
6396 6799

  
6397
        qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6800
        qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6398 6801
        index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6399 6802
        alpha = alpha_table[index_a];
6400 6803
        beta  = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6401 6804

  
6402

  
6403 6805
        if( bS[bS_index] < 4 ) {
6404 6806
            const int tc0 = tc0_table[index_a][bS[bS_index] - 1];
6405
            /* 4px edge length */
6406 6807
            const int p0 = pix[-1];
6407 6808
            const int p1 = pix[-2];
6408 6809
            const int p2 = pix[-3];
......
6431 6832
                tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6432 6833
            }
6433 6834
        }else{
6434
            /* 4px edge length */
6435 6835
            const int p0 = pix[-1];
6436 6836
            const int p1 = pix[-2];
6437 6837
            const int p2 = pix[-3];
......
6477 6877
        }
6478 6878
    }
6479 6879
}
6480
static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp[2] ) {
6880
static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) {
6481 6881
    int i;
6482 6882
    for( i = 0; i < 8; i++, pix += stride) {
6483 6883
        int index_a;
......
6491 6891
            continue;
6492 6892
        }
6493 6893

  
6494
        qp_index = h->mb_field_decoding_flag ? (i & 1) : (i >> 3);
6894
        qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6495 6895
        index_a = clip( qp[qp_index] + h->slice_alpha_c0_offset, 0, 51 );
6496 6896
        alpha = alpha_table[index_a];
6497 6897
        beta  = beta_table[clip( qp[qp_index] + h->slice_beta_offset, 0, 51 )];
6898

  
6498 6899
        if( bS[bS_index] < 4 ) {
6499 6900
            const int tc = tc0_table[index_a][bS[bS_index] - 1] + 1;
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff