Revision a50f0bea

View differences:

libavcodec/Makefile
168 168
                                          ratecontrol.o h263.o ituh263enc.o \
169 169
                                          flvenc.o mpeg12data.o             \
170 170
                                          mpegvideo.o error_resilience.o
171
OBJS-$(CONFIG_H264_DECODER)            += h264.o                               \
171
OBJS-$(CONFIG_H264_DECODER)            += h264.o h264_hl_motion.o              \
172 172
                                          h264_loopfilter.o h264_direct.o      \
173 173
                                          cabac.o h264_sei.o h264_ps.o         \
174 174
                                          h264_refs.o h264_cavlc.o h264_cabac.o\
......
356 356
                                          mpegvideo.o error_resilience.o \
357 357
                                          ituh263enc.o mpegvideo_enc.o   \
358 358
                                          ratecontrol.o mpeg12data.o
359
OBJS-$(CONFIG_SVQ3_DECODER)            += h264.o svq3.o                       \
359
OBJS-$(CONFIG_SVQ3_DECODER)            += h264.o svq3.o h264_hl_motion.o      \
360 360
                                          h264_loopfilter.o h264_direct.o     \
361 361
                                          h264_sei.o h264_ps.o h264_refs.o    \
362 362
                                          h264_cavlc.o h264_cabac.o cabac.o   \
......
594 594
OBJS-$(CONFIG_FLAC_PARSER)             += flac_parser.o flacdata.o flac.o
595 595
OBJS-$(CONFIG_H261_PARSER)             += h261_parser.o
596 596
OBJS-$(CONFIG_H263_PARSER)             += h263_parser.o
597
OBJS-$(CONFIG_H264_PARSER)             += h264_parser.o h264.o            \
597
OBJS-$(CONFIG_H264_PARSER)             += h264_parser.o h264.o h264_hl_motion.o \
598 598
                                          cabac.o                         \
599 599
                                          h264_refs.o h264_sei.o h264_direct.o \
600 600
                                          h264_loopfilter.o h264_cabac.o \
libavcodec/h264.c
250 250
    return 0;
251 251
}
252 252

  
253
static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height,
254
                                 int y_offset, int list){
255
    int raw_my= h->mv_cache[list][ scan8[n] ][1];
256
    int filter_height= (raw_my&3) ? 2 : 0;
257
    int full_my= (raw_my>>2) + y_offset;
258
    int top = full_my - filter_height, bottom = full_my + height + filter_height;
259

  
260
    return FFMAX(abs(top), bottom);
261
}
262

  
263
static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height,
264
                               int y_offset, int list0, int list1, int *nrefs){
265
    MpegEncContext * const s = &h->s;
266
    int my;
267

  
268
    y_offset += 16*(s->mb_y >> MB_FIELD);
269

  
270
    if(list0){
271
        int ref_n = h->ref_cache[0][ scan8[n] ];
272
        Picture *ref= &h->ref_list[0][ref_n];
273

  
274
        // Error resilience puts the current picture in the ref list.
275
        // Don't try to wait on these as it will cause a deadlock.
276
        // Fields can wait on each other, though.
277
        if(ref->thread_opaque != s->current_picture.thread_opaque ||
278
           (ref->reference&3) != s->picture_structure) {
279
            my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
280
            if (refs[0][ref_n] < 0) nrefs[0] += 1;
281
            refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
282
        }
283
    }
284

  
285
    if(list1){
286
        int ref_n = h->ref_cache[1][ scan8[n] ];
287
        Picture *ref= &h->ref_list[1][ref_n];
288

  
289
        if(ref->thread_opaque != s->current_picture.thread_opaque ||
290
           (ref->reference&3) != s->picture_structure) {
291
            my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
292
            if (refs[1][ref_n] < 0) nrefs[1] += 1;
293
            refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
294
        }
295
    }
296
}
297

  
298
/**
299
 * Wait until all reference frames are available for MC operations.
300
 *
301
 * @param h the H264 context
302
 */
303
static void await_references(H264Context *h){
304
    MpegEncContext * const s = &h->s;
305
    const int mb_xy= h->mb_xy;
306
    const int mb_type= s->current_picture.mb_type[mb_xy];
307
    int refs[2][48];
308
    int nrefs[2] = {0};
309
    int ref, list;
310

  
311
    memset(refs, -1, sizeof(refs));
312

  
313
    if(IS_16X16(mb_type)){
314
        get_lowest_part_y(h, refs, 0, 16, 0,
315
                  IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
316
    }else if(IS_16X8(mb_type)){
317
        get_lowest_part_y(h, refs, 0, 8, 0,
318
                  IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
319
        get_lowest_part_y(h, refs, 8, 8, 8,
320
                  IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
321
    }else if(IS_8X16(mb_type)){
322
        get_lowest_part_y(h, refs, 0, 16, 0,
323
                  IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
324
        get_lowest_part_y(h, refs, 4, 16, 0,
325
                  IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
326
    }else{
327
        int i;
328

  
329
        assert(IS_8X8(mb_type));
330

  
331
        for(i=0; i<4; i++){
332
            const int sub_mb_type= h->sub_mb_type[i];
333
            const int n= 4*i;
334
            int y_offset= (i&2)<<2;
335

  
336
            if(IS_SUB_8X8(sub_mb_type)){
337
                get_lowest_part_y(h, refs, n  , 8, y_offset,
338
                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
339
            }else if(IS_SUB_8X4(sub_mb_type)){
340
                get_lowest_part_y(h, refs, n  , 4, y_offset,
341
                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
342
                get_lowest_part_y(h, refs, n+2, 4, y_offset+4,
343
                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
344
            }else if(IS_SUB_4X8(sub_mb_type)){
345
                get_lowest_part_y(h, refs, n  , 8, y_offset,
346
                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
347
                get_lowest_part_y(h, refs, n+1, 8, y_offset,
348
                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
349
            }else{
350
                int j;
351
                assert(IS_SUB_4X4(sub_mb_type));
352
                for(j=0; j<4; j++){
353
                    int sub_y_offset= y_offset + 2*(j&2);
354
                    get_lowest_part_y(h, refs, n+j, 4, sub_y_offset,
355
                              IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
356
                }
357
            }
358
        }
359
    }
360

  
361
    for(list=h->list_count-1; list>=0; list--){
362
        for(ref=0; ref<48 && nrefs[list]; ref++){
363
            int row = refs[list][ref];
364
            if(row >= 0){
365
                Picture *ref_pic = &h->ref_list[list][ref];
366
                int ref_field = ref_pic->reference - 1;
367
                int ref_field_picture = ref_pic->field_picture;
368
                int pic_height = 16*s->mb_height >> ref_field_picture;
369

  
370
                row <<= MB_MBAFF;
371
                nrefs[list]--;
372

  
373
                if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields
374
                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1);
375
                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1)           , pic_height-1), 0);
376
                }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame
377
                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field    , pic_height-1), 0);
378
                }else if(FIELD_PICTURE){
379
                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field);
380
                }else{
381
                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0);
382
                }
383
            }
384
        }
385
    }
386
}
387

  
388 253
#if 0
389 254
/**
390 255
 * DCT transforms the 16 dc values.
......
451 316
}
452 317
#endif
453 318

  
454
static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
455
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
456
                           int src_x_offset, int src_y_offset,
457
                           qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
458
    MpegEncContext * const s = &h->s;
459
    const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
460
    int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
461
    const int luma_xy= (mx&3) + ((my&3)<<2);
462
    uint8_t * src_y = pic->data[0] + ((mx>>2)<<h->pixel_shift) + (my>>2)*h->mb_linesize;
463
    uint8_t * src_cb, * src_cr;
464
    int extra_width= h->emu_edge_width;
465
    int extra_height= h->emu_edge_height;
466
    int emu=0;
467
    const int full_mx= mx>>2;
468
    const int full_my= my>>2;
469
    const int pic_width  = 16*s->mb_width;
470
    const int pic_height = 16*s->mb_height >> MB_FIELD;
471

  
472
    if(mx&7) extra_width -= 3;
473
    if(my&7) extra_height -= 3;
474

  
475
    if(   full_mx < 0-extra_width
476
       || full_my < 0-extra_height
477
       || full_mx + 16/*FIXME*/ > pic_width + extra_width
478
       || full_my + 16/*FIXME*/ > pic_height + extra_height){
479
        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2<<h->pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
480
            src_y= s->edge_emu_buffer + (2<<h->pixel_shift) + 2*h->mb_linesize;
481
        emu=1;
482
    }
483

  
484
    qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
485
    if(!square){
486
        qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
487
    }
488

  
489
    if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
490

  
491
    if(MB_FIELD){
492
        // chroma offset when predicting from a field of opposite parity
493
        my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
494
        emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
495
    }
496
    src_cb= pic->data[1] + ((mx>>3)<<h->pixel_shift) + (my>>3)*h->mb_uvlinesize;
497
    src_cr= pic->data[2] + ((mx>>3)<<h->pixel_shift) + (my>>3)*h->mb_uvlinesize;
498

  
499
    if(emu){
500
        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
501
            src_cb= s->edge_emu_buffer;
502
    }
503
    chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
504

  
505
    if(emu){
506
        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
507
            src_cr= s->edge_emu_buffer;
508
    }
509
    chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
510
}
511

  
512
static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
513
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
514
                           int x_offset, int y_offset,
515
                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
516
                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
517
                           int list0, int list1){
518
    MpegEncContext * const s = &h->s;
519
    qpel_mc_func *qpix_op=  qpix_put;
520
    h264_chroma_mc_func chroma_op= chroma_put;
521

  
522
    dest_y  += (2*x_offset<<h->pixel_shift) + 2*y_offset*h->  mb_linesize;
523
    dest_cb += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
524
    dest_cr += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
525
    x_offset += 8*s->mb_x;
526
    y_offset += 8*(s->mb_y >> MB_FIELD);
527

  
528
    if(list0){
529
        Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
530
        mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
531
                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
532
                           qpix_op, chroma_op);
533

  
534
        qpix_op=  qpix_avg;
535
        chroma_op= chroma_avg;
536
    }
537

  
538
    if(list1){
539
        Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
540
        mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
541
                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
542
                           qpix_op, chroma_op);
543
    }
544
}
545

  
546
static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
547
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
548
                           int x_offset, int y_offset,
549
                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
550
                           h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
551
                           h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
552
                           int list0, int list1){
553
    MpegEncContext * const s = &h->s;
554

  
555
    dest_y  += (2*x_offset<<h->pixel_shift) + 2*y_offset*h->  mb_linesize;
556
    dest_cb += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
557
    dest_cr += (  x_offset<<h->pixel_shift) +   y_offset*h->mb_uvlinesize;
558
    x_offset += 8*s->mb_x;
559
    y_offset += 8*(s->mb_y >> MB_FIELD);
560

  
561
    if(list0 && list1){
562
        /* don't optimize for luma-only case, since B-frames usually
563
         * use implicit weights => chroma too. */
564
        uint8_t *tmp_cb = s->obmc_scratchpad;
565
        uint8_t *tmp_cr = s->obmc_scratchpad + (8<<h->pixel_shift);
566
        uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
567
        int refn0 = h->ref_cache[0][ scan8[n] ];
568
        int refn1 = h->ref_cache[1][ scan8[n] ];
569

  
570
        mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
571
                    dest_y, dest_cb, dest_cr,
572
                    x_offset, y_offset, qpix_put, chroma_put);
573
        mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
574
                    tmp_y, tmp_cb, tmp_cr,
575
                    x_offset, y_offset, qpix_put, chroma_put);
576

  
577
        if(h->use_weight == 2){
578
            int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
579
            int weight1 = 64 - weight0;
580
            luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
581
            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
582
            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
583
        }else{
584
            luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
585
                            h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
586
                            h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
587
            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
588
                            h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
589
                            h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
590
            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
591
                            h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
592
                            h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
593
        }
594
    }else{
595
        int list = list1 ? 1 : 0;
596
        int refn = h->ref_cache[list][ scan8[n] ];
597
        Picture *ref= &h->ref_list[list][refn];
598
        mc_dir_part(h, ref, n, square, chroma_height, delta, list,
599
                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
600
                    qpix_put, chroma_put);
601

  
602
        luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
603
                       h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
604
        if(h->use_weight_chroma){
605
            chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
606
                             h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
607
            chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
608
                             h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
609
        }
610
    }
611
}
612

  
613
static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
614
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
615
                           int x_offset, int y_offset,
616
                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
617
                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
618
                           h264_weight_func *weight_op, h264_biweight_func *weight_avg,
619
                           int list0, int list1){
620
    if((h->use_weight==2 && list0 && list1
621
        && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
622
       || h->use_weight==1)
623
        mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
624
                         x_offset, y_offset, qpix_put, chroma_put,
625
                         weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
626
    else
627
        mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
628
                    x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
629
}
630

  
631
static inline void prefetch_motion(H264Context *h, int list){
632
    /* fetch pixels for estimated mv 4 macroblocks ahead
633
     * optimized for 64byte cache lines */
634
    MpegEncContext * const s = &h->s;
635
    const int refn = h->ref_cache[list][scan8[0]];
636
    if(refn >= 0){
637
        const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
638
        const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
639
        uint8_t **src= h->ref_list[list][refn].data;
640
        int off= ((mx+64)<<h->pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize;
641
        s->dsp.prefetch(src[0]+off, s->linesize, 4);
642
        off= (((mx>>1)+64)<<h->pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize;
643
        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
644
    }
645
}
646

  
647
static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
648
                      qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
649
                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
650
                      h264_weight_func *weight_op, h264_biweight_func *weight_avg){
651
    MpegEncContext * const s = &h->s;
652
    const int mb_xy= h->mb_xy;
653
    const int mb_type= s->current_picture.mb_type[mb_xy];
654

  
655
    assert(IS_INTER(mb_type));
656

  
657
    if(HAVE_PTHREADS && s->avctx->active_thread_type&FF_THREAD_FRAME)
658
        await_references(h);
659
    prefetch_motion(h, 0);
660

  
661
    if(IS_16X16(mb_type)){
662
        mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
663
                qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
664
                weight_op, weight_avg,
665
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
666
    }else if(IS_16X8(mb_type)){
667
        mc_part(h, 0, 0, 4, (8<<h->pixel_shift), dest_y, dest_cb, dest_cr, 0, 0,
668
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
669
                &weight_op[1], &weight_avg[1],
670
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
671
        mc_part(h, 8, 0, 4, (8<<h->pixel_shift), dest_y, dest_cb, dest_cr, 0, 4,
672
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
673
                &weight_op[1], &weight_avg[1],
674
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
675
    }else if(IS_8X16(mb_type)){
676
        mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
677
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
678
                &weight_op[2], &weight_avg[2],
679
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
680
        mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
681
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
682
                &weight_op[2], &weight_avg[2],
683
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
684
    }else{
685
        int i;
686

  
687
        assert(IS_8X8(mb_type));
688

  
689
        for(i=0; i<4; i++){
690
            const int sub_mb_type= h->sub_mb_type[i];
691
            const int n= 4*i;
692
            int x_offset= (i&1)<<2;
693
            int y_offset= (i&2)<<1;
694

  
695
            if(IS_SUB_8X8(sub_mb_type)){
696
                mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
697
                    qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
698
                    &weight_op[3], &weight_avg[3],
699
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
700
            }else if(IS_SUB_8X4(sub_mb_type)){
701
                mc_part(h, n  , 0, 2, (4<<h->pixel_shift), dest_y, dest_cb, dest_cr, x_offset, y_offset,
702
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
703
                    &weight_op[4], &weight_avg[4],
704
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
705
                mc_part(h, n+2, 0, 2, (4<<h->pixel_shift), dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
706
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
707
                    &weight_op[4], &weight_avg[4],
708
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
709
            }else if(IS_SUB_4X8(sub_mb_type)){
710
                mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
711
                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
712
                    &weight_op[5], &weight_avg[5],
713
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
714
                mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
715
                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
716
                    &weight_op[5], &weight_avg[5],
717
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
718
            }else{
719
                int j;
720
                assert(IS_SUB_4X4(sub_mb_type));
721
                for(j=0; j<4; j++){
722
                    int sub_x_offset= x_offset + 2*(j&1);
723
                    int sub_y_offset= y_offset +   (j&2);
724
                    mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
725
                        qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
726
                        &weight_op[6], &weight_avg[6],
727
                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
728
                }
729
            }
730
        }
731
    }
732

  
733
    prefetch_motion(h, 1);
734
}
735

  
736 319

  
737 320
static void free_tables(H264Context *h, int free_rbsp){
738 321
    int i;
......
1692 1275
            if(h->deblocking_filter)
1693 1276
                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
1694 1277
        }else if(is_h264){
1695
            hl_motion(h, dest_y, dest_cb, dest_cr,
1278
            ff_hl_motion(h, dest_y, dest_cb, dest_cr,
1696 1279
                      s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
1697 1280
                      s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
1698 1281
                      h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab);
libavcodec/h264.h
717 717
void ff_h264_reset_sei(H264Context *h);
718 718

  
719 719

  
720
void ff_hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
721
                      qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
722
                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
723
                      h264_weight_func *weight_op, h264_biweight_func *weight_avg);
724

  
725

  
720 726
/*
721 727
o-o o-o
722 728
 / / /
libavcodec/h264_hl_motion.c
1

  
2
#include "h264.h"
3
#include "thread.h"
4

  
5
static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height,
6
                                 int y_offset, int list){
7
    int raw_my= h->mv_cache[list][ scan8[n] ][1];
8
    int filter_height= (raw_my&3) ? 2 : 0;
9
    int full_my= (raw_my>>2) + y_offset;
10
    int top = full_my - filter_height, bottom = full_my + height + filter_height;
11

  
12
    return FFMAX(abs(top), bottom);
13
}
14

  
15
static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height,
16
                               int y_offset, int list0, int list1, int *nrefs){
17
    MpegEncContext * const s = &h->s;
18
    int my;
19

  
20
    y_offset += 16*(s->mb_y >> MB_FIELD);
21

  
22
    if(list0){
23
        int ref_n = h->ref_cache[0][ scan8[n] ];
24
        Picture *ref= &h->ref_list[0][ref_n];
25

  
26
        // Error resilience puts the current picture in the ref list.
27
        // Don't try to wait on these as it will cause a deadlock.
28
        // Fields can wait on each other, though.
29
        if(ref->thread_opaque != s->current_picture.thread_opaque ||
30
           (ref->reference&3) != s->picture_structure) {
31
            my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
32
            if (refs[0][ref_n] < 0) nrefs[0] += 1;
33
            refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
34
        }
35
    }
36

  
37
    if(list1){
38
        int ref_n = h->ref_cache[1][ scan8[n] ];
39
        Picture *ref= &h->ref_list[1][ref_n];
40

  
41
        if(ref->thread_opaque != s->current_picture.thread_opaque ||
42
           (ref->reference&3) != s->picture_structure) {
43
            my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
44
            if (refs[1][ref_n] < 0) nrefs[1] += 1;
45
            refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
46
        }
47
    }
48
}
49

  
50
/**
51
 * Wait until all reference frames are available for MC operations.
52
 *
53
 * @param h the H264 context
54
 */
55
static void await_references(H264Context *h){
56
    MpegEncContext * const s = &h->s;
57
    const int mb_xy= h->mb_xy;
58
    const int mb_type= s->current_picture.mb_type[mb_xy];
59
    int refs[2][48];
60
    int nrefs[2] = {0};
61
    int ref, list;
62

  
63
    memset(refs, -1, sizeof(refs));
64

  
65
    if(IS_16X16(mb_type)){
66
        get_lowest_part_y(h, refs, 0, 16, 0,
67
                  IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
68
    }else if(IS_16X8(mb_type)){
69
        get_lowest_part_y(h, refs, 0, 8, 0,
70
                  IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
71
        get_lowest_part_y(h, refs, 8, 8, 8,
72
                  IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
73
    }else if(IS_8X16(mb_type)){
74
        get_lowest_part_y(h, refs, 0, 16, 0,
75
                  IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
76
        get_lowest_part_y(h, refs, 4, 16, 0,
77
                  IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
78
    }else{
79
        int i;
80

  
81
        assert(IS_8X8(mb_type));
82

  
83
        for(i=0; i<4; i++){
84
            const int sub_mb_type= h->sub_mb_type[i];
85
            const int n= 4*i;
86
            int y_offset= (i&2)<<2;
87

  
88
            if(IS_SUB_8X8(sub_mb_type)){
89
                get_lowest_part_y(h, refs, n  , 8, y_offset,
90
                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
91
            }else if(IS_SUB_8X4(sub_mb_type)){
92
                get_lowest_part_y(h, refs, n  , 4, y_offset,
93
                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
94
                get_lowest_part_y(h, refs, n+2, 4, y_offset+4,
95
                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
96
            }else if(IS_SUB_4X8(sub_mb_type)){
97
                get_lowest_part_y(h, refs, n  , 8, y_offset,
98
                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
99
                get_lowest_part_y(h, refs, n+1, 8, y_offset,
100
                          IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
101
            }else{
102
                int j;
103
                assert(IS_SUB_4X4(sub_mb_type));
104
                for(j=0; j<4; j++){
105
                    int sub_y_offset= y_offset + 2*(j&2);
106
                    get_lowest_part_y(h, refs, n+j, 4, sub_y_offset,
107
                              IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
108
                }
109
            }
110
        }
111
    }
112

  
113
    for(list=h->list_count-1; list>=0; list--){
114
        for(ref=0; ref<48 && nrefs[list]; ref++){
115
            int row = refs[list][ref];
116
            if(row >= 0){
117
                Picture *ref_pic = &h->ref_list[list][ref];
118
                int ref_field = ref_pic->reference - 1;
119
                int ref_field_picture = ref_pic->field_picture;
120
                int pic_height = 16*s->mb_height >> ref_field_picture;
121

  
122
                row <<= MB_MBAFF;
123
                nrefs[list]--;
124

  
125
                if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields
126
                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1);
127
                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1)           , pic_height-1), 0);
128
                }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame
129
                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field    , pic_height-1), 0);
130
                }else if(FIELD_PICTURE){
131
                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field);
132
                }else{
133
                    ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0);
134
                }
135
            }
136
        }
137
    }
138
}
139

  
140
#define FUNC(a) a ## _8
141
#define PIXEL_SHIFT 0
142
#include "h264_hl_motion.h"
143

  
144
#undef PIXEL_SHIFT
145
#undef FUNC
146
#define FUNC(a) a ## _16
147
#define PIXEL_SHIFT 1
148
#include "h264_hl_motion.h"
149

  
150
void ff_hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
151
                      qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
152
                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
153
                      h264_weight_func *weight_op, h264_biweight_func *weight_avg){
154
    if(h->pixel_shift){
155
        hl_motion_16(h, dest_y, dest_cb, dest_cr,
156
                      qpix_put, chroma_put,
157
                      qpix_avg, chroma_avg,
158
                      weight_op, weight_avg);
159
    }else
160
        hl_motion_8(h, dest_y, dest_cb, dest_cr,
161
                      qpix_put, chroma_put,
162
                      qpix_avg, chroma_avg,
163
                      weight_op, weight_avg);
164
}
libavcodec/h264_hl_motion.h
1

  
2
static inline void FUNC(mc_dir_part)(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
3
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
4
                           int src_x_offset, int src_y_offset,
5
                           qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
6
    MpegEncContext * const s = &h->s;
7
    const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
8
    int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
9
    const int luma_xy= (mx&3) + ((my&3)<<2);
10
    uint8_t * src_y = pic->data[0] + ((mx>>2)<<PIXEL_SHIFT) + (my>>2)*h->mb_linesize;
11
    uint8_t * src_cb, * src_cr;
12
    int extra_width= h->emu_edge_width;
13
    int extra_height= h->emu_edge_height;
14
    int emu=0;
15
    const int full_mx= mx>>2;
16
    const int full_my= my>>2;
17
    const int pic_width  = 16*s->mb_width;
18
    const int pic_height = 16*s->mb_height >> MB_FIELD;
19

  
20
    if(mx&7) extra_width -= 3;
21
    if(my&7) extra_height -= 3;
22

  
23
    if(   full_mx < 0-extra_width
24
       || full_my < 0-extra_height
25
       || full_mx + 16/*FIXME*/ > pic_width + extra_width
26
       || full_my + 16/*FIXME*/ > pic_height + extra_height){
27
        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2<<PIXEL_SHIFT) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
28
            src_y= s->edge_emu_buffer + (2<<PIXEL_SHIFT) + 2*h->mb_linesize;
29
        emu=1;
30
    }
31

  
32
    qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
33
    if(!square){
34
        qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
35
    }
36

  
37
    if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
38

  
39
    if(MB_FIELD){
40
        // chroma offset when predicting from a field of opposite parity
41
        my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
42
        emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
43
    }
44
    src_cb= pic->data[1] + ((mx>>3)<<PIXEL_SHIFT) + (my>>3)*h->mb_uvlinesize;
45
    src_cr= pic->data[2] + ((mx>>3)<<PIXEL_SHIFT) + (my>>3)*h->mb_uvlinesize;
46

  
47
    if(emu){
48
        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
49
            src_cb= s->edge_emu_buffer;
50
    }
51
    chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
52

  
53
    if(emu){
54
        s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
55
            src_cr= s->edge_emu_buffer;
56
    }
57
    chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
58
}
59

  
60
static inline void FUNC(mc_part_std)(H264Context *h, int n, int square, int chroma_height, int delta,
61
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
62
                           int x_offset, int y_offset,
63
                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
64
                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
65
                           int list0, int list1){
66
    MpegEncContext * const s = &h->s;
67
    qpel_mc_func *qpix_op=  qpix_put;
68
    h264_chroma_mc_func chroma_op= chroma_put;
69

  
70
    dest_y  += (2*x_offset<<PIXEL_SHIFT) + 2*y_offset*h->  mb_linesize;
71
    dest_cb += (  x_offset<<PIXEL_SHIFT) +   y_offset*h->mb_uvlinesize;
72
    dest_cr += (  x_offset<<PIXEL_SHIFT) +   y_offset*h->mb_uvlinesize;
73
    x_offset += 8*s->mb_x;
74
    y_offset += 8*(s->mb_y >> MB_FIELD);
75

  
76
    if(list0){
77
        Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
78
        FUNC(mc_dir_part)(h, ref, n, square, chroma_height, delta, 0,
79
                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
80
                           qpix_op, chroma_op);
81

  
82
        qpix_op=  qpix_avg;
83
        chroma_op= chroma_avg;
84
    }
85

  
86
    if(list1){
87
        Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
88
        FUNC(mc_dir_part)(h, ref, n, square, chroma_height, delta, 1,
89
                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
90
                           qpix_op, chroma_op);
91
    }
92
}
93

  
94
static inline void FUNC(mc_part_weighted)(H264Context *h, int n, int square, int chroma_height, int delta,
95
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
96
                           int x_offset, int y_offset,
97
                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
98
                           h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
99
                           h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
100
                           int list0, int list1){
101
    MpegEncContext * const s = &h->s;
102

  
103
    dest_y  += (2*x_offset<<PIXEL_SHIFT) + 2*y_offset*h->  mb_linesize;
104
    dest_cb += (  x_offset<<PIXEL_SHIFT) +   y_offset*h->mb_uvlinesize;
105
    dest_cr += (  x_offset<<PIXEL_SHIFT) +   y_offset*h->mb_uvlinesize;
106
    x_offset += 8*s->mb_x;
107
    y_offset += 8*(s->mb_y >> MB_FIELD);
108

  
109
    if(list0 && list1){
110
        /* don't optimize for luma-only case, since B-frames usually
111
         * use implicit weights => chroma too. */
112
        uint8_t *tmp_cb = s->obmc_scratchpad;
113
        uint8_t *tmp_cr = s->obmc_scratchpad + (8<<PIXEL_SHIFT);
114
        uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
115
        int refn0 = h->ref_cache[0][ scan8[n] ];
116
        int refn1 = h->ref_cache[1][ scan8[n] ];
117

  
118
        FUNC(mc_dir_part)(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
119
                    dest_y, dest_cb, dest_cr,
120
                    x_offset, y_offset, qpix_put, chroma_put);
121
        FUNC(mc_dir_part)(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
122
                    tmp_y, tmp_cb, tmp_cr,
123
                    x_offset, y_offset, qpix_put, chroma_put);
124

  
125
        if(h->use_weight == 2){
126
            int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
127
            int weight1 = 64 - weight0;
128
            luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
129
            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
130
            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
131
        }else{
132
            luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
133
                            h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
134
                            h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
135
            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
136
                            h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
137
                            h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
138
            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
139
                            h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
140
                            h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
141
        }
142
    }else{
143
        int list = list1 ? 1 : 0;
144
        int refn = h->ref_cache[list][ scan8[n] ];
145
        Picture *ref= &h->ref_list[list][refn];
146
        FUNC(mc_dir_part)(h, ref, n, square, chroma_height, delta, list,
147
                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
148
                    qpix_put, chroma_put);
149

  
150
        luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
151
                       h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
152
        if(h->use_weight_chroma){
153
            chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
154
                             h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
155
            chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
156
                             h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
157
        }
158
    }
159
}
160

  
161
static inline void FUNC(mc_part)(H264Context *h, int n, int square, int chroma_height, int delta,
162
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
163
                           int x_offset, int y_offset,
164
                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
165
                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
166
                           h264_weight_func *weight_op, h264_biweight_func *weight_avg,
167
                           int list0, int list1){
168
    if((h->use_weight==2 && list0 && list1
169
        && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
170
       || h->use_weight==1)
171
        FUNC(mc_part_weighted)(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
172
                         x_offset, y_offset, qpix_put, chroma_put,
173
                         weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
174
    else
175
        FUNC(mc_part_std)(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
176
                    x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
177
}
178

  
179
static inline void FUNC(prefetch_motion)(H264Context *h, int list){
180
    /* fetch pixels for estimated mv 4 macroblocks ahead
181
     * optimized for 64byte cache lines */
182
    MpegEncContext * const s = &h->s;
183
    const int refn = h->ref_cache[list][scan8[0]];
184
    if(refn >= 0){
185
        const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
186
        const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
187
        uint8_t **src= h->ref_list[list][refn].data;
188
        int off= ((mx+64)<<PIXEL_SHIFT) + (my + (s->mb_x&3)*4)*h->mb_linesize;
189
        s->dsp.prefetch(src[0]+off, s->linesize, 4);
190
        off= (((mx>>1)+64)<<PIXEL_SHIFT) + ((my>>1) + (s->mb_x&7))*s->uvlinesize;
191
        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
192
    }
193
}
194

  
195
static void FUNC(hl_motion)(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
196
                      qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
197
                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
198
                      h264_weight_func *weight_op, h264_biweight_func *weight_avg){
199
    MpegEncContext * const s = &h->s;
200
    const int mb_xy= h->mb_xy;
201
    const int mb_type= s->current_picture.mb_type[mb_xy];
202

  
203
    assert(IS_INTER(mb_type));
204

  
205
    if(HAVE_PTHREADS && s->avctx->active_thread_type&FF_THREAD_FRAME)
206
        await_references(h);
207
    FUNC(prefetch_motion)(h, 0);
208

  
209
    if(IS_16X16(mb_type)){
210
        FUNC(mc_part)(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
211
                qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
212
                weight_op, weight_avg,
213
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
214
    }else if(IS_16X8(mb_type)){
215
        FUNC(mc_part)(h, 0, 0, 4, (8<<PIXEL_SHIFT), dest_y, dest_cb, dest_cr, 0, 0,
216
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
217
                &weight_op[1], &weight_avg[1],
218
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
219
        FUNC(mc_part)(h, 8, 0, 4, (8<<PIXEL_SHIFT), dest_y, dest_cb, dest_cr, 0, 4,
220
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
221
                &weight_op[1], &weight_avg[1],
222
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
223
    }else if(IS_8X16(mb_type)){
224
        FUNC(mc_part)(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
225
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
226
                &weight_op[2], &weight_avg[2],
227
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
228
        FUNC(mc_part)(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
229
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
230
                &weight_op[2], &weight_avg[2],
231
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
232
    }else{
233
        int i;
234

  
235
        assert(IS_8X8(mb_type));
236

  
237
        for(i=0; i<4; i++){
238
            const int sub_mb_type= h->sub_mb_type[i];
239
            const int n= 4*i;
240
            int x_offset= (i&1)<<2;
241
            int y_offset= (i&2)<<1;
242

  
243
            if(IS_SUB_8X8(sub_mb_type)){
244
                FUNC(mc_part)(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
245
                    qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
246
                    &weight_op[3], &weight_avg[3],
247
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
248
            }else if(IS_SUB_8X4(sub_mb_type)){
249
                FUNC(mc_part)(h, n  , 0, 2, (4<<PIXEL_SHIFT), dest_y, dest_cb, dest_cr, x_offset, y_offset,
250
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
251
                    &weight_op[4], &weight_avg[4],
252
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
253
                FUNC(mc_part)(h, n+2, 0, 2, (4<<PIXEL_SHIFT), dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
254
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
255
                    &weight_op[4], &weight_avg[4],
256
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
257
            }else if(IS_SUB_4X8(sub_mb_type)){
258
                FUNC(mc_part)(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
259
                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
260
                    &weight_op[5], &weight_avg[5],
261
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
262
                FUNC(mc_part)(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
263
                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
264
                    &weight_op[5], &weight_avg[5],
265
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
266
            }else{
267
                int j;
268
                assert(IS_SUB_4X4(sub_mb_type));
269
                for(j=0; j<4; j++){
270
                    int sub_x_offset= x_offset + 2*(j&1);
271
                    int sub_y_offset= y_offset +   (j&2);
272
                    FUNC(mc_part)(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
273
                        qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
274
                        &weight_op[6], &weight_avg[6],
275
                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
276
                }
277
            }
278
        }
279
    }
280

  
281
    FUNC(prefetch_motion)(h, 1);
282
}

Also available in: Unified diff