Revision 0d21a846 libavcodec/motion_est.c

View differences:

libavcodec/motion_est.c
1 1
/*
2 2
 * Motion estimation 
3 3
 * Copyright (c) 2000,2001 Fabrice Bellard.
4
 * Copyright (c) 2002 Michael Niedermayer
4 5
 * 
5 6
 *
6 7
 * This library is free software; you can redistribute it and/or
......
25 26
#include "dsputil.h"
26 27
#include "mpegvideo.h"
27 28

  
28
//#define ABS(a) ((a)>0 ? (a) : -(a))
29
#define MAX(a,b) ((a) > (b) ? (a) : (b))
29
#define SQ(a) ((a)*(a))
30 30
#define INTER_BIAS	257
31 31

  
32
static int halfpel_motion_search(MpegEncContext * s,
33
				  int *mx_ptr, int *my_ptr, int dmin,
34
				  int xmin, int ymin, int xmax, int ymax,
35
                                  int pred_x, int pred_y, uint8_t *ref_picture);
32
#define P_LAST P[0]
33
#define P_LEFT P[1]
34
#define P_TOP P[2]
35
#define P_TOPRIGHT P[3]
36
#define P_MEDIAN P[4]
37
#define P_LAST_LEFT P[5]
38
#define P_LAST_RIGHT P[6]
39
#define P_LAST_TOP P[7]
40
#define P_LAST_BOTTOM P[8]
41
#define P_MV1 P[9]
42

  
36 43

  
37 44
static int pix_sum(UINT8 * pix, int line_size)
38 45
{
......
359 366

  
360 367
#define CHECK_MV(x,y)\
361 368
{\
362
    d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
363
    d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
364
    if(d<dmin){\
365
        best[0]=x;\
366
        best[1]=y;\
367
        dmin=d;\
369
    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
370
    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
371
    if(map[index]!=key){\
372
        d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
373
        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
374
        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
375
        map[index]= key;\
376
        score_map[index]= d;\
368 377
    }\
369 378
}
370 379

  
371 380
#define CHECK_MV_DIR(x,y,new_dir)\
372 381
{\
373
    d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
374
    d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
375
    if(d<dmin){\
376
        best[0]=x;\
377
        best[1]=y;\
378
        dmin=d;\
379
        next_dir= new_dir;\
382
    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
383
    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
384
    if(map[index]!=key){\
385
        d = pix_abs(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
386
        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
387
        if(d<dmin){\
388
            best[0]=x;\
389
            best[1]=y;\
390
            dmin=d;\
391
            next_dir= new_dir;\
392
        }\
393
        map[index]= key;\
394
        score_map[index]= d;\
380 395
    }\
381 396
}
382 397

  
383 398
#define CHECK_MV4(x,y)\
384 399
{\
385
    d = pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
386
    d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
387
    if(d<dmin){\
388
        best[0]=x;\
389
        best[1]=y;\
390
        dmin=d;\
391
    }\
392
}
393

  
394
#define CHECK_MV4_DIR(x,y,new_dir)\
395
{\
396
    d = pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
397
    d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
398
    if(d<dmin){\
399
        best[0]=x;\
400
        best[1]=y;\
401
        dmin=d;\
402
        next_dir= new_dir;\
400
    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
401
    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
402
    if(map[index]!=key){\
403
        d = pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
404
        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
405
        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
406
        map[index]= key;\
407
        score_map[index]= d;\
403 408
    }\
404 409
}
405 410

  
406

  
407 411
#define check(x,y,S,v)\
408
if( (x)<(xmin<<(S)) ) printf("%d %d %d %d xmin" #v, (x), (y), s->mb_x, s->mb_y);\
409
if( (x)>(xmax<<(S)) ) printf("%d %d %d %d xmax" #v, (x), (y), s->mb_x, s->mb_y);\
410
if( (y)<(ymin<<(S)) ) printf("%d %d %d %d ymin" #v, (x), (y), s->mb_x, s->mb_y);\
411
if( (y)>(ymax<<(S)) ) printf("%d %d %d %d ymax" #v, (x), (y), s->mb_x, s->mb_y);\
412
if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
413
if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
414
if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
415
if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
412 416

  
413 417

  
414 418
static inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
415 419
                                       UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
416 420
                                       int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
417
                                       int xmin, int ymin, int xmax, int ymax, int shift)
421
                                       int xmin, int ymin, int xmax, int ymax, int shift,
422
                                       uint32_t *map, uint16_t *score_map, int map_generation,
423
                                       op_pixels_abs_func pix_abs)
418 424
{
419 425
    int next_dir=-1;
420 426

  
......
462 468
    */
463 469
}
464 470

  
465
static inline int small_diamond_search4MV(MpegEncContext * s, int *best, int dmin,
466
                                       UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
467
                                       int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
468
                                       int xmin, int ymin, int xmax, int ymax, int shift)
469
{
470
    int next_dir=-1;
471

  
472
    for(;;){
473
        int d;
474
        const int dir= next_dir;
475
        const int x= best[0];
476
        const int y= best[1];
477
        next_dir=-1;
478

  
479
//printf("%d", dir);
480
        if(dir!=2 && x>xmin) CHECK_MV4_DIR(x-1, y  , 0)
481
        if(dir!=3 && y>ymin) CHECK_MV4_DIR(x  , y-1, 1)
482
        if(dir!=0 && x<xmax) CHECK_MV4_DIR(x+1, y  , 2)
483
        if(dir!=1 && y<ymax) CHECK_MV4_DIR(x  , y+1, 3)
484

  
485
        if(next_dir==-1){
486
            return dmin;
487
        }
488
    }
489
}
490

  
471
#if 1
472
#define SNAKE_1 3
473
#define SNAKE_2 2
474
#else
475
#define SNAKE_1 7
476
#define SNAKE_2 3
477
#endif
491 478
static inline int snake_search(MpegEncContext * s, int *best, int dmin,
492 479
                                       UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
493 480
                                       int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
494
                                       int xmin, int ymin, int xmax, int ymax, int shift)
481
                                       int xmin, int ymin, int xmax, int ymax, int shift,
482
                                       uint32_t *map, uint16_t *score_map,int map_generation,
483
                                       op_pixels_abs_func pix_abs)
495 484
{
496 485
    int dir=0;
497 486
    int c=1;
......
517 506
        x+=x_dir[dir];
518 507
        y+=y_dir[dir];
519 508
        if(x>=xmin && x<=xmax && y>=ymin && y<=ymax){
520
            d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);
521
            d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;
509
            const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;
510
            const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);
511
            if(map[index]!=key){
512
                d = pix_abs(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);
513
                d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;
514
                map[index]=key;
515
                score_map[index]=d;
516
            }else
517
                d= dmin+1;
522 518
        }else{
523 519
            d = dmin + 10000; //FIXME smarter boundary handling
524 520
        }
......
537 533
        }else{
538 534
//bad++;
539 535
            if(fails){
540
                if(fails>=3) return dmin;
536
                if(fails>=SNAKE_1+1) return dmin;
541 537
            }else{
542
                c= -c;
538
                if(dir&1) dir-= c*3;
539
                else      c= -c;
540
//                c= -c;
543 541
            }
544
            dir+=c*2;
542
            dir+=c*SNAKE_2;
545 543
            fails++;
546 544
        }
547 545
        dir&=7;
548 546
    }
549 547
}
550 548

  
549
static inline int cross_search(MpegEncContext * s, int *best, int dmin,
550
                                       UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
551
                                       int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
552
                                       int xmin, int ymin, int xmax, int ymax, int shift,
553
                                       uint32_t *map, uint16_t *score_map,int map_generation,
554
                                       op_pixels_abs_func pix_abs)
555
{
556
    static int x_dir[4]= {-1, 0, 1, 0};
557
    static int y_dir[4]= { 0,-1, 0, 1};
558
    int improvement[2]={100000, 100000};
559
    int dirs[2]={2, 3};
560
    int dir;
561
    int last_dir= -1;
562
    
563
    for(;;){
564
        dir= dirs[ improvement[0] > improvement[1] ? 0 : 1 ];
565
        if(improvement[dir&1]==-1) return dmin;
566
        
567
        {
568
            const int x= best[0] + x_dir[dir];
569
            const int y= best[1] + y_dir[dir];
570
            const int key= (y<<ME_MAP_MV_BITS) + x + map_generation;
571
            const int index= ((y<<ME_MAP_SHIFT) + x)&(ME_MAP_SIZE-1);
572
            int d;
573
            if(x>=xmin && x<=xmax && y>=ymin && y<=ymax){
574
                if(map[index]!=key){
575
                    d = pix_abs(new_pic, old_pic + x + y*pic_stride, pic_stride);
576
                    d += (mv_penalty[(x<<shift)-pred_x] + mv_penalty[(y<<shift)-pred_y])*quant;
577
                    map[index]=key;
578
                    score_map[index]=d;
579
                    if(d<dmin){
580
                        improvement[dir&1]= dmin-d;
581
                        improvement[(dir&1)^1]++;
582
                        dmin=d;
583
                        best[0]= x;
584
                        best[1]= y;
585
                        last_dir=dir;
586
                        continue;
587
                    }
588
                }else{
589
                    d= score_map[index];
590
                }
591
            }else{
592
                d= dmin + 1000; //FIXME is this a good idea?
593
            }
594
            /* evaluated point was cached or checked and worse */
595

  
596
            if(last_dir==dir){
597
                improvement[dir&1]= -1;
598
            }else{
599
                improvement[dir&1]= d-dmin;
600
                last_dir= dirs[dir&1]= dir^2;
601
            }
602
        }
603
    }
604
}
605

  
606
static inline int update_map_generation(MpegEncContext * s)
607
{
608
    s->me_map_generation+= 1<<(ME_MAP_MV_BITS*2);
609
    if(s->me_map_generation==0){
610
        s->me_map_generation= 1<<(ME_MAP_MV_BITS*2);
611
        memset(s->me_map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
612
    }
613
    return s->me_map_generation;
614
}
615

  
551 616
static int epzs_motion_search(MpegEncContext * s,
552 617
                             int *mx_ptr, int *my_ptr,
553
                             int P[5][2], int pred_x, int pred_y,
618
                             int P[10][2], int pred_x, int pred_y,
554 619
                             int xmin, int ymin, int xmax, int ymax, uint8_t * ref_picture)
555 620
{
556 621
    int best[2]={0, 0};
......
561 626
    UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
562 627
    int quant= s->qscale; // qscale of the prev frame
563 628
    const int shift= 1+s->quarter_sample;
629
    uint32_t *map= s->me_map;
630
    uint16_t *score_map= s->me_score_map;
631
    int map_generation;
564 632

  
565 633
    new_pic = s->new_picture[0] + pic_xy;
566 634
    old_pic = ref_picture + pic_xy;
567
   
635
    
636
    map_generation= update_map_generation(s);
637

  
568 638
    dmin = pix_abs16x16(new_pic, old_pic, pic_stride);
569
    if(dmin<Z_THRESHOLD){
570
        *mx_ptr= 0;
571
        *my_ptr= 0;
572
//printf("Z");
573
        return dmin;
574
    }
639
    map[0]= map_generation;
640
    score_map[0]= dmin;
575 641

  
576 642
    /* first line */
577
    if ((s->mb_y == 0 || s->first_slice_line || s->first_gob_line)) {
578
        CHECK_MV(P[1][0]>>shift, P[1][1]>>shift)
643
    if ((s->mb_y == 0 || s->first_slice_line)) {
644
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
645
        CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift)
579 646
    }else{
580
        CHECK_MV(P[4][0]>>shift, P[4][1]>>shift)
581
        if(dmin<Z_THRESHOLD){
582
            *mx_ptr= P[4][0]>>shift;
583
            *my_ptr= P[4][1]>>shift;
584
//printf("M\n");
647
        if(dmin<256 && ( P_LEFT[0]    |P_LEFT[1]
648
                        |P_TOP[0]     |P_TOP[1]
649
                        |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
650
            *mx_ptr= 0;
651
            *my_ptr= 0;
652
            s->skip_me=1;
585 653
            return dmin;
586 654
        }
587
        CHECK_MV(P[1][0]>>shift, P[1][1]>>shift)
588
        CHECK_MV(P[2][0]>>shift, P[2][1]>>shift)
589
        CHECK_MV(P[3][0]>>shift, P[3][1]>>shift)
655
        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
656
        if(dmin>256*2){
657
            CHECK_MV(P_LAST[0]    >>shift, P_LAST[1]    >>shift)
658
            CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
659
            CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
660
            CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
661
        }
590 662
    }
591
    CHECK_MV(P[0][0]>>shift, P[0][1]>>shift)
592

  
663
    if(dmin>256*4){
664
        CHECK_MV(P_LAST_RIGHT[0] >>shift, P_LAST_RIGHT[1] >>shift)
665
        CHECK_MV(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
666
    }
667
#if 0 //doest only slow things down
668
    if(dmin>512*3){
669
        int step;
670
        dmin= score_map[0];
671
        best[0]= best[1]=0;
672
        for(step=128; step>0; step>>=1){
673
            const int step2= step;
674
            int y;
675
            for(y=-step2+best[1]; y<=step2+best[1]; y+=step){
676
                int x;
677
                if(y<ymin || y>ymax) continue;
678

  
679
                for(x=-step2+best[0]; x<=step2+best[0]; x+=step){
680
                    if(x<xmin || x>xmax) continue;
681
                    if(x==best[0] && y==best[1]) continue;
682
                    CHECK_MV(x,y)
683
                }
684
            }
685
        }
686
    }
687
#endif
593 688
//check(best[0],best[1],0, b0)
594 689
    if(s->me_method==ME_EPZS)
595 690
        dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, 
596
                                   pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift);
691
                                   pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
692
                                   shift, map, score_map, map_generation, pix_abs16x16);
597 693
    else
598
        dmin=         snake_search(s, best, dmin, new_pic, old_pic, pic_stride, 
599
                                   pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift);
694
        dmin=         cross_search(s, best, dmin, new_pic, old_pic, pic_stride, 
695
                                   pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
696
                                   shift, map, score_map, map_generation, pix_abs16x16);
600 697
//check(best[0],best[1],0, b1)
601 698
    *mx_ptr= best[0];
602 699
    *my_ptr= best[1];    
......
607 704

  
608 705
static int epzs_motion_search4(MpegEncContext * s, int block,
609 706
                             int *mx_ptr, int *my_ptr,
610
                             int P[6][2], int pred_x, int pred_y,
707
                             int P[10][2], int pred_x, int pred_y,
611 708
                             int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
612 709
{
613 710
    int best[2]={0, 0};
......
618 715
    UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
619 716
    int quant= s->qscale; // qscale of the prev frame
620 717
    const int shift= 1+s->quarter_sample;
718
    uint32_t *map= s->me_map;
719
    uint16_t *score_map= s->me_score_map;
720
    int map_generation;
621 721

  
622 722
    new_pic = s->new_picture[0] + pic_xy;
623 723
    old_pic = ref_picture + pic_xy;
624
   
625
    dmin = pix_abs8x8(new_pic, old_pic, pic_stride);
626 724

  
725
    map_generation= update_map_generation(s);
726

  
727
    dmin = 1000000;
728
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax); 
627 729
    /* first line */
628
    if ((s->mb_y == 0 || s->first_slice_line || s->first_gob_line) && block<2) {
629
        CHECK_MV4(P[1][0]>>shift, P[1][1]>>shift)
730
    if ((s->mb_y == 0 || s->first_slice_line) && block<2) {
731
        CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
732
        CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift)
733
        CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift)
630 734
    }else{
631
        CHECK_MV4(P[4][0]>>shift, P[4][1]>>shift)
632
        if(dmin<Z_THRESHOLD){
633
            *mx_ptr= P[4][0]>>shift;
634
            *my_ptr= P[4][1]>>shift;
635
//printf("M\n");
636
            return dmin;
735
        CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift)
736
        //FIXME try some early stop
737
        if(dmin>64*2){
738
            CHECK_MV4(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
739
            CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
740
            CHECK_MV4(P_TOP[0]>>shift, P_TOP[1]>>shift)
741
            CHECK_MV4(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
742
            CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift)
637 743
        }
638
        CHECK_MV4(P[1][0]>>shift, P[1][1]>>shift)
639
        CHECK_MV4(P[2][0]>>shift, P[2][1]>>shift)
640
        CHECK_MV4(P[3][0]>>shift, P[3][1]>>shift)
641 744
    }
642
    CHECK_MV4(P[0][0]>>shift, P[0][1]>>shift)
643
    CHECK_MV4(P[5][0]>>shift, P[5][1]>>shift)
745
    if(dmin>64*4){
746
        CHECK_MV4(P_LAST_RIGHT[0]>>shift, P_LAST_RIGHT[1]>>shift)
747
        CHECK_MV4(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
748
    }
749

  
750
    if(s->me_method==ME_EPZS)
751
        dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, 
752
                                   pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
753
                                   shift, map, score_map, map_generation, pix_abs8x8);
754
    else
755
        dmin=         cross_search(s, best, dmin, new_pic, old_pic, pic_stride, 
756
                                   pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
757
                                   shift, map, score_map, map_generation, pix_abs8x8);
644 758

  
645
//check(best[0],best[1],0, b0)
646
    dmin= small_diamond_search4MV(s, best, dmin, new_pic, old_pic, pic_stride, 
647
                                   pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift);
648
//check(best[0],best[1],0, b1)
649 759
    *mx_ptr= best[0];
650 760
    *my_ptr= best[1];    
651 761

  
......
654 764
}
655 765

  
656 766
#define CHECK_HALF_MV(suffix, x, y) \
657
    d= pix_abs16x16_ ## suffix(pix, ptr+((x)>>1), s->linesize);\
767
{\
768
    d= pix_abs_ ## suffix(pix, ptr+((x)>>1), s->linesize);\
658 769
    d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\
659
    if(d<dminh){\
660
        dminh= d;\
661
        mx= mx1 + x;\
662
        my= my1 + y;\
663
    }
770
    COPY3_IF_LT(dminh, d, dx, x, dy, y)\
771
}
664 772

  
665
#define CHECK_HALF_MV4(suffix, x, y) \
666
    d= pix_abs8x8_ ## suffix(pix, ptr+((x)>>1), s->linesize);\
667
    d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\
668
    if(d<dminh){\
669
        dminh= d;\
670
        mx= mx1 + x;\
671
        my= my1 + y;\
672
    }
673 773
    
674 774
/* The idea would be to make half pel ME after Inter/Intra decision to 
675 775
   save time. */
676 776
static inline int halfpel_motion_search(MpegEncContext * s,
677 777
				  int *mx_ptr, int *my_ptr, int dmin,
678 778
				  int xmin, int ymin, int xmax, int ymax,
679
                                  int pred_x, int pred_y, uint8_t *ref_picture)
779
                                  int pred_x, int pred_y, uint8_t *ref_picture,
780
                                  op_pixels_abs_func pix_abs_x2, 
781
                                  op_pixels_abs_func pix_abs_y2, op_pixels_abs_func pix_abs_xy2, int n)
680 782
{
681 783
    UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
682 784
    const int quant= s->qscale;
683
    int pen_x, pen_y;
684
    int mx, my, mx1, my1, d, xx, yy, dminh;
785
    int mx, my, xx, yy, dminh;
685 786
    UINT8 *pix, *ptr;
686 787

  
687
    mx = *mx_ptr;
688
    my = *my_ptr;
689
    ptr = ref_picture + (my * s->linesize) + mx;
788
    if(s->skip_me){
789
        *mx_ptr = 0;
790
        *my_ptr = 0;
791
        return dmin;
792
    }else
690 793

  
691
    xx = 16 * s->mb_x;
692
    yy = 16 * s->mb_y;
794
    xx = 16 * s->mb_x + 8*(n&1);
795
    yy = 16 * s->mb_y + 8*(n>>1);
693 796
    pix =  s->new_picture[0] + (yy * s->linesize) + xx;
797

  
798
    mx = *mx_ptr;
799
    my = *my_ptr;
800
    ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx);
694 801
    
695 802
    dminh = dmin;
696 803

  
697 804
    if (mx > xmin && mx < xmax && 
698 805
        my > ymin && my < ymax) {
806
        int dx=0, dy=0;
807
        int d, pen_x, pen_y; 
699 808

  
700
        mx= mx1= 2*(mx - xx);
701
        my= my1= 2*(my - yy);
702
        if(dmin < Z_THRESHOLD && mx==0 && my==0){
703
            *mx_ptr = 0;
704
            *my_ptr = 0;
705
            return dmin;
706
        }
809
        mx<<=1;
810
        my<<=1;
707 811
        
708 812
        pen_x= pred_x + mx;
709 813
        pen_y= pred_y + my;
......
720 824
        CHECK_HALF_MV(y2 ,  0, +1)
721 825
        CHECK_HALF_MV(xy2, +1, +1)
722 826

  
827
        mx+=dx;
828
        my+=dy;
723 829
    }else{
724
        mx= 2*(mx - xx);
725
        my= 2*(my - yy);
830
        mx<<=1;
831
        my<<=1;
726 832
    }
727 833

  
728 834
    *mx_ptr = mx;
......
730 836
    return dminh;
731 837
}
732 838

  
733
static inline void halfpel_motion_search4(MpegEncContext * s,
839
static inline int fast_halfpel_motion_search(MpegEncContext * s,
734 840
				  int *mx_ptr, int *my_ptr, int dmin,
735 841
				  int xmin, int ymin, int xmax, int ymax,
736
                                  int pred_x, int pred_y, int block_x, int block_y,
737
                                  uint8_t *ref_picture)
842
                                  int pred_x, int pred_y, uint8_t *ref_picture,
843
                                  op_pixels_abs_func pix_abs_x2, 
844
                                  op_pixels_abs_func pix_abs_y2, op_pixels_abs_func pix_abs_xy2, int n)
738 845
{
739 846
    UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
847
    uint16_t *score_map= s->me_score_map;
740 848
    const int quant= s->qscale;
741
    int pen_x, pen_y;
742
    int mx, my, mx1, my1, d, xx, yy, dminh;
849
    int mx, my, xx, yy, dminh;
743 850
    UINT8 *pix, *ptr;
744 851

  
745
    xx = 8 * block_x;
746
    yy = 8 * block_y;
852
    if(s->skip_me){
853
//    printf("S");
854
        *mx_ptr = 0;
855
        *my_ptr = 0;
856
        return dmin;
857
    }
858
//    printf("N");
859
        
860
    xx = 16 * s->mb_x + 8*(n&1);
861
    yy = 16 * s->mb_y + 8*(n>>1);
747 862
    pix =  s->new_picture[0] + (yy * s->linesize) + xx;
748
    
863

  
749 864
    mx = *mx_ptr;
750 865
    my = *my_ptr;
751
    ptr = ref_picture + ((yy+my) * s->linesize) + xx + mx;
752

  
866
    ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx);
867
    
753 868
    dminh = dmin;
754 869

  
755 870
    if (mx > xmin && mx < xmax && 
756 871
        my > ymin && my < ymax) {
872
        int dx=0, dy=0;
873
        int d, pen_x, pen_y; 
874
        const int index= (my<<ME_MAP_SHIFT) + mx;
875
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
876
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)];
877
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)];
878
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
879
        mx<<=1;
880
        my<<=1;
757 881

  
758
        mx= mx1= 2*mx;
759
        my= my1= 2*my;
760
        if(dmin < Z_THRESHOLD && mx==0 && my==0){
761
            *mx_ptr = 0;
762
            *my_ptr = 0;
763
            return;
764
        }
765 882
        
766 883
        pen_x= pred_x + mx;
767 884
        pen_y= pred_y + my;
768 885

  
769 886
        ptr-= s->linesize;
770
        CHECK_HALF_MV4(xy2, -1, -1)
771
        CHECK_HALF_MV4(y2 ,  0, -1)
772
        CHECK_HALF_MV4(xy2, +1, -1)
773
        
774
        ptr+= s->linesize;
775
        CHECK_HALF_MV4(x2 , -1,  0)
776
        CHECK_HALF_MV4(x2 , +1,  0)
777
        CHECK_HALF_MV4(xy2, -1, +1)
778
        CHECK_HALF_MV4(y2 ,  0, +1)
779
        CHECK_HALF_MV4(xy2, +1, +1)
887
        if(t<=b){
888
            CHECK_HALF_MV(y2 ,  0, -1)
889
            if(l<=r){
890
                CHECK_HALF_MV(xy2, -1, -1)
891
                if(t+r<=b+l){
892
                    CHECK_HALF_MV(xy2, +1, -1)
893
                    ptr+= s->linesize;
894
                }else{
895
                    ptr+= s->linesize;
896
                    CHECK_HALF_MV(xy2, -1, +1)
897
                }
898
                CHECK_HALF_MV(x2 , -1,  0)
899
            }else{
900
                CHECK_HALF_MV(xy2, +1, -1)
901
                if(t+l<=b+r){
902
                    CHECK_HALF_MV(xy2, -1, -1)
903
                    ptr+= s->linesize;
904
                }else{
905
                    ptr+= s->linesize;
906
                    CHECK_HALF_MV(xy2, +1, +1)
907
                }
908
                CHECK_HALF_MV(x2 , +1,  0)
909
            }
910
        }else{
911
            if(l<=r){
912
                if(t+l<=b+r){
913
                    CHECK_HALF_MV(xy2, -1, -1)
914
                    ptr+= s->linesize;
915
                }else{
916
                    ptr+= s->linesize;
917
                    CHECK_HALF_MV(xy2, +1, +1)
918
                }
919
                CHECK_HALF_MV(x2 , -1,  0)
920
                CHECK_HALF_MV(xy2, -1, +1)
921
            }else{
922
                if(t+r<=b+l){
923
                    CHECK_HALF_MV(xy2, +1, -1)
924
                    ptr+= s->linesize;
925
                }else{
926
                    ptr+= s->linesize;
927
                    CHECK_HALF_MV(xy2, -1, +1)
928
                }
929
                CHECK_HALF_MV(x2 , +1,  0)
930
                CHECK_HALF_MV(xy2, +1, +1)
931
            }
932
            CHECK_HALF_MV(y2 ,  0, +1)
933
        }
934
        mx+=dx;
935
        my+=dy;
780 936

  
781 937
    }else{
782
        mx*=2;
783
        my*=2;
938
        mx<<=1;
939
        my<<=1;
784 940
    }
785 941

  
786 942
    *mx_ptr = mx;
787 943
    *my_ptr = my;
944
    return dminh;
788 945
}
789 946

  
790
static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my)
947
static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
791 948
{
792 949
    const int xy= s->mb_x + 1 + (s->mb_y + 1)*(s->mb_width + 2);
793 950
    
......
795 952
    s->p_mv_table[xy][1] = my;
796 953

  
797 954
    /* has allready been set to the 4 MV if 4MV is done */
798
    if(!(s->flags&CODEC_FLAG_4MV)){
955
    if(mv4){
799 956
        int mot_xy= s->block_index[0];
800 957

  
801 958
        s->motion_val[mot_xy  ][0]= mx;
......
840 997
    }
841 998
}
842 999

  
1000
static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, int ymax, int mx, int my, int shift)
1001
{
1002
    int block;
1003
    int P[10][2];
1004
    uint8_t *ref_picture= s->last_picture[0];
1005
    int dmin_sum=0;
1006

  
1007
    for(block=0; block<4; block++){
1008
        int mx4, my4;
1009
        int pred_x4, pred_y4;
1010
        int dmin4;
1011
        static const int off[4]= {2, 1, 1, -1};
1012
        const int mot_stride = s->block_wrap[0];
1013
        const int mot_xy = s->block_index[block];
1014
//        const int block_x= (block&1);
1015
//        const int block_y= (block>>1);
1016
#if 1 // this saves us a bit of cliping work and shouldnt affect compression in a negative way
1017
        const int rel_xmin4= xmin;
1018
        const int rel_xmax4= xmax;
1019
        const int rel_ymin4= ymin;
1020
        const int rel_ymax4= ymax;
1021
#else
1022
        const int rel_xmin4= xmin - block_x*8;
1023
        const int rel_xmax4= xmax - block_x*8 + 8;
1024
        const int rel_ymin4= ymin - block_y*8;
1025
        const int rel_ymax4= ymax - block_y*8 + 8;
1026
#endif
1027
        P_LAST[0] = s->motion_val[mot_xy    ][0];
1028
        P_LAST[1] = s->motion_val[mot_xy    ][1];
1029
        P_LEFT[0] = s->motion_val[mot_xy - 1][0];
1030
        P_LEFT[1] = s->motion_val[mot_xy - 1][1];
1031
        P_LAST_RIGHT[0] = s->motion_val[mot_xy + 1][0];
1032
        P_LAST_RIGHT[1] = s->motion_val[mot_xy + 1][1];
1033
        P_LAST_BOTTOM[0]= s->motion_val[mot_xy + 1*mot_stride][0];
1034
        P_LAST_BOTTOM[1]= s->motion_val[mot_xy + 1*mot_stride][1];
1035

  
1036
        if(P_LEFT[0]       > (rel_xmax4<<shift)) P_LEFT[0]       = (rel_xmax4<<shift);
1037
        if(P_LAST_RIGHT[0] < (rel_xmin4<<shift)) P_LAST_RIGHT[0] = (rel_xmin4<<shift);
1038
        if(P_LAST_BOTTOM[1]< (rel_ymin4<<shift)) P_LAST_BOTTOM[1]= (rel_ymin4<<shift);
1039

  
1040
        /* special case for first line */
1041
        if ((s->mb_y == 0 || s->first_slice_line) && block<2) {
1042
            pred_x4= P_LEFT[0];
1043
            pred_y4= P_LEFT[1];
1044
        } else {
1045
            P_TOP[0]      = s->motion_val[mot_xy - mot_stride             ][0];
1046
            P_TOP[1]      = s->motion_val[mot_xy - mot_stride             ][1];
1047
            P_TOPRIGHT[0] = s->motion_val[mot_xy - mot_stride + off[block]][0];
1048
            P_TOPRIGHT[1] = s->motion_val[mot_xy - mot_stride + off[block]][1];
1049
            if(P_TOP[1]      > (rel_ymax4<<shift)) P_TOP[1]     = (rel_ymax4<<shift);
1050
            if(P_TOPRIGHT[0] < (rel_xmin4<<shift)) P_TOPRIGHT[0]= (rel_xmin4<<shift);
1051
            if(P_TOPRIGHT[0] > (rel_xmax4<<shift)) P_TOPRIGHT[0]= (rel_xmax4<<shift);
1052
            if(P_TOPRIGHT[1] > (rel_ymax4<<shift)) P_TOPRIGHT[1]= (rel_ymax4<<shift);
1053
    
1054
            P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1055
            P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1056

  
1057
            if(s->out_format == FMT_H263){
1058
                pred_x4 = P_MEDIAN[0];
1059
                pred_y4 = P_MEDIAN[1];
1060
            }else { /* mpeg1 at least */
1061
                pred_x4= P_LEFT[0];
1062
                pred_y4= P_LEFT[1];
1063
            }
1064
        }
1065
        P_MV1[0]= mx;
1066
        P_MV1[1]= my;
1067

  
1068
        dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, ref_picture);
1069

  
1070
        dmin4= fast_halfpel_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, 
1071
                                   pred_x4, pred_y4, ref_picture, pix_abs8x8_x2, 
1072
                                   pix_abs8x8_y2, pix_abs8x8_xy2, block);
1073
 
1074
        s->motion_val[ s->block_index[block] ][0]= mx4;
1075
        s->motion_val[ s->block_index[block] ][1]= my4;
1076
        dmin_sum+= dmin4;
1077
    }
1078
    return dmin_sum;
1079
}
1080

  
843 1081
void ff_estimate_p_frame_motion(MpegEncContext * s,
844 1082
                                int mb_x, int mb_y)
845 1083
{
......
848 1086
    int xmin, ymin, xmax, ymax;
849 1087
    int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
850 1088
    int pred_x=0, pred_y=0;
851
    int P[6][2];
1089
    int P[10][2];
852 1090
    const int shift= 1+s->quarter_sample;
853 1091
    int mb_type=0;
854 1092
    uint8_t *ref_picture= s->last_picture[0];
855 1093

  
856 1094
    get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, s->f_code);
1095
    rel_xmin= xmin - mb_x*16;
1096
    rel_xmax= xmax - mb_x*16;
1097
    rel_ymin= ymin - mb_y*16;
1098
    rel_ymax= ymax - mb_y*16;
1099
    s->skip_me=0;
857 1100

  
858 1101
    switch(s->me_method) {
859 1102
    case ME_ZERO:
860 1103
    default:
861 1104
	no_motion_search(s, &mx, &my);
1105
        mx-= mb_x*16;
1106
        my-= mb_y*16;
862 1107
        dmin = 0;
863 1108
        break;
864 1109
    case ME_FULL:
865 1110
	dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture);
1111
        mx-= mb_x*16;
1112
        my-= mb_y*16;
866 1113
        break;
867 1114
    case ME_LOG:
868 1115
	dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
1116
        mx-= mb_x*16;
1117
        my-= mb_y*16;
869 1118
        break;
870 1119
    case ME_PHODS:
871 1120
	dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
1121
        mx-= mb_x*16;
1122
        my-= mb_y*16;
872 1123
        break;
873 1124
    case ME_X1:
874 1125
    case ME_EPZS:
......
876 1127
            const int mot_stride = s->block_wrap[0];
877 1128
            const int mot_xy = s->block_index[0];
878 1129

  
879
            rel_xmin= xmin - mb_x*16;
880
            rel_xmax= xmax - mb_x*16;
881
            rel_ymin= ymin - mb_y*16;
882
            rel_ymax= ymax - mb_y*16;
1130
            P_LAST[0]       = s->motion_val[mot_xy    ][0];
1131
            P_LAST[1]       = s->motion_val[mot_xy    ][1];
1132
            P_LEFT[0]       = s->motion_val[mot_xy - 1][0];
1133
            P_LEFT[1]       = s->motion_val[mot_xy - 1][1];
1134
            P_LAST_RIGHT[0] = s->motion_val[mot_xy + 2][0];
1135
            P_LAST_RIGHT[1] = s->motion_val[mot_xy + 2][1];
1136
            P_LAST_BOTTOM[0]= s->motion_val[mot_xy + 2*mot_stride][0];
1137
            P_LAST_BOTTOM[1]= s->motion_val[mot_xy + 2*mot_stride][1];
883 1138

  
884
            P[0][0] = s->motion_val[mot_xy    ][0];
885
            P[0][1] = s->motion_val[mot_xy    ][1];
886
            P[1][0] = s->motion_val[mot_xy - 1][0];
887
            P[1][1] = s->motion_val[mot_xy - 1][1];
888
            if(P[1][0] > (rel_xmax<<shift)) P[1][0]= (rel_xmax<<shift);
1139
            if(P_LEFT[0]       > (rel_xmax<<shift)) P_LEFT[0]       = (rel_xmax<<shift);
1140
            if(P_LAST_RIGHT[0] < (rel_xmin<<shift)) P_LAST_RIGHT[0] = (rel_xmin<<shift);
1141
            if(P_LAST_BOTTOM[1]< (rel_ymin<<shift)) P_LAST_BOTTOM[1]= (rel_ymin<<shift);
889 1142

  
890 1143
            /* special case for first line */
891
            if ((mb_y == 0 || s->first_slice_line || s->first_gob_line)) {
892
                P[4][0] = P[1][0];
893
                P[4][1] = P[1][1];
1144
            if ((mb_y == 0 || s->first_slice_line)) {
1145
                pred_x= P_LEFT[0];
1146
                pred_y= P_LEFT[1];
894 1147
            } else {
895
                P[2][0] = s->motion_val[mot_xy - mot_stride             ][0];
896
                P[2][1] = s->motion_val[mot_xy - mot_stride             ][1];
897
                P[3][0] = s->motion_val[mot_xy - mot_stride + 2         ][0];
898
                P[3][1] = s->motion_val[mot_xy - mot_stride + 2         ][1];
899
                if(P[2][1] > (rel_ymax<<shift)) P[2][1]= (rel_ymax<<shift);
900
                if(P[3][0] < (rel_xmin<<shift)) P[3][0]= (rel_xmin<<shift);
901
                if(P[3][1] > (rel_ymax<<shift)) P[3][1]= (rel_ymax<<shift);
1148
                P_TOP[0]      = s->motion_val[mot_xy - mot_stride    ][0];
1149
                P_TOP[1]      = s->motion_val[mot_xy - mot_stride    ][1];
1150
                P_TOPRIGHT[0] = s->motion_val[mot_xy - mot_stride + 2][0];
1151
                P_TOPRIGHT[1] = s->motion_val[mot_xy - mot_stride + 2][1];
1152
                if(P_TOP[1]      > (rel_ymax<<shift)) P_TOP[1]     = (rel_ymax<<shift);
1153
                if(P_TOPRIGHT[0] < (rel_xmin<<shift)) P_TOPRIGHT[0]= (rel_xmin<<shift);
1154
                if(P_TOPRIGHT[1] > (rel_ymax<<shift)) P_TOPRIGHT[1]= (rel_ymax<<shift);
902 1155
        
903
                P[4][0]= mid_pred(P[1][0], P[2][0], P[3][0]);
904
                P[4][1]= mid_pred(P[1][1], P[2][1], P[3][1]);
905
            }
906
            if(s->out_format == FMT_H263){
907
                pred_x = P[4][0];
908
                pred_y = P[4][1];
909
            }else { /* mpeg1 at least */
910
                pred_x= P[1][0];
911
                pred_y= P[1][1];
1156
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1157
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1158

  
1159
                if(s->out_format == FMT_H263){
1160
                    pred_x = P_MEDIAN[0];
1161
                    pred_y = P_MEDIAN[1];
1162
                }else { /* mpeg1 at least */
1163
                    pred_x= P_LEFT[0];
1164
                    pred_y= P_LEFT[1];
1165
                }
912 1166
            }
913 1167
        }
914 1168
        dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, ref_picture);
915 1169
 
916
        mx+= mb_x*16;
917
        my+= mb_y*16;
918 1170
        break;
919 1171
    }
920
    
921
    if(s->flags&CODEC_FLAG_4MV){
922
        int block;
923

  
924
        mb_type|= MB_TYPE_INTER4V;
925

  
926
        for(block=0; block<4; block++){
927
            int mx4, my4;
928
            int pred_x4, pred_y4;
929
            int dmin4;
930
            static const int off[4]= {2, 1, 1, -1};
931
            const int mot_stride = s->block_wrap[0];
932
            const int mot_xy = s->block_index[block];
933
            const int block_x= mb_x*2 + (block&1);
934
            const int block_y= mb_y*2 + (block>>1);
935

  
936
            const int rel_xmin4= xmin - block_x*8;
937
            const int rel_xmax4= xmax - block_x*8 + 8;
938
            const int rel_ymin4= ymin - block_y*8;
939
            const int rel_ymax4= ymax - block_y*8 + 8;
940

  
941
            P[0][0] = s->motion_val[mot_xy    ][0];
942
            P[0][1] = s->motion_val[mot_xy    ][1];
943
            P[1][0] = s->motion_val[mot_xy - 1][0];
944
            P[1][1] = s->motion_val[mot_xy - 1][1];
945
            if(P[1][0] > (rel_xmax4<<shift)) P[1][0]= (rel_xmax4<<shift);
946

  
947
            /* special case for first line */
948
            if ((mb_y == 0 || s->first_slice_line || s->first_gob_line) && block<2) {
949
                P[4][0] = P[1][0];
950
                P[4][1] = P[1][1];
951
            } else {
952
                P[2][0] = s->motion_val[mot_xy - mot_stride             ][0];
953
                P[2][1] = s->motion_val[mot_xy - mot_stride             ][1];
954
                P[3][0] = s->motion_val[mot_xy - mot_stride + off[block]][0];
955
                P[3][1] = s->motion_val[mot_xy - mot_stride + off[block]][1];
956
                if(P[2][1] > (rel_ymax4<<shift)) P[2][1]= (rel_ymax4<<shift);
957
                if(P[3][0] < (rel_xmin4<<shift)) P[3][0]= (rel_xmin4<<shift);
958
                if(P[3][0] > (rel_xmax4<<shift)) P[3][0]= (rel_xmax4<<shift);
959
                if(P[3][1] > (rel_ymax4<<shift)) P[3][1]= (rel_ymax4<<shift);
960
        
961
                P[4][0]= mid_pred(P[1][0], P[2][0], P[3][0]);
962
                P[4][1]= mid_pred(P[1][1], P[2][1], P[3][1]);
963
            }
964
            if(s->out_format == FMT_H263){
965
                pred_x4 = P[4][0];
966
                pred_y4 = P[4][1];
967
            }else { /* mpeg1 at least */
968
                pred_x4= P[1][0];
969
                pred_y4= P[1][1];
970
            }
971
            P[5][0]= mx - mb_x*16;
972
            P[5][1]= my - mb_y*16;
973

  
974
            dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, ref_picture);
975

  
976
            halfpel_motion_search4(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, 
977
                                   pred_x4, pred_y4, block_x, block_y, ref_picture);
978
     
979
            s->motion_val[ s->block_index[block] ][0]= mx4;
980
            s->motion_val[ s->block_index[block] ][1]= my4;
981
        }
982
    }
983 1172

  
984 1173
    /* intra / predictive decision */
985 1174
    xx = mb_x * 16;
986 1175
    yy = mb_y * 16;
987 1176

  
988 1177
    pix = s->new_picture[0] + (yy * s->linesize) + xx;
989
    /* At this point (mx,my) are full-pell and the absolute displacement */
990
    ppix = ref_picture + (my * s->linesize) + mx;
1178
    /* At this point (mx,my) are full-pell and the relative displacement */
1179
    ppix = ref_picture + ((yy+my) * s->linesize) + (xx+mx);
991 1180
    
992 1181
    sum = pix_sum(pix, s->linesize);
993
#if 0
994
    varc = pix_dev(pix, s->linesize, (sum+128)>>8) + INTER_BIAS;
995
    vard = pix_abs16x16(pix, ppix, s->linesize);
996
#else
1182
    
997 1183
    sum= (sum+8)>>4;
998
    varc = ((pix_norm1(pix, s->linesize) - sum*sum + 128 + 500)>>8);
1184
    varc = (pix_norm1(pix, s->linesize) - sum*sum + 500 + 128)>>8;
999 1185
    vard = (pix_norm(pix, ppix, s->linesize)+128)>>8;
1000
#endif
1001

  
1002
    s->mb_var[s->mb_width * mb_y + mb_x] = varc;
1003
    s->avg_mb_var+= varc;
1004
    s->mc_mb_var += vard;
1005

  
1186
//printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
1187
    s->mb_var   [s->mb_width * mb_y + mb_x] = varc;
1188
    s->mc_mb_var[s->mb_width * mb_y + mb_x] = vard;
1189
    s->mb_var_sum    += varc;
1190
    s->mc_mb_var_sum += vard;
1191
//printf("E%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
1006 1192
    
1007 1193
#if 0
1008 1194
    printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
......
1013 1199
            mb_type|= MB_TYPE_INTRA;
1014 1200
        if (varc*2 + 200 > vard){
1015 1201
            mb_type|= MB_TYPE_INTER;
1016
            halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y, ref_picture);
1202
            if(s->me_method >= ME_EPZS)
1203
                fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1204
                                           pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
1205
                                           pix_abs16x16_xy2, 0);
1206
            else
1207
                halfpel_motion_search(     s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1208
                                           pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
1209
                                           pix_abs16x16_xy2, 0);                                           
1017 1210
        }else{
1018
            mx = mx*2 - mb_x*32;
1019
            my = my*2 - mb_y*32;
1211
            mx <<=1;
1212
            my <<=1;
1020 1213
        }
1214
        if((s->flags&CODEC_FLAG_4MV)
1215
           && !s->skip_me && varc>50 && vard>10){
1216
            mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
1217
            mb_type|=MB_TYPE_INTER4V;
1218

  
1219
            set_p_mv_tables(s, mx, my, 0);
1220
        }else
1221
            set_p_mv_tables(s, mx, my, 1);
1021 1222
    }else{
1022 1223
        if (vard <= 64 || vard < varc) {
1023 1224
            mb_type|= MB_TYPE_INTER;
1024 1225
            if (s->me_method != ME_ZERO) {
1025
                halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y, ref_picture);
1226
                if(s->me_method >= ME_EPZS)
1227
                    dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1228
                                           pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
1229
                                           pix_abs16x16_xy2, 0);
1230
                else
1231
                    dmin= halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1232
                                           pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
1233
                                           pix_abs16x16_xy2, 0);
1234
                if((s->flags&CODEC_FLAG_4MV)
1235
                   && !s->skip_me && varc>50 && vard>10){
1236
                    int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
1237
                    if(dmin4 + 128 <dmin)
1238
                        mb_type= MB_TYPE_INTER4V;
1239
                }
1240
                set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V);
1241

  
1026 1242
            } else {
1027
                mx -= 16 * mb_x;
1028
                my -= 16 * mb_y;
1243
                mx <<=1;
1244
                my <<=1;
1029 1245
            }
1030 1246
#if 0
1031 1247
            if (vard < 10) {
......
1036 1252
#endif
1037 1253
        }else{
1038 1254
            mb_type|= MB_TYPE_INTRA;
1039
            mx = 0;//mx*2 - 32 * mb_x;
1040
            my = 0;//my*2 - 32 * mb_y;
1255
            mx = 0;
1256
            my = 0;
1041 1257
        }
1042 1258
    }
1043 1259

  
1044 1260
    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
1045
    set_p_mv_tables(s, mx, my);
1046 1261
}
1047 1262

  
1048 1263
int ff_estimate_motion_b(MpegEncContext * s,
......
1052 1267
    int xmin, ymin, xmax, ymax;
1053 1268
    int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
1054 1269
    int pred_x=0, pred_y=0;
1055
    int P[6][2];
1270
    int P[10][2];
1056 1271
    const int shift= 1+s->quarter_sample;
1057 1272
    const int mot_stride = s->mb_width + 2;
1058 1273
    const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
1059 1274
    
1060 1275
    get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, f_code);
1276
    rel_xmin= xmin - mb_x*16;
1277
    rel_xmax= xmax - mb_x*16;
1278
    rel_ymin= ymin - mb_y*16;
1279
    rel_ymax= ymax - mb_y*16;
1061 1280

  
1062 1281
    switch(s->me_method) {
1063 1282
    case ME_ZERO:
1064 1283
    default:
1065 1284
	no_motion_search(s, &mx, &my);
1066 1285
        dmin = 0;
1286
        mx-= mb_x*16;
1287
        my-= mb_y*16;
1067 1288
        break;
1068 1289
    case ME_FULL:
1069 1290
	dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture);
1291
        mx-= mb_x*16;
1292
        my-= mb_y*16;
1070 1293
        break;
1071 1294
    case ME_LOG:
1072 1295
	dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
1296
        mx-= mb_x*16;
1297
        my-= mb_y*16;
1073 1298
        break;
1074 1299
    case ME_PHODS:
1075 1300
	dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
1301
        mx-= mb_x*16;
1302
        my-= mb_y*16;
1076 1303
        break;
1077 1304
    case ME_X1:
1078 1305
    case ME_EPZS:
1079 1306
       {
1080 1307

  
1081
            rel_xmin= xmin - mb_x*16;
1082
            rel_xmax= xmax - mb_x*16;
1083
            rel_ymin= ymin - mb_y*16;
1084
            rel_ymax= ymax - mb_y*16;
1308
            P_LAST[0]        = mv_table[mot_xy    ][0];
1309
            P_LAST[1]        = mv_table[mot_xy    ][1];
1310
            P_LEFT[0]        = mv_table[mot_xy - 1][0];
1311
            P_LEFT[1]        = mv_table[mot_xy - 1][1];
1312
            P_LAST_RIGHT[0]  = mv_table[mot_xy + 1][0];
1313
            P_LAST_RIGHT[1]  = mv_table[mot_xy + 1][1];
1314
            P_LAST_BOTTOM[0] = mv_table[mot_xy + mot_stride][0];
1315
            P_LAST_BOTTOM[1] = mv_table[mot_xy + mot_stride][1];
1085 1316

  
1086
            P[0][0] = mv_table[mot_xy    ][0];
1087
            P[0][1] = mv_table[mot_xy    ][1];
1088
            P[1][0] = mv_table[mot_xy - 1][0];
1089
            P[1][1] = mv_table[mot_xy - 1][1];
1090
            if(P[1][0] > (rel_xmax<<shift)) P[1][0]= (rel_xmax<<shift);
1317
            if(P_LEFT[0]       > (rel_xmax<<shift)) P_LEFT[0]       = (rel_xmax<<shift);
1318
            if(P_LAST_RIGHT[0] < (rel_xmin<<shift)) P_LAST_RIGHT[0] = (rel_xmin<<shift);
1319
            if(P_LAST_BOTTOM[1]< (rel_ymin<<shift)) P_LAST_BOTTOM[1]= (rel_ymin<<shift);
1091 1320

  
1092 1321
            /* special case for first line */
1093
            if ((mb_y == 0 || s->first_slice_line || s->first_gob_line)) {
1094
                P[4][0] = P[1][0];
1095
                P[4][1] = P[1][1];
1322
            if ((mb_y == 0 || s->first_slice_line)) {
1096 1323
            } else {
1097
                P[2][0] = mv_table[mot_xy - mot_stride             ][0];
1098
                P[2][1] = mv_table[mot_xy - mot_stride             ][1];
1099
                P[3][0] = mv_table[mot_xy - mot_stride + 1         ][0];
1100
                P[3][1] = mv_table[mot_xy - mot_stride + 1         ][1];
1101
                if(P[2][1] > (rel_ymax<<shift)) P[2][1]= (rel_ymax<<shift);
1102
                if(P[3][0] < (rel_xmin<<shift)) P[3][0]= (rel_xmin<<shift);
1103
                if(P[3][1] > (rel_ymax<<shift)) P[3][1]= (rel_ymax<<shift);
1324
                P_TOP[0] = mv_table[mot_xy - mot_stride             ][0];
1325
                P_TOP[1] = mv_table[mot_xy - mot_stride             ][1];
1326
                P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1         ][0];
1327
                P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1         ][1];
1328
                if(P_TOP[1] > (rel_ymax<<shift)) P_TOP[1]= (rel_ymax<<shift);
1329
                if(P_TOPRIGHT[0] < (rel_xmin<<shift)) P_TOPRIGHT[0]= (rel_xmin<<shift);
1330
                if(P_TOPRIGHT[1] > (rel_ymax<<shift)) P_TOPRIGHT[1]= (rel_ymax<<shift);
1104 1331
        
1105
                P[4][0]= mid_pred(P[1][0], P[2][0], P[3][0]);
1106
                P[4][1]= mid_pred(P[1][1], P[2][1], P[3][1]);
1332
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1333
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1107 1334
            }
1108
            pred_x= P[1][0];
1109
            pred_y= P[1][1];
1335
            pred_x= P_LEFT[0];
1336
            pred_y= P_LEFT[1];
1110 1337
        }
1111 1338
        dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, ref_picture);
1112 1339
 
1113
        mx+= mb_x*16;
1114
        my+= mb_y*16;
1115 1340
        break;
1116 1341
    }
1117 1342
    
1118
    /* intra / predictive decision */
1119
//    xx = mb_x * 16;
1120
//    yy = mb_y * 16;
1121

  
1122
//    pix = s->new_picture[0] + (yy * s->linesize) + xx;
1123
    /* At this point (mx,my) are full-pell and the absolute displacement */
1124
//    ppix = ref_picture + (my * s->linesize) + mx;
1125
    
1126
    dmin= halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y, ref_picture);
1127

  
1343
    dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1344
                                pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
1345
                                pix_abs16x16_xy2, 0);
1346
//printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
1128 1347
//    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
1129 1348
    mv_table[mot_xy][0]= mx;
1130 1349
    mv_table[mot_xy][1]= my;
......
1140 1359
                   int pred_bx, int pred_by)
1141 1360
{
1142 1361
    //FIXME optimize?
1362
    //FIXME direct mode penalty
1143 1363
    UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
1144 1364
    uint8_t *dest_y = s->me_scratchpad;
1145 1365
    uint8_t *ptr;
......
1201 1421
static inline int direct_search(MpegEncContext * s,
1202 1422
                                int mb_x, int mb_y)
1203 1423
{
1204
    int P[6][2];
1424
    int P[10][2];
1205 1425
    const int mot_stride = s->mb_width + 2;
1206 1426
    const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
1207 1427
    int dmin, dmin2;
......
1266 1486
        }
1267 1487
    }
1268 1488

  
1269
    P[0][0] = mv_table[mot_xy    ][0];
1270
    P[0][1] = mv_table[mot_xy    ][1];
1271
    P[1][0] = mv_table[mot_xy - 1][0];
1272
    P[1][1] = mv_table[mot_xy - 1][1];
1273

  
1489
    P_LAST[0]        = mv_table[mot_xy    ][0];
1490
    P_LAST[1]        = mv_table[mot_xy    ][1];
1491
    P_LEFT[0]        = mv_table[mot_xy - 1][0];
1492
    P_LEFT[1]        = mv_table[mot_xy - 1][1];
1493
    P_LAST_RIGHT[0]  = mv_table[mot_xy + 1][0];
1494
    P_LAST_RIGHT[1]  = mv_table[mot_xy + 1][1];
1495
    P_LAST_BOTTOM[0] = mv_table[mot_xy + mot_stride][0];
1496
    P_LAST_BOTTOM[1] = mv_table[mot_xy + mot_stride][1];
1497
/*
1498
    if(P_LEFT[0]       > (rel_xmax<<shift)) P_LEFT[0]       = (rel_xmax<<shift);
1499
    if(P_LAST_RIGHT[0] < (rel_xmin<<shift)) P_LAST_RIGHT[0] = (rel_xmin<<shift);
1500
    if(P_LAST_BOTTOM[1]< (rel_ymin<<shift)) P_LAST_BOTTOM[1]= (rel_ymin<<shift);
1501
*/
1274 1502
    /* special case for first line */
1275
    if ((mb_y == 0 || s->first_slice_line || s->first_gob_line)) {
1276
        P[4][0] = P[1][0];
1277
        P[4][1] = P[1][1];
1503
    if ((mb_y == 0 || s->first_slice_line)) {
1278 1504
    } else {
1279
        P[2][0] = mv_table[mot_xy - mot_stride             ][0];
1280
        P[2][1] = mv_table[mot_xy - mot_stride             ][1];
1281
        P[3][0] = mv_table[mot_xy - mot_stride + 1         ][0];
1282
        P[3][1] = mv_table[mot_xy - mot_stride + 1         ][1];
1505
        P_TOP[0] = mv_table[mot_xy - mot_stride             ][0];
1506
        P_TOP[1] = mv_table[mot_xy - mot_stride             ][1];
1507
        P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1         ][0];
1508
        P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1         ][1];
1283 1509
    
1284
        P[4][0]= mid_pred(P[1][0], P[2][0], P[3][0]);
1285
        P[4][1]= mid_pred(P[1][1], P[2][1], P[3][1]);
1510
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1511
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1286 1512
    }
1287 1513
    dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, -16, -16, 15, 15, ref_picture);
1288 1514
    if(mx==0 && my==0) dmin=99999999; // not representable, due to rounding stuff
......
1332 1558
void ff_estimate_b_frame_motion(MpegEncContext * s,
1333 1559
                             int mb_x, int mb_y)
1334 1560
{
1335
    const int mot_stride = s->mb_width + 2;
1336
    const int xy = (mb_y + 1)*mot_stride + mb_x + 1;
1337 1561
    const int quant= s->qscale;
1338 1562
    int fmin, bmin, dmin, fbmin;
1339 1563
    int type=0;
1340
    int motion_fx, motion_fy, motion_bx, motion_by;
1341 1564
    
1342 1565
    dmin= direct_search(s, mb_x, mb_y);
1343 1566

  
......
1365 1588
            score=fbmin;
1366 1589
            type= MB_TYPE_BIDIR;
1367 1590
        }
1368
        s->mc_mb_var += score;
1591
        s->mc_mb_var_sum += score;
1592
        s->mc_mb_var[mb_y*s->mb_width + mb_x] = score;
1369 1593
    }
1594
/*
1595
{
1596
static int count=0;
1597
static int sum=0;
1598
if(type==MB_TYPE_DIRECT){
1599
  int diff= ABS(s->b_forw_mv_table)
1600
}
1601
}*/
1370 1602

  
1371 1603
    s->mb_type[mb_y*s->mb_width + mb_x]= type;
1604
/*    if(mb_y==0 && mb_x==0) printf("\n");
1605
    if(mb_x==0) printf("\n");
1606
    printf("%d", av_log2(type));
1607
*/
1372 1608
}
1373 1609

  
1374 1610
/* find best f_code for ME which do unlimited searches */
1375 1611
int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
1376 1612
{
1377
    int f_code;
1378

  
1379 1613
    if(s->me_method>=ME_EPZS){
1380
        int mv_num[8];
1614
        int score[8];
1381 1615
        int i, y;
1382
        int loose=0;
1383 1616
        UINT8 * fcode_tab= s->fcode_tab;
1617
        int best_fcode=-1;
1618
        int best_score=-10000000;
1384 1619

  
1385
        for(i=0; i<8; i++) mv_num[i]=0;
1620
        for(i=0; i<8; i++) score[i]= s->mb_num*(8-i); //FIXME *2 and all other too so its the same but nicer
1386 1621

  
1387 1622
        for(y=0; y<s->mb_height; y++){
1388 1623
            int x;
......
1390 1625
            i= y*s->mb_width;
1391 1626
            for(x=0; x<s->mb_width; x++){
1392 1627
                if(s->mb_type[i] & type){
1393
                    mv_num[ fcode_tab[mv_table[xy][0] + MAX_MV] ]++;
1394
                    mv_num[ fcode_tab[mv_table[xy][1] + MAX_MV] ]++;
1395
//printf("%d %d %d\n", s->mv_table[0][i], fcode_tab[s->mv_table[0][i] + MAX_MV], i);
1628
                    int fcode= MAX(fcode_tab[mv_table[xy][0] + MAX_MV],
1629
                                   fcode_tab[mv_table[xy][1] + MAX_MV]);
1630
                    int j;
1631
                    
1632
                    for(j=0; j<fcode && j<8; j++){
1633
                        if(s->pict_type==B_TYPE || s->mc_mb_var[i] < s->mb_var[i])
1634
                            score[j]-= 170;
1635
                    }
1396 1636
                }
1397 1637
                i++;
1398 1638
                xy++;
1399 1639
            }
1400 1640
        }
1401

  
1402
        for(i=MAX_FCODE; i>1; i--){
1403
            int threshold;
1404
            loose+= mv_num[i];
1405

  
1406
            if(s->pict_type==B_TYPE) threshold= 0;
1407
            else                     threshold= s->mb_num/20; //FIXME 
1408
            if(loose > threshold) break;
1641
        
1642
        for(i=1; i<8; i++){
1643
            if(score[i] > best_score){
1644
                best_score= score[i];
1645
                best_fcode= i;
1646
            }
1647
//            printf("%d %d\n", i, score[i]);
1409 1648
        }
1649

  
1410 1650
//    printf("fcode: %d type: %d\n", i, s->pict_type);
1411
        return i;
1651
        return best_fcode;
1412 1652
/*        for(i=0; i<=MAX_FCODE; i++){
1413 1653
            printf("%d ", mv_num[i]);
1414 1654
        }
......
1423 1663
    const int f_code= s->f_code;
1424 1664
    int y;
1425 1665
    UINT8 * fcode_tab= s->fcode_tab;
1426

  
1666
//int clip=0;
1667
//int noclip=0;
1427 1668
    /* clip / convert to intra 16x16 type MVs */
1428 1669
    for(y=0; y<s->mb_height; y++){
1429 1670
        int x;
......
1439 1680
                    s->mb_type[i] |= MB_TYPE_INTRA;
1440 1681
                    s->p_mv_table[xy][0] = 0;
1441 1682
                    s->p_mv_table[xy][1] = 0;
1683
//clip++;
1442 1684
                }
1685
//else
1686
//  noclip++;
1443 1687
            }
1444 1688
            xy++;
1445 1689
            i++;
1446 1690
        }
1447 1691
    }
1448

  
1692
//printf("%d no:%d %d//\n", clip, noclip, f_code);
1449 1693
    if(s->flags&CODEC_FLAG_4MV){
1450 1694
        const int wrap= 2+ s->mb_width*2;
1451 1695

  

Also available in: Unified diff