Revision 622348f9

View differences:

ffmpeg.c
111 111
static int same_quality = 0;
112 112
static int b_frames = 0;
113 113
static int mb_decision = FF_MB_DECISION_SIMPLE;
114
static int ildct_cmp = FF_CMP_VSAD;
114 115
static int mb_cmp = FF_CMP_SAD;
115 116
static int sub_cmp = FF_CMP_SAD;
116 117
static int cmp = FF_CMP_SAD;
......
1639 1640

  
1640 1641
static void opt_video_buffer_size(const char *arg)
1641 1642
{
1642
    video_rc_buffer_size = atoi(arg) * 1024;
1643
    video_rc_buffer_size = atoi(arg) * 8*1024;
1643 1644
}
1644 1645

  
1645 1646
static void opt_video_rc_eq(char *arg)
......
1841 1842
    mb_cmp = atoi(arg);
1842 1843
}
1843 1844

  
1845
static void opt_ildct_cmp(const char *arg)
1846
{
1847
    ildct_cmp = atoi(arg);
1848
}
1849

  
1844 1850
static void opt_sub_cmp(const char *arg)
1845 1851
{
1846 1852
    sub_cmp = atoi(arg);
......
2372 2378

  
2373 2379
                video_enc->mb_decision = mb_decision;
2374 2380
                video_enc->mb_cmp = mb_cmp;
2381
                video_enc->ildct_cmp = ildct_cmp;
2375 2382
                video_enc->me_sub_cmp = sub_cmp;
2376 2383
                video_enc->me_cmp = cmp;
2377 2384
                
......
3000 3007
    { "bt", HAS_ARG | OPT_VIDEO, {(void*)opt_video_bitrate_tolerance}, "set video bitrate tolerance (in kbit/s)", "tolerance" },
3001 3008
    { "maxrate", HAS_ARG | OPT_VIDEO, {(void*)opt_video_bitrate_max}, "set max video bitrate tolerance (in kbit/s)", "bitrate" },
3002 3009
    { "minrate", HAS_ARG | OPT_VIDEO, {(void*)opt_video_bitrate_min}, "set min video bitrate tolerance (in kbit/s)", "bitrate" },
3003
    { "bufsize", HAS_ARG | OPT_VIDEO, {(void*)opt_video_buffer_size}, "set ratecontrol buffere size (in kbit)", "size" },
3010
    { "bufsize", HAS_ARG | OPT_VIDEO, {(void*)opt_video_buffer_size}, "set ratecontrol buffere size (in kByte)", "size" },
3004 3011
    { "vcodec", HAS_ARG | OPT_VIDEO, {(void*)opt_video_codec}, "force video codec ('copy' to copy stream)", "codec" },
3005 3012
    { "me", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_motion_estimation}, "set motion estimation method", 
3006 3013
      "method" },
......
3012 3019
    { "hq", OPT_BOOL, {(void*)&mb_decision}, "activate high quality settings" },
3013 3020
    { "mbd", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_mb_decision}, "macroblock decision", "mode" },
3014 3021
    { "mbcmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_mb_cmp}, "macroblock compare function", "cmp function" },
3022
    { "ildctcmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_ildct_cmp}, "ildct compare function", "cmp function" },
3015 3023
    { "subcmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_sub_cmp}, "subpel compare function", "cmp function" },
3016 3024
    { "cmp", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_cmp}, "fullpel compare function", "cmp function" },
3017 3025
    { "4mv", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&use_4mv}, "use four motion vector by macroblock (MPEG4)" },
libavcodec/avcodec.h
17 17

  
18 18
#define FFMPEG_VERSION_INT     0x000408
19 19
#define FFMPEG_VERSION         "0.4.8"
20
#define LIBAVCODEC_BUILD       4698
20
#define LIBAVCODEC_BUILD       4699
21 21

  
22 22
#define LIBAVCODEC_VERSION_INT FFMPEG_VERSION_INT
23 23
#define LIBAVCODEC_VERSION     FFMPEG_VERSION
......
1196 1196
     * - decoding: unused
1197 1197
     */
1198 1198
    int mb_cmp;
1199
    /**
1200
     * interlaced dct compare function
1201
     * - encoding: set by user.
1202
     * - decoding: unused
1203
     */
1204
    int ildct_cmp;
1199 1205
#define FF_CMP_SAD  0
1200 1206
#define FF_CMP_SSE  1
1201 1207
#define FF_CMP_SATD 2
......
1204 1210
#define FF_CMP_BIT  5
1205 1211
#define FF_CMP_RD   6
1206 1212
#define FF_CMP_ZERO 7
1213
#define FF_CMP_VSAD 8
1214
#define FF_CMP_VSSE 9
1207 1215
#define FF_CMP_CHROMA 256
1208 1216
    
1209 1217
    /**
libavcodec/dsputil.c
2560 2560
    }
2561 2561
}
2562 2562

  
2563
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
2564
    return 0;
2565
}
2566

  
2567
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
2568
    int i;
2569
    
2570
    memset(cmp, 0, sizeof(void*)*5);
2571
        
2572
    for(i=0; i<5; i++){
2573
        switch(type&0xFF){
2574
        case FF_CMP_SAD:
2575
            cmp[i]= c->sad[i];
2576
            break;
2577
        case FF_CMP_SATD:
2578
            cmp[i]= c->hadamard8_diff[i];
2579
            break;
2580
        case FF_CMP_SSE:
2581
            cmp[i]= c->sse[i];
2582
            break;
2583
        case FF_CMP_DCT:
2584
            cmp[i]= c->dct_sad[i];
2585
            break;
2586
        case FF_CMP_PSNR:
2587
            cmp[i]= c->quant_psnr[i];
2588
            break;
2589
        case FF_CMP_BIT:
2590
            cmp[i]= c->bit[i];
2591
            break;
2592
        case FF_CMP_RD:
2593
            cmp[i]= c->rd[i];
2594
            break;
2595
        case FF_CMP_VSAD:
2596
            cmp[i]= c->vsad[i];
2597
            break;
2598
        case FF_CMP_VSSE:
2599
            cmp[i]= c->vsse[i];
2600
            break;
2601
        case FF_CMP_ZERO:
2602
            cmp[i]= zero_cmp;
2603
            break;
2604
        default:
2605
            av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
2606
        }
2607
    }
2608
}
2609

  
2563 2610
/**
2564 2611
 * memset(blocks, 0, sizeof(DCTELEM)*6*64)
2565 2612
 */
......
2685 2732
    return sum;
2686 2733
}
2687 2734

  
2688
static int hadamard8_abs_c(uint8_t *src, int stride, int mean){
2735
static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
2689 2736
    int i;
2690 2737
    int temp[64];
2691 2738
    int sum=0;
2692
//FIXME OOOPS ignore 0 term instead of mean mess
2739
    
2740
    assert(h==8);
2741
    
2693 2742
    for(i=0; i<8; i++){
2694 2743
        //FIXME try pointer walks
2695
        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-mean,src[stride*i+1]-mean);
2696
        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-mean,src[stride*i+3]-mean);
2697
        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-mean,src[stride*i+5]-mean);
2698
        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-mean,src[stride*i+7]-mean);
2744
        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2745
        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2746
        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2747
        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
2699 2748
        
2700 2749
        BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2701 2750
        BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
......
2726 2775
            +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2727 2776
    }
2728 2777
    
2778
    sum -= ABS(temp[8*0] + temp[8*4]); // -mean
2779
    
2729 2780
    return sum;
2730 2781
}
2731 2782

  
......
2911 2962
    return bits;
2912 2963
}
2913 2964

  
2965
static int vsad_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
2966
    int score=0;
2967
    int x,y;
2968
    
2969
    for(y=1; y<h; y++){
2970
        for(x=0; x<16; x+=4){
2971
            score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride]) 
2972
                   +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
2973
        }
2974
        s+= stride;
2975
    }
2976
    
2977
    return score;
2978
}
2979

  
2980
static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2981
    int score=0;
2982
    int x,y;
2983
    
2984
    for(y=1; y<h; y++){
2985
        for(x=0; x<16; x++){
2986
            score+= ABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2987
        }
2988
        s1+= stride;
2989
        s2+= stride;
2990
    }
2991
    
2992
    return score;
2993
}
2994

  
2995
#define SQ(a) ((a)*(a))
2996
static int vsse_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
2997
    int score=0;
2998
    int x,y;
2999
    
3000
    for(y=1; y<h; y++){
3001
        for(x=0; x<16; x+=4){
3002
            score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride]) 
3003
                   +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
3004
        }
3005
        s+= stride;
3006
    }
3007
    
3008
    return score;
3009
}
3010

  
3011
static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
3012
    int score=0;
3013
    int x,y;
3014
    
3015
    for(y=1; y<h; y++){
3016
        for(x=0; x<16; x++){
3017
            score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
3018
        }
3019
        s1+= stride;
3020
        s2+= stride;
3021
    }
3022
    
3023
    return score;
3024
}
3025

  
2914 3026
WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
3027
WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2915 3028
WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2916 3029
WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2917 3030
WARPER8_16_SQ(rd8x8_c, rd16_c)
......
3095 3208
    c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
3096 3209
    c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
3097 3210
        
3098
    c->hadamard8_abs = hadamard8_abs_c;
3099

  
3100 3211
#define SET_CMP_FUNC(name) \
3101 3212
    c->name[0]= name ## 16_c;\
3102 3213
    c->name[1]= name ## 8x8_c;
3103 3214
    
3104 3215
    SET_CMP_FUNC(hadamard8_diff)
3216
    c->hadamard8_diff[4]= hadamard8_intra16_c;
3105 3217
    SET_CMP_FUNC(dct_sad)
3106 3218
    c->sad[0]= pix_abs16_c;
3107 3219
    c->sad[1]= pix_abs8_c;
......
3110 3222
    SET_CMP_FUNC(quant_psnr)
3111 3223
    SET_CMP_FUNC(rd)
3112 3224
    SET_CMP_FUNC(bit)
3225
    c->vsad[0]= vsad16_c;
3226
    c->vsad[4]= vsad_intra16_c;
3227
    c->vsse[0]= vsse16_c;
3228
    c->vsse[4]= vsse_intra16_c;
3113 3229
        
3114 3230
    c->add_bytes= add_bytes_c;
3115 3231
    c->diff_bytes= diff_bytes_c;
libavcodec/dsputil.h
138 138
    int (*pix_norm1)(uint8_t * pix, int line_size);
139 139
// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
140 140
    
141
    me_cmp_func sad[4]; /* identical to pix_absAxA except additional void * */
142
    me_cmp_func sse[4];
143
    me_cmp_func hadamard8_diff[4];
144
    me_cmp_func dct_sad[4];
145
    me_cmp_func quant_psnr[4];
146
    me_cmp_func bit[4];
147
    me_cmp_func rd[4];
148
    int (*hadamard8_abs )(uint8_t *src, int stride, int mean);
141
    me_cmp_func sad[5]; /* identical to pix_absAxA except additional void * */
142
    me_cmp_func sse[5];
143
    me_cmp_func hadamard8_diff[5];
144
    me_cmp_func dct_sad[5];
145
    me_cmp_func quant_psnr[5];
146
    me_cmp_func bit[5];
147
    me_cmp_func rd[5];
148
    me_cmp_func vsad[5];
149
    me_cmp_func vsse[5];
149 150

  
150 151
    me_cmp_func me_pre_cmp[5];
151 152
    me_cmp_func me_cmp[5];
152 153
    me_cmp_func me_sub_cmp[5];
153 154
    me_cmp_func mb_cmp[5];
155
    me_cmp_func ildct_cmp[5]; //only width 16 used
154 156

  
155
    /* maybe create an array for 16/8/4/2 functions */
156 157
    /**
157 158
     * Halfpel motion compensation with rounding (a+b+1)>>1.
158 159
     * this is an array[4][4] of motion compensation funcions for 4 
......
293 294
 */
294 295
void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
295 296

  
297
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
298

  
296 299
#define	BYTE_VEC32(c)	((c)*0x01010101UL)
297 300

  
298 301
static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
libavcodec/i386/dsputil_mmx.c
22 22
#include "../dsputil.h"
23 23
#include "../simple_idct.h"
24 24

  
25
//#undef NDEBUG
26
//#include <assert.h>
27

  
25 28
extern const uint8_t ff_h263_loop_filter_strength[32];
26 29

  
27 30
int mm_flags; /* multimedia extension flags */
......
747 750
    return tmp;
748 751
}
749 752

  
753
static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) {
754
    int tmp;
755
    
756
    assert( (((int)pix) & 7) == 0);
757
    assert((line_size &7) ==0);
758
    
759
#define SUM(in0, in1, out0, out1) \
760
      "movq (%0), %%mm2\n"\
761
      "movq 8(%0), %%mm3\n"\
762
      "addl %2,%0\n"\
763
      "movq %%mm2, " #out0 "\n"\
764
      "movq %%mm3, " #out1 "\n"\
765
      "psubusb " #in0 ", %%mm2\n"\
766
      "psubusb " #in1 ", %%mm3\n"\
767
      "psubusb " #out0 ", " #in0 "\n"\
768
      "psubusb " #out1 ", " #in1 "\n"\
769
      "por %%mm2, " #in0 "\n"\
770
      "por %%mm3, " #in1 "\n"\
771
      "movq " #in0 ", %%mm2\n"\
772
      "movq " #in1 ", %%mm3\n"\
773
      "punpcklbw %%mm7, " #in0 "\n"\
774
      "punpcklbw %%mm7, " #in1 "\n"\
775
      "punpckhbw %%mm7, %%mm2\n"\
776
      "punpckhbw %%mm7, %%mm3\n"\
777
      "paddw " #in1 ", " #in0 "\n"\
778
      "paddw %%mm3, %%mm2\n"\
779
      "paddw %%mm2, " #in0 "\n"\
780
      "paddw " #in0 ", %%mm6\n"
781

  
782
    
783
  asm volatile (
784
      "movl %3,%%ecx\n"
785
      "pxor %%mm6,%%mm6\n"
786
      "pxor %%mm7,%%mm7\n"
787
      "movq (%0),%%mm0\n"
788
      "movq 8(%0),%%mm1\n"
789
      "addl %2,%0\n"
790
      "subl $2, %%ecx\n"
791
      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
792
      "1:\n"
793
      
794
      SUM(%%mm4, %%mm5, %%mm0, %%mm1)
795
      
796
      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
797
      
798
      "subl $2, %%ecx\n"
799
      "jnz 1b\n"
800

  
801
      "movq %%mm6,%%mm0\n"
802
      "psrlq $32, %%mm6\n"
803
      "paddw %%mm6,%%mm0\n"
804
      "movq %%mm0,%%mm6\n"
805
      "psrlq $16, %%mm0\n"
806
      "paddw %%mm6,%%mm0\n"
807
      "movd %%mm0,%1\n"
808
      : "+r" (pix), "=r"(tmp) 
809
      : "r" (line_size) , "m" (h)
810
      : "%ecx");
811
    return tmp & 0xFFFF;
812
}
813
#undef SUM
814

  
815
static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) {
816
    int tmp;
817
    
818
    assert( (((int)pix) & 7) == 0);
819
    assert((line_size &7) ==0);
820
    
821
#define SUM(in0, in1, out0, out1) \
822
      "movq (%0), " #out0 "\n"\
823
      "movq 8(%0), " #out1 "\n"\
824
      "addl %2,%0\n"\
825
      "psadbw " #out0 ", " #in0 "\n"\
826
      "psadbw " #out1 ", " #in1 "\n"\
827
      "paddw " #in1 ", " #in0 "\n"\
828
      "paddw " #in0 ", %%mm6\n"
829

  
830
  asm volatile (
831
      "movl %3,%%ecx\n"
832
      "pxor %%mm6,%%mm6\n"
833
      "pxor %%mm7,%%mm7\n"
834
      "movq (%0),%%mm0\n"
835
      "movq 8(%0),%%mm1\n"
836
      "addl %2,%0\n"
837
      "subl $2, %%ecx\n"
838
      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
839
      "1:\n"
840
      
841
      SUM(%%mm4, %%mm5, %%mm0, %%mm1)
842
      
843
      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
844
      
845
      "subl $2, %%ecx\n"
846
      "jnz 1b\n"
847

  
848
      "movd %%mm6,%1\n"
849
      : "+r" (pix), "=r"(tmp) 
850
      : "r" (line_size) , "m" (h)
851
      : "%ecx");
852
    return tmp;
853
}
854
#undef SUM
855

  
856
static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
857
    int tmp;
858
    
859
    assert( (((int)pix1) & 7) == 0);
860
    assert( (((int)pix2) & 7) == 0);
861
    assert((line_size &7) ==0);
862
    
863
#define SUM(in0, in1, out0, out1) \
864
      "movq (%0),%%mm2\n"\
865
      "movq (%1)," #out0 "\n"\
866
      "movq 8(%0),%%mm3\n"\
867
      "movq 8(%1)," #out1 "\n"\
868
      "addl %3,%0\n"\
869
      "addl %3,%1\n"\
870
      "psubb " #out0 ", %%mm2\n"\
871
      "psubb " #out1 ", %%mm3\n"\
872
      "pxor %%mm7, %%mm2\n"\
873
      "pxor %%mm7, %%mm3\n"\
874
      "movq %%mm2, " #out0 "\n"\
875
      "movq %%mm3, " #out1 "\n"\
876
      "psubusb " #in0 ", %%mm2\n"\
877
      "psubusb " #in1 ", %%mm3\n"\
878
      "psubusb " #out0 ", " #in0 "\n"\
879
      "psubusb " #out1 ", " #in1 "\n"\
880
      "por %%mm2, " #in0 "\n"\
881
      "por %%mm3, " #in1 "\n"\
882
      "movq " #in0 ", %%mm2\n"\
883
      "movq " #in1 ", %%mm3\n"\
884
      "punpcklbw %%mm7, " #in0 "\n"\
885
      "punpcklbw %%mm7, " #in1 "\n"\
886
      "punpckhbw %%mm7, %%mm2\n"\
887
      "punpckhbw %%mm7, %%mm3\n"\
888
      "paddw " #in1 ", " #in0 "\n"\
889
      "paddw %%mm3, %%mm2\n"\
890
      "paddw %%mm2, " #in0 "\n"\
891
      "paddw " #in0 ", %%mm6\n"
892

  
893
    
894
  asm volatile (
895
      "movl %4,%%ecx\n"
896
      "pxor %%mm6,%%mm6\n"
897
      "pcmpeqw %%mm7,%%mm7\n"
898
      "psllw $15, %%mm7\n"
899
      "packsswb %%mm7, %%mm7\n"
900
      "movq (%0),%%mm0\n"
901
      "movq (%1),%%mm2\n"
902
      "movq 8(%0),%%mm1\n"
903
      "movq 8(%1),%%mm3\n"
904
      "addl %3,%0\n"
905
      "addl %3,%1\n"
906
      "subl $2, %%ecx\n"
907
      "psubb %%mm2, %%mm0\n"
908
      "psubb %%mm3, %%mm1\n"
909
      "pxor %%mm7, %%mm0\n"
910
      "pxor %%mm7, %%mm1\n"
911
      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
912
      "1:\n"
913
      
914
      SUM(%%mm4, %%mm5, %%mm0, %%mm1)
915
      
916
      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
917
      
918
      "subl $2, %%ecx\n"
919
      "jnz 1b\n"
920

  
921
      "movq %%mm6,%%mm0\n"
922
      "psrlq $32, %%mm6\n"
923
      "paddw %%mm6,%%mm0\n"
924
      "movq %%mm0,%%mm6\n"
925
      "psrlq $16, %%mm0\n"
926
      "paddw %%mm6,%%mm0\n"
927
      "movd %%mm0,%2\n"
928
      : "+r" (pix1), "+r" (pix2), "=r"(tmp) 
929
      : "r" (line_size) , "m" (h)
930
      : "%ecx");
931
    return tmp & 0x7FFF;
932
}
933
#undef SUM
934

  
935
static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
936
    int tmp;
937
    
938
    assert( (((int)pix1) & 7) == 0);
939
    assert( (((int)pix2) & 7) == 0);
940
    assert((line_size &7) ==0);
941
    
942
#define SUM(in0, in1, out0, out1) \
943
      "movq (%0)," #out0 "\n"\
944
      "movq (%1),%%mm2\n"\
945
      "movq 8(%0)," #out1 "\n"\
946
      "movq 8(%1),%%mm3\n"\
947
      "addl %3,%0\n"\
948
      "addl %3,%1\n"\
949
      "psubb %%mm2, " #out0 "\n"\
950
      "psubb %%mm3, " #out1 "\n"\
951
      "pxor %%mm7, " #out0 "\n"\
952
      "pxor %%mm7, " #out1 "\n"\
953
      "psadbw " #out0 ", " #in0 "\n"\
954
      "psadbw " #out1 ", " #in1 "\n"\
955
      "paddw " #in1 ", " #in0 "\n"\
956
      "paddw " #in0 ", %%mm6\n"
957

  
958
  asm volatile (
959
      "movl %4,%%ecx\n"
960
      "pxor %%mm6,%%mm6\n"
961
      "pcmpeqw %%mm7,%%mm7\n"
962
      "psllw $15, %%mm7\n"
963
      "packsswb %%mm7, %%mm7\n"
964
      "movq (%0),%%mm0\n"
965
      "movq (%1),%%mm2\n"
966
      "movq 8(%0),%%mm1\n"
967
      "movq 8(%1),%%mm3\n"
968
      "addl %3,%0\n"
969
      "addl %3,%1\n"
970
      "subl $2, %%ecx\n"
971
      "psubb %%mm2, %%mm0\n"
972
      "psubb %%mm3, %%mm1\n"
973
      "pxor %%mm7, %%mm0\n"
974
      "pxor %%mm7, %%mm1\n"
975
      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
976
      "1:\n"
977
      
978
      SUM(%%mm4, %%mm5, %%mm0, %%mm1)
979
      
980
      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
981
      
982
      "subl $2, %%ecx\n"
983
      "jnz 1b\n"
984

  
985
      "movd %%mm6,%2\n"
986
      : "+r" (pix1), "+r" (pix2), "=r"(tmp) 
987
      : "r" (line_size) , "m" (h)
988
      : "%ecx");
989
    return tmp;
990
}
991
#undef SUM
992

  
750 993
static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
751 994
    int i=0;
752 995
    asm volatile(
......
1874 2117
        
1875 2118
	c->pix_norm1 = pix_norm1_mmx;
1876 2119
	c->sse[0] = sse16_mmx;
2120
        c->vsad[4]= vsad_intra16_mmx;
2121

  
2122
        if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
2123
            c->vsad[0] = vsad16_mmx;
2124
        }
1877 2125
#endif //CONFIG_ENCODERS
1878 2126

  
1879 2127
        c->h263_v_loop_filter= h263_v_loop_filter_mmx;
......
1897 2145
#ifdef CONFIG_ENCODERS
1898 2146
            c->hadamard8_diff[0]= hadamard8_diff16_mmx2;
1899 2147
            c->hadamard8_diff[1]= hadamard8_diff_mmx2;
2148
            c->vsad[4]= vsad_intra16_mmx2;
1900 2149
#endif //CONFIG_ENCODERS
1901 2150

  
1902 2151
            if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
......
1906 2155
                c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
1907 2156
                c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
1908 2157
                c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
2158
                c->vsad[0] = vsad16_mmx2;
1909 2159
            }
1910 2160

  
1911 2161
#if 1
libavcodec/motion_est.c
277 277
#undef INIT
278 278
#undef CMP__DIRECT
279 279

  
280

  
281
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
282
    return 0;
283
}
284

  
285
static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){
286
    DSPContext* c= &s->dsp;
287
    int i;
288
    
289
    memset(cmp, 0, sizeof(void*)*5);
290
        
291
    for(i=0; i<4; i++){
292
        switch(type&0xFF){
293
        case FF_CMP_SAD:
294
            cmp[i]= c->sad[i];
295
            break;
296
        case FF_CMP_SATD:
297
            cmp[i]= c->hadamard8_diff[i];
298
            break;
299
        case FF_CMP_SSE:
300
            cmp[i]= c->sse[i];
301
            break;
302
        case FF_CMP_DCT:
303
            cmp[i]= c->dct_sad[i];
304
            break;
305
        case FF_CMP_PSNR:
306
            cmp[i]= c->quant_psnr[i];
307
            break;
308
        case FF_CMP_BIT:
309
            cmp[i]= c->bit[i];
310
            break;
311
        case FF_CMP_RD:
312
            cmp[i]= c->rd[i];
313
            break;
314
        case FF_CMP_ZERO:
315
            cmp[i]= zero_cmp;
316
            break;
317
        default:
318
            av_log(s->avctx, AV_LOG_ERROR,"internal error in cmp function selection\n");
319
        }
320
    }
321
}
322

  
323 280
static inline int get_penalty_factor(MpegEncContext *s, int type){
324 281
    switch(type&0xFF){
325 282
    default:
......
340 297
}
341 298

  
342 299
void ff_init_me(MpegEncContext *s){
343
    set_cmp(s, s->dsp.me_pre_cmp, s->avctx->me_pre_cmp);
344
    set_cmp(s, s->dsp.me_cmp, s->avctx->me_cmp);
345
    set_cmp(s, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
346
    set_cmp(s, s->dsp.mb_cmp, s->avctx->mb_cmp);
300
    ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, s->avctx->me_pre_cmp);
301
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
302
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
303
    ff_set_cmp(&s->dsp, s->dsp.mb_cmp, s->avctx->mb_cmp);
347 304

  
348 305
    if(s->flags&CODEC_FLAG_QPEL){
349 306
        if(s->avctx->me_sub_cmp&FF_CMP_CHROMA)
......
1783 1740
        }
1784 1741
         //FIXME something smarter
1785 1742
        if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
1743
#if 0        
1744
        if(s->out_format == FMT_MPEG1)
1745
            type |= CANDIDATE_MB_TYPE_INTRA;
1746
#endif
1786 1747
    }
1787 1748

  
1788 1749
    s->mb_type[mb_y*s->mb_stride + mb_x]= type;
libavcodec/mpegvideo.c
973 973
    s->progressive_frame= 
974 974
    s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
975 975
    
976
    ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
977
    
976 978
    ff_init_me(s);
977 979

  
978 980
#ifdef CONFIG_ENCODERS
......
3168 3170
        av_log(s->avctx, AV_LOG_INFO, "warning, cliping %d dct coefficents to %d..%d\n", overflow, minlevel, maxlevel);
3169 3171
}
3170 3172

  
3171
#if 0
3172
static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
3173
    int score=0;
3174
    int x,y;
3175
    
3176
    for(y=0; y<7; y++){
3177
        for(x=0; x<16; x+=4){
3178
            score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride]) 
3179
                   +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
3180
        }
3181
        s+= stride;
3182
    }
3183
    
3184
    return score;
3185
}
3186

  
3187
static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
3188
    int score=0;
3189
    int x,y;
3190
    
3191
    for(y=0; y<7; y++){
3192
        for(x=0; x<16; x++){
3193
            score+= ABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
3194
        }
3195
        s1+= stride;
3196
        s2+= stride;
3197
    }
3198
    
3199
    return score;
3200
}
3201
#else
3202
#define SQ(a) ((a)*(a))
3203

  
3204
static int pix_vcmp16x8(uint8_t *s, int stride){ //FIXME move to dsputil & optimize
3205
    int score=0;
3206
    int x,y;
3207
    
3208
    for(y=0; y<7; y++){
3209
        for(x=0; x<16; x+=4){
3210
            score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride]) 
3211
                   +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
3212
        }
3213
        s+= stride;
3214
    }
3215
    
3216
    return score;
3217
}
3218

  
3219
static int pix_diff_vcmp16x8(uint8_t *s1, uint8_t*s2, int stride){ //FIXME move to dsputil & optimize
3220
    int score=0;
3221
    int x,y;
3222
    
3223
    for(y=0; y<7; y++){
3224
        for(x=0; x<16; x++){
3225
            score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
3226
        }
3227
        s1+= stride;
3228
        s2+= stride;
3229
    }
3230
    
3231
    return score;
3232
}
3233

  
3234
#endif
3235

  
3236 3173
#endif //CONFIG_ENCODERS
3237 3174

  
3238 3175
/**
......
3352 3289
        if(s->flags&CODEC_FLAG_INTERLACED_DCT){
3353 3290
            int progressive_score, interlaced_score;
3354 3291

  
3355
            progressive_score= pix_vcmp16x8(ptr, wrap_y  ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y );
3356
            interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y  , wrap_y*2);
3357
            
3358
            if(progressive_score > interlaced_score + 100){
3359
                s->interlaced_dct=1;
3292
            s->interlaced_dct=0;
3293
            progressive_score= s->dsp.ildct_cmp[4](s, ptr           , NULL, wrap_y, 8) 
3294
                              +s->dsp.ildct_cmp[4](s, ptr + wrap_y*8, NULL, wrap_y, 8) - 400;
3295

  
3296
            if(progressive_score > 0){
3297
                interlaced_score = s->dsp.ildct_cmp[4](s, ptr           , NULL, wrap_y*2, 8) 
3298
                                  +s->dsp.ildct_cmp[4](s, ptr + wrap_y  , NULL, wrap_y*2, 8);
3299
                if(progressive_score > interlaced_score){
3300
                    s->interlaced_dct=1;
3360 3301
            
3361
                dct_offset= wrap_y;
3362
                wrap_y<<=1;
3363
            }else
3364
                s->interlaced_dct=0;
3302
                    dct_offset= wrap_y;
3303
                    wrap_y<<=1;
3304
                }
3305
            }
3365 3306
        }
3366 3307
        
3367 3308
	s->dsp.get_pixels(s->block[0], ptr                 , wrap_y);
......
3430 3371
        
3431 3372
        if(s->flags&CODEC_FLAG_INTERLACED_DCT){
3432 3373
            int progressive_score, interlaced_score;
3374

  
3375
            s->interlaced_dct=0;
3376
            progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8) 
3377
                              +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
3433 3378
            
3434
            progressive_score= pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y  ) 
3435
                             + pix_diff_vcmp16x8(ptr_y + wrap_y*8, dest_y + wrap_y*8, wrap_y  );
3436
            interlaced_score = pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y*2)
3437
                             + pix_diff_vcmp16x8(ptr_y + wrap_y  , dest_y + wrap_y  , wrap_y*2);
3379
            if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
3380

  
3381
            if(progressive_score>0){
3382
                interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8) 
3383
                                  +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
3438 3384
            
3439
            if(progressive_score > interlaced_score + 600){
3440
                s->interlaced_dct=1;
3385
                if(progressive_score > interlaced_score){
3386
                    s->interlaced_dct=1;
3441 3387
            
3442
                dct_offset= wrap_y;
3443
                wrap_y<<=1;
3444
            }else
3445
                s->interlaced_dct=0;
3388
                    dct_offset= wrap_y;
3389
                    wrap_y<<=1;
3390
                }
3391
            }
3446 3392
        }
3447 3393
        
3448 3394
	s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
tests/ffmpeg.regression.ref
5 5
b588110bebb48b5a1815ac26d0f0c9cc *./data/a-mpeg2.mpg
6 6
ddfa5c618dab54df0f47976ddd55d90f *./data/out.yuv
7 7
stddev:  7.65 PSNR:30.44 bytes:7602176
8
826f088b9b3d051642f51e05860c9738 *./data/a-mpeg2i.mpg
9
af80cb3a57800a0870273f62697ba29f *./data/out.yuv
10
stddev:  7.93 PSNR:30.13 bytes:7602176
8
13336cffcba456ff4a7607b2a7e57b33 *./data/a-mpeg2i.mpg
9
4c9701eb83ed81dd9a328af83d7d7c8a *./data/out.yuv
10
stddev:  7.66 PSNR:30.43 bytes:7602176
11 11
d0dc46dd831398237a690ebbeff18b64 *./data/a-msmpeg4v2.avi
12 12
712aa6c959d1d90a78fe98657cbff19c *./data/out.yuv
13 13
stddev:  8.11 PSNR:29.94 bytes:7602176
tests/rotozoom.regression.ref
5 5
aa0f088777131d8ffb627e6ff37312ca *./data/a-mpeg2.mpg
6 6
830e7d798089ea6213e0867fd7676fde *./data/out.yuv
7 7
stddev:  4.95 PSNR:34.22 bytes:7602176
8
aff7511e16a07314cac0489d3dbc4477 *./data/a-mpeg2i.mpg
9
6199bac131333a8dba043e69b2071dd0 *./data/out.yuv
10
stddev:  4.97 PSNR:34.19 bytes:7602176
8
6da01fd0d910fbfcdc5b212ef3dd65cb *./data/a-mpeg2i.mpg
9
1e21fd7ed53abf352f9ea8548afa80a3 *./data/out.yuv
10
stddev:  4.96 PSNR:34.20 bytes:7602176
11 11
14db391f167b52b21a983157b410affc *./data/a-msmpeg4v2.avi
12 12
fc8881e0904af9491d5fa0163183954b *./data/out.yuv
13 13
stddev:  5.29 PSNR:33.64 bytes:7602176

Also available in: Unified diff