Revision 40d0e665 libavcodec/i386/dsputilenc_mmx.c
libavcodec/i386/dsputilenc_mmx.c  

51  51 
"add $32, %%"REG_a" \n\t" 
52  52 
"js 1b \n\t" 
53  53 
: "+r" (pixels) 
54 
: "r" (block+64), "r" ((long)line_size), "r" ((long)line_size*2)


54 
: "r" (block+64), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*2)


55  55 
: "%"REG_a 
56  56 
); 
57  57 
} 
...  ...  
80  80 
"add $16, %%"REG_a" \n\t" 
81  81 
"jnz 1b \n\t" 
82  82 
: "+r" (s1), "+r" (s2) 
83 
: "r" (block+64), "r" ((long)stride)


83 
: "r" (block+64), "r" ((x86_reg)stride)


84  84 
: "%"REG_a 
85  85 
); 
86  86 
} 
...  ...  
88  88 
static int pix_sum16_mmx(uint8_t * pix, int line_size){ 
89  89 
const int h=16; 
90  90 
int sum; 
91 
long index= line_size*h;


91 
x86_reg index= line_size*h;


92  92  
93  93 
asm volatile( 
94  94 
"pxor %%mm7, %%mm7 \n\t" 
...  ...  
117  117 
"movd %%mm6, %0 \n\t" 
118  118 
"andl $0xFFFF, %0 \n\t" 
119  119 
: "=&r" (sum), "+r" (index) 
120 
: "r" (pix  index), "r" ((long)line_size)


120 
: "r" (pix  index), "r" ((x86_reg)line_size)


121  121 
); 
122  122  
123  123 
return sum; 
...  ...  
162  162 
"psrlq $32, %%mm7\n" /* shift hi dword to lo */ 
163  163 
"paddd %%mm7,%%mm1\n" 
164  164 
"movd %%mm1,%1\n" 
165 
: "+r" (pix), "=r"(tmp) : "r" ((long)line_size) : "%ecx" );


165 
: "+r" (pix), "=r"(tmp) : "r" ((x86_reg)line_size) : "%ecx" );


166  166 
return tmp; 
167  167 
} 
168  168  
...  ...  
222  222 
"paddd %%mm7,%%mm1\n" 
223  223 
"movd %%mm1,%2\n" 
224  224 
: "+r" (pix1), "+r" (pix2), "=r"(tmp) 
225 
: "r" ((long)line_size) , "m" (h)


225 
: "r" ((x86_reg)line_size) , "m" (h)


226  226 
: "%ecx"); 
227  227 
return tmp; 
228  228 
} 
...  ...  
282  282 
"paddd %%mm7,%%mm1\n" 
283  283 
"movd %%mm1,%2\n" 
284  284 
: "+r" (pix1), "+r" (pix2), "=r"(tmp) 
285 
: "r" ((long)line_size) , "m" (h)


285 
: "r" ((x86_reg)line_size) , "m" (h)


286  286 
: "%ecx"); 
287  287 
return tmp; 
288  288 
} 
...  ...  
345  345 
"paddd %%xmm1,%%xmm7\n" 
346  346 
"movd %%xmm7,%3\n" 
347  347 
: "+r" (pix1), "+r" (pix2), "+r"(h), "=r"(tmp) 
348 
: "r" ((long)line_size));


348 
: "r" ((x86_reg)line_size));


349  349 
return tmp; 
350  350 
} 
351  351  
...  ...  
469  469 
"paddd %%mm6,%%mm0\n" 
470  470 
"movd %%mm0,%1\n" 
471  471 
: "+r" (pix1), "=r"(tmp) 
472 
: "r" ((long)line_size) , "g" (h2)


472 
: "r" ((x86_reg)line_size) , "g" (h2)


473  473 
: "%ecx"); 
474  474 
return tmp; 
475  475 
} 
...  ...  
583  583 
"paddd %%mm6,%%mm0\n" 
584  584 
"movd %%mm0,%1\n" 
585  585 
: "+r" (pix1), "=r"(tmp) 
586 
: "r" ((long)line_size) , "g" (h2)


586 
: "r" ((x86_reg)line_size) , "g" (h2)


587  587 
: "%ecx"); 
588  588 
return tmp + hf_noise8_mmx(pix+8, line_size, h); 
589  589 
} 
...  ...  
665  665 
"paddw %%mm6,%%mm0\n" 
666  666 
"movd %%mm0,%1\n" 
667  667 
: "+r" (pix), "=r"(tmp) 
668 
: "r" ((long)line_size) , "m" (h)


668 
: "r" ((x86_reg)line_size) , "m" (h)


669  669 
: "%ecx"); 
670  670 
return tmp & 0xFFFF; 
671  671 
} 
...  ...  
706  706  
707  707 
"movd %%mm6,%1\n" 
708  708 
: "+r" (pix), "=r"(tmp) 
709 
: "r" ((long)line_size) , "m" (h)


709 
: "r" ((x86_reg)line_size) , "m" (h)


710  710 
: "%ecx"); 
711  711 
return tmp; 
712  712 
} 
...  ...  
785  785 
"paddw %%mm6,%%mm0\n" 
786  786 
"movd %%mm0,%2\n" 
787  787 
: "+r" (pix1), "+r" (pix2), "=r"(tmp) 
788 
: "r" ((long)line_size) , "m" (h)


788 
: "r" ((x86_reg)line_size) , "m" (h)


789  789 
: "%ecx"); 
790  790 
return tmp & 0x7FFF; 
791  791 
} 
...  ...  
843  843  
844  844 
"movd %%mm6,%2\n" 
845  845 
: "+r" (pix1), "+r" (pix2), "=r"(tmp) 
846 
: "r" ((long)line_size) , "m" (h)


846 
: "r" ((x86_reg)line_size) , "m" (h)


847  847 
: "%ecx"); 
848  848 
return tmp; 
849  849 
} 
850  850 
#undef SUM 
851  851  
852  852 
static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ 
853 
long i=0;


853 
x86_reg i=0;


854  854 
asm volatile( 
855  855 
"1: \n\t" 
856  856 
"movq (%2, %0), %%mm0 \n\t" 
...  ...  
865  865 
"cmp %4, %0 \n\t" 
866  866 
" jb 1b \n\t" 
867  867 
: "+r" (i) 
868 
: "r"(src1), "r"(src2), "r"(dst), "r"((long)w15)


868 
: "r"(src1), "r"(src2), "r"(dst), "r"((x86_reg)w15)


869  869 
); 
870  870 
for(; i<w; i++) 
871  871 
dst[i+0] = src1[i+0]src2[i+0]; 
872  872 
} 
873  873  
874  874 
static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ 
875 
long i=0;


875 
x86_reg i=0;


876  876 
uint8_t l, lt; 
877  877  
878  878 
asm volatile( 
...  ...  
895  895 
"cmp %4, %0 \n\t" 
896  896 
" jb 1b \n\t" 
897  897 
: "+r" (i) 
898 
: "r"(src1), "r"(src2), "r"(dst), "r"((long)w)


898 
: "r"(src1), "r"(src2), "r"(dst), "r"((x86_reg)w)


899  899 
); 
900  900  
901  901 
l= *left; 
...  ...  
930  930 
DIFF_PIXELS_1(m0, mm##7, mm##0, (%1,%3,4), (%2,%3,4))\ 
931  931 
"mov"#m1" %0, "#mm"0 \n\t"\ 
932  932 
: "+m"(temp), "+r"(p1b), "+r"(p2b)\ 
933 
: "r"((long)stride), "r"((long)stride*3)\


933 
: "r"((x86_reg)stride), "r"((x86_reg)stride*3)\


934  934 
);\ 
935  935 
} 
936  936 
//the "+m"(temp) is needed as gcc 2.95 sometimes fails to compile "=m"(temp) 
...  ...  
1237  1237  
1238  1238 
static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int size){ 
1239  1239 
int sum; 
1240 
long i=size;


1240 
x86_reg i=size;


1241  1241 
asm volatile( 
1242  1242 
"pxor %%mm4, %%mm4 \n" 
1243  1243 
"1: \n" 
