Revision b32c9ca9
libavcodec/x86/h264_qpel_mmx.c  

299  299 
int h=8;\ 
300  300 
__asm__ volatile(\ 
301  301 
"pxor %%mm7, %%mm7 \n\t"\ 
302 
"movq %0, %%mm6 \n\t"\ 

303 
:: "m"(ff_pw_5)\ 

304 
);\ 

305 
do{\ 

306 
__asm__ volatile(\ 

302 
"movq "MANGLE(ff_pw_5)", %%mm6\n\t"\ 

303 
"1: \n\t"\ 

307  304 
"movq (%0), %%mm0 \n\t"\ 
308  305 
"movq 1(%0), %%mm2 \n\t"\ 
309  306 
"movq %%mm0, %%mm1 \n\t"\ 
...  ...  
336  333 
"punpcklbw %%mm7, %%mm5 \n\t"\ 
337  334 
"paddw %%mm3, %%mm2 \n\t"\ 
338  335 
"paddw %%mm5, %%mm4 \n\t"\ 
339 
"movq %5, %%mm5 \n\t"\


336 
"movq "MANGLE(ff_pw_16)", %%mm5\n\t"\


340  337 
"paddw %%mm5, %%mm2 \n\t"\ 
341  338 
"paddw %%mm5, %%mm4 \n\t"\ 
342  339 
"paddw %%mm2, %%mm0 \n\t"\ 
...  ...  
347  344 
"packuswb %%mm1, %%mm0 \n\t"\ 
348  345 
PAVGB" %%mm4, %%mm0 \n\t"\ 
349  346 
OP(%%mm0, (%1),%%mm5, q)\ 
350 
"add %4, %0 \n\t"\ 

351 
"add %4, %1 \n\t"\ 

352 
"add %3, %2 \n\t"\ 

353 
: "+a"(src), "+c"(dst), "+d"(src2)\ 

354 
: "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\ 

355 
"m"(ff_pw_16)\ 

347 
"add %5, %0 \n\t"\ 

348 
"add %5, %1 \n\t"\ 

349 
"add %4, %2 \n\t"\ 

350 
"decl %3 \n\t"\ 

351 
"jg 1b \n\t"\ 

352 
: "+a"(src), "+c"(dst), "+d"(src2), "+g"(h)\ 

353 
: "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\ 

356  354 
: "memory"\ 
357  355 
);\ 
358 
}while(h);\ 

359  356 
}\ 
360  357 
\ 
361  358 
static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ 
...  ...  
697  694 
int h=8;\ 
698  695 
__asm__ volatile(\ 
699  696 
"pxor %%xmm7, %%xmm7 \n\t"\ 
700 
"movdqa %0, %%xmm6 \n\t"\ 

701 
:: "m"(ff_pw_5)\ 

702 
);\ 

703 
do{\ 

704 
__asm__ volatile(\ 

697 
"movdqa "MANGLE(ff_pw_5)", %%xmm6\n\t"\ 

698 
"1: \n\t"\ 

705  699 
"lddqu 2(%0), %%xmm1 \n\t"\ 
706  700 
"movdqa %%xmm1, %%xmm0 \n\t"\ 
707  701 
"punpckhbw %%xmm7, %%xmm1 \n\t"\ 
...  ...  
721  715 
"psllw $2, %%xmm2 \n\t"\ 
722  716 
"movq (%2), %%xmm3 \n\t"\ 
723  717 
"psubw %%xmm1, %%xmm2 \n\t"\ 
724 
"paddw %5, %%xmm0 \n\t"\


718 
"paddw "MANGLE(ff_pw_16)", %%xmm0\n\t"\


725  719 
"pmullw %%xmm6, %%xmm2 \n\t"\ 
726  720 
"paddw %%xmm0, %%xmm2 \n\t"\ 
727  721 
"psraw $5, %%xmm2 \n\t"\ 
728  722 
"packuswb %%xmm2, %%xmm2 \n\t"\ 
729  723 
"pavgb %%xmm3, %%xmm2 \n\t"\ 
730  724 
OP(%%xmm2, (%1), %%xmm4, q)\ 
731 
"add %4, %0 \n\t"\ 

732 
"add %4, %1 \n\t"\ 

733 
"add %3, %2 \n\t"\ 

734 
: "+a"(src), "+c"(dst), "+d"(src2)\ 

735 
: "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\ 

736 
"m"(ff_pw_16)\ 

725 
"add %5, %0 \n\t"\ 

726 
"add %5, %1 \n\t"\ 

727 
"add %4, %2 \n\t"\ 

728 
"decl %3 \n\t"\ 

729 
"jg 1b \n\t"\ 

730 
: "+a"(src), "+c"(dst), "+d"(src2), "+g"(h)\ 

731 
: "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\ 

737  732 
: "memory"\ 
738  733 
);\ 
739 
}while(h);\ 

740  734 
}\ 
741  735 
QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ 
742  736 
\ 
Also available in: Unified diff