Revision b4c2ada5 libavcodec/x86/h264dsp_mmx.c
libavcodec/x86/h264dsp_mmx.c  

812  812 
// could do a special case for dir==0 && edges==1, but it only reduces the 
813  813 
// average filter time by 1.2% 
814  814 
for( dir=1; dir>=0; dir ) { 
815 
const int d_idx = dir ? 8 : 1;


815 
const x86_reg d_idx = dir ? 8 : 1;


816  816 
const int mask_mv = dir ? mask_mv1 : mask_mv0; 
817  817 
DECLARE_ALIGNED_8(const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL; 
818  818 
int b_idx, edge, l; 
...  ...  
825  825 
__asm__ volatile("pxor %%mm0, %%mm0 \n\t":); 
826  826 
for( l = bidir; l >= 0; l ) { 
827  827 
__asm__ volatile( 
828 
"movd %0, %%mm1 \n\t"


829 
"punpckldq %1, %%mm1 \n\t"


828 
"movd (%0), %%mm1 \n\t"


829 
"punpckldq (%0,%1), %%mm1 \n\t"


830  830 
"punpckldq %%mm1, %%mm2 \n\t" 
831  831 
"pcmpeqb %%mm2, %%mm1 \n\t" 
832  832 
"paddb %%mm6, %%mm1 \n\t" 
833  833 
"punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn] 
834  834 
"por %%mm1, %%mm0 \n\t" 
835  835  
836 
"movq %2, %%mm1 \n\t"


837 
"movq 8+1*%2, %%mm2 \n\t"


838 
"psubw %3, %%mm1 \n\t"


839 
"psubw 8+1*%3, %%mm2 \n\t"


836 
"movq (%2), %%mm1 \n\t"


837 
"movq 8(%2), %%mm2 \n\t"


838 
"psubw (%2,%1,4), %%mm1 \n\t"


839 
"psubw 8(%2,%1,4), %%mm2 \n\t"


840  840 
"packsswb %%mm2, %%mm1 \n\t" 
841  841 
"paddb %%mm5, %%mm1 \n\t" 
842  842 
"pminub %%mm4, %%mm1 \n\t" 
843  843 
"pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b]  mv[bn]) >= limit 
844  844 
"por %%mm1, %%mm0 \n\t" 
845 
::"m"(ref[l][b_idx]), 

846 
"m"(ref[l][b_idx+d_idx]), 

847 
"m"(mv[l][b_idx][0]), 

848 
"m"(mv[l][b_idx+d_idx][0]) 

845 
::"r"(ref[l]+b_idx), 

846 
"r"(d_idx), 

847 
"r"(mv[l]+b_idx) 

849  848 
); 
850  849 
} 
851  850 
if(bidir==1){ 
852  851 
__asm__ volatile("pxor %%mm3, %%mm3 \n\t":); 
853  852 
for( l = bidir; l >= 0; l ) { 
854  853 
__asm__ volatile( 
855 
"movd %0, %%mm1 \n\t"


856 
"punpckldq %1, %%mm1 \n\t"


854 
"movd (%0), %%mm1 \n\t"


855 
"punpckldq (%1), %%mm1 \n\t"


857  856 
"punpckldq %%mm1, %%mm2 \n\t" 
858  857 
"pcmpeqb %%mm2, %%mm1 \n\t" 
859  858 
"paddb %%mm6, %%mm1 \n\t" 
860  859 
"punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn] 
861  860 
"por %%mm1, %%mm3 \n\t" 
862  861  
863 
"movq %2, %%mm1 \n\t"


864 
"movq 8+1*%2, %%mm2 \n\t"


865 
"psubw %3, %%mm1 \n\t"


866 
"psubw 8+1*%3, %%mm2 \n\t"


862 
"movq (%2), %%mm1 \n\t"


863 
"movq 8(%2), %%mm2 \n\t"


864 
"psubw (%3), %%mm1 \n\t"


865 
"psubw 8(%3), %%mm2 \n\t"


867  866 
"packsswb %%mm2, %%mm1 \n\t" 
868  867 
"paddb %%mm5, %%mm1 \n\t" 
869  868 
"pminub %%mm4, %%mm1 \n\t" 
870  869 
"pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b]  mv[bn]) >= limit 
871  870 
"por %%mm1, %%mm3 \n\t" 
872 
::"m"(ref[l][b_idx]),


873 
"m"(ref[1l][b_idx+d_idx]),


874 
"m"(mv[l][b_idx][0]),


875 
"m"(mv[1l][b_idx+d_idx][0])


871 
::"r"(ref[l]+b_idx),


872 
"r"(ref[1l]+b_idx+d_idx),


873 
"r"(mv[l][b_idx]),


874 
"r"(mv[1l][b_idx+d_idx])


876  875 
); 
877  876 
} 
878  877 
__asm__ volatile( 
Also available in: Unified diff