Revision 6d4c49a2 libavcodec/x86/dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c  

579  579 
dst[i+0] += src[i+0]; 
580  580 
} 
581  581  
582 
static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ 

583 
x86_reg i=0; 

584 
__asm__ volatile( 

585 
"jmp 2f \n\t" 

586 
"1: \n\t" 

587 
"movq (%2, %0), %%mm0 \n\t" 

588 
"movq 8(%2, %0), %%mm1 \n\t" 

589 
"paddb (%3, %0), %%mm0 \n\t" 

590 
"paddb 8(%3, %0), %%mm1 \n\t" 

591 
"movq %%mm0, (%1, %0) \n\t" 

592 
"movq %%mm1, 8(%1, %0) \n\t" 

593 
"add $16, %0 \n\t" 

594 
"2: \n\t" 

595 
"cmp %4, %0 \n\t" 

596 
" js 1b \n\t" 

597 
: "+r" (i) 

598 
: "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg)w15) 

599 
); 

600 
for(; i<w; i++) 

601 
dst[i] = src1[i] + src2[i]; 

602 
} 

603  
604  582 
#if HAVE_7REGS && HAVE_TEN_OPERANDS 
605  583 
static void add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top) { 
606  584 
x86_reg w2 = w; 
...  ...  
876  854 
} 
877  855 
} 
878  856  
879 
#define PAETH(cpu, abs3)\ 

880 
static void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\ 

881 
{\ 

882 
x86_reg i = bpp;\ 

883 
x86_reg end = w3;\ 

884 
__asm__ volatile(\ 

885 
"pxor %%mm7, %%mm7 \n"\ 

886 
"movd (%1,%0), %%mm0 \n"\ 

887 
"movd (%2,%0), %%mm1 \n"\ 

888 
"punpcklbw %%mm7, %%mm0 \n"\ 

889 
"punpcklbw %%mm7, %%mm1 \n"\ 

890 
"add %4, %0 \n"\ 

891 
"1: \n"\ 

892 
"movq %%mm1, %%mm2 \n"\ 

893 
"movd (%2,%0), %%mm1 \n"\ 

894 
"movq %%mm2, %%mm3 \n"\ 

895 
"punpcklbw %%mm7, %%mm1 \n"\ 

896 
"movq %%mm2, %%mm4 \n"\ 

897 
"psubw %%mm1, %%mm3 \n"\ 

898 
"psubw %%mm0, %%mm4 \n"\ 

899 
"movq %%mm3, %%mm5 \n"\ 

900 
"paddw %%mm4, %%mm5 \n"\ 

901 
abs3\ 

902 
"movq %%mm4, %%mm6 \n"\ 

903 
"pminsw %%mm5, %%mm6 \n"\ 

904 
"pcmpgtw %%mm6, %%mm3 \n"\ 

905 
"pcmpgtw %%mm5, %%mm4 \n"\ 

906 
"movq %%mm4, %%mm6 \n"\ 

907 
"pand %%mm3, %%mm4 \n"\ 

908 
"pandn %%mm3, %%mm6 \n"\ 

909 
"pandn %%mm0, %%mm3 \n"\ 

910 
"movd (%3,%0), %%mm0 \n"\ 

911 
"pand %%mm1, %%mm6 \n"\ 

912 
"pand %%mm4, %%mm2 \n"\ 

913 
"punpcklbw %%mm7, %%mm0 \n"\ 

914 
"movq %6, %%mm5 \n"\ 

915 
"paddw %%mm6, %%mm0 \n"\ 

916 
"paddw %%mm2, %%mm3 \n"\ 

917 
"paddw %%mm3, %%mm0 \n"\ 

918 
"pand %%mm5, %%mm0 \n"\ 

919 
"movq %%mm0, %%mm3 \n"\ 

920 
"packuswb %%mm3, %%mm3 \n"\ 

921 
"movd %%mm3, (%1,%0) \n"\ 

922 
"add %4, %0 \n"\ 

923 
"cmp %5, %0 \n"\ 

924 
"jle 1b \n"\ 

925 
:"+r"(i)\ 

926 
:"r"(dst), "r"(top), "r"(src), "r"((x86_reg)bpp), "g"(end),\ 

927 
"m"(ff_pw_255)\ 

928 
:"memory"\ 

929 
);\ 

930 
} 

931  
932 
#define ABS3_MMX2\ 

933 
"psubw %%mm5, %%mm7 \n"\ 

934 
"pmaxsw %%mm7, %%mm5 \n"\ 

935 
"pxor %%mm6, %%mm6 \n"\ 

936 
"pxor %%mm7, %%mm7 \n"\ 

937 
"psubw %%mm3, %%mm6 \n"\ 

938 
"psubw %%mm4, %%mm7 \n"\ 

939 
"pmaxsw %%mm6, %%mm3 \n"\ 

940 
"pmaxsw %%mm7, %%mm4 \n"\ 

941 
"pxor %%mm7, %%mm7 \n" 

942  
943 
#define ABS3_SSSE3\ 

944 
"pabsw %%mm3, %%mm3 \n"\ 

945 
"pabsw %%mm4, %%mm4 \n"\ 

946 
"pabsw %%mm5, %%mm5 \n" 

947  
948 
PAETH(mmx2, ABS3_MMX2) 

949 
#if HAVE_SSSE3 

950 
PAETH(ssse3, ABS3_SSSE3) 

951 
#endif 

952  
953  857 
#define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\ 
954  858 
"paddw " #m4 ", " #m3 " \n\t" /* x1 */\ 
955  859 
"movq "MANGLE(ff_pw_20)", %%mm4 \n\t" /* 20 */\ 
...  ...  
2537  2441 
#endif 
2538  2442  
2539  2443 
c>add_bytes= add_bytes_mmx; 
2540 
c>add_bytes_l2= add_bytes_l2_mmx; 

2541  2444  
2542  2445 
if (!h264_high_depth) 
2543  2446 
c>draw_edges = draw_edges_mmx; 
...  ...  
2658  2561 
c>add_hfyu_median_prediction = add_hfyu_median_prediction_cmov; 
2659  2562 
#endif 
2660  2563  
2661 
c>add_png_paeth_prediction= add_png_paeth_prediction_mmx2; 

2662  2564 
} else if (mm_flags & AV_CPU_FLAG_3DNOW) { 
2663  2565 
c>prefetch = prefetch_3dnow; 
2664  2566  
...  ...  
2772  2674 
H264_QPEL_FUNCS(3, 2, ssse3); 
2773  2675 
H264_QPEL_FUNCS(3, 3, ssse3); 
2774  2676 
} 
2775 
c>add_png_paeth_prediction= add_png_paeth_prediction_ssse3; 

2776  2677 
#if HAVE_YASM 
2777  2678 
if (!h264_high_depth) { 
2778  2679 
c>put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd; 
Also available in: Unified diff