Revision 622348f9 libavcodec/i386/dsputil_mmx.c
libavcodec/i386/dsputil_mmx.c  

22  22 
#include "../dsputil.h" 
23  23 
#include "../simple_idct.h" 
24  24  
25 
//#undef NDEBUG 

26 
//#include <assert.h> 

27  
25  28 
extern const uint8_t ff_h263_loop_filter_strength[32]; 
26  29  
27  30 
int mm_flags; /* multimedia extension flags */ 
...  ...  
747  750 
return tmp; 
748  751 
} 
749  752  
753 
static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) { 

754 
int tmp; 

755 


756 
assert( (((int)pix) & 7) == 0); 

757 
assert((line_size &7) ==0); 

758 


759 
#define SUM(in0, in1, out0, out1) \ 

760 
"movq (%0), %%mm2\n"\ 

761 
"movq 8(%0), %%mm3\n"\ 

762 
"addl %2,%0\n"\ 

763 
"movq %%mm2, " #out0 "\n"\ 

764 
"movq %%mm3, " #out1 "\n"\ 

765 
"psubusb " #in0 ", %%mm2\n"\ 

766 
"psubusb " #in1 ", %%mm3\n"\ 

767 
"psubusb " #out0 ", " #in0 "\n"\ 

768 
"psubusb " #out1 ", " #in1 "\n"\ 

769 
"por %%mm2, " #in0 "\n"\ 

770 
"por %%mm3, " #in1 "\n"\ 

771 
"movq " #in0 ", %%mm2\n"\ 

772 
"movq " #in1 ", %%mm3\n"\ 

773 
"punpcklbw %%mm7, " #in0 "\n"\ 

774 
"punpcklbw %%mm7, " #in1 "\n"\ 

775 
"punpckhbw %%mm7, %%mm2\n"\ 

776 
"punpckhbw %%mm7, %%mm3\n"\ 

777 
"paddw " #in1 ", " #in0 "\n"\ 

778 
"paddw %%mm3, %%mm2\n"\ 

779 
"paddw %%mm2, " #in0 "\n"\ 

780 
"paddw " #in0 ", %%mm6\n" 

781  
782 


783 
asm volatile ( 

784 
"movl %3,%%ecx\n" 

785 
"pxor %%mm6,%%mm6\n" 

786 
"pxor %%mm7,%%mm7\n" 

787 
"movq (%0),%%mm0\n" 

788 
"movq 8(%0),%%mm1\n" 

789 
"addl %2,%0\n" 

790 
"subl $2, %%ecx\n" 

791 
SUM(%%mm0, %%mm1, %%mm4, %%mm5) 

792 
"1:\n" 

793 


794 
SUM(%%mm4, %%mm5, %%mm0, %%mm1) 

795 


796 
SUM(%%mm0, %%mm1, %%mm4, %%mm5) 

797 


798 
"subl $2, %%ecx\n" 

799 
"jnz 1b\n" 

800  
801 
"movq %%mm6,%%mm0\n" 

802 
"psrlq $32, %%mm6\n" 

803 
"paddw %%mm6,%%mm0\n" 

804 
"movq %%mm0,%%mm6\n" 

805 
"psrlq $16, %%mm0\n" 

806 
"paddw %%mm6,%%mm0\n" 

807 
"movd %%mm0,%1\n" 

808 
: "+r" (pix), "=r"(tmp) 

809 
: "r" (line_size) , "m" (h) 

810 
: "%ecx"); 

811 
return tmp & 0xFFFF; 

812 
} 

813 
#undef SUM 

814  
815 
static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) { 

816 
int tmp; 

817 


818 
assert( (((int)pix) & 7) == 0); 

819 
assert((line_size &7) ==0); 

820 


821 
#define SUM(in0, in1, out0, out1) \ 

822 
"movq (%0), " #out0 "\n"\ 

823 
"movq 8(%0), " #out1 "\n"\ 

824 
"addl %2,%0\n"\ 

825 
"psadbw " #out0 ", " #in0 "\n"\ 

826 
"psadbw " #out1 ", " #in1 "\n"\ 

827 
"paddw " #in1 ", " #in0 "\n"\ 

828 
"paddw " #in0 ", %%mm6\n" 

829  
830 
asm volatile ( 

831 
"movl %3,%%ecx\n" 

832 
"pxor %%mm6,%%mm6\n" 

833 
"pxor %%mm7,%%mm7\n" 

834 
"movq (%0),%%mm0\n" 

835 
"movq 8(%0),%%mm1\n" 

836 
"addl %2,%0\n" 

837 
"subl $2, %%ecx\n" 

838 
SUM(%%mm0, %%mm1, %%mm4, %%mm5) 

839 
"1:\n" 

840 


841 
SUM(%%mm4, %%mm5, %%mm0, %%mm1) 

842 


843 
SUM(%%mm0, %%mm1, %%mm4, %%mm5) 

844 


845 
"subl $2, %%ecx\n" 

846 
"jnz 1b\n" 

847  
848 
"movd %%mm6,%1\n" 

849 
: "+r" (pix), "=r"(tmp) 

850 
: "r" (line_size) , "m" (h) 

851 
: "%ecx"); 

852 
return tmp; 

853 
} 

854 
#undef SUM 

855  
856 
static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { 

857 
int tmp; 

858 


859 
assert( (((int)pix1) & 7) == 0); 

860 
assert( (((int)pix2) & 7) == 0); 

861 
assert((line_size &7) ==0); 

862 


863 
#define SUM(in0, in1, out0, out1) \ 

864 
"movq (%0),%%mm2\n"\ 

865 
"movq (%1)," #out0 "\n"\ 

866 
"movq 8(%0),%%mm3\n"\ 

867 
"movq 8(%1)," #out1 "\n"\ 

868 
"addl %3,%0\n"\ 

869 
"addl %3,%1\n"\ 

870 
"psubb " #out0 ", %%mm2\n"\ 

871 
"psubb " #out1 ", %%mm3\n"\ 

872 
"pxor %%mm7, %%mm2\n"\ 

873 
"pxor %%mm7, %%mm3\n"\ 

874 
"movq %%mm2, " #out0 "\n"\ 

875 
"movq %%mm3, " #out1 "\n"\ 

876 
"psubusb " #in0 ", %%mm2\n"\ 

877 
"psubusb " #in1 ", %%mm3\n"\ 

878 
"psubusb " #out0 ", " #in0 "\n"\ 

879 
"psubusb " #out1 ", " #in1 "\n"\ 

880 
"por %%mm2, " #in0 "\n"\ 

881 
"por %%mm3, " #in1 "\n"\ 

882 
"movq " #in0 ", %%mm2\n"\ 

883 
"movq " #in1 ", %%mm3\n"\ 

884 
"punpcklbw %%mm7, " #in0 "\n"\ 

885 
"punpcklbw %%mm7, " #in1 "\n"\ 

886 
"punpckhbw %%mm7, %%mm2\n"\ 

887 
"punpckhbw %%mm7, %%mm3\n"\ 

888 
"paddw " #in1 ", " #in0 "\n"\ 

889 
"paddw %%mm3, %%mm2\n"\ 

890 
"paddw %%mm2, " #in0 "\n"\ 

891 
"paddw " #in0 ", %%mm6\n" 

892  
893 


894 
asm volatile ( 

895 
"movl %4,%%ecx\n" 

896 
"pxor %%mm6,%%mm6\n" 

897 
"pcmpeqw %%mm7,%%mm7\n" 

898 
"psllw $15, %%mm7\n" 

899 
"packsswb %%mm7, %%mm7\n" 

900 
"movq (%0),%%mm0\n" 

901 
"movq (%1),%%mm2\n" 

902 
"movq 8(%0),%%mm1\n" 

903 
"movq 8(%1),%%mm3\n" 

904 
"addl %3,%0\n" 

905 
"addl %3,%1\n" 

906 
"subl $2, %%ecx\n" 

907 
"psubb %%mm2, %%mm0\n" 

908 
"psubb %%mm3, %%mm1\n" 

909 
"pxor %%mm7, %%mm0\n" 

910 
"pxor %%mm7, %%mm1\n" 

911 
SUM(%%mm0, %%mm1, %%mm4, %%mm5) 

912 
"1:\n" 

913 


914 
SUM(%%mm4, %%mm5, %%mm0, %%mm1) 

915 


916 
SUM(%%mm0, %%mm1, %%mm4, %%mm5) 

917 


918 
"subl $2, %%ecx\n" 

919 
"jnz 1b\n" 

920  
921 
"movq %%mm6,%%mm0\n" 

922 
"psrlq $32, %%mm6\n" 

923 
"paddw %%mm6,%%mm0\n" 

924 
"movq %%mm0,%%mm6\n" 

925 
"psrlq $16, %%mm0\n" 

926 
"paddw %%mm6,%%mm0\n" 

927 
"movd %%mm0,%2\n" 

928 
: "+r" (pix1), "+r" (pix2), "=r"(tmp) 

929 
: "r" (line_size) , "m" (h) 

930 
: "%ecx"); 

931 
return tmp & 0x7FFF; 

932 
} 

933 
#undef SUM 

934  
935 
static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { 

936 
int tmp; 

937 


938 
assert( (((int)pix1) & 7) == 0); 

939 
assert( (((int)pix2) & 7) == 0); 

940 
assert((line_size &7) ==0); 

941 


942 
#define SUM(in0, in1, out0, out1) \ 

943 
"movq (%0)," #out0 "\n"\ 

944 
"movq (%1),%%mm2\n"\ 

945 
"movq 8(%0)," #out1 "\n"\ 

946 
"movq 8(%1),%%mm3\n"\ 

947 
"addl %3,%0\n"\ 

948 
"addl %3,%1\n"\ 

949 
"psubb %%mm2, " #out0 "\n"\ 

950 
"psubb %%mm3, " #out1 "\n"\ 

951 
"pxor %%mm7, " #out0 "\n"\ 

952 
"pxor %%mm7, " #out1 "\n"\ 

953 
"psadbw " #out0 ", " #in0 "\n"\ 

954 
"psadbw " #out1 ", " #in1 "\n"\ 

955 
"paddw " #in1 ", " #in0 "\n"\ 

956 
"paddw " #in0 ", %%mm6\n" 

957  
958 
asm volatile ( 

959 
"movl %4,%%ecx\n" 

960 
"pxor %%mm6,%%mm6\n" 

961 
"pcmpeqw %%mm7,%%mm7\n" 

962 
"psllw $15, %%mm7\n" 

963 
"packsswb %%mm7, %%mm7\n" 

964 
"movq (%0),%%mm0\n" 

965 
"movq (%1),%%mm2\n" 

966 
"movq 8(%0),%%mm1\n" 

967 
"movq 8(%1),%%mm3\n" 

968 
"addl %3,%0\n" 

969 
"addl %3,%1\n" 

970 
"subl $2, %%ecx\n" 

971 
"psubb %%mm2, %%mm0\n" 

972 
"psubb %%mm3, %%mm1\n" 

973 
"pxor %%mm7, %%mm0\n" 

974 
"pxor %%mm7, %%mm1\n" 

975 
SUM(%%mm0, %%mm1, %%mm4, %%mm5) 

976 
"1:\n" 

977 


978 
SUM(%%mm4, %%mm5, %%mm0, %%mm1) 

979 


980 
SUM(%%mm0, %%mm1, %%mm4, %%mm5) 

981 


982 
"subl $2, %%ecx\n" 

983 
"jnz 1b\n" 

984  
985 
"movd %%mm6,%2\n" 

986 
: "+r" (pix1), "+r" (pix2), "=r"(tmp) 

987 
: "r" (line_size) , "m" (h) 

988 
: "%ecx"); 

989 
return tmp; 

990 
} 

991 
#undef SUM 

992  
750  993 
static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ 
751  994 
int i=0; 
752  995 
asm volatile( 
...  ...  
1874  2117 

1875  2118 
c>pix_norm1 = pix_norm1_mmx; 
1876  2119 
c>sse[0] = sse16_mmx; 
2120 
c>vsad[4]= vsad_intra16_mmx; 

2121  
2122 
if(!(avctx>flags & CODEC_FLAG_BITEXACT)){ 

2123 
c>vsad[0] = vsad16_mmx; 

2124 
} 

1877  2125 
#endif //CONFIG_ENCODERS 
1878  2126  
1879  2127 
c>h263_v_loop_filter= h263_v_loop_filter_mmx; 
...  ...  
1897  2145 
#ifdef CONFIG_ENCODERS 
1898  2146 
c>hadamard8_diff[0]= hadamard8_diff16_mmx2; 
1899  2147 
c>hadamard8_diff[1]= hadamard8_diff_mmx2; 
2148 
c>vsad[4]= vsad_intra16_mmx2; 

1900  2149 
#endif //CONFIG_ENCODERS 
1901  2150  
1902  2151 
if(!(avctx>flags & CODEC_FLAG_BITEXACT)){ 
...  ...  
1906  2155 
c>put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; 
1907  2156 
c>avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; 
1908  2157 
c>avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; 
2158 
c>vsad[0] = vsad16_mmx2; 

1909  2159 
} 
1910  2160  
1911  2161 
#if 1 
Also available in: Unified diff