Revision ed8c0670 postproc/rgb2rgb.c

View differences:

postproc/rgb2rgb.c
811 811
		src  += srcStride;
812 812
	}
813 813
#ifdef HAVE_MMX
814
asm(    EMMS" \n\t"
815
        SFENCE" \n\t"
816
        :::"memory");
814
asm volatile(   EMMS" \n\t"
815
        	SFENCE" \n\t"
816
        	:::"memory");
817 817
#endif
818 818
}
819 819

  
......
830 830
	const int chromWidth= width>>1;
831 831
	for(y=0; y<height; y+=2)
832 832
	{
833
#ifdef HAVE_MMX
834
		asm volatile(
835
			"xorl %%eax, %%eax		\n\t"
836
			"pcmpeqw %%mm7, %%mm7		\n\t"
837
			"psrlw $8, %%mm7		\n\t" // FF,00,FF,00...
838
			".balign 16			\n\t"
839
			"1:				\n\t"
840
			PREFETCH" 64(%0, %%eax, 4)	\n\t"
841
			"movq (%0, %%eax, 4), %%mm0	\n\t" // UYVY UYVY(0)
842
			"movq 8(%0, %%eax, 4), %%mm1	\n\t" // UYVY UYVY(4)
843
			"movq %%mm0, %%mm2		\n\t" // UYVY UYVY(0)
844
			"movq %%mm1, %%mm3		\n\t" // UYVY UYVY(4)
845
			"pand %%mm7, %%mm0		\n\t" // U0V0 U0V0(0)
846
			"pand %%mm7, %%mm1		\n\t" // U0V0 U0V0(4)
847
			"psrlw $8, %%mm2		\n\t" // Y0Y0 Y0Y0(0)
848
			"psrlw $8, %%mm3		\n\t" // Y0Y0 Y0Y0(4)
849
			"packuswb %%mm1, %%mm0		\n\t" // UVUV UVUV(0)
850
			"packuswb %%mm3, %%mm2		\n\t" // YYYY YYYY(0)
851

  
852
			MOVNTQ" %%mm2, (%1, %%eax, 2)	\n\t"
853

  
854
			"movq 16(%0, %%eax, 4), %%mm1	\n\t" // UYVY UYVY(8)
855
			"movq 24(%0, %%eax, 4), %%mm2	\n\t" // UYVY UYVY(12)
856
			"movq %%mm1, %%mm3		\n\t" // UYVY UYVY(8)
857
			"movq %%mm2, %%mm4		\n\t" // UYVY UYVY(12)
858
			"pand %%mm7, %%mm1		\n\t" // U0V0 U0V0(8)
859
			"pand %%mm7, %%mm2		\n\t" // U0V0 U0V0(12)
860
			"psrlw $8, %%mm3		\n\t" // Y0Y0 Y0Y0(8)
861
			"psrlw $8, %%mm4		\n\t" // Y0Y0 Y0Y0(12)
862
			"packuswb %%mm2, %%mm1		\n\t" // UVUV UVUV(8)
863
			"packuswb %%mm4, %%mm3		\n\t" // YYYY YYYY(8)
864

  
865
			MOVNTQ" %%mm3, 8(%1, %%eax, 2)	\n\t"
866

  
867
			"movq %%mm0, %%mm2		\n\t" // UVUV UVUV(0)
868
			"movq %%mm1, %%mm3		\n\t" // UVUV UVUV(8)
869
			"psrlw $8, %%mm0		\n\t" // V0V0 V0V0(0)
870
			"psrlw $8, %%mm1		\n\t" // V0V0 V0V0(8)
871
			"pand %%mm7, %%mm2		\n\t" // U0U0 U0U0(0)
872
			"pand %%mm7, %%mm3		\n\t" // U0U0 U0U0(8)
873
			"packuswb %%mm1, %%mm0		\n\t" // VVVV VVVV(0)
874
			"packuswb %%mm3, %%mm2		\n\t" // UUUU UUUU(0)
875

  
876
			MOVNTQ" %%mm0, (%3, %%eax)	\n\t"
877
			MOVNTQ" %%mm2, (%2, %%eax)	\n\t"
878

  
879
			"addl $8, %%eax			\n\t"
880
			"cmpl %4, %%eax			\n\t"
881
			" jb 1b				\n\t"
882
			::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth)
883
			: "memory", "%eax"
884
		);
885

  
886
		ydst += lumStride;
887
		src  += srcStride;
888

  
889
		asm volatile(
890
			"xorl %%eax, %%eax		\n\t"
891
			".balign 16			\n\t"
892
			"1:				\n\t"
893
			PREFETCH" 64(%0, %%eax, 4)	\n\t"
894
			"movq (%0, %%eax, 4), %%mm0	\n\t" // YUYV YUYV(0)
895
			"movq 8(%0, %%eax, 4), %%mm1	\n\t" // YUYV YUYV(4)
896
			"movq 16(%0, %%eax, 4), %%mm2	\n\t" // YUYV YUYV(8)
897
			"movq 24(%0, %%eax, 4), %%mm3	\n\t" // YUYV YUYV(12)
898
			"psrlw $8, %%mm0		\n\t" // Y0Y0 Y0Y0(0)
899
			"psrlw $8, %%mm1		\n\t" // Y0Y0 Y0Y0(4)
900
			"psrlw $8, %%mm2		\n\t" // Y0Y0 Y0Y0(8)
901
			"psrlw $8, %%mm3		\n\t" // Y0Y0 Y0Y0(12)
902
			"packuswb %%mm1, %%mm0		\n\t" // YYYY YYYY(0)
903
			"packuswb %%mm3, %%mm2		\n\t" // YYYY YYYY(8)
904

  
905
			MOVNTQ" %%mm0, (%1, %%eax, 2)	\n\t"
906
			MOVNTQ" %%mm2, 8(%1, %%eax, 2)	\n\t"
907

  
908
			"addl $8, %%eax			\n\t"
909
			"cmpl %4, %%eax			\n\t"
910
			" jb 1b				\n\t"
911

  
912
			::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth)
913
			: "memory", "%eax"
914
		);
915
#else
833 916
		int i;
834 917
		for(i=0; i<chromWidth; i++)
835 918
		{
......
846 929
			ydst[2*i+0] 	= src[4*i+1];
847 930
			ydst[2*i+1] 	= src[4*i+3];
848 931
		}
932
#endif
849 933
		udst += chromStride;
850 934
		vdst += chromStride;
851 935
		ydst += lumStride;
852 936
		src  += srcStride;
853 937
	}
938
#ifdef HAVE_MMX
939
asm volatile(   EMMS" \n\t"
940
        	SFENCE" \n\t"
941
        	:::"memory");
942
#endif
854 943
}
855 944

  
856 945

  

Also available in: Unified diff