Revision 6e3bba72

View differences:

postproc/rgb2rgb.c
585 585
#ifdef HAVE_MMX
586 586
	asm volatile (
587 587
		"xorl %%eax, %%eax		\n\t"
588
		".align 16			\n\t"
588 589
		"1:				\n\t"
589 590
		PREFETCH" 32(%0, %%eax)		\n\t"
590 591
		"movq (%0, %%eax), %%mm0	\n\t"
......
635 636
//FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
636 637
		asm volatile(
637 638
			"xorl %%eax, %%eax		\n\t"
639
			".align 16			\n\t"
638 640
			"1:				\n\t"
639 641
			PREFETCH" 32(%1, %%eax, 2)	\n\t"
640 642
			PREFETCH" 32(%2, %%eax)		\n\t"
......
708 710
			"xorl %%eax, %%eax		\n\t"
709 711
			"pcmpeqw %%mm7, %%mm7		\n\t"
710 712
			"psrlw $8, %%mm7		\n\t" // FF,00,FF,00...
713
			".align 16			\n\t"
711 714
			"1:				\n\t"
712 715
			PREFETCH" 64(%0, %%eax, 4)	\n\t"
713 716
			"movq (%0, %%eax, 4), %%mm0	\n\t" // YUYV YUYV(0)
......
757 760

  
758 761
		asm volatile(
759 762
			"xorl %%eax, %%eax		\n\t"
763
			".align 16			\n\t"
760 764
			"1:				\n\t"
761 765
			PREFETCH" 64(%0, %%eax, 4)	\n\t"
762 766
			"movq (%0, %%eax, 4), %%mm0	\n\t" // YUYV YUYV(0)
postproc/rgb2rgb_template.c
585 585
#ifdef HAVE_MMX
586 586
	asm volatile (
587 587
		"xorl %%eax, %%eax		\n\t"
588
		".align 16			\n\t"
588 589
		"1:				\n\t"
589 590
		PREFETCH" 32(%0, %%eax)		\n\t"
590 591
		"movq (%0, %%eax), %%mm0	\n\t"
......
635 636
//FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
636 637
		asm volatile(
637 638
			"xorl %%eax, %%eax		\n\t"
639
			".align 16			\n\t"
638 640
			"1:				\n\t"
639 641
			PREFETCH" 32(%1, %%eax, 2)	\n\t"
640 642
			PREFETCH" 32(%2, %%eax)		\n\t"
......
708 710
			"xorl %%eax, %%eax		\n\t"
709 711
			"pcmpeqw %%mm7, %%mm7		\n\t"
710 712
			"psrlw $8, %%mm7		\n\t" // FF,00,FF,00...
713
			".align 16			\n\t"
711 714
			"1:				\n\t"
712 715
			PREFETCH" 64(%0, %%eax, 4)	\n\t"
713 716
			"movq (%0, %%eax, 4), %%mm0	\n\t" // YUYV YUYV(0)
......
757 760

  
758 761
		asm volatile(
759 762
			"xorl %%eax, %%eax		\n\t"
763
			".align 16			\n\t"
760 764
			"1:				\n\t"
761 765
			PREFETCH" 64(%0, %%eax, 4)	\n\t"
762 766
			"movq (%0, %%eax, 4), %%mm0	\n\t" // YUYV YUYV(0)
postproc/swscale.c
143 143
		"punpcklwd %%mm5, %%mm5		\n\t"\
144 144
		"punpcklwd %%mm5, %%mm5		\n\t"\
145 145
		"xorl %%eax, %%eax		\n\t"\
146
		".align 16			\n\t"\
146 147
		"1:				\n\t"\
147 148
		"movq (%0, %%eax, 2), %%mm0	\n\t" /*buf0[eax]*/\
148 149
		"movq (%1, %%eax, 2), %%mm1	\n\t" /*buf1[eax]*/\
......
196 197
		"punpcklwd %%mm5, %%mm5		\n\t"\
197 198
		"movq %%mm5, asm_uvalpha1	\n\t"\
198 199
		"xorl %%eax, %%eax		\n\t"\
200
		".align 16			\n\t"\
199 201
		"1:				\n\t"\
200 202
		"movq (%2, %%eax), %%mm2	\n\t" /* uvbuf0[eax]*/\
201 203
		"movq (%3, %%eax), %%mm3	\n\t" /* uvbuf1[eax]*/\
......
260 262

  
261 263
#define YSCALEYUV2RGB1 \
262 264
		"xorl %%eax, %%eax		\n\t"\
265
		".align 16			\n\t"\
263 266
		"1:				\n\t"\
264 267
		"movq (%2, %%eax), %%mm3	\n\t" /* uvbuf0[eax]*/\
265 268
		"movq 4096(%2, %%eax), %%mm4	\n\t" /* uvbuf0[eax+2048]*/\
......
308 311
// do vertical chrominance interpolation
309 312
#define YSCALEYUV2RGB1b \
310 313
		"xorl %%eax, %%eax		\n\t"\
314
		".align 16			\n\t"\
311 315
		"1:				\n\t"\
312 316
		"movq (%2, %%eax), %%mm2	\n\t" /* uvbuf0[eax]*/\
313 317
		"movq (%3, %%eax), %%mm3	\n\t" /* uvbuf1[eax]*/\
......
1306 1310
		"xorl %%eax, %%eax		\n\t" // i
1307 1311
		"xorl %%ebx, %%ebx		\n\t" // xx
1308 1312
		"xorl %%ecx, %%ecx		\n\t" // 2*xalpha
1313
		".align 16			\n\t"
1309 1314
		"1:				\n\t"
1310 1315
		"movzbl  (%0, %%ebx), %%edi	\n\t" //src[xx]
1311 1316
		"movzbl 1(%0, %%ebx), %%esi	\n\t" //src[xx+1]
......
1437 1442
		"xorl %%eax, %%eax		\n\t" // i
1438 1443
		"xorl %%ebx, %%ebx		\n\t" // xx
1439 1444
		"xorl %%ecx, %%ecx		\n\t" // 2*xalpha
1445
		".align 16			\n\t"
1440 1446
		"1:				\n\t"
1441 1447
		"movl %0, %%esi			\n\t"
1442 1448
		"movzbl  (%%esi, %%ebx), %%edi	\n\t" //src[xx]
postproc/swscale_template.c
143 143
		"punpcklwd %%mm5, %%mm5		\n\t"\
144 144
		"punpcklwd %%mm5, %%mm5		\n\t"\
145 145
		"xorl %%eax, %%eax		\n\t"\
146
		".align 16			\n\t"\
146 147
		"1:				\n\t"\
147 148
		"movq (%0, %%eax, 2), %%mm0	\n\t" /*buf0[eax]*/\
148 149
		"movq (%1, %%eax, 2), %%mm1	\n\t" /*buf1[eax]*/\
......
196 197
		"punpcklwd %%mm5, %%mm5		\n\t"\
197 198
		"movq %%mm5, asm_uvalpha1	\n\t"\
198 199
		"xorl %%eax, %%eax		\n\t"\
200
		".align 16			\n\t"\
199 201
		"1:				\n\t"\
200 202
		"movq (%2, %%eax), %%mm2	\n\t" /* uvbuf0[eax]*/\
201 203
		"movq (%3, %%eax), %%mm3	\n\t" /* uvbuf1[eax]*/\
......
260 262

  
261 263
#define YSCALEYUV2RGB1 \
262 264
		"xorl %%eax, %%eax		\n\t"\
265
		".align 16			\n\t"\
263 266
		"1:				\n\t"\
264 267
		"movq (%2, %%eax), %%mm3	\n\t" /* uvbuf0[eax]*/\
265 268
		"movq 4096(%2, %%eax), %%mm4	\n\t" /* uvbuf0[eax+2048]*/\
......
308 311
// do vertical chrominance interpolation
309 312
#define YSCALEYUV2RGB1b \
310 313
		"xorl %%eax, %%eax		\n\t"\
314
		".align 16			\n\t"\
311 315
		"1:				\n\t"\
312 316
		"movq (%2, %%eax), %%mm2	\n\t" /* uvbuf0[eax]*/\
313 317
		"movq (%3, %%eax), %%mm3	\n\t" /* uvbuf1[eax]*/\
......
1306 1310
		"xorl %%eax, %%eax		\n\t" // i
1307 1311
		"xorl %%ebx, %%ebx		\n\t" // xx
1308 1312
		"xorl %%ecx, %%ecx		\n\t" // 2*xalpha
1313
		".align 16			\n\t"
1309 1314
		"1:				\n\t"
1310 1315
		"movzbl  (%0, %%ebx), %%edi	\n\t" //src[xx]
1311 1316
		"movzbl 1(%0, %%ebx), %%esi	\n\t" //src[xx+1]
......
1437 1442
		"xorl %%eax, %%eax		\n\t" // i
1438 1443
		"xorl %%ebx, %%ebx		\n\t" // xx
1439 1444
		"xorl %%ecx, %%ecx		\n\t" // 2*xalpha
1445
		".align 16			\n\t"
1440 1446
		"1:				\n\t"
1441 1447
		"movl %0, %%esi			\n\t"
1442 1448
		"movzbl  (%%esi, %%ebx), %%edi	\n\t" //src[xx]

Also available in: Unified diff