Revision 6e1c66bc postproc/swscale_template.c

View differences:

postproc/swscale_template.c
16 16
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17 17
*/
18 18

  
19
#undef REAL_MOVNTQ
19 20
#undef MOVNTQ
20 21
#undef PAVGB
21 22
#undef PREFETCH
......
54 55
#endif
55 56

  
56 57
#ifdef HAVE_MMX2
57
#define MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
58
#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
58 59
#else
59
#define MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
60
#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
60 61
#endif
62
#define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
61 63

  
62 64
#ifdef HAVE_ALTIVEC
63 65
#include "swscale_altivec_template.c"
64 66
#endif
65 67

  
66 68
#define YSCALEYUV2YV12X(x, offset) \
67
			"xorl %%eax, %%eax		\n\t"\
69
			"xor %%"REG_a", %%"REG_a"	\n\t"\
68 70
			"movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
69 71
			"movq %%mm3, %%mm4		\n\t"\
70
			"leal " offset "(%0), %%edx	\n\t"\
71
			"movl (%%edx), %%esi		\n\t"\
72
			"lea " offset "(%0), %%"REG_d"	\n\t"\
73
			"mov (%%"REG_d"), %%"REG_S"	\n\t"\
72 74
			".balign 16			\n\t" /* FIXME Unroll? */\
73 75
			"1:				\n\t"\
74
			"movq 8(%%edx), %%mm0		\n\t" /* filterCoeff */\
75
			"movq " #x "(%%esi, %%eax, 2), %%mm2	\n\t" /* srcData */\
76
			"movq 8+" #x "(%%esi, %%eax, 2), %%mm5	\n\t" /* srcData */\
77
			"addl $16, %%edx		\n\t"\
78
			"movl (%%edx), %%esi		\n\t"\
79
			"testl %%esi, %%esi		\n\t"\
76
			"movq 8(%%"REG_d"), %%mm0	\n\t" /* filterCoeff */\
77
			"movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
78
			"movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5\n\t" /* srcData */\
79
			"add $16, %%"REG_d"		\n\t"\
80
			"mov (%%"REG_d"), %%"REG_S"	\n\t"\
81
			"test %%"REG_S", %%"REG_S"	\n\t"\
80 82
			"pmulhw %%mm0, %%mm2		\n\t"\
81 83
			"pmulhw %%mm0, %%mm5		\n\t"\
82 84
			"paddw %%mm2, %%mm3		\n\t"\
......
85 87
			"psraw $3, %%mm3		\n\t"\
86 88
			"psraw $3, %%mm4		\n\t"\
87 89
			"packuswb %%mm4, %%mm3		\n\t"\
88
			MOVNTQ(%%mm3, (%1, %%eax))\
89
			"addl $8, %%eax			\n\t"\
90
			"cmpl %2, %%eax			\n\t"\
90
			MOVNTQ(%%mm3, (%1, %%REGa))\
91
			"add $8, %%"REG_a"		\n\t"\
92
			"cmp %2, %%"REG_a"		\n\t"\
91 93
			"movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
92 94
			"movq %%mm3, %%mm4		\n\t"\
93
			"leal " offset "(%0), %%edx	\n\t"\
94
			"movl (%%edx), %%esi		\n\t"\
95
			"lea " offset "(%0), %%"REG_d"	\n\t"\
96
			"mov (%%"REG_d"), %%"REG_S"	\n\t"\
95 97
			"jb 1b				\n\t"
96 98

  
97 99
#define YSCALEYUV2YV121 \
98
			"movl %2, %%eax			\n\t"\
100
			"mov %2, %%"REG_a"		\n\t"\
99 101
			".balign 16			\n\t" /* FIXME Unroll? */\
100 102
			"1:				\n\t"\
101
			"movq (%0, %%eax, 2), %%mm0	\n\t"\
102
			"movq 8(%0, %%eax, 2), %%mm1	\n\t"\
103
			"movq (%0, %%"REG_a", 2), %%mm0	\n\t"\
104
			"movq 8(%0, %%"REG_a", 2), %%mm1\n\t"\
103 105
			"psraw $7, %%mm0		\n\t"\
104 106
			"psraw $7, %%mm1		\n\t"\
105 107
			"packuswb %%mm1, %%mm0		\n\t"\
106
			MOVNTQ(%%mm0, (%1, %%eax))\
107
			"addl $8, %%eax			\n\t"\
108
			MOVNTQ(%%mm0, (%1, %%REGa))\
109
			"add $8, %%"REG_a"		\n\t"\
108 110
			"jnc 1b				\n\t"
109 111

  
110 112
/*
......
115 117
			: "%eax", "%ebx", "%ecx", "%edx", "%esi"
116 118
*/
117 119
#define YSCALEYUV2PACKEDX \
118
		"xorl %%eax, %%eax		\n\t"\
120
		"xor %%"REG_a", %%"REG_a"	\n\t"\
119 121
		".balign 16			\n\t"\
120 122
		"nop				\n\t"\
121 123
		"1:				\n\t"\
122
		"leal "CHR_MMX_FILTER_OFFSET"(%0), %%edx	\n\t"\
123
		"movl (%%edx), %%esi		\n\t"\
124
		"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
125
		"mov (%%"REG_d"), %%"REG_S"	\n\t"\
124 126
		"movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
125 127
		"movq %%mm3, %%mm4		\n\t"\
126 128
		".balign 16			\n\t"\
127 129
		"2:				\n\t"\
128
		"movq 8(%%edx), %%mm0		\n\t" /* filterCoeff */\
129
		"movq (%%esi, %%eax), %%mm2	\n\t" /* UsrcData */\
130
		"movq 4096(%%esi, %%eax), %%mm5	\n\t" /* VsrcData */\
131
		"addl $16, %%edx		\n\t"\
132
		"movl (%%edx), %%esi		\n\t"\
130
		"movq 8(%%"REG_d"), %%mm0	\n\t" /* filterCoeff */\
131
		"movq (%%"REG_S", %%"REG_a"), %%mm2	\n\t" /* UsrcData */\
132
		"movq 4096(%%"REG_S", %%"REG_a"), %%mm5	\n\t" /* VsrcData */\
133
		"add $16, %%"REG_d"		\n\t"\
134
		"mov (%%"REG_d"), %%"REG_S"	\n\t"\
133 135
		"pmulhw %%mm0, %%mm2		\n\t"\
134 136
		"pmulhw %%mm0, %%mm5		\n\t"\
135 137
		"paddw %%mm2, %%mm3		\n\t"\
136 138
		"paddw %%mm5, %%mm4		\n\t"\
137
		"testl %%esi, %%esi		\n\t"\
139
		"test %%"REG_S", %%"REG_S"	\n\t"\
138 140
		" jnz 2b			\n\t"\
139 141
\
140
		"leal "LUM_MMX_FILTER_OFFSET"(%0), %%edx	\n\t"\
141
		"movl (%%edx), %%esi		\n\t"\
142
		"lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
143
		"mov (%%"REG_d"), %%"REG_S"	\n\t"\
142 144
		"movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\
143 145
		"movq %%mm1, %%mm7		\n\t"\
144 146
		".balign 16			\n\t"\
145 147
		"2:				\n\t"\
146
		"movq 8(%%edx), %%mm0		\n\t" /* filterCoeff */\
147
		"movq (%%esi, %%eax, 2), %%mm2	\n\t" /* Y1srcData */\
148
		"movq 8(%%esi, %%eax, 2), %%mm5	\n\t" /* Y2srcData */\
149
		"addl $16, %%edx		\n\t"\
150
		"movl (%%edx), %%esi		\n\t"\
148
		"movq 8(%%"REG_d"), %%mm0	\n\t" /* filterCoeff */\
149
		"movq (%%"REG_S", %%"REG_a", 2), %%mm2	\n\t" /* Y1srcData */\
150
		"movq 8(%%"REG_S", %%"REG_a", 2), %%mm5	\n\t" /* Y2srcData */\
151
		"add $16, %%"REG_d"		\n\t"\
152
		"mov (%%"REG_d"), %%"REG_S"	\n\t"\
151 153
		"pmulhw %%mm0, %%mm2		\n\t"\
152 154
		"pmulhw %%mm0, %%mm5		\n\t"\
153 155
		"paddw %%mm2, %%mm1		\n\t"\
154 156
		"paddw %%mm5, %%mm7		\n\t"\
155
		"testl %%esi, %%esi		\n\t"\
157
		"test %%"REG_S", %%"REG_S"	\n\t"\
156 158
		" jnz 2b			\n\t"\
157 159

  
158 160

  
......
202 204
		"movd %7, %%mm5			\n\t" /*uvalpha1*/\
203 205
		"punpcklwd %%mm5, %%mm5		\n\t"\
204 206
		"punpcklwd %%mm5, %%mm5		\n\t"\
205
		"xorl %%eax, %%eax		\n\t"\
207
		"xor %%"REG_a", %%"REG_a"		\n\t"\
206 208
		".balign 16			\n\t"\
207 209
		"1:				\n\t"\
208
		"movq (%0, %%eax, 2), %%mm0	\n\t" /*buf0[eax]*/\
209
		"movq (%1, %%eax, 2), %%mm1	\n\t" /*buf1[eax]*/\
210
		"movq (%2, %%eax,2), %%mm2	\n\t" /* uvbuf0[eax]*/\
211
		"movq (%3, %%eax,2), %%mm3	\n\t" /* uvbuf1[eax]*/\
210
		"movq (%0, %%"REG_a", 2), %%mm0	\n\t" /*buf0[eax]*/\
211
		"movq (%1, %%"REG_a", 2), %%mm1	\n\t" /*buf1[eax]*/\
212
		"movq (%2, %%"REG_a",2), %%mm2	\n\t" /* uvbuf0[eax]*/\
213
		"movq (%3, %%"REG_a",2), %%mm3	\n\t" /* uvbuf1[eax]*/\
212 214
		"psubw %%mm1, %%mm0		\n\t" /* buf0[eax] - buf1[eax]*/\
213 215
		"psubw %%mm3, %%mm2		\n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
214 216
		"pmulhw %%mm6, %%mm0		\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
215 217
		"pmulhw %%mm5, %%mm2		\n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
216 218
		"psraw $4, %%mm1		\n\t" /* buf0[eax] - buf1[eax] >>4*/\
217
		"movq 4096(%2, %%eax,2), %%mm4	\n\t" /* uvbuf0[eax+2048]*/\
219
		"movq 4096(%2, %%"REG_a",2), %%mm4	\n\t" /* uvbuf0[eax+2048]*/\
218 220
		"psraw $4, %%mm3		\n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
219 221
		"paddw %%mm0, %%mm1		\n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
220
		"movq 4096(%3, %%eax,2), %%mm0	\n\t" /* uvbuf1[eax+2048]*/\
222
		"movq 4096(%3, %%"REG_a",2), %%mm0	\n\t" /* uvbuf1[eax+2048]*/\
221 223
		"paddw %%mm2, %%mm3		\n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
222 224
		"psubw %%mm0, %%mm4		\n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
223 225
		"psubw "MANGLE(w80)", %%mm1	\n\t" /* 8(Y-16)*/\
......
248 250
		"packuswb %%mm1, %%mm1		\n\t"
249 251
#endif
250 252

  
251
#define YSCALEYUV2PACKED(index, c) \
253
#define REAL_YSCALEYUV2PACKED(index, c) \
252 254
		"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
253 255
		"movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1\n\t"\
254 256
		"psraw $3, %%mm0		\n\t"\
255 257
		"psraw $3, %%mm1		\n\t"\
256 258
		"movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c")\n\t"\
257 259
		"movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c")\n\t"\
258
		"xorl "#index", "#index"		\n\t"\
260
		"xor "#index", "#index"		\n\t"\
259 261
		".balign 16			\n\t"\
260 262
		"1:				\n\t"\
261 263
		"movq (%2, "#index"), %%mm2	\n\t" /* uvbuf0[eax]*/\
......
284 286
		"paddw %%mm0, %%mm1		\n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
285 287
		"paddw %%mm6, %%mm7		\n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
286 288
                
287
#define YSCALEYUV2RGB(index, c) \
288
		"xorl "#index", "#index"	\n\t"\
289
#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
290
                
291
#define REAL_YSCALEYUV2RGB(index, c) \
292
		"xor "#index", "#index"	\n\t"\
289 293
		".balign 16			\n\t"\
290 294
		"1:				\n\t"\
291 295
		"movq (%2, "#index"), %%mm2	\n\t" /* uvbuf0[eax]*/\
......
348 352
		"packuswb %%mm6, %%mm5		\n\t"\
349 353
		"packuswb %%mm3, %%mm4		\n\t"\
350 354
		"pxor %%mm7, %%mm7		\n\t"
355
#define YSCALEYUV2RGB(index, c)  REAL_YSCALEYUV2RGB(index, c)
351 356
                
352
#define YSCALEYUV2PACKED1(index, c) \
353
		"xorl "#index", "#index"		\n\t"\
357
#define REAL_YSCALEYUV2PACKED1(index, c) \
358
		"xor "#index", "#index"		\n\t"\
354 359
		".balign 16			\n\t"\
355 360
		"1:				\n\t"\
356 361
		"movq (%2, "#index"), %%mm3	\n\t" /* uvbuf0[eax]*/\
......
362 367
		"psraw $7, %%mm1		\n\t" \
363 368
		"psraw $7, %%mm7		\n\t" \
364 369
                
365
#define YSCALEYUV2RGB1(index, c) \
366
		"xorl "#index", "#index"	\n\t"\
370
#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
371
                
372
#define REAL_YSCALEYUV2RGB1(index, c) \
373
		"xor "#index", "#index"	\n\t"\
367 374
		".balign 16			\n\t"\
368 375
		"1:				\n\t"\
369 376
		"movq (%2, "#index"), %%mm3	\n\t" /* uvbuf0[eax]*/\
......
409 416
		"packuswb %%mm6, %%mm5		\n\t"\
410 417
		"packuswb %%mm3, %%mm4		\n\t"\
411 418
		"pxor %%mm7, %%mm7		\n\t"
419
#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
412 420

  
413
#define YSCALEYUV2PACKED1b(index, c) \
414
		"xorl "#index", "#index"		\n\t"\
421
#define REAL_YSCALEYUV2PACKED1b(index, c) \
422
		"xor "#index", "#index"		\n\t"\
415 423
		".balign 16			\n\t"\
416 424
		"1:				\n\t"\
417 425
		"movq (%2, "#index"), %%mm2	\n\t" /* uvbuf0[eax]*/\
......
426 434
		"movq 8(%0, "#index", 2), %%mm7	\n\t" /*buf0[eax]*/\
427 435
		"psraw $7, %%mm1		\n\t" \
428 436
		"psraw $7, %%mm7		\n\t" 
437
#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
429 438
                
430 439
// do vertical chrominance interpolation
431
#define YSCALEYUV2RGB1b(index, c) \
432
		"xorl "#index", "#index"		\n\t"\
440
#define REAL_YSCALEYUV2RGB1b(index, c) \
441
		"xor "#index", "#index"		\n\t"\
433 442
		".balign 16			\n\t"\
434 443
		"1:				\n\t"\
435 444
		"movq (%2, "#index"), %%mm2	\n\t" /* uvbuf0[eax]*/\
......
479 488
		"packuswb %%mm6, %%mm5		\n\t"\
480 489
		"packuswb %%mm3, %%mm4		\n\t"\
481 490
		"pxor %%mm7, %%mm7		\n\t"
491
#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
482 492

  
483
#define WRITEBGR32(dst, dstw, index) \
493
#define REAL_WRITEBGR32(dst, dstw, index) \
484 494
		/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
485 495
			"movq %%mm2, %%mm1		\n\t" /* B */\
486 496
			"movq %%mm5, %%mm6		\n\t" /* R */\
......
500 510
			MOVNTQ(%%mm1, 16(dst, index, 4))\
501 511
			MOVNTQ(%%mm3, 24(dst, index, 4))\
502 512
\
503
			"addl $8, "#index"		\n\t"\
504
			"cmpl "#dstw", "#index"		\n\t"\
513
			"add $8, "#index"		\n\t"\
514
			"cmp "#dstw", "#index"		\n\t"\
505 515
			" jb 1b				\n\t"
516
#define WRITEBGR32(dst, dstw, index)  REAL_WRITEBGR32(dst, dstw, index)
506 517

  
507
#define WRITEBGR16(dst, dstw, index) \
518
#define REAL_WRITEBGR16(dst, dstw, index) \
508 519
			"pand "MANGLE(bF8)", %%mm2	\n\t" /* B */\
509 520
			"pand "MANGLE(bFC)", %%mm4	\n\t" /* G */\
510 521
			"pand "MANGLE(bF8)", %%mm5	\n\t" /* R */\
......
527 538
			MOVNTQ(%%mm2, (dst, index, 2))\
528 539
			MOVNTQ(%%mm1, 8(dst, index, 2))\
529 540
\
530
			"addl $8, "#index"		\n\t"\
531
			"cmpl "#dstw", "#index"		\n\t"\
541
			"add $8, "#index"		\n\t"\
542
			"cmp "#dstw", "#index"		\n\t"\
532 543
			" jb 1b				\n\t"
544
#define WRITEBGR16(dst, dstw, index)  REAL_WRITEBGR16(dst, dstw, index)
533 545

  
534
#define WRITEBGR15(dst, dstw, index) \
546
#define REAL_WRITEBGR15(dst, dstw, index) \
535 547
			"pand "MANGLE(bF8)", %%mm2	\n\t" /* B */\
536 548
			"pand "MANGLE(bF8)", %%mm4	\n\t" /* G */\
537 549
			"pand "MANGLE(bF8)", %%mm5	\n\t" /* R */\
......
555 567
			MOVNTQ(%%mm2, (dst, index, 2))\
556 568
			MOVNTQ(%%mm1, 8(dst, index, 2))\
557 569
\
558
			"addl $8, "#index"		\n\t"\
559
			"cmpl "#dstw", "#index"		\n\t"\
570
			"add $8, "#index"		\n\t"\
571
			"cmp "#dstw", "#index"		\n\t"\
560 572
			" jb 1b				\n\t"
573
#define WRITEBGR15(dst, dstw, index)  REAL_WRITEBGR15(dst, dstw, index)
561 574

  
562 575
#define WRITEBGR24OLD(dst, dstw, index) \
563 576
		/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
......
609 622
			MOVNTQ(%%mm0, (dst))\
610 623
			MOVNTQ(%%mm2, 8(dst))\
611 624
			MOVNTQ(%%mm3, 16(dst))\
612
			"addl $24, "#dst"		\n\t"\
625
			"add $24, "#dst"		\n\t"\
613 626
\
614
			"addl $8, "#index"		\n\t"\
615
			"cmpl "#dstw", "#index"		\n\t"\
627
			"add $8, "#index"		\n\t"\
628
			"cmp "#dstw", "#index"		\n\t"\
616 629
			" jb 1b				\n\t"
617 630

  
618 631
#define WRITEBGR24MMX(dst, dstw, index) \
......
662 675
			"por %%mm3, %%mm5		\n\t" /* RGBRGBRG 2 */\
663 676
			MOVNTQ(%%mm5, 16(dst))\
664 677
\
665
			"addl $24, "#dst"		\n\t"\
678
			"add $24, "#dst"		\n\t"\
666 679
\
667
			"addl $8, "#index"			\n\t"\
668
			"cmpl "#dstw", "#index"			\n\t"\
680
			"add $8, "#index"			\n\t"\
681
			"cmp "#dstw", "#index"			\n\t"\
669 682
			" jb 1b				\n\t"
670 683

  
671 684
#define WRITEBGR24MMX2(dst, dstw, index) \
......
710 723
			"por %%mm3, %%mm6		\n\t"\
711 724
			MOVNTQ(%%mm6, 16(dst))\
712 725
\
713
			"addl $24, "#dst"		\n\t"\
726
			"add $24, "#dst"		\n\t"\
714 727
\
715
			"addl $8, "#index"		\n\t"\
716
			"cmpl "#dstw", "#index"		\n\t"\
728
			"add $8, "#index"		\n\t"\
729
			"cmp "#dstw", "#index"		\n\t"\
717 730
			" jb 1b				\n\t"
718 731

  
719 732
#ifdef HAVE_MMX2
720 733
#undef WRITEBGR24
721
#define WRITEBGR24 WRITEBGR24MMX2
734
#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX2(dst, dstw, index)
722 735
#else
723 736
#undef WRITEBGR24
724
#define WRITEBGR24 WRITEBGR24MMX
737
#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX(dst, dstw, index)
725 738
#endif
726 739

  
727
#define WRITEYUY2(dst, dstw, index) \
740
#define REAL_WRITEYUY2(dst, dstw, index) \
728 741
			"packuswb %%mm3, %%mm3		\n\t"\
729 742
			"packuswb %%mm4, %%mm4		\n\t"\
730 743
			"packuswb %%mm7, %%mm1		\n\t"\
......
736 749
			MOVNTQ(%%mm1, (dst, index, 2))\
737 750
			MOVNTQ(%%mm7, 8(dst, index, 2))\
738 751
\
739
			"addl $8, "#index"		\n\t"\
740
			"cmpl "#dstw", "#index"		\n\t"\
752
			"add $8, "#index"		\n\t"\
753
			"cmp "#dstw", "#index"		\n\t"\
741 754
			" jb 1b				\n\t"
755
#define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
742 756

  
743 757

  
744 758
static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
......
751 765
		asm volatile(
752 766
				YSCALEYUV2YV12X(0, CHR_MMX_FILTER_OFFSET)
753 767
				:: "r" (&c->redDither),
754
				"r" (uDest), "m" (chrDstW)
755
				: "%eax", "%edx", "%esi"
768
				"r" (uDest), "m" ((long)chrDstW)
769
				: "%"REG_a, "%"REG_d, "%"REG_S
756 770
			);
757 771

  
758 772
		asm volatile(
759 773
				YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET)
760 774
				:: "r" (&c->redDither),
761
				"r" (vDest), "m" (chrDstW)
762
				: "%eax", "%edx", "%esi"
775
				"r" (vDest), "m" ((long)chrDstW)
776
				: "%"REG_a, "%"REG_d, "%"REG_S
763 777
			);
764 778
	}
765 779

  
766 780
	asm volatile(
767 781
			YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET)
768 782
			:: "r" (&c->redDither),
769
			   "r" (dest), "m" (dstW)
770
			: "%eax", "%edx", "%esi"
783
			   "r" (dest), "m" ((long)dstW)
784
			: "%"REG_a, "%"REG_d, "%"REG_S
771 785
		);
772 786
#else
773 787
#ifdef HAVE_ALTIVEC
......
791 805
		asm volatile(
792 806
				YSCALEYUV2YV121
793 807
				:: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW),
794
				"g" (-chrDstW)
795
				: "%eax"
808
				"g" ((long)-chrDstW)
809
				: "%"REG_a
796 810
			);
797 811

  
798 812
		asm volatile(
799 813
				YSCALEYUV2YV121
800 814
				:: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW),
801
				"g" (-chrDstW)
802
				: "%eax"
815
				"g" ((long)-chrDstW)
816
				: "%"REG_a
803 817
			);
804 818
	}
805 819

  
806 820
	asm volatile(
807 821
		YSCALEYUV2YV121
808 822
		:: "r" (lumSrc + dstW), "r" (dest + dstW),
809
		"g" (-dstW)
810
		: "%eax"
823
		"g" ((long)-dstW)
824
		: "%"REG_a
811 825
	);
812 826
#else
813 827
	int i;
......
858 872
		{
859 873
			asm volatile(
860 874
				YSCALEYUV2RGBX
861
				WRITEBGR32(%4, %5, %%eax)
875
				WRITEBGR32(%4, %5, %%REGa)
862 876

  
863 877
			:: "r" (&c->redDither), 
864 878
			   "m" (dummy), "m" (dummy), "m" (dummy),
865 879
			   "r" (dest), "m" (dstW)
866
			: "%eax", "%edx", "%esi"
880
			: "%"REG_a, "%"REG_d, "%"REG_S
867 881
			);
868 882
		}
869 883
		break;
......
871 885
		{
872 886
			asm volatile(
873 887
				YSCALEYUV2RGBX
874
				"leal (%%eax, %%eax, 2), %%ebx	\n\t" //FIXME optimize
875
				"addl %4, %%ebx			\n\t"
876
				WRITEBGR24(%%ebx, %5, %%eax)
888
				"lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t" //FIXME optimize
889
				"add %4, %%"REG_b"			\n\t"
890
				WRITEBGR24(%%REGb, %5, %%REGa)
877 891

  
878 892
			:: "r" (&c->redDither), 
879 893
			   "m" (dummy), "m" (dummy), "m" (dummy),
880 894
			   "r" (dest), "m" (dstW)
881
			: "%eax", "%ebx", "%edx", "%esi" //FIXME ebx
895
			: "%"REG_a, "%"REG_b, "%"REG_d, "%"REG_S //FIXME ebx
882 896
			);
883 897
		}
884 898
		break;
......
893 907
				"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
894 908
#endif
895 909

  
896
				WRITEBGR15(%4, %5, %%eax)
910
				WRITEBGR15(%4, %5, %%REGa)
897 911

  
898 912
			:: "r" (&c->redDither), 
899 913
			   "m" (dummy), "m" (dummy), "m" (dummy),
900 914
			   "r" (dest), "m" (dstW)
901
			: "%eax", "%edx", "%esi"
915
			: "%"REG_a, "%"REG_d, "%"REG_S
902 916
			);
903 917
		}
904 918
		break;
......
913 927
				"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
914 928
#endif
915 929

  
916
				WRITEBGR16(%4, %5, %%eax)
930
				WRITEBGR16(%4, %5, %%REGa)
917 931

  
918 932
			:: "r" (&c->redDither), 
919 933
			   "m" (dummy), "m" (dummy), "m" (dummy),
920 934
			   "r" (dest), "m" (dstW)
921
			: "%eax", "%edx", "%esi"
935
			: "%"REG_a, "%"REG_d, "%"REG_S
922 936
			);
923 937
		}
924 938
		break;
......
932 946
				"psraw $3, %%mm4		\n\t"
933 947
				"psraw $3, %%mm1		\n\t"
934 948
				"psraw $3, %%mm7		\n\t"
935
				WRITEYUY2(%4, %5, %%eax)
949
				WRITEYUY2(%4, %5, %%REGa)
936 950

  
937 951
			:: "r" (&c->redDither), 
938 952
			   "m" (dummy), "m" (dummy), "m" (dummy),
939 953
			   "r" (dest), "m" (dstW)
940
			: "%eax", "%edx", "%esi"
954
			: "%"REG_a, "%"REG_d, "%"REG_S
941 955
			);
942 956
		}
943 957
		break;
......
984 998
			"punpcklwd %%mm0, %%mm3		\n\t" // BGR0BGR0
985 999
			"punpckhwd %%mm0, %%mm1		\n\t" // BGR0BGR0
986 1000

  
987
			MOVNTQ(%%mm3, (%4, %%eax, 4))
988
			MOVNTQ(%%mm1, 8(%4, %%eax, 4))
1001
			MOVNTQ(%%mm3, (%4, %%REGa, 4))
1002
			MOVNTQ(%%mm1, 8(%4, %%REGa, 4))
989 1003

  
990
			"addl $4, %%eax			\n\t"
991
			"cmpl %5, %%eax			\n\t"
1004
			"add $4, %%"REG_a"		\n\t"
1005
			"cmp %5, %%"REG_a"		\n\t"
992 1006
			" jb 1b				\n\t"
993 1007

  
994 1008

  
995
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
1009
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" ((long)dstW),
996 1010
			"m" (yalpha1), "m" (uvalpha1)
997
			: "%eax"
1011
			: "%"REG_a
998 1012
			);
999 1013
			break;
1000 1014
		case IMGFMT_BGR24:
......
1024 1038
			"psrlq $24, %%mm1		\n\t" // 0BGR0000
1025 1039
			"por %%mm2, %%mm1		\n\t" // RBGRR000
1026 1040

  
1027
			"movl %4, %%ebx			\n\t"
1028
			"addl %%eax, %%ebx		\n\t"
1041
			"mov %4, %%"REG_b"		\n\t"
1042
			"add %%"REG_a", %%"REG_b"	\n\t"
1029 1043

  
1030 1044
#ifdef HAVE_MMX2
1031 1045
			//FIXME Alignment
1032
			"movntq %%mm3, (%%ebx, %%eax, 2)\n\t"
1033
			"movntq %%mm1, 8(%%ebx, %%eax, 2)\n\t"
1046
			"movntq %%mm3, (%%"REG_b", %%"REG_a", 2)\n\t"
1047
			"movntq %%mm1, 8(%%"REG_b", %%"REG_a", 2)\n\t"
1034 1048
#else
1035
			"movd %%mm3, (%%ebx, %%eax, 2)	\n\t"
1049
			"movd %%mm3, (%%"REG_b", %%"REG_a", 2)	\n\t"
1036 1050
			"psrlq $32, %%mm3		\n\t"
1037
			"movd %%mm3, 4(%%ebx, %%eax, 2)	\n\t"
1038
			"movd %%mm1, 8(%%ebx, %%eax, 2)	\n\t"
1051
			"movd %%mm3, 4(%%"REG_b", %%"REG_a", 2)	\n\t"
1052
			"movd %%mm1, 8(%%"REG_b", %%"REG_a", 2)	\n\t"
1039 1053
#endif
1040
			"addl $4, %%eax			\n\t"
1041
			"cmpl %5, %%eax			\n\t"
1054
			"add $4, %%"REG_a"		\n\t"
1055
			"cmp %5, %%"REG_a"		\n\t"
1042 1056
			" jb 1b				\n\t"
1043 1057

  
1044 1058
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW),
1045 1059
			"m" (yalpha1), "m" (uvalpha1)
1046
			: "%eax", "%ebx"
1060
			: "%"REG_a, "%"REG_b
1047 1061
			);
1048 1062
			break;
1049 1063
		case IMGFMT_BGR15:
......
1068 1082
			"por %%mm3, %%mm1		\n\t"
1069 1083
			"por %%mm1, %%mm0		\n\t"
1070 1084

  
1071
			MOVNTQ(%%mm0, (%4, %%eax, 2))
1085
			MOVNTQ(%%mm0, (%4, %%REGa, 2))
1072 1086

  
1073
			"addl $4, %%eax			\n\t"
1074
			"cmpl %5, %%eax			\n\t"
1087
			"add $4, %%"REG_a"		\n\t"
1088
			"cmp %5, %%"REG_a"		\n\t"
1075 1089
			" jb 1b				\n\t"
1076 1090

  
1077 1091
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
1078 1092
			"m" (yalpha1), "m" (uvalpha1)
1079
			: "%eax"
1093
			: "%"REG_a
1080 1094
			);
1081 1095
			break;
1082 1096
		case IMGFMT_BGR16:
......
1101 1115
			"por %%mm3, %%mm1		\n\t"
1102 1116
			"por %%mm1, %%mm0		\n\t"
1103 1117

  
1104
			MOVNTQ(%%mm0, (%4, %%eax, 2))
1118
			MOVNTQ(%%mm0, (%4, %%REGa, 2))
1105 1119

  
1106
			"addl $4, %%eax			\n\t"
1107
			"cmpl %5, %%eax			\n\t"
1120
			"add $4, %%"REG_a"		\n\t"
1121
			"cmp %5, %%"REG_a"		\n\t"
1108 1122
			" jb 1b				\n\t"
1109 1123

  
1110 1124
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
1111 1125
			"m" (yalpha1), "m" (uvalpha1)
1112
			: "%eax"
1126
			: "%"REG_a
1113 1127
			);
1114 1128
		break;
1115 1129
#endif
......
1188 1202
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
1189 1203
	case IMGFMT_BGR32:
1190 1204
			asm volatile(
1191
				"movl %%esp, "ESP_OFFSET"(%5)		\n\t"
1192
				"movl %4, %%esp				\n\t"
1193
				YSCALEYUV2RGB(%%eax, %5)
1194
				WRITEBGR32(%%esp, 8280(%5), %%eax)
1195
				"movl "ESP_OFFSET"(%5), %%esp		\n\t"
1205
				"mov %%"REG_SP", "ESP_OFFSET"(%5)	\n\t"
1206
				"mov %4, %%"REG_SP"			\n\t"
1207
				YSCALEYUV2RGB(%%REGa, %5)
1208
				WRITEBGR32(%%REGSP, 8280(%5), %%REGa)
1209
				"mov "ESP_OFFSET"(%5), %%"REG_SP"	\n\t"
1196 1210

  
1197 1211
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1198 1212
			"r" (&c->redDither)
1199
			: "%eax"
1213
			: "%"REG_a
1200 1214
			);
1201 1215
			return;
1202 1216
	case IMGFMT_BGR24:
1203 1217
			asm volatile(
1204
				"movl %%esp, "ESP_OFFSET"(%5)		\n\t"
1205
				"movl %4, %%esp			\n\t"
1206
				YSCALEYUV2RGB(%%eax, %5)
1207
				WRITEBGR24(%%esp, 8280(%5), %%eax)
1208
				"movl "ESP_OFFSET"(%5), %%esp		\n\t"
1218
				"mov %%"REG_SP", "ESP_OFFSET"(%5)	\n\t"
1219
				"mov %4, %%"REG_SP"			\n\t"
1220
				YSCALEYUV2RGB(%%REGa, %5)
1221
				WRITEBGR24(%%REGSP, 8280(%5), %%REGa)
1222
				"mov "ESP_OFFSET"(%5), %%"REG_SP"	\n\t"
1209 1223
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1210 1224
			"r" (&c->redDither)
1211
			: "%eax"
1225
			: "%"REG_a
1212 1226
			);
1213 1227
			return;
1214 1228
	case IMGFMT_BGR15:
1215 1229
			asm volatile(
1216
				"movl %%esp, "ESP_OFFSET"(%5)		\n\t"
1217
				"movl %4, %%esp				\n\t"
1218
				YSCALEYUV2RGB(%%eax, %5)
1230
				"mov %%"REG_SP", "ESP_OFFSET"(%5)	\n\t"
1231
				"mov %4, %%"REG_SP"			\n\t"
1232
				YSCALEYUV2RGB(%%REGa, %5)
1219 1233
		/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1220 1234
#ifdef DITHER1XBPP
1221 1235
				"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
......
1223 1237
				"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1224 1238
#endif
1225 1239

  
1226
				WRITEBGR15(%%esp, 8280(%5), %%eax)
1227
				"movl "ESP_OFFSET"(%5), %%esp		\n\t"
1240
				WRITEBGR15(%%REGSP, 8280(%5), %%REGa)
1241
				"mov "ESP_OFFSET"(%5), %%"REG_SP"	\n\t"
1228 1242

  
1229 1243
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1230 1244
			"r" (&c->redDither)
1231
			: "%eax"
1245
			: "%"REG_a
1232 1246
			);
1233 1247
			return;
1234 1248
	case IMGFMT_BGR16:
1235 1249
			asm volatile(
1236
				"movl %%esp, "ESP_OFFSET"(%5)		\n\t"
1237
				"movl %4, %%esp				\n\t"
1238
				YSCALEYUV2RGB(%%eax, %5)
1250
				"mov %%"REG_SP", "ESP_OFFSET"(%5)	\n\t"
1251
				"mov %4, %%"REG_SP"			\n\t"
1252
				YSCALEYUV2RGB(%%REGa, %5)
1239 1253
		/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1240 1254
#ifdef DITHER1XBPP
1241 1255
				"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
......
1243 1257
				"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1244 1258
#endif
1245 1259

  
1246
				WRITEBGR16(%%esp, 8280(%5), %%eax)
1247
				"movl "ESP_OFFSET"(%5), %%esp		\n\t"
1260
				WRITEBGR16(%%REGSP, 8280(%5), %%REGa)
1261
				"mov "ESP_OFFSET"(%5), %%"REG_SP"	\n\t"
1248 1262
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1249 1263
			"r" (&c->redDither)
1250
			: "%eax"
1264
			: "%"REG_a
1251 1265
			);
1252 1266
			return;
1253 1267
	case IMGFMT_YUY2:
1254 1268
			asm volatile(
1255
				"movl %%esp, "ESP_OFFSET"(%5)		\n\t"
1256
				"movl %4, %%esp				\n\t"
1257
				YSCALEYUV2PACKED(%%eax, %5)
1258
				WRITEYUY2(%%esp, 8280(%5), %%eax)
1259
				"movl "ESP_OFFSET"(%5), %%esp		\n\t"
1269
				"mov %%"REG_SP", "ESP_OFFSET"(%5)	\n\t"
1270
				"mov %4, %%"REG_SP"			\n\t"
1271
				YSCALEYUV2PACKED(%%REGa, %5)
1272
				WRITEYUY2(%%REGSP, 8280(%5), %%REGa)
1273
				"mov "ESP_OFFSET"(%5), %%"REG_SP"	\n\t"
1260 1274
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1261 1275
			"r" (&c->redDither)
1262
			: "%eax"
1276
			: "%"REG_a
1263 1277
			);
1264 1278
			return;
1265 1279
	default: break;
......
1293 1307
		{
1294 1308
		case IMGFMT_BGR32:
1295 1309
			asm volatile(
1296
				"movl %%esp, "ESP_OFFSET"(%5)		\n\t"
1297
				"movl %4, %%esp				\n\t"
1298
				YSCALEYUV2RGB1(%%eax, %5)
1299
				WRITEBGR32(%%esp, 8280(%5), %%eax)
1300
				"movl "ESP_OFFSET"(%5), %%esp		\n\t"
1310
				"mov %%"REG_SP", "ESP_OFFSET"(%5)	\n\t"
1311
				"mov %4, %%"REG_SP"			\n\t"
1312
				YSCALEYUV2RGB1(%%REGa, %5)
1313
				WRITEBGR32(%%REGSP, 8280(%5), %%REGa)
1314
				"mov "ESP_OFFSET"(%5), %%"REG_SP"	\n\t"
1301 1315

  
1302 1316
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1303 1317
			"r" (&c->redDither)
1304
			: "%eax"
1318
			: "%"REG_a
1305 1319
			);
1306 1320
			return;
1307 1321
		case IMGFMT_BGR24:
1308 1322
			asm volatile(
1309
				"movl %%esp, "ESP_OFFSET"(%5)		\n\t"
1310
				"movl %4, %%esp				\n\t"
1311
				YSCALEYUV2RGB1(%%eax, %5)
1312
				WRITEBGR24(%%esp, 8280(%5), %%eax)
1313
				"movl "ESP_OFFSET"(%5), %%esp		\n\t"
1323
				"mov %%"REG_SP", "ESP_OFFSET"(%5)	\n\t"
1324
				"mov %4, %%"REG_SP"			\n\t"
1325
				YSCALEYUV2RGB1(%%REGa, %5)
1326
				WRITEBGR24(%%REGSP, 8280(%5), %%REGa)
1327
				"mov "ESP_OFFSET"(%5), %%"REG_SP"	\n\t"
1314 1328

  
1315 1329
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1316 1330
			"r" (&c->redDither)
1317
			: "%eax"
1331
			: "%"REG_a
1318 1332
			);
1319 1333
			return;
1320 1334
		case IMGFMT_BGR15:
1321 1335
			asm volatile(
1322
				"movl %%esp, "ESP_OFFSET"(%5)		\n\t"
1323
				"movl %4, %%esp				\n\t"
1324
				YSCALEYUV2RGB1(%%eax, %5)
1336
				"mov %%"REG_SP", "ESP_OFFSET"(%5)	\n\t"
1337
				"mov %4, %%"REG_SP"			\n\t"
1338
				YSCALEYUV2RGB1(%%REGa, %5)
1325 1339
		/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1326 1340
#ifdef DITHER1XBPP
1327 1341
				"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1328 1342
				"paddusb "MANGLE(g5Dither)", %%mm4\n\t"
1329 1343
				"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1330 1344
#endif
1331
				WRITEBGR15(%%esp, 8280(%5), %%eax)
1332
				"movl "ESP_OFFSET"(%5), %%esp		\n\t"
1345
				WRITEBGR15(%%REGSP, 8280(%5), %%REGa)
1346
				"mov "ESP_OFFSET"(%5), %%"REG_SP"	\n\t"
1333 1347

  
1334 1348
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1335 1349
			"r" (&c->redDither)
1336
			: "%eax"
1350
			: "%"REG_a
1337 1351
			);
1338 1352
			return;
1339 1353
		case IMGFMT_BGR16:
1340 1354
			asm volatile(
1341
				"movl %%esp, "ESP_OFFSET"(%5)		\n\t"
1342
				"movl %4, %%esp				\n\t"
1343
				YSCALEYUV2RGB1(%%eax, %5)
1355
				"mov %%"REG_SP", "ESP_OFFSET"(%5)	\n\t"
1356
				"mov %4, %%"REG_SP"			\n\t"
1357
				YSCALEYUV2RGB1(%%REGa, %5)
1344 1358
		/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1345 1359
#ifdef DITHER1XBPP
1346 1360
				"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
......
1348 1362
				"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1349 1363
#endif
1350 1364

  
1351
				WRITEBGR16(%%esp, 8280(%5), %%eax)
1352
				"movl "ESP_OFFSET"(%5), %%esp		\n\t"
1365
				WRITEBGR16(%%REGSP, 8280(%5), %%REGa)
1366
				"mov "ESP_OFFSET"(%5), %%"REG_SP"	\n\t"
1353 1367

  
1354 1368
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1355 1369
			"r" (&c->redDither)
1356
			: "%eax"
1370
			: "%"REG_a
1357 1371
			);
1358 1372
			return;
1359 1373
		case IMGFMT_YUY2:
1360 1374
			asm volatile(
1361
				"movl %%esp, "ESP_OFFSET"(%5)		\n\t"
1362
				"movl %4, %%esp				\n\t"
1363
				YSCALEYUV2PACKED1(%%eax, %5)
1364
				WRITEYUY2(%%esp, 8280(%5), %%eax)
1365
				"movl "ESP_OFFSET"(%5), %%esp		\n\t"
1375
				"mov %%"REG_SP", "ESP_OFFSET"(%5)	\n\t"
1376
				"mov %4, %%"REG_SP"			\n\t"
1377
				YSCALEYUV2PACKED1(%%REGa, %5)
1378
				WRITEYUY2(%%REGSP, 8280(%5), %%REGa)
1379
				"mov "ESP_OFFSET"(%5), %%"REG_SP"	\n\t"
1366 1380

  
1367 1381
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1368 1382
			"r" (&c->redDither)
1369
			: "%eax"
1383
			: "%"REG_a
1370 1384
			);
1371 1385
			return;
1372 1386
		}
......
1377 1391
		{
1378 1392
		case IMGFMT_BGR32:
1379 1393
			asm volatile(
1380
				"movl %%esp, "ESP_OFFSET"(%5)		\n\t"
1381
				"movl %4, %%esp				\n\t"
1382
				YSCALEYUV2RGB1b(%%eax, %5)
1383
				WRITEBGR32(%%esp, 8280(%5), %%eax)
1384
				"movl "ESP_OFFSET"(%5), %%esp		\n\t"
1394
				"mov %%"REG_SP", "ESP_OFFSET"(%5)	\n\t"
1395
				"mov %4, %%"REG_SP"			\n\t"
1396
				YSCALEYUV2RGB1b(%%REGa, %5)
1397
				WRITEBGR32(%%REGSP, 8280(%5), %%REGa)
1398
				"mov "ESP_OFFSET"(%5), %%"REG_SP"	\n\t"
1385 1399

  
1386 1400
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1387 1401
			"r" (&c->redDither)
1388
			: "%eax"
1402
			: "%"REG_a
1389 1403
			);
1390 1404
			return;
1391 1405
		case IMGFMT_BGR24:
1392 1406
			asm volatile(
1393
				"movl %%esp, "ESP_OFFSET"(%5)		\n\t"
1394
				"movl %4, %%esp				\n\t"
1395
				YSCALEYUV2RGB1b(%%eax, %5)
1396
				WRITEBGR24(%%esp, 8280(%5), %%eax)
1397
				"movl "ESP_OFFSET"(%5), %%esp		\n\t"
1407
				"mov %%"REG_SP", "ESP_OFFSET"(%5)	\n\t"
1408
				"mov %4, %%"REG_SP"			\n\t"
1409
				YSCALEYUV2RGB1b(%%REGa, %5)
1410
				WRITEBGR24(%%REGSP, 8280(%5), %%REGa)
1411
				"mov "ESP_OFFSET"(%5), %%"REG_SP"	\n\t"
1398 1412

  
1399 1413
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1400 1414
			"r" (&c->redDither)
1401
			: "%eax"
1415
			: "%"REG_a
1402 1416
			);
1403 1417
			return;
1404 1418
		case IMGFMT_BGR15:
1405 1419
			asm volatile(
1406
				"movl %%esp, "ESP_OFFSET"(%5)		\n\t"
1407
				"movl %4, %%esp				\n\t"
1408
				YSCALEYUV2RGB1b(%%eax, %5)
1420
				"mov %%"REG_SP", "ESP_OFFSET"(%5)	\n\t"
1421
				"mov %4, %%"REG_SP"			\n\t"
1422
				YSCALEYUV2RGB1b(%%REGa, %5)
1409 1423
		/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1410 1424
#ifdef DITHER1XBPP
1411 1425
				"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1412 1426
				"paddusb "MANGLE(g5Dither)", %%mm4\n\t"
1413 1427
				"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1414 1428
#endif
1415
				WRITEBGR15(%%esp, 8280(%5), %%eax)
1416
				"movl "ESP_OFFSET"(%5), %%esp		\n\t"
1429
				WRITEBGR15(%%REGSP, 8280(%5), %%REGa)
1430
				"mov "ESP_OFFSET"(%5), %%"REG_SP"	\n\t"
1417 1431

  
1418 1432
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1419 1433
			"r" (&c->redDither)
1420
			: "%eax"
1434
			: "%"REG_a
1421 1435
			);
1422 1436
			return;
1423 1437
		case IMGFMT_BGR16:
1424 1438
			asm volatile(
1425
				"movl %%esp, "ESP_OFFSET"(%5)		\n\t"
1426
				"movl %4, %%esp				\n\t"
1427
				YSCALEYUV2RGB1b(%%eax, %5)
1439
				"mov %%"REG_SP", "ESP_OFFSET"(%5)	\n\t"
1440
				"mov %4, %%"REG_SP"			\n\t"
1441
				YSCALEYUV2RGB1b(%%REGa, %5)
1428 1442
		/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1429 1443
#ifdef DITHER1XBPP
1430 1444
				"paddusb "MANGLE(b5Dither)", %%mm2\n\t"
......
1432 1446
				"paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1433 1447
#endif
1434 1448

  
1435
				WRITEBGR16(%%esp, 8280(%5), %%eax)
1436
				"movl "ESP_OFFSET"(%5), %%esp		\n\t"
1449
				WRITEBGR16(%%REGSP, 8280(%5), %%REGa)
1450
				"mov "ESP_OFFSET"(%5), %%"REG_SP"	\n\t"
1437 1451

  
1438 1452
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1439 1453
			"r" (&c->redDither)
1440
			: "%eax"
1454
			: "%"REG_a
1441 1455
			);
1442 1456
			return;
1443 1457
		case IMGFMT_YUY2:
1444 1458
			asm volatile(
1445
				"movl %%esp, "ESP_OFFSET"(%5)		\n\t"
1446
				"movl %4, %%esp				\n\t"
1447
				YSCALEYUV2PACKED1b(%%eax, %5)
1448
				WRITEYUY2(%%esp, 8280(%5), %%eax)
1449
				"movl "ESP_OFFSET"(%5), %%esp		\n\t"
1459
				"mov %%"REG_SP", "ESP_OFFSET"(%5)	\n\t"
1460
				"mov %4, %%"REG_SP"			\n\t"
1461
				YSCALEYUV2PACKED1b(%%REGa, %5)
1462
				WRITEYUY2(%%REGSP, 8280(%5), %%REGa)
1463
				"mov "ESP_OFFSET"(%5), %%"REG_SP"	\n\t"
1450 1464

  
1451 1465
			:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest),
1452 1466
			"r" (&c->redDither)
1453
			: "%eax"
1467
			: "%"REG_a
1454 1468
			);
1455 1469
			return;
1456 1470
		}
......
1471 1485
#ifdef HAVE_MMX
1472 1486
	asm volatile(
1473 1487
		"movq "MANGLE(bm01010101)", %%mm2\n\t"
1474
		"movl %0, %%eax			\n\t"
1488
		"mov %0, %%"REG_a"		\n\t"
1475 1489
		"1:				\n\t"
1476
		"movq (%1, %%eax,2), %%mm0	\n\t"
1477
		"movq 8(%1, %%eax,2), %%mm1	\n\t"
1490
		"movq (%1, %%"REG_a",2), %%mm0	\n\t"
1491
		"movq 8(%1, %%"REG_a",2), %%mm1	\n\t"
1478 1492
		"pand %%mm2, %%mm0		\n\t"
1479 1493
		"pand %%mm2, %%mm1		\n\t"
1480 1494
		"packuswb %%mm1, %%mm0		\n\t"
1481
		"movq %%mm0, (%2, %%eax)	\n\t"
1482
		"addl $8, %%eax			\n\t"
1495
		"movq %%mm0, (%2, %%"REG_a")	\n\t"
1496
		"add $8, %%"REG_a"		\n\t"
1483 1497
		" js 1b				\n\t"
1484
		: : "g" (-width), "r" (src+width*2), "r" (dst+width)
1485
		: "%eax"
1498
		: : "g" ((long)-width), "r" (src+width*2), "r" (dst+width)
1499
		: "%"REG_a
1486 1500
	);
1487 1501
#else
1488 1502
	int i;
......
1496 1510
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1497 1511
	asm volatile(
1498 1512
		"movq "MANGLE(bm01010101)", %%mm4\n\t"
1499
		"movl %0, %%eax			\n\t"
1513
		"mov %0, %%"REG_a"		\n\t"
1500 1514
		"1:				\n\t"
1501
		"movq (%1, %%eax,4), %%mm0	\n\t"
1502
		"movq 8(%1, %%eax,4), %%mm1	\n\t"
1503
		"movq (%2, %%eax,4), %%mm2	\n\t"
1504
		"movq 8(%2, %%eax,4), %%mm3	\n\t"
1515
		"movq (%1, %%"REG_a",4), %%mm0	\n\t"
1516
		"movq 8(%1, %%"REG_a",4), %%mm1	\n\t"
1517
		"movq (%2, %%"REG_a",4), %%mm2	\n\t"
1518
		"movq 8(%2, %%"REG_a",4), %%mm3	\n\t"
1505 1519
		PAVGB(%%mm2, %%mm0)
1506 1520
		PAVGB(%%mm3, %%mm1)
1507 1521
		"psrlw $8, %%mm0		\n\t"
......
1512 1526
		"pand %%mm4, %%mm1		\n\t"
1513 1527
		"packuswb %%mm0, %%mm0		\n\t"
1514 1528
		"packuswb %%mm1, %%mm1		\n\t"
1515
		"movd %%mm0, (%4, %%eax)	\n\t"
1516
		"movd %%mm1, (%3, %%eax)	\n\t"
1517
		"addl $4, %%eax			\n\t"
1529
		"movd %%mm0, (%4, %%"REG_a")	\n\t"
1530
		"movd %%mm1, (%3, %%"REG_a")	\n\t"
1531
		"add $4, %%"REG_a"		\n\t"
1518 1532
		" js 1b				\n\t"
1519
		: : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width)
1520
		: "%eax"
1533
		: : "g" ((long)-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width)
1534
		: "%"REG_a
1521 1535
	);
1522 1536
#else
1523 1537
	int i;
......
1534 1548
{
1535 1549
#ifdef HAVE_MMX
1536 1550
	asm volatile(
1537
		"movl %0, %%eax			\n\t"
1551
		"mov %0, %%"REG_a"		\n\t"
1538 1552
		"1:				\n\t"
1539
		"movq (%1, %%eax,2), %%mm0	\n\t"
1540
		"movq 8(%1, %%eax,2), %%mm1	\n\t"
1553
		"movq (%1, %%"REG_a",2), %%mm0	\n\t"
1554
		"movq 8(%1, %%"REG_a",2), %%mm1	\n\t"
1541 1555
		"psrlw $8, %%mm0		\n\t"
1542 1556
		"psrlw $8, %%mm1		\n\t"
1543 1557
		"packuswb %%mm1, %%mm0		\n\t"
1544
		"movq %%mm0, (%2, %%eax)	\n\t"
1545
		"addl $8, %%eax			\n\t"
1558
		"movq %%mm0, (%2, %%"REG_a")	\n\t"
1559
		"add $8, %%"REG_a"		\n\t"
1546 1560
		" js 1b				\n\t"
1547
		: : "g" (-width), "r" (src+width*2), "r" (dst+width)
1548
		: "%eax"
1561
		: : "g" ((long)-width), "r" (src+width*2), "r" (dst+width)
1562
		: "%"REG_a
1549 1563
	);
1550 1564
#else
1551 1565
	int i;
......
1559 1573
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1560 1574
	asm volatile(
1561 1575
		"movq "MANGLE(bm01010101)", %%mm4\n\t"
1562
		"movl %0, %%eax			\n\t"
1576
		"mov %0, %%"REG_a"		\n\t"
1563 1577
		"1:				\n\t"
1564
		"movq (%1, %%eax,4), %%mm0	\n\t"
1565
		"movq 8(%1, %%eax,4), %%mm1	\n\t"
1566
		"movq (%2, %%eax,4), %%mm2	\n\t"
1567
		"movq 8(%2, %%eax,4), %%mm3	\n\t"
1578
		"movq (%1, %%"REG_a",4), %%mm0	\n\t"
1579
		"movq 8(%1, %%"REG_a",4), %%mm1	\n\t"
1580
		"movq (%2, %%"REG_a",4), %%mm2	\n\t"
1581
		"movq 8(%2, %%"REG_a",4), %%mm3	\n\t"
1568 1582
		PAVGB(%%mm2, %%mm0)
1569 1583
		PAVGB(%%mm3, %%mm1)
1570 1584
		"pand %%mm4, %%mm0		\n\t"
......
1575 1589
		"pand %%mm4, %%mm1		\n\t"
1576 1590
		"packuswb %%mm0, %%mm0		\n\t"
1577 1591
		"packuswb %%mm1, %%mm1		\n\t"
1578
		"movd %%mm0, (%4, %%eax)	\n\t"
1579
		"movd %%mm1, (%3, %%eax)	\n\t"
1580
		"addl $4, %%eax			\n\t"
1592
		"movd %%mm0, (%4, %%"REG_a")	\n\t"
1593
		"movd %%mm1, (%3, %%"REG_a")	\n\t"
1594
		"add $4, %%"REG_a"		\n\t"
1581 1595
		" js 1b				\n\t"
1582
		: : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width)
1583
		: "%eax"
1596
		: : "g" ((long)-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width)
1597
		: "%"REG_a
1584 1598
	);
1585 1599
#else
1586 1600
	int i;
......
1635 1649
{
1636 1650
#ifdef HAVE_MMX
1637 1651
	asm volatile(
1638
		"movl %2, %%eax			\n\t"
1652
		"mov %2, %%"REG_a"		\n\t"
1639 1653
		"movq "MANGLE(bgr2YCoeff)", %%mm6		\n\t"
1640 1654
		"movq "MANGLE(w1111)", %%mm5		\n\t"
1641 1655
		"pxor %%mm7, %%mm7		\n\t"
1642
		"leal (%%eax, %%eax, 2), %%ebx	\n\t"
1656
		"lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"\n\t"
1643 1657
		".balign 16			\n\t"
1644 1658
		"1:				\n\t"
1645
		PREFETCH" 64(%0, %%ebx)		\n\t"
1646
		"movd (%0, %%ebx), %%mm0	\n\t"
1647
		"movd 3(%0, %%ebx), %%mm1	\n\t"
1659
		PREFETCH" 64(%0, %%"REG_b")	\n\t"
1660
		"movd (%0, %%"REG_b"), %%mm0	\n\t"
1661
		"movd 3(%0, %%"REG_b"), %%mm1	\n\t"
1648 1662
		"punpcklbw %%mm7, %%mm0		\n\t"
1649 1663
		"punpcklbw %%mm7, %%mm1		\n\t"
1650
		"movd 6(%0, %%ebx), %%mm2	\n\t"
1651
		"movd 9(%0, %%ebx), %%mm3	\n\t"
1664
		"movd 6(%0, %%"REG_b"), %%mm2	\n\t"
1665
		"movd 9(%0, %%"REG_b"), %%mm3	\n\t"
1652 1666
		"punpcklbw %%mm7, %%mm2		\n\t"
1653 1667
		"punpcklbw %%mm7, %%mm3		\n\t"
1654 1668
		"pmaddwd %%mm6, %%mm0		\n\t"
......
1668 1682
		"packssdw %%mm2, %%mm0		\n\t"
1669 1683
		"psraw $7, %%mm0		\n\t"
1670 1684

  
1671
		"movd 12(%0, %%ebx), %%mm4	\n\t"
1672
		"movd 15(%0, %%ebx), %%mm1	\n\t"
1685
		"movd 12(%0, %%"REG_b"), %%mm4	\n\t"
1686
		"movd 15(%0, %%"REG_b"), %%mm1	\n\t"
1673 1687
		"punpcklbw %%mm7, %%mm4		\n\t"
1674 1688
		"punpcklbw %%mm7, %%mm1		\n\t"
1675
		"movd 18(%0, %%ebx), %%mm2	\n\t"
1676
		"movd 21(%0, %%ebx), %%mm3	\n\t"
1689
		"movd 18(%0, %%"REG_b"), %%mm2	\n\t"
1690
		"movd 21(%0, %%"REG_b"), %%mm3	\n\t"
1677 1691
		"punpcklbw %%mm7, %%mm2		\n\t"
1678 1692
		"punpcklbw %%mm7, %%mm3		\n\t"
1679 1693
		"pmaddwd %%mm6, %%mm4		\n\t"
......
1690 1704
		"packssdw %%mm3, %%mm2		\n\t"
1691 1705
		"pmaddwd %%mm5, %%mm4		\n\t"
1692 1706
		"pmaddwd %%mm5, %%mm2		\n\t"
1693
		"addl $24, %%ebx		\n\t"
1707
		"add $24, %%"REG_b"		\n\t"
1694 1708
		"packssdw %%mm2, %%mm4		\n\t"
1695 1709
		"psraw $7, %%mm4		\n\t"
1696 1710

  
1697 1711
		"packuswb %%mm4, %%mm0		\n\t"
1698 1712
		"paddusb "MANGLE(bgr2YOffset)", %%mm0	\n\t"
1699 1713

  
1700
		"movq %%mm0, (%1, %%eax)	\n\t"
1701
		"addl $8, %%eax			\n\t"
1714
		"movq %%mm0, (%1, %%"REG_a")	\n\t"
1715
		"add $8, %%"REG_a"		\n\t"
1702 1716
		" js 1b				\n\t"
1703
		: : "r" (src+width*3), "r" (dst+width), "g" (-width)
1704
		: "%eax", "%ebx"
1717
		: : "r" (src+width*3), "r" (dst+width), "g" ((long)-width)
1718
		: "%"REG_a, "%"REG_b
1705 1719
	);
1706 1720
#else
1707 1721
	int i;
......
1720 1734
{
1721 1735
#ifdef HAVE_MMX
1722 1736
	asm volatile(
1723
		"movl %4, %%eax			\n\t"
1737
		"mov %4, %%"REG_a"		\n\t"
1724 1738
		"movq "MANGLE(w1111)", %%mm5		\n\t"
1725 1739
		"movq "MANGLE(bgr2UCoeff)", %%mm6		\n\t"
1726 1740
		"pxor %%mm7, %%mm7		\n\t"
1727
		"leal (%%eax, %%eax, 2), %%ebx	\n\t"
1728
		"addl %%ebx, %%ebx		\n\t"
1741
		"lea (%%"REG_a", %%"REG_a", 2), %%"REG_b"	\n\t"
1742
		"add %%"REG_b", %%"REG_b"	\n\t"
1729 1743
		".balign 16			\n\t"
1730 1744
		"1:				\n\t"
1731
		PREFETCH" 64(%0, %%ebx)		\n\t"
1732
		PREFETCH" 64(%1, %%ebx)		\n\t"
1745
		PREFETCH" 64(%0, %%"REG_b")	\n\t"
1746
		PREFETCH" 64(%1, %%"REG_b")	\n\t"
1733 1747
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1734
		"movq (%0, %%ebx), %%mm0	\n\t"
1735
		"movq (%1, %%ebx), %%mm1	\n\t"
1736
		"movq 6(%0, %%ebx), %%mm2	\n\t"
1737
		"movq 6(%1, %%ebx), %%mm3	\n\t"
1748
		"movq (%0, %%"REG_b"), %%mm0	\n\t"
1749
		"movq (%1, %%"REG_b"), %%mm1	\n\t"
1750
		"movq 6(%0, %%"REG_b"), %%mm2	\n\t"
1751
		"movq 6(%1, %%"REG_b"), %%mm3	\n\t"
1738 1752
		PAVGB(%%mm1, %%mm0)
1739 1753
		PAVGB(%%mm3, %%mm2)
1740 1754
		"movq %%mm0, %%mm1		\n\t"
......
1746 1760
		"punpcklbw %%mm7, %%mm0		\n\t"
1747 1761
		"punpcklbw %%mm7, %%mm2		\n\t"
1748 1762
#else
1749
		"movd (%0, %%ebx), %%mm0	\n\t"
1750
		"movd (%1, %%ebx), %%mm1	\n\t"
1751
		"movd 3(%0, %%ebx), %%mm2	\n\t"
1752
		"movd 3(%1, %%ebx), %%mm3	\n\t"
1763
		"movd (%0, %%"REG_b"), %%mm0	\n\t"
1764
		"movd (%1, %%"REG_b"), %%mm1	\n\t"
1765
		"movd 3(%0, %%"REG_b"), %%mm2	\n\t"
1766
		"movd 3(%1, %%"REG_b"), %%mm3	\n\t"
1753 1767
		"punpcklbw %%mm7, %%mm0		\n\t"
1754 1768
		"punpcklbw %%mm7, %%mm1		\n\t"
1755 1769
		"punpcklbw %%mm7, %%mm2		\n\t"
......
1757 1771
		"paddw %%mm1, %%mm0		\n\t"
1758 1772
		"paddw %%mm3, %%mm2		\n\t"
1759 1773
		"paddw %%mm2, %%mm0		\n\t"
1760
		"movd 6(%0, %%ebx), %%mm4	\n\t"
1761
		"movd 6(%1, %%ebx), %%mm1	\n\t"
1762
		"movd 9(%0, %%ebx), %%mm2	\n\t"
1763
		"movd 9(%1, %%ebx), %%mm3	\n\t"
1774
		"movd 6(%0, %%"REG_b"), %%mm4	\n\t"
1775
		"movd 6(%1, %%"REG_b"), %%mm1	\n\t"
1776
		"movd 9(%0, %%"REG_b"), %%mm2	\n\t"
1777
		"movd 9(%1, %%"REG_b"), %%mm3	\n\t"
1764 1778
		"punpcklbw %%mm7, %%mm4		\n\t"
1765 1779
		"punpcklbw %%mm7, %%mm1		\n\t"
1766 1780
		"punpcklbw %%mm7, %%mm2		\n\t"
......
1792 1806
		"psraw $7, %%mm0		\n\t"
1793 1807

  
1794 1808
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1795
		"movq 12(%0, %%ebx), %%mm4	\n\t"
1796
		"movq 12(%1, %%ebx), %%mm1	\n\t"
1797
		"movq 18(%0, %%ebx), %%mm2	\n\t"
1798
		"movq 18(%1, %%ebx), %%mm3	\n\t"
1809
		"movq 12(%0, %%"REG_b"), %%mm4	\n\t"
1810
		"movq 12(%1, %%"REG_b"), %%mm1	\n\t"
1811
		"movq 18(%0, %%"REG_b"), %%mm2	\n\t"
1812
		"movq 18(%1, %%"REG_b"), %%mm3	\n\t"
1799 1813
		PAVGB(%%mm1, %%mm4)
1800 1814
		PAVGB(%%mm3, %%mm2)
1801 1815
		"movq %%mm4, %%mm1		\n\t"
......
1807 1821
		"punpcklbw %%mm7, %%mm4		\n\t"
1808 1822
		"punpcklbw %%mm7, %%mm2		\n\t"
1809 1823
#else
1810
		"movd 12(%0, %%ebx), %%mm4	\n\t"
1811
		"movd 12(%1, %%ebx), %%mm1	\n\t"
1812
		"movd 15(%0, %%ebx), %%mm2	\n\t"
1813
		"movd 15(%1, %%ebx), %%mm3	\n\t"
1824
		"movd 12(%0, %%"REG_b"), %%mm4	\n\t"
1825
		"movd 12(%1, %%"REG_b"), %%mm1	\n\t"
1826
		"movd 15(%0, %%"REG_b"), %%mm2	\n\t"
1827
		"movd 15(%1, %%"REG_b"), %%mm3	\n\t"
1814 1828
		"punpcklbw %%mm7, %%mm4		\n\t"
1815 1829
		"punpcklbw %%mm7, %%mm1		\n\t"
1816 1830
		"punpcklbw %%mm7, %%mm2		\n\t"
......
1818 1832
		"paddw %%mm1, %%mm4		\n\t"
1819 1833
		"paddw %%mm3, %%mm2		\n\t"
1820 1834
		"paddw %%mm2, %%mm4		\n\t"
1821
		"movd 18(%0, %%ebx), %%mm5	\n\t"
1822
		"movd 18(%1, %%ebx), %%mm1	\n\t"
1823
		"movd 21(%0, %%ebx), %%mm2	\n\t"
1824
		"movd 21(%1, %%ebx), %%mm3	\n\t"
1835
		"movd 18(%0, %%"REG_b"), %%mm5	\n\t"
1836
		"movd 18(%1, %%"REG_b"), %%mm1	\n\t"
1837
		"movd 21(%0, %%"REG_b"), %%mm2	\n\t"
1838
		"movd 21(%1, %%"REG_b"), %%mm3	\n\t"
1825 1839
		"punpcklbw %%mm7, %%mm5		\n\t"
1826 1840
		"punpcklbw %%mm7, %%mm1		\n\t"
1827 1841
		"punpcklbw %%mm7, %%mm2		\n\t"
......
1850 1864
		"packssdw %%mm3, %%mm1		\n\t"
1851 1865
		"pmaddwd %%mm5, %%mm4		\n\t"
1852 1866
		"pmaddwd %%mm5, %%mm1		\n\t"
1853
		"addl $24, %%ebx		\n\t"
1867
		"add $24, %%"REG_b"		\n\t"
1854 1868
		"packssdw %%mm1, %%mm4		\n\t" // V3 V2 U3 U2
1855 1869
		"psraw $7, %%mm4		\n\t"
1856 1870
		
......
1860 1874
		"packsswb %%mm1, %%mm0		\n\t"
1861 1875
		"paddb "MANGLE(bgr2UVOffset)", %%mm0	\n\t"
1862 1876

  
1863
		"movd %%mm0, (%2, %%eax)	\n\t"
1877
		"movd %%mm0, (%2, %%"REG_a")	\n\t"
1864 1878
		"punpckhdq %%mm0, %%mm0		\n\t"
1865
		"movd %%mm0, (%3, %%eax)	\n\t"
1866
		"addl $4, %%eax			\n\t"
1879
		"movd %%mm0, (%3, %%"REG_a")	\n\t"
1880
		"add $4, %%"REG_a"		\n\t"
1867 1881
		" js 1b				\n\t"
1868
		: : "r" (src1+width*6), "r" (src2+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width)
1869
		: "%eax", "%ebx"
1882
		: : "r" (src1+width*6), "r" (src2+width*6), "r" (dstU+width), "r" (dstV+width), "g" ((long)-width)
1883
		: "%"REG_a, "%"REG_b
1870 1884
	);
1871 1885
#else
1872 1886
	int i;
......
2024 2038
	assert(filterSize % 4 == 0 && filterSize>0);
2025 2039
	if(filterSize==4) // allways true for upscaling, sometimes for down too
2026 2040
	{
2027
		int counter= -2*dstW;
2041
		long counter= -2*dstW;
2028 2042
		filter-= counter*2;
2029 2043
		filterPos-= counter/2;
2030 2044
		dst-= counter/2;
2031 2045
		asm volatile(
2032 2046
			"pxor %%mm7, %%mm7		\n\t"
2033 2047
			"movq "MANGLE(w02)", %%mm6	\n\t"
2034
			"pushl %%ebp			\n\t" // we use 7 regs here ...
2035
			"movl %%eax, %%ebp		\n\t"
2048
			"push %%"REG_BP"		\n\t" // we use 7 regs here ...
2049
			"mov %%"REG_a", %%"REG_BP"	\n\t"
2036 2050
			".balign 16			\n\t"
2037 2051
			"1:				\n\t"
2038
			"movzwl (%2, %%ebp), %%eax	\n\t"
2039
			"movzwl 2(%2, %%ebp), %%ebx	\n\t"
2040
			"movq (%1, %%ebp, 4), %%mm1	\n\t"
2041
			"movq 8(%1, %%ebp, 4), %%mm3	\n\t"
2042
			"movd (%3, %%eax), %%mm0	\n\t"
2043
			"movd (%3, %%ebx), %%mm2	\n\t"
2052
			"movzxw (%2, %%"REG_BP"), %%"REG_a"\n\t"
2053
			"movzxw 2(%2, %%"REG_BP"), %%"REG_b"\n\t"
2054
			"movq (%1, %%"REG_BP", 4), %%mm1\n\t"
2055
			"movq 8(%1, %%"REG_BP", 4), %%mm3\n\t"
2056
			"movd (%3, %%"REG_a"), %%mm0	\n\t"
2057
			"movd (%3, %%"REG_b"), %%mm2	\n\t"
2044 2058
			"punpcklbw %%mm7, %%mm0		\n\t"
2045 2059
			"punpcklbw %%mm7, %%mm2		\n\t"
2046 2060
			"pmaddwd %%mm1, %%mm0		\n\t"
......
2050 2064
			"packssdw %%mm3, %%mm0		\n\t"
2051 2065
			"pmaddwd %%mm6, %%mm0		\n\t"
2052 2066
			"packssdw %%mm0, %%mm0		\n\t"
2053
			"movd %%mm0, (%4, %%ebp)	\n\t"
2054
			"addl $4, %%ebp			\n\t"
2067
			"movd %%mm0, (%4, %%"REG_BP")	\n\t"
2068
			"add $4, %%"REG_BP"		\n\t"
2055 2069
			" jnc 1b			\n\t"
2056 2070

  
2057
			"popl %%ebp			\n\t"
2071
			"pop %%"REG_BP"			\n\t"
2058 2072
			: "+a" (counter)
2059 2073
			: "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
2060
			: "%ebx"
2074
			: "%"REG_b
2061 2075
		);
2062 2076
	}
2063 2077
	else if(filterSize==8)
2064 2078
	{
2065
		int counter= -2*dstW;
2079
		long counter= -2*dstW;
2066 2080
		filter-= counter*4;
2067 2081
		filterPos-= counter/2;
2068 2082
		dst-= counter/2;
2069 2083
		asm volatile(
2070 2084
			"pxor %%mm7, %%mm7		\n\t"
2071 2085
			"movq "MANGLE(w02)", %%mm6	\n\t"
2072
			"pushl %%ebp			\n\t" // we use 7 regs here ...
2073
			"movl %%eax, %%ebp		\n\t"
2086
			"push %%"REG_BP"		\n\t" // we use 7 regs here ...
2087
			"mov %%"REG_a", %%"REG_BP"	\n\t"
2074 2088
			".balign 16			\n\t"
2075 2089
			"1:				\n\t"
2076
			"movzwl (%2, %%ebp), %%eax	\n\t"
2077
			"movzwl 2(%2, %%ebp), %%ebx	\n\t"
2078
			"movq (%1, %%ebp, 8), %%mm1	\n\t"
2079
			"movq 16(%1, %%ebp, 8), %%mm3	\n\t"
2080
			"movd (%3, %%eax), %%mm0	\n\t"
2081
			"movd (%3, %%ebx), %%mm2	\n\t"
2090
			"movzxw (%2, %%"REG_BP"), %%"REG_a"\n\t"
2091
			"movzxw 2(%2, %%"REG_BP"), %%"REG_b"\n\t"
2092
			"movq (%1, %%"REG_BP", 8), %%mm1\n\t"
2093
			"movq 16(%1, %%"REG_BP", 8), %%mm3\n\t"
2094
			"movd (%3, %%"REG_a"), %%mm0	\n\t"
2095
			"movd (%3, %%"REG_b"), %%mm2	\n\t"
2082 2096
			"punpcklbw %%mm7, %%mm0		\n\t"
2083 2097
			"punpcklbw %%mm7, %%mm2		\n\t"
2084 2098
			"pmaddwd %%mm1, %%mm0		\n\t"
2085 2099
			"pmaddwd %%mm2, %%mm3		\n\t"
2086 2100

  
2087
			"movq 8(%1, %%ebp, 8), %%mm1	\n\t"
2088
			"movq 24(%1, %%ebp, 8), %%mm5	\n\t"
2089
			"movd 4(%3, %%eax), %%mm4	\n\t"
2090
			"movd 4(%3, %%ebx), %%mm2	\n\t"
2101
			"movq 8(%1, %%"REG_BP", 8), %%mm1\n\t"
2102
			"movq 24(%1, %%"REG_BP", 8), %%mm5\n\t"
2103
			"movd 4(%3, %%"REG_a"), %%mm4	\n\t"
2104
			"movd 4(%3, %%"REG_b"), %%mm2	\n\t"
2091 2105
			"punpcklbw %%mm7, %%mm4		\n\t"
2092 2106
			"punpcklbw %%mm7, %%mm2		\n\t"
2093 2107
			"pmaddwd %%mm1, %%mm4		\n\t"
......
2100 2114
			"packssdw %%mm3, %%mm0		\n\t"
2101 2115
			"pmaddwd %%mm6, %%mm0		\n\t"
2102 2116
			"packssdw %%mm0, %%mm0		\n\t"
2103
			"movd %%mm0, (%4, %%ebp)	\n\t"
2104
			"addl $4, %%ebp			\n\t"
2117
			"movd %%mm0, (%4, %%"REG_BP")	\n\t"
2118
			"add $4, %%"REG_BP"		\n\t"
2105 2119
			" jnc 1b			\n\t"
2106 2120

  
2107
			"popl %%ebp			\n\t"
2121
			"pop %%"REG_BP"			\n\t"
2108 2122
			: "+a" (counter)
2109 2123
			: "c" (filter), "d" (filterPos), "S" (src), "D" (dst)
2110
			: "%ebx"
2124
			: "%"REG_b
2111 2125
		);
2112 2126
	}
2113 2127
	else
2114 2128
	{
2115
		int counter= -2*dstW;
2129
		long counter= -2*dstW;
2116 2130
//		filter-= counter*filterSize/2;
2117 2131
		filterPos-= counter/2;
2118 2132
		dst-= counter/2;
......
2121 2135
			"movq "MANGLE(w02)", %%mm6	\n\t"
2122 2136
			".balign 16			\n\t"
2123 2137
			"1:				\n\t"
2124
			"movl %2, %%ecx			\n\t"
2125
			"movzwl (%%ecx, %0), %%eax	\n\t"
2126
			"movzwl 2(%%ecx, %0), %%ebx	\n\t"
2127
			"movl %5, %%ecx			\n\t"
2138
			"mov %2, %%"REG_c"		\n\t"
2139
			"movzxw (%%"REG_c", %0), %%"REG_a"\n\t"
2140
			"movzxw 2(%%"REG_c", %0), %%"REG_b"\n\t"
2141
			"mov %5, %%"REG_c"		\n\t"
2128 2142
			"pxor %%mm4, %%mm4		\n\t"
2129 2143
			"pxor %%mm5, %%mm5		\n\t"
2130 2144
			"2:				\n\t"
2131 2145
			"movq (%1), %%mm1		\n\t"
2132 2146
			"movq (%1, %6), %%mm3		\n\t"
2133
			"movd (%%ecx, %%eax), %%mm0	\n\t"
2134
			"movd (%%ecx, %%ebx), %%mm2	\n\t"
2147
			"movd (%%"REG_c", %%"REG_a"), %%mm0\n\t"
2148
			"movd (%%"REG_c", %%"REG_b"), %%mm2\n\t"
2135 2149
			"punpcklbw %%mm7, %%mm0		\n\t"
2136 2150
			"punpcklbw %%mm7, %%mm2		\n\t"
2137 2151
			"pmaddwd %%mm1, %%mm0		\n\t"
2138 2152
			"pmaddwd %%mm2, %%mm3		\n\t"
2139 2153
			"paddd %%mm3, %%mm5		\n\t"
2140 2154
			"paddd %%mm0, %%mm4		\n\t"
2141
			"addl $8, %1			\n\t"
2142
			"addl $4, %%ecx			\n\t"
2143
			"cmpl %4, %%ecx			\n\t"
2155
			"add $8, %1			\n\t"
2156
			"add $4, %%"REG_c"		\n\t"
2157
			"cmp %4, %%"REG_c"		\n\t"
2144 2158
			" jb 2b				\n\t"
2145
			"addl %6, %1			\n\t"
2159
			"add %6, %1			\n\t"
2146 2160
			"psrad $8, %%mm4		\n\t"
2147 2161
			"psrad $8, %%mm5		\n\t"
2148 2162
			"packssdw %%mm5, %%mm4		\n\t"
2149 2163
			"pmaddwd %%mm6, %%mm4		\n\t"
2150 2164
			"packssdw %%mm4, %%mm4		\n\t"
2151
			"movl %3, %%eax			\n\t"
2152
			"movd %%mm4, (%%eax, %0)	\n\t"
2153
			"addl $4, %0			\n\t"
2165
			"mov %3, %%"REG_a"		\n\t"
2166
			"movd %%mm4, (%%"REG_a", %0)	\n\t"
2167
			"add $4, %0			\n\t"
2154 2168
			" jnc 1b			\n\t"
2155 2169

  
2156 2170
			: "+r" (counter), "+r" (filter)
2157 2171
			: "m" (filterPos), "m" (dst), "m"(src+filterSize),
2158
			  "m" (src), "r" (filterSize*2)
2159
			: "%ebx", "%eax", "%ecx"
2172
			  "m" (src), "r" ((long)filterSize*2)
2173
			: "%"REG_b, "%"REG_a, "%"REG_c
2160 2174
		);
2161 2175
	}
2162 2176
#else
......
2241 2255
    }
2242 2256
    else // Fast Bilinear upscale / crap downscale
2243 2257
    {
2244
#ifdef ARCH_X86
2258
#if defined(ARCH_X86) || defined(ARCH_X86_64)
2245 2259
#ifdef HAVE_MMX2
2246 2260
	int i;
2247 2261
	if(canMMX2BeUsed)
2248 2262
	{
2249 2263
		asm volatile(
2250 2264
			"pxor %%mm7, %%mm7		\n\t"
2251
			"movl %0, %%ecx			\n\t"
2252
			"movl %1, %%edi			\n\t"
2253
			"movl %2, %%edx			\n\t"
2254
			"movl %3, %%ebx			\n\t"
2255
			"xorl %%eax, %%eax		\n\t" // i
2256
			PREFETCH" (%%ecx)		\n\t"
2257
			PREFETCH" 32(%%ecx)		\n\t"
2258
			PREFETCH" 64(%%ecx)		\n\t"
2265
			"mov %0, %%"REG_c"		\n\t"
2266
			"mov %1, %%"REG_D"		\n\t"
2267
			"mov %2, %%"REG_d"		\n\t"
2268
			"mov %3, %%"REG_b"		\n\t"
2269
			"xor %%"REG_a", %%"REG_a"	\n\t" // i
2270
			PREFETCH" (%%"REG_c")		\n\t"
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff