Statistics
| Branch: | Revision:

ffmpeg / libswscale / swscale_template.c @ 911406f2

History | View | Annotate | Download (105 KB)

1 fe8054c0 Michael Niedermayer
/*
2 d026b45e Diego Biurrun
 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
 *
20 807e0c66 Luca Abeni
 * the C code (not assembly, mmx, ...) of this file can be used
21
 * under the LGPL license too
22 d026b45e Diego Biurrun
 */
23 783e9cc9 Michael Niedermayer
24 6e1c66bc Aurelien Jacobs
#undef REAL_MOVNTQ
25 541c4eb9 Michael Niedermayer
#undef MOVNTQ
26 7d7f78b5 Michael Niedermayer
#undef PAVGB
27 48a05cec Michael Niedermayer
#undef PREFETCH
28
#undef PREFETCHW
29
#undef EMMS
30
#undef SFENCE
31
32
#ifdef HAVE_3DNOW
33
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
34
#define EMMS     "femms"
35
#else
36
#define EMMS     "emms"
37
#endif
38
39
#ifdef HAVE_3DNOW
40
#define PREFETCH  "prefetch"
41
#define PREFETCHW "prefetchw"
42
#elif defined ( HAVE_MMX2 )
43
#define PREFETCH "prefetchnta"
44
#define PREFETCHW "prefetcht0"
45
#else
46 d904b5fc Nigel Pearson
#define PREFETCH  " # nop"
47
#define PREFETCHW " # nop"
48 48a05cec Michael Niedermayer
#endif
49
50
#ifdef HAVE_MMX2
51
#define SFENCE "sfence"
52
#else
53 d904b5fc Nigel Pearson
#define SFENCE " # nop"
54 48a05cec Michael Niedermayer
#endif
55 d3f41512 Michael Niedermayer
56 d604bab9 Michael Niedermayer
#ifdef HAVE_MMX2
57
#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
58
#elif defined (HAVE_3DNOW)
59
#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
60
#endif
61 d3f41512 Michael Niedermayer
62 d604bab9 Michael Niedermayer
#ifdef HAVE_MMX2
63 6e1c66bc Aurelien Jacobs
#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
64 d604bab9 Michael Niedermayer
#else
65 6e1c66bc Aurelien Jacobs
#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
66 d604bab9 Michael Niedermayer
#endif
67 6e1c66bc Aurelien Jacobs
#define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
68 d604bab9 Michael Niedermayer
69 a2faa401 Romain Dolbeau
#ifdef HAVE_ALTIVEC
70
#include "swscale_altivec_template.c"
71
#endif
72
73 bca11e75 Michael Niedermayer
#define YSCALEYUV2YV12X(x, offset, dest, width) \
74
                asm volatile(\
75 6e1c66bc Aurelien Jacobs
                        "xor %%"REG_a", %%"REG_a"        \n\t"\
76 379a2036 Michael Niedermayer
                        "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
77
                        "movq %%mm3, %%mm4                \n\t"\
78 6e1c66bc Aurelien Jacobs
                        "lea " offset "(%0), %%"REG_d"        \n\t"\
79
                        "mov (%%"REG_d"), %%"REG_S"        \n\t"\
80 4bff9ef9 Diego Biurrun
                        ASMALIGN(4) /* FIXME Unroll? */\
81 c1b0bfb4 Michael Niedermayer
                        "1:                                \n\t"\
82 6e1c66bc Aurelien Jacobs
                        "movq 8(%%"REG_d"), %%mm0        \n\t" /* filterCoeff */\
83
                        "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
84
                        "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5\n\t" /* srcData */\
85
                        "add $16, %%"REG_d"                \n\t"\
86
                        "mov (%%"REG_d"), %%"REG_S"        \n\t"\
87
                        "test %%"REG_S", %%"REG_S"        \n\t"\
88 c1b0bfb4 Michael Niedermayer
                        "pmulhw %%mm0, %%mm2                \n\t"\
89
                        "pmulhw %%mm0, %%mm5                \n\t"\
90
                        "paddw %%mm2, %%mm3                \n\t"\
91
                        "paddw %%mm5, %%mm4                \n\t"\
92
                        " jnz 1b                        \n\t"\
93
                        "psraw $3, %%mm3                \n\t"\
94
                        "psraw $3, %%mm4                \n\t"\
95
                        "packuswb %%mm4, %%mm3                \n\t"\
96 6e1c66bc Aurelien Jacobs
                        MOVNTQ(%%mm3, (%1, %%REGa))\
97
                        "add $8, %%"REG_a"                \n\t"\
98
                        "cmp %2, %%"REG_a"                \n\t"\
99 379a2036 Michael Niedermayer
                        "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
100
                        "movq %%mm3, %%mm4                \n\t"\
101 6e1c66bc Aurelien Jacobs
                        "lea " offset "(%0), %%"REG_d"        \n\t"\
102
                        "mov (%%"REG_d"), %%"REG_S"        \n\t"\
103 bca11e75 Michael Niedermayer
                        "jb 1b                                \n\t"\
104
                        :: "r" (&c->redDither),\
105 e96da13b Reimar Döffinger
                        "r" (dest), "g" (width)\
106 bca11e75 Michael Niedermayer
                        : "%"REG_a, "%"REG_d, "%"REG_S\
107
                );
108
109
#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
110
                asm volatile(\
111
                        "lea " offset "(%0), %%"REG_d"        \n\t"\
112
                        "xor %%"REG_a", %%"REG_a"        \n\t"\
113
                        "pxor %%mm4, %%mm4              \n\t"\
114
                        "pxor %%mm5, %%mm5              \n\t"\
115
                        "pxor %%mm6, %%mm6              \n\t"\
116
                        "pxor %%mm7, %%mm7              \n\t"\
117
                        "mov (%%"REG_d"), %%"REG_S"        \n\t"\
118 4bff9ef9 Diego Biurrun
                        ASMALIGN(4) \
119 bca11e75 Michael Niedermayer
                        "1:                                \n\t"\
120
                        "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm0\n\t" /* srcData */\
121
                        "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\
122
                        "mov 4(%%"REG_d"), %%"REG_S"        \n\t"\
123
                        "movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm1\n\t" /* srcData */\
124
                        "movq %%mm0, %%mm3              \n\t"\
125
                        "punpcklwd %%mm1, %%mm0        \n\t"\
126
                        "punpckhwd %%mm1, %%mm3        \n\t"\
127
                        "movq 8(%%"REG_d"), %%mm1        \n\t" /* filterCoeff */\
128
                        "pmaddwd %%mm1, %%mm0           \n\t"\
129
                        "pmaddwd %%mm1, %%mm3           \n\t"\
130
                        "paddd %%mm0, %%mm4             \n\t"\
131
                        "paddd %%mm3, %%mm5             \n\t"\
132
                        "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm3\n\t" /* srcData */\
133
                        "mov 16(%%"REG_d"), %%"REG_S"        \n\t"\
134
                        "add $16, %%"REG_d"                \n\t"\
135
                        "test %%"REG_S", %%"REG_S"      \n\t"\
136
                        "movq %%mm2, %%mm0              \n\t"\
137
                        "punpcklwd %%mm3, %%mm2        \n\t"\
138
                        "punpckhwd %%mm3, %%mm0        \n\t"\
139
                        "pmaddwd %%mm1, %%mm2           \n\t"\
140
                        "pmaddwd %%mm1, %%mm0           \n\t"\
141
                        "paddd %%mm2, %%mm6             \n\t"\
142
                        "paddd %%mm0, %%mm7             \n\t"\
143
                        " jnz 1b                        \n\t"\
144
                        "psrad $16, %%mm4                \n\t"\
145
                        "psrad $16, %%mm5                \n\t"\
146
                        "psrad $16, %%mm6                \n\t"\
147
                        "psrad $16, %%mm7                \n\t"\
148
                        "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
149
                        "packssdw %%mm5, %%mm4                \n\t"\
150
                        "packssdw %%mm7, %%mm6                \n\t"\
151
                        "paddw %%mm0, %%mm4             \n\t"\
152
                        "paddw %%mm0, %%mm6             \n\t"\
153
                        "psraw $3, %%mm4                \n\t"\
154
                        "psraw $3, %%mm6                \n\t"\
155
                        "packuswb %%mm6, %%mm4                \n\t"\
156
                        MOVNTQ(%%mm4, (%1, %%REGa))\
157
                        "add $8, %%"REG_a"                \n\t"\
158
                        "cmp %2, %%"REG_a"                \n\t"\
159
                        "lea " offset "(%0), %%"REG_d"        \n\t"\
160
                        "pxor %%mm4, %%mm4              \n\t"\
161
                        "pxor %%mm5, %%mm5              \n\t"\
162
                        "pxor %%mm6, %%mm6              \n\t"\
163
                        "pxor %%mm7, %%mm7              \n\t"\
164
                        "mov (%%"REG_d"), %%"REG_S"        \n\t"\
165
                        "jb 1b                                \n\t"\
166
                        :: "r" (&c->redDither),\
167 e96da13b Reimar Döffinger
                        "r" (dest), "g" (width)\
168 bca11e75 Michael Niedermayer
                        : "%"REG_a, "%"REG_d, "%"REG_S\
169
                );
170 c1b0bfb4 Michael Niedermayer
171
#define YSCALEYUV2YV121 \
172 6e1c66bc Aurelien Jacobs
                        "mov %2, %%"REG_a"                \n\t"\
173 4bff9ef9 Diego Biurrun
                        ASMALIGN(4) /* FIXME Unroll? */\
174 c1b0bfb4 Michael Niedermayer
                        "1:                                \n\t"\
175 6e1c66bc Aurelien Jacobs
                        "movq (%0, %%"REG_a", 2), %%mm0        \n\t"\
176
                        "movq 8(%0, %%"REG_a", 2), %%mm1\n\t"\
177 c1b0bfb4 Michael Niedermayer
                        "psraw $7, %%mm0                \n\t"\
178
                        "psraw $7, %%mm1                \n\t"\
179
                        "packuswb %%mm1, %%mm0                \n\t"\
180 6e1c66bc Aurelien Jacobs
                        MOVNTQ(%%mm0, (%1, %%REGa))\
181
                        "add $8, %%"REG_a"                \n\t"\
182 c1b0bfb4 Michael Niedermayer
                        "jnc 1b                                \n\t"
183
184
/*
185
                        :: "m" (-lumFilterSize), "m" (-chrFilterSize),
186
                           "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
187
                           "r" (dest), "m" (dstW),
188
                           "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
189
                        : "%eax", "%ebx", "%ecx", "%edx", "%esi"
190
*/
191 25593e29 Michael Niedermayer
#define YSCALEYUV2PACKEDX \
192 8422aa88 Michael Niedermayer
        asm volatile(\
193 6e1c66bc Aurelien Jacobs
                "xor %%"REG_a", %%"REG_a"        \n\t"\
194 4bff9ef9 Diego Biurrun
                ASMALIGN(4)\
195 77a49659 Michael Niedermayer
                "nop                                \n\t"\
196 c1b0bfb4 Michael Niedermayer
                "1:                                \n\t"\
197 6e1c66bc Aurelien Jacobs
                "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
198
                "mov (%%"REG_d"), %%"REG_S"        \n\t"\
199 379a2036 Michael Niedermayer
                "movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\
200
                "movq %%mm3, %%mm4                \n\t"\
201 4bff9ef9 Diego Biurrun
                ASMALIGN(4)\
202 c1b0bfb4 Michael Niedermayer
                "2:                                \n\t"\
203 6e1c66bc Aurelien Jacobs
                "movq 8(%%"REG_d"), %%mm0        \n\t" /* filterCoeff */\
204
                "movq (%%"REG_S", %%"REG_a"), %%mm2        \n\t" /* UsrcData */\
205
                "movq 4096(%%"REG_S", %%"REG_a"), %%mm5        \n\t" /* VsrcData */\
206
                "add $16, %%"REG_d"                \n\t"\
207
                "mov (%%"REG_d"), %%"REG_S"        \n\t"\
208 c1b0bfb4 Michael Niedermayer
                "pmulhw %%mm0, %%mm2                \n\t"\
209
                "pmulhw %%mm0, %%mm5                \n\t"\
210
                "paddw %%mm2, %%mm3                \n\t"\
211
                "paddw %%mm5, %%mm4                \n\t"\
212 6e1c66bc Aurelien Jacobs
                "test %%"REG_S", %%"REG_S"        \n\t"\
213 c1b0bfb4 Michael Niedermayer
                " jnz 2b                        \n\t"\
214
\
215 6e1c66bc Aurelien Jacobs
                "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
216
                "mov (%%"REG_d"), %%"REG_S"        \n\t"\
217 379a2036 Michael Niedermayer
                "movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\
218
                "movq %%mm1, %%mm7                \n\t"\
219 4bff9ef9 Diego Biurrun
                ASMALIGN(4)\
220 c1b0bfb4 Michael Niedermayer
                "2:                                \n\t"\
221 6e1c66bc Aurelien Jacobs
                "movq 8(%%"REG_d"), %%mm0        \n\t" /* filterCoeff */\
222
                "movq (%%"REG_S", %%"REG_a", 2), %%mm2        \n\t" /* Y1srcData */\
223
                "movq 8(%%"REG_S", %%"REG_a", 2), %%mm5        \n\t" /* Y2srcData */\
224
                "add $16, %%"REG_d"                \n\t"\
225
                "mov (%%"REG_d"), %%"REG_S"        \n\t"\
226 c1b0bfb4 Michael Niedermayer
                "pmulhw %%mm0, %%mm2                \n\t"\
227
                "pmulhw %%mm0, %%mm5                \n\t"\
228
                "paddw %%mm2, %%mm1                \n\t"\
229
                "paddw %%mm5, %%mm7                \n\t"\
230 6e1c66bc Aurelien Jacobs
                "test %%"REG_S", %%"REG_S"        \n\t"\
231 c1b0bfb4 Michael Niedermayer
                " jnz 2b                        \n\t"\
232 25593e29 Michael Niedermayer
233 8422aa88 Michael Niedermayer
#define YSCALEYUV2PACKEDX_END\
234
        :: "r" (&c->redDither), \
235
            "m" (dummy), "m" (dummy), "m" (dummy),\
236
            "r" (dest), "m" (dstW)\
237
        : "%"REG_a, "%"REG_d, "%"REG_S\
238
        );
239
240 bca11e75 Michael Niedermayer
#define YSCALEYUV2PACKEDX_ACCURATE \
241 8422aa88 Michael Niedermayer
        asm volatile(\
242 bca11e75 Michael Niedermayer
                "xor %%"REG_a", %%"REG_a"        \n\t"\
243 4bff9ef9 Diego Biurrun
                ASMALIGN(4)\
244 bca11e75 Michael Niedermayer
                "nop                                \n\t"\
245
                "1:                                \n\t"\
246
                "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
247
                "mov (%%"REG_d"), %%"REG_S"        \n\t"\
248
                "pxor %%mm4, %%mm4              \n\t"\
249
                "pxor %%mm5, %%mm5              \n\t"\
250
                "pxor %%mm6, %%mm6              \n\t"\
251
                "pxor %%mm7, %%mm7              \n\t"\
252 4bff9ef9 Diego Biurrun
                ASMALIGN(4)\
253 bca11e75 Michael Niedermayer
                "2:                                \n\t"\
254
                "movq (%%"REG_S", %%"REG_a"), %%mm0        \n\t" /* UsrcData */\
255
                "movq 4096(%%"REG_S", %%"REG_a"), %%mm2        \n\t" /* VsrcData */\
256
                "mov 4(%%"REG_d"), %%"REG_S"        \n\t"\
257
                "movq (%%"REG_S", %%"REG_a"), %%mm1        \n\t" /* UsrcData */\
258
                "movq %%mm0, %%mm3              \n\t"\
259
                "punpcklwd %%mm1, %%mm0        \n\t"\
260
                "punpckhwd %%mm1, %%mm3        \n\t"\
261
                "movq 8(%%"REG_d"), %%mm1        \n\t" /* filterCoeff */\
262
                "pmaddwd %%mm1, %%mm0           \n\t"\
263
                "pmaddwd %%mm1, %%mm3           \n\t"\
264
                "paddd %%mm0, %%mm4             \n\t"\
265
                "paddd %%mm3, %%mm5             \n\t"\
266
                "movq 4096(%%"REG_S", %%"REG_a"), %%mm3        \n\t" /* VsrcData */\
267
                "mov 16(%%"REG_d"), %%"REG_S"        \n\t"\
268
                "add $16, %%"REG_d"                \n\t"\
269
                "test %%"REG_S", %%"REG_S"      \n\t"\
270
                "movq %%mm2, %%mm0              \n\t"\
271
                "punpcklwd %%mm3, %%mm2        \n\t"\
272
                "punpckhwd %%mm3, %%mm0        \n\t"\
273
                "pmaddwd %%mm1, %%mm2           \n\t"\
274
                "pmaddwd %%mm1, %%mm0           \n\t"\
275
                "paddd %%mm2, %%mm6             \n\t"\
276
                "paddd %%mm0, %%mm7             \n\t"\
277
                " jnz 2b                        \n\t"\
278
                "psrad $16, %%mm4                \n\t"\
279
                "psrad $16, %%mm5                \n\t"\
280
                "psrad $16, %%mm6                \n\t"\
281
                "psrad $16, %%mm7                \n\t"\
282
                "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
283
                "packssdw %%mm5, %%mm4                \n\t"\
284
                "packssdw %%mm7, %%mm6                \n\t"\
285
                "paddw %%mm0, %%mm4             \n\t"\
286
                "paddw %%mm0, %%mm6             \n\t"\
287
                "movq %%mm4, "U_TEMP"(%0)       \n\t"\
288
                "movq %%mm6, "V_TEMP"(%0)       \n\t"\
289
\
290
                "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\
291
                "mov (%%"REG_d"), %%"REG_S"        \n\t"\
292
                "pxor %%mm1, %%mm1              \n\t"\
293
                "pxor %%mm5, %%mm5              \n\t"\
294
                "pxor %%mm7, %%mm7              \n\t"\
295
                "pxor %%mm6, %%mm6              \n\t"\
296 4bff9ef9 Diego Biurrun
                ASMALIGN(4)\
297 bca11e75 Michael Niedermayer
                "2:                                \n\t"\
298
                "movq (%%"REG_S", %%"REG_a", 2), %%mm0        \n\t" /* Y1srcData */\
299
                "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2        \n\t" /* Y2srcData */\
300
                "mov 4(%%"REG_d"), %%"REG_S"        \n\t"\
301
                "movq (%%"REG_S", %%"REG_a", 2), %%mm4        \n\t" /* Y1srcData */\
302
                "movq %%mm0, %%mm3              \n\t"\
303
                "punpcklwd %%mm4, %%mm0        \n\t"\
304
                "punpckhwd %%mm4, %%mm3        \n\t"\
305
                "movq 8(%%"REG_d"), %%mm4        \n\t" /* filterCoeff */\
306
                "pmaddwd %%mm4, %%mm0           \n\t"\
307
                "pmaddwd %%mm4, %%mm3           \n\t"\
308
                "paddd %%mm0, %%mm1             \n\t"\
309
                "paddd %%mm3, %%mm5             \n\t"\
310
                "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3        \n\t" /* Y2srcData */\
311
                "mov 16(%%"REG_d"), %%"REG_S"        \n\t"\
312
                "add $16, %%"REG_d"                \n\t"\
313
                "test %%"REG_S", %%"REG_S"      \n\t"\
314
                "movq %%mm2, %%mm0              \n\t"\
315
                "punpcklwd %%mm3, %%mm2        \n\t"\
316
                "punpckhwd %%mm3, %%mm0        \n\t"\
317
                "pmaddwd %%mm4, %%mm2           \n\t"\
318
                "pmaddwd %%mm4, %%mm0           \n\t"\
319
                "paddd %%mm2, %%mm7             \n\t"\
320
                "paddd %%mm0, %%mm6             \n\t"\
321
                " jnz 2b                        \n\t"\
322
                "psrad $16, %%mm1                \n\t"\
323
                "psrad $16, %%mm5                \n\t"\
324
                "psrad $16, %%mm7                \n\t"\
325
                "psrad $16, %%mm6                \n\t"\
326
                "movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\
327
                "packssdw %%mm5, %%mm1                \n\t"\
328
                "packssdw %%mm6, %%mm7                \n\t"\
329
                "paddw %%mm0, %%mm1             \n\t"\
330
                "paddw %%mm0, %%mm7             \n\t"\
331
                "movq  "U_TEMP"(%0), %%mm3      \n\t"\
332
                "movq  "V_TEMP"(%0), %%mm4      \n\t"\
333
334 8422aa88 Michael Niedermayer
#define YSCALEYUV2RGBX \
335 77a49659 Michael Niedermayer
                "psubw "U_OFFSET"(%0), %%mm3        \n\t" /* (U-128)8*/\
336
                "psubw "V_OFFSET"(%0), %%mm4        \n\t" /* (V-128)8*/\
337 c1b0bfb4 Michael Niedermayer
                "movq %%mm3, %%mm2                \n\t" /* (U-128)8*/\
338
                "movq %%mm4, %%mm5                \n\t" /* (V-128)8*/\
339 77a49659 Michael Niedermayer
                "pmulhw "UG_COEFF"(%0), %%mm3        \n\t"\
340
                "pmulhw "VG_COEFF"(%0), %%mm4        \n\t"\
341 c1b0bfb4 Michael Niedermayer
        /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
342 77a49659 Michael Niedermayer
                "pmulhw "UB_COEFF"(%0), %%mm2        \n\t"\
343
                "pmulhw "VR_COEFF"(%0), %%mm5        \n\t"\
344
                "psubw "Y_OFFSET"(%0), %%mm1        \n\t" /* 8(Y-16)*/\
345
                "psubw "Y_OFFSET"(%0), %%mm7        \n\t" /* 8(Y-16)*/\
346
                "pmulhw "Y_COEFF"(%0), %%mm1        \n\t"\
347
                "pmulhw "Y_COEFF"(%0), %%mm7        \n\t"\
348 c1b0bfb4 Michael Niedermayer
        /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
349
                "paddw %%mm3, %%mm4                \n\t"\
350
                "movq %%mm2, %%mm0                \n\t"\
351
                "movq %%mm5, %%mm6                \n\t"\
352
                "movq %%mm4, %%mm3                \n\t"\
353
                "punpcklwd %%mm2, %%mm2                \n\t"\
354
                "punpcklwd %%mm5, %%mm5                \n\t"\
355
                "punpcklwd %%mm4, %%mm4                \n\t"\
356
                "paddw %%mm1, %%mm2                \n\t"\
357
                "paddw %%mm1, %%mm5                \n\t"\
358
                "paddw %%mm1, %%mm4                \n\t"\
359
                "punpckhwd %%mm0, %%mm0                \n\t"\
360
                "punpckhwd %%mm6, %%mm6                \n\t"\
361
                "punpckhwd %%mm3, %%mm3                \n\t"\
362
                "paddw %%mm7, %%mm0                \n\t"\
363
                "paddw %%mm7, %%mm6                \n\t"\
364
                "paddw %%mm7, %%mm3                \n\t"\
365
                /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
366
                "packuswb %%mm0, %%mm2                \n\t"\
367
                "packuswb %%mm6, %%mm5                \n\t"\
368
                "packuswb %%mm3, %%mm4                \n\t"\
369
                "pxor %%mm7, %%mm7                \n\t"
370 77a49659 Michael Niedermayer
#if 0
371 d604bab9 Michael Niedermayer
#define FULL_YSCALEYUV2RGB \
372
                "pxor %%mm7, %%mm7                \n\t"\
373
                "movd %6, %%mm6                        \n\t" /*yalpha1*/\
374
                "punpcklwd %%mm6, %%mm6                \n\t"\
375
                "punpcklwd %%mm6, %%mm6                \n\t"\
376
                "movd %7, %%mm5                        \n\t" /*uvalpha1*/\
377
                "punpcklwd %%mm5, %%mm5                \n\t"\
378
                "punpcklwd %%mm5, %%mm5                \n\t"\
379 6e1c66bc Aurelien Jacobs
                "xor %%"REG_a", %%"REG_a"                \n\t"\
380 4bff9ef9 Diego Biurrun
                ASMALIGN(4)\
381 d604bab9 Michael Niedermayer
                "1:                                \n\t"\
382 6e1c66bc Aurelien Jacobs
                "movq (%0, %%"REG_a", 2), %%mm0        \n\t" /*buf0[eax]*/\
383
                "movq (%1, %%"REG_a", 2), %%mm1        \n\t" /*buf1[eax]*/\
384
                "movq (%2, %%"REG_a",2), %%mm2        \n\t" /* uvbuf0[eax]*/\
385
                "movq (%3, %%"REG_a",2), %%mm3        \n\t" /* uvbuf1[eax]*/\
386 d604bab9 Michael Niedermayer
                "psubw %%mm1, %%mm0                \n\t" /* buf0[eax] - buf1[eax]*/\
387
                "psubw %%mm3, %%mm2                \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
388
                "pmulhw %%mm6, %%mm0                \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
389
                "pmulhw %%mm5, %%mm2                \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
390
                "psraw $4, %%mm1                \n\t" /* buf0[eax] - buf1[eax] >>4*/\
391 6e1c66bc Aurelien Jacobs
                "movq 4096(%2, %%"REG_a",2), %%mm4        \n\t" /* uvbuf0[eax+2048]*/\
392 d604bab9 Michael Niedermayer
                "psraw $4, %%mm3                \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
393
                "paddw %%mm0, %%mm1                \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
394 6e1c66bc Aurelien Jacobs
                "movq 4096(%3, %%"REG_a",2), %%mm0        \n\t" /* uvbuf1[eax+2048]*/\
395 d604bab9 Michael Niedermayer
                "paddw %%mm2, %%mm3                \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
396
                "psubw %%mm0, %%mm4                \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
397 9b464428 Felix Bünemann
                "psubw "MANGLE(w80)", %%mm1        \n\t" /* 8(Y-16)*/\
398
                "psubw "MANGLE(w400)", %%mm3        \n\t" /* 8(U-128)*/\
399
                "pmulhw "MANGLE(yCoeff)", %%mm1        \n\t"\
400 d604bab9 Michael Niedermayer
\
401
\
402
                "pmulhw %%mm5, %%mm4                \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
403
                "movq %%mm3, %%mm2                \n\t" /* (U-128)8*/\
404 9b464428 Felix Bünemann
                "pmulhw "MANGLE(ubCoeff)", %%mm3\n\t"\
405 d604bab9 Michael Niedermayer
                "psraw $4, %%mm0                \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
406 9b464428 Felix Bünemann
                "pmulhw "MANGLE(ugCoeff)", %%mm2\n\t"\
407 d604bab9 Michael Niedermayer
                "paddw %%mm4, %%mm0                \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
408 9b464428 Felix Bünemann
                "psubw "MANGLE(w400)", %%mm0        \n\t" /* (V-128)8*/\
409 d604bab9 Michael Niedermayer
\
410
\
411
                "movq %%mm0, %%mm4                \n\t" /* (V-128)8*/\
412 9b464428 Felix Bünemann
                "pmulhw "MANGLE(vrCoeff)", %%mm0\n\t"\
413
                "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\
414 d604bab9 Michael Niedermayer
                "paddw %%mm1, %%mm3                \n\t" /* B*/\
415
                "paddw %%mm1, %%mm0                \n\t" /* R*/\
416
                "packuswb %%mm3, %%mm3                \n\t"\
417
\
418
                "packuswb %%mm0, %%mm0                \n\t"\
419
                "paddw %%mm4, %%mm2                \n\t"\
420
                "paddw %%mm2, %%mm1                \n\t" /* G*/\
421
\
422
                "packuswb %%mm1, %%mm1                \n\t"
423 77a49659 Michael Niedermayer
#endif
424 d604bab9 Michael Niedermayer
425 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2PACKED(index, c) \
426 6542b44e Michael Niedermayer
                "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
427
                "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1\n\t"\
428
                "psraw $3, %%mm0                \n\t"\
429
                "psraw $3, %%mm1                \n\t"\
430
                "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c")\n\t"\
431
                "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c")\n\t"\
432 6e1c66bc Aurelien Jacobs
                "xor "#index", "#index"                \n\t"\
433 4bff9ef9 Diego Biurrun
                ASMALIGN(4)\
434 25593e29 Michael Niedermayer
                "1:                                \n\t"\
435 6542b44e Michael Niedermayer
                "movq (%2, "#index"), %%mm2        \n\t" /* uvbuf0[eax]*/\
436
                "movq (%3, "#index"), %%mm3        \n\t" /* uvbuf1[eax]*/\
437
                "movq 4096(%2, "#index"), %%mm5        \n\t" /* uvbuf0[eax+2048]*/\
438
                "movq 4096(%3, "#index"), %%mm4        \n\t" /* uvbuf1[eax+2048]*/\
439 25593e29 Michael Niedermayer
                "psubw %%mm3, %%mm2                \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
440
                "psubw %%mm4, %%mm5                \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
441 6542b44e Michael Niedermayer
                "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
442 25593e29 Michael Niedermayer
                "pmulhw %%mm0, %%mm2                \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
443
                "pmulhw %%mm0, %%mm5                \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
444
                "psraw $7, %%mm3                \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
445
                "psraw $7, %%mm4                \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
446
                "paddw %%mm2, %%mm3                \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
447
                "paddw %%mm5, %%mm4                \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
448 6542b44e Michael Niedermayer
                "movq (%0, "#index", 2), %%mm0        \n\t" /*buf0[eax]*/\
449
                "movq (%1, "#index", 2), %%mm1        \n\t" /*buf1[eax]*/\
450
                "movq 8(%0, "#index", 2), %%mm6        \n\t" /*buf0[eax]*/\
451
                "movq 8(%1, "#index", 2), %%mm7        \n\t" /*buf1[eax]*/\
452 25593e29 Michael Niedermayer
                "psubw %%mm1, %%mm0                \n\t" /* buf0[eax] - buf1[eax]*/\
453
                "psubw %%mm7, %%mm6                \n\t" /* buf0[eax] - buf1[eax]*/\
454 6542b44e Michael Niedermayer
                "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
455
                "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
456 25593e29 Michael Niedermayer
                "psraw $7, %%mm1                \n\t" /* buf0[eax] - buf1[eax] >>4*/\
457
                "psraw $7, %%mm7                \n\t" /* buf0[eax] - buf1[eax] >>4*/\
458
                "paddw %%mm0, %%mm1                \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
459
                "paddw %%mm6, %%mm7                \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
460
                
461 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
462
                
463
#define REAL_YSCALEYUV2RGB(index, c) \
464
                "xor "#index", "#index"        \n\t"\
465 4bff9ef9 Diego Biurrun
                ASMALIGN(4)\
466 d604bab9 Michael Niedermayer
                "1:                                \n\t"\
467 6542b44e Michael Niedermayer
                "movq (%2, "#index"), %%mm2        \n\t" /* uvbuf0[eax]*/\
468
                "movq (%3, "#index"), %%mm3        \n\t" /* uvbuf1[eax]*/\
469
                "movq 4096(%2, "#index"), %%mm5\n\t" /* uvbuf0[eax+2048]*/\
470
                "movq 4096(%3, "#index"), %%mm4\n\t" /* uvbuf1[eax+2048]*/\
471 d604bab9 Michael Niedermayer
                "psubw %%mm3, %%mm2                \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
472
                "psubw %%mm4, %%mm5                \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
473 6542b44e Michael Niedermayer
                "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\
474 d604bab9 Michael Niedermayer
                "pmulhw %%mm0, %%mm2                \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
475
                "pmulhw %%mm0, %%mm5                \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
476
                "psraw $4, %%mm3                \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
477
                "psraw $4, %%mm4                \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
478
                "paddw %%mm2, %%mm3                \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
479
                "paddw %%mm5, %%mm4                \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
480 6542b44e Michael Niedermayer
                "psubw "U_OFFSET"("#c"), %%mm3        \n\t" /* (U-128)8*/\
481
                "psubw "V_OFFSET"("#c"), %%mm4        \n\t" /* (V-128)8*/\
482 d604bab9 Michael Niedermayer
                "movq %%mm3, %%mm2                \n\t" /* (U-128)8*/\
483
                "movq %%mm4, %%mm5                \n\t" /* (V-128)8*/\
484 6542b44e Michael Niedermayer
                "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
485
                "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
486 d604bab9 Michael Niedermayer
        /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
487 6542b44e Michael Niedermayer
                "movq (%0, "#index", 2), %%mm0        \n\t" /*buf0[eax]*/\
488
                "movq (%1, "#index", 2), %%mm1        \n\t" /*buf1[eax]*/\
489
                "movq 8(%0, "#index", 2), %%mm6\n\t" /*buf0[eax]*/\
490
                "movq 8(%1, "#index", 2), %%mm7\n\t" /*buf1[eax]*/\
491 d604bab9 Michael Niedermayer
                "psubw %%mm1, %%mm0                \n\t" /* buf0[eax] - buf1[eax]*/\
492
                "psubw %%mm7, %%mm6                \n\t" /* buf0[eax] - buf1[eax]*/\
493 6542b44e Michael Niedermayer
                "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
494
                "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
495 d604bab9 Michael Niedermayer
                "psraw $4, %%mm1                \n\t" /* buf0[eax] - buf1[eax] >>4*/\
496
                "psraw $4, %%mm7                \n\t" /* buf0[eax] - buf1[eax] >>4*/\
497
                "paddw %%mm0, %%mm1                \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
498
                "paddw %%mm6, %%mm7                \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
499 6542b44e Michael Niedermayer
                "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
500
                "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
501
                "psubw "Y_OFFSET"("#c"), %%mm1        \n\t" /* 8(Y-16)*/\
502
                "psubw "Y_OFFSET"("#c"), %%mm7        \n\t" /* 8(Y-16)*/\
503
                "pmulhw "Y_COEFF"("#c"), %%mm1        \n\t"\
504
                "pmulhw "Y_COEFF"("#c"), %%mm7        \n\t"\
505 d604bab9 Michael Niedermayer
        /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
506
                "paddw %%mm3, %%mm4                \n\t"\
507
                "movq %%mm2, %%mm0                \n\t"\
508
                "movq %%mm5, %%mm6                \n\t"\
509
                "movq %%mm4, %%mm3                \n\t"\
510
                "punpcklwd %%mm2, %%mm2                \n\t"\
511
                "punpcklwd %%mm5, %%mm5                \n\t"\
512
                "punpcklwd %%mm4, %%mm4                \n\t"\
513
                "paddw %%mm1, %%mm2                \n\t"\
514
                "paddw %%mm1, %%mm5                \n\t"\
515
                "paddw %%mm1, %%mm4                \n\t"\
516
                "punpckhwd %%mm0, %%mm0                \n\t"\
517
                "punpckhwd %%mm6, %%mm6                \n\t"\
518
                "punpckhwd %%mm3, %%mm3                \n\t"\
519
                "paddw %%mm7, %%mm0                \n\t"\
520
                "paddw %%mm7, %%mm6                \n\t"\
521
                "paddw %%mm7, %%mm3                \n\t"\
522
                /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
523
                "packuswb %%mm0, %%mm2                \n\t"\
524
                "packuswb %%mm6, %%mm5                \n\t"\
525
                "packuswb %%mm3, %%mm4                \n\t"\
526
                "pxor %%mm7, %%mm7                \n\t"
527 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2RGB(index, c)  REAL_YSCALEYUV2RGB(index, c)
528 25593e29 Michael Niedermayer
                
529 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2PACKED1(index, c) \
530
                "xor "#index", "#index"                \n\t"\
531 4bff9ef9 Diego Biurrun
                ASMALIGN(4)\
532 25593e29 Michael Niedermayer
                "1:                                \n\t"\
533 e54d94ba Michael Niedermayer
                "movq (%2, "#index"), %%mm3        \n\t" /* uvbuf0[eax]*/\
534
                "movq 4096(%2, "#index"), %%mm4        \n\t" /* uvbuf0[eax+2048]*/\
535 25593e29 Michael Niedermayer
                "psraw $7, %%mm3                \n\t" \
536
                "psraw $7, %%mm4                \n\t" \
537 e54d94ba Michael Niedermayer
                "movq (%0, "#index", 2), %%mm1        \n\t" /*buf0[eax]*/\
538
                "movq 8(%0, "#index", 2), %%mm7        \n\t" /*buf0[eax]*/\
539 25593e29 Michael Niedermayer
                "psraw $7, %%mm1                \n\t" \
540
                "psraw $7, %%mm7                \n\t" \
541
                
542 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
543
                
544
#define REAL_YSCALEYUV2RGB1(index, c) \
545
                "xor "#index", "#index"        \n\t"\
546 4bff9ef9 Diego Biurrun
                ASMALIGN(4)\
547 d604bab9 Michael Niedermayer
                "1:                                \n\t"\
548 e54d94ba Michael Niedermayer
                "movq (%2, "#index"), %%mm3        \n\t" /* uvbuf0[eax]*/\
549
                "movq 4096(%2, "#index"), %%mm4        \n\t" /* uvbuf0[eax+2048]*/\
550 d604bab9 Michael Niedermayer
                "psraw $4, %%mm3                \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
551
                "psraw $4, %%mm4                \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
552 e54d94ba Michael Niedermayer
                "psubw "U_OFFSET"("#c"), %%mm3        \n\t" /* (U-128)8*/\
553
                "psubw "V_OFFSET"("#c"), %%mm4        \n\t" /* (V-128)8*/\
554 d604bab9 Michael Niedermayer
                "movq %%mm3, %%mm2                \n\t" /* (U-128)8*/\
555
                "movq %%mm4, %%mm5                \n\t" /* (V-128)8*/\
556 e54d94ba Michael Niedermayer
                "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
557
                "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
558 d604bab9 Michael Niedermayer
        /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
559 e54d94ba Michael Niedermayer
                "movq (%0, "#index", 2), %%mm1        \n\t" /*buf0[eax]*/\
560
                "movq 8(%0, "#index", 2), %%mm7        \n\t" /*buf0[eax]*/\
561 497d4f99 Michael Niedermayer
                "psraw $4, %%mm1                \n\t" /* buf0[eax] - buf1[eax] >>4*/\
562
                "psraw $4, %%mm7                \n\t" /* buf0[eax] - buf1[eax] >>4*/\
563 e54d94ba Michael Niedermayer
                "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
564
                "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
565
                "psubw "Y_OFFSET"("#c"), %%mm1        \n\t" /* 8(Y-16)*/\
566
                "psubw "Y_OFFSET"("#c"), %%mm7        \n\t" /* 8(Y-16)*/\
567
                "pmulhw "Y_COEFF"("#c"), %%mm1        \n\t"\
568
                "pmulhw "Y_COEFF"("#c"), %%mm7        \n\t"\
569 497d4f99 Michael Niedermayer
        /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
570
                "paddw %%mm3, %%mm4                \n\t"\
571
                "movq %%mm2, %%mm0                \n\t"\
572
                "movq %%mm5, %%mm6                \n\t"\
573
                "movq %%mm4, %%mm3                \n\t"\
574
                "punpcklwd %%mm2, %%mm2                \n\t"\
575
                "punpcklwd %%mm5, %%mm5                \n\t"\
576
                "punpcklwd %%mm4, %%mm4                \n\t"\
577
                "paddw %%mm1, %%mm2                \n\t"\
578
                "paddw %%mm1, %%mm5                \n\t"\
579
                "paddw %%mm1, %%mm4                \n\t"\
580
                "punpckhwd %%mm0, %%mm0                \n\t"\
581
                "punpckhwd %%mm6, %%mm6                \n\t"\
582
                "punpckhwd %%mm3, %%mm3                \n\t"\
583
                "paddw %%mm7, %%mm0                \n\t"\
584
                "paddw %%mm7, %%mm6                \n\t"\
585
                "paddw %%mm7, %%mm3                \n\t"\
586
                /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
587
                "packuswb %%mm0, %%mm2                \n\t"\
588
                "packuswb %%mm6, %%mm5                \n\t"\
589
                "packuswb %%mm3, %%mm4                \n\t"\
590
                "pxor %%mm7, %%mm7                \n\t"
591 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
592 497d4f99 Michael Niedermayer
593 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2PACKED1b(index, c) \
594
                "xor "#index", "#index"                \n\t"\
595 4bff9ef9 Diego Biurrun
                ASMALIGN(4)\
596 25593e29 Michael Niedermayer
                "1:                                \n\t"\
597 e54d94ba Michael Niedermayer
                "movq (%2, "#index"), %%mm2        \n\t" /* uvbuf0[eax]*/\
598
                "movq (%3, "#index"), %%mm3        \n\t" /* uvbuf1[eax]*/\
599
                "movq 4096(%2, "#index"), %%mm5        \n\t" /* uvbuf0[eax+2048]*/\
600
                "movq 4096(%3, "#index"), %%mm4        \n\t" /* uvbuf1[eax+2048]*/\
601 25593e29 Michael Niedermayer
                "paddw %%mm2, %%mm3                \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
602
                "paddw %%mm5, %%mm4                \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
603
                "psrlw $8, %%mm3                \n\t" \
604
                "psrlw $8, %%mm4                \n\t" \
605 e54d94ba Michael Niedermayer
                "movq (%0, "#index", 2), %%mm1        \n\t" /*buf0[eax]*/\
606
                "movq 8(%0, "#index", 2), %%mm7        \n\t" /*buf0[eax]*/\
607 25593e29 Michael Niedermayer
                "psraw $7, %%mm1                \n\t" \
608
                "psraw $7, %%mm7                \n\t" 
609 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
610 25593e29 Michael Niedermayer
                
611 497d4f99 Michael Niedermayer
// do vertical chrominance interpolation
612 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2RGB1b(index, c) \
613
                "xor "#index", "#index"                \n\t"\
614 4bff9ef9 Diego Biurrun
                ASMALIGN(4)\
615 497d4f99 Michael Niedermayer
                "1:                                \n\t"\
616 e54d94ba Michael Niedermayer
                "movq (%2, "#index"), %%mm2        \n\t" /* uvbuf0[eax]*/\
617
                "movq (%3, "#index"), %%mm3        \n\t" /* uvbuf1[eax]*/\
618
                "movq 4096(%2, "#index"), %%mm5        \n\t" /* uvbuf0[eax+2048]*/\
619
                "movq 4096(%3, "#index"), %%mm4        \n\t" /* uvbuf1[eax+2048]*/\
620 397c035e Michael Niedermayer
                "paddw %%mm2, %%mm3                \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
621
                "paddw %%mm5, %%mm4                \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
622 c1b0bfb4 Michael Niedermayer
                "psrlw $5, %%mm3                \n\t" /*FIXME might overflow*/\
623
                "psrlw $5, %%mm4                \n\t" /*FIXME might overflow*/\
624 e54d94ba Michael Niedermayer
                "psubw "U_OFFSET"("#c"), %%mm3        \n\t" /* (U-128)8*/\
625
                "psubw "V_OFFSET"("#c"), %%mm4        \n\t" /* (V-128)8*/\
626 497d4f99 Michael Niedermayer
                "movq %%mm3, %%mm2                \n\t" /* (U-128)8*/\
627
                "movq %%mm4, %%mm5                \n\t" /* (V-128)8*/\
628 e54d94ba Michael Niedermayer
                "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\
629
                "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\
630 497d4f99 Michael Niedermayer
        /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
631 e54d94ba Michael Niedermayer
                "movq (%0, "#index", 2), %%mm1        \n\t" /*buf0[eax]*/\
632
                "movq 8(%0, "#index", 2), %%mm7        \n\t" /*buf0[eax]*/\
633 d604bab9 Michael Niedermayer
                "psraw $4, %%mm1                \n\t" /* buf0[eax] - buf1[eax] >>4*/\
634
                "psraw $4, %%mm7                \n\t" /* buf0[eax] - buf1[eax] >>4*/\
635 e54d94ba Michael Niedermayer
                "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\
636
                "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\
637
                "psubw "Y_OFFSET"("#c"), %%mm1        \n\t" /* 8(Y-16)*/\
638
                "psubw "Y_OFFSET"("#c"), %%mm7        \n\t" /* 8(Y-16)*/\
639
                "pmulhw "Y_COEFF"("#c"), %%mm1        \n\t"\
640
                "pmulhw "Y_COEFF"("#c"), %%mm7        \n\t"\
641 d604bab9 Michael Niedermayer
        /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
642
                "paddw %%mm3, %%mm4                \n\t"\
643
                "movq %%mm2, %%mm0                \n\t"\
644
                "movq %%mm5, %%mm6                \n\t"\
645
                "movq %%mm4, %%mm3                \n\t"\
646
                "punpcklwd %%mm2, %%mm2                \n\t"\
647
                "punpcklwd %%mm5, %%mm5                \n\t"\
648
                "punpcklwd %%mm4, %%mm4                \n\t"\
649
                "paddw %%mm1, %%mm2                \n\t"\
650
                "paddw %%mm1, %%mm5                \n\t"\
651
                "paddw %%mm1, %%mm4                \n\t"\
652
                "punpckhwd %%mm0, %%mm0                \n\t"\
653
                "punpckhwd %%mm6, %%mm6                \n\t"\
654
                "punpckhwd %%mm3, %%mm3                \n\t"\
655
                "paddw %%mm7, %%mm0                \n\t"\
656
                "paddw %%mm7, %%mm6                \n\t"\
657
                "paddw %%mm7, %%mm3                \n\t"\
658
                /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
659
                "packuswb %%mm0, %%mm2                \n\t"\
660
                "packuswb %%mm6, %%mm5                \n\t"\
661
                "packuswb %%mm3, %%mm4                \n\t"\
662
                "pxor %%mm7, %%mm7                \n\t"
663 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
664 d604bab9 Michael Niedermayer
665 6e1c66bc Aurelien Jacobs
#define REAL_WRITEBGR32(dst, dstw, index) \
666 d604bab9 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
667
                        "movq %%mm2, %%mm1                \n\t" /* B */\
668
                        "movq %%mm5, %%mm6                \n\t" /* R */\
669
                        "punpcklbw %%mm4, %%mm2                \n\t" /* GBGBGBGB 0 */\
670
                        "punpcklbw %%mm7, %%mm5                \n\t" /* 0R0R0R0R 0 */\
671
                        "punpckhbw %%mm4, %%mm1                \n\t" /* GBGBGBGB 2 */\
672
                        "punpckhbw %%mm7, %%mm6                \n\t" /* 0R0R0R0R 2 */\
673
                        "movq %%mm2, %%mm0                \n\t" /* GBGBGBGB 0 */\
674
                        "movq %%mm1, %%mm3                \n\t" /* GBGBGBGB 2 */\
675
                        "punpcklwd %%mm5, %%mm0                \n\t" /* 0RGB0RGB 0 */\
676
                        "punpckhwd %%mm5, %%mm2                \n\t" /* 0RGB0RGB 1 */\
677
                        "punpcklwd %%mm6, %%mm1                \n\t" /* 0RGB0RGB 2 */\
678
                        "punpckhwd %%mm6, %%mm3                \n\t" /* 0RGB0RGB 3 */\
679
\
680 6542b44e Michael Niedermayer
                        MOVNTQ(%%mm0, (dst, index, 4))\
681
                        MOVNTQ(%%mm2, 8(dst, index, 4))\
682
                        MOVNTQ(%%mm1, 16(dst, index, 4))\
683
                        MOVNTQ(%%mm3, 24(dst, index, 4))\
684 d604bab9 Michael Niedermayer
\
685 6e1c66bc Aurelien Jacobs
                        "add $8, "#index"                \n\t"\
686
                        "cmp "#dstw", "#index"                \n\t"\
687 d604bab9 Michael Niedermayer
                        " jb 1b                                \n\t"
688 6e1c66bc Aurelien Jacobs
#define WRITEBGR32(dst, dstw, index)  REAL_WRITEBGR32(dst, dstw, index)
689 d604bab9 Michael Niedermayer
690 6e1c66bc Aurelien Jacobs
#define REAL_WRITEBGR16(dst, dstw, index) \
691 9b464428 Felix Bünemann
                        "pand "MANGLE(bF8)", %%mm2        \n\t" /* B */\
692
                        "pand "MANGLE(bFC)", %%mm4        \n\t" /* G */\
693
                        "pand "MANGLE(bF8)", %%mm5        \n\t" /* R */\
694 f62255fb Michael Niedermayer
                        "psrlq $3, %%mm2                \n\t"\
695 d604bab9 Michael Niedermayer
\
696 f62255fb Michael Niedermayer
                        "movq %%mm2, %%mm1                \n\t"\
697
                        "movq %%mm4, %%mm3                \n\t"\
698 d604bab9 Michael Niedermayer
\
699 f62255fb Michael Niedermayer
                        "punpcklbw %%mm7, %%mm3                \n\t"\
700
                        "punpcklbw %%mm5, %%mm2                \n\t"\
701
                        "punpckhbw %%mm7, %%mm4                \n\t"\
702
                        "punpckhbw %%mm5, %%mm1                \n\t"\
703 d604bab9 Michael Niedermayer
\
704 f62255fb Michael Niedermayer
                        "psllq $3, %%mm3                \n\t"\
705
                        "psllq $3, %%mm4                \n\t"\
706 d604bab9 Michael Niedermayer
\
707
                        "por %%mm3, %%mm2                \n\t"\
708
                        "por %%mm4, %%mm1                \n\t"\
709
\
710 6542b44e Michael Niedermayer
                        MOVNTQ(%%mm2, (dst, index, 2))\
711
                        MOVNTQ(%%mm1, 8(dst, index, 2))\
712 d604bab9 Michael Niedermayer
\
713 6e1c66bc Aurelien Jacobs
                        "add $8, "#index"                \n\t"\
714
                        "cmp "#dstw", "#index"                \n\t"\
715 d604bab9 Michael Niedermayer
                        " jb 1b                                \n\t"
716 6e1c66bc Aurelien Jacobs
#define WRITEBGR16(dst, dstw, index)  REAL_WRITEBGR16(dst, dstw, index)
717 d604bab9 Michael Niedermayer
718 6e1c66bc Aurelien Jacobs
#define REAL_WRITEBGR15(dst, dstw, index) \
719 9b464428 Felix Bünemann
                        "pand "MANGLE(bF8)", %%mm2        \n\t" /* B */\
720
                        "pand "MANGLE(bF8)", %%mm4        \n\t" /* G */\
721
                        "pand "MANGLE(bF8)", %%mm5        \n\t" /* R */\
722 f62255fb Michael Niedermayer
                        "psrlq $3, %%mm2                \n\t"\
723
                        "psrlq $1, %%mm5                \n\t"\
724 d604bab9 Michael Niedermayer
\
725 f62255fb Michael Niedermayer
                        "movq %%mm2, %%mm1                \n\t"\
726
                        "movq %%mm4, %%mm3                \n\t"\
727 d604bab9 Michael Niedermayer
\
728 f62255fb Michael Niedermayer
                        "punpcklbw %%mm7, %%mm3                \n\t"\
729
                        "punpcklbw %%mm5, %%mm2                \n\t"\
730
                        "punpckhbw %%mm7, %%mm4                \n\t"\
731
                        "punpckhbw %%mm5, %%mm1                \n\t"\
732 d604bab9 Michael Niedermayer
\
733 f62255fb Michael Niedermayer
                        "psllq $2, %%mm3                \n\t"\
734
                        "psllq $2, %%mm4                \n\t"\
735 d604bab9 Michael Niedermayer
\
736
                        "por %%mm3, %%mm2                \n\t"\
737
                        "por %%mm4, %%mm1                \n\t"\
738
\
739 6542b44e Michael Niedermayer
                        MOVNTQ(%%mm2, (dst, index, 2))\
740
                        MOVNTQ(%%mm1, 8(dst, index, 2))\
741 d604bab9 Michael Niedermayer
\
742 6e1c66bc Aurelien Jacobs
                        "add $8, "#index"                \n\t"\
743
                        "cmp "#dstw", "#index"                \n\t"\
744 d604bab9 Michael Niedermayer
                        " jb 1b                                \n\t"
745 6e1c66bc Aurelien Jacobs
#define WRITEBGR15(dst, dstw, index)  REAL_WRITEBGR15(dst, dstw, index)
746 f62255fb Michael Niedermayer
747 6542b44e Michael Niedermayer
#define WRITEBGR24OLD(dst, dstw, index) \
748 d604bab9 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
749
                        "movq %%mm2, %%mm1                \n\t" /* B */\
750
                        "movq %%mm5, %%mm6                \n\t" /* R */\
751
                        "punpcklbw %%mm4, %%mm2                \n\t" /* GBGBGBGB 0 */\
752
                        "punpcklbw %%mm7, %%mm5                \n\t" /* 0R0R0R0R 0 */\
753
                        "punpckhbw %%mm4, %%mm1                \n\t" /* GBGBGBGB 2 */\
754
                        "punpckhbw %%mm7, %%mm6                \n\t" /* 0R0R0R0R 2 */\
755
                        "movq %%mm2, %%mm0                \n\t" /* GBGBGBGB 0 */\
756
                        "movq %%mm1, %%mm3                \n\t" /* GBGBGBGB 2 */\
757 a525ce8d Michael Niedermayer
                        "punpcklwd %%mm5, %%mm0                \n\t" /* 0RGB0RGB 0 */\
758
                        "punpckhwd %%mm5, %%mm2                \n\t" /* 0RGB0RGB 1 */\
759
                        "punpcklwd %%mm6, %%mm1                \n\t" /* 0RGB0RGB 2 */\
760
                        "punpckhwd %%mm6, %%mm3                \n\t" /* 0RGB0RGB 3 */\
761 d604bab9 Michael Niedermayer
\
762
                        "movq %%mm0, %%mm4                \n\t" /* 0RGB0RGB 0 */\
763
                        "psrlq $8, %%mm0                \n\t" /* 00RGB0RG 0 */\
764 9b464428 Felix Bünemann
                        "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 0 */\
765
                        "pand "MANGLE(bm11111000)", %%mm0\n\t" /* 00RGB000 0.5 */\
766 d604bab9 Michael Niedermayer
                        "por %%mm4, %%mm0                \n\t" /* 00RGBRGB 0 */\
767
                        "movq %%mm2, %%mm4                \n\t" /* 0RGB0RGB 1 */\
768
                        "psllq $48, %%mm2                \n\t" /* GB000000 1 */\
769
                        "por %%mm2, %%mm0                \n\t" /* GBRGBRGB 0 */\
770
\
771
                        "movq %%mm4, %%mm2                \n\t" /* 0RGB0RGB 1 */\
772
                        "psrld $16, %%mm4                \n\t" /* 000R000R 1 */\
773
                        "psrlq $24, %%mm2                \n\t" /* 0000RGB0 1.5 */\
774
                        "por %%mm4, %%mm2                \n\t" /* 000RRGBR 1 */\
775 9b464428 Felix Bünemann
                        "pand "MANGLE(bm00001111)", %%mm2\n\t" /* 0000RGBR 1 */\
776 d604bab9 Michael Niedermayer
                        "movq %%mm1, %%mm4                \n\t" /* 0RGB0RGB 2 */\
777
                        "psrlq $8, %%mm1                \n\t" /* 00RGB0RG 2 */\
778 9b464428 Felix Bünemann
                        "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 2 */\
779
                        "pand "MANGLE(bm11111000)", %%mm1\n\t" /* 00RGB000 2.5 */\
780 d604bab9 Michael Niedermayer
                        "por %%mm4, %%mm1                \n\t" /* 00RGBRGB 2 */\
781
                        "movq %%mm1, %%mm4                \n\t" /* 00RGBRGB 2 */\
782
                        "psllq $32, %%mm1                \n\t" /* BRGB0000 2 */\
783
                        "por %%mm1, %%mm2                \n\t" /* BRGBRGBR 1 */\
784
\
785
                        "psrlq $32, %%mm4                \n\t" /* 000000RG 2.5 */\
786
                        "movq %%mm3, %%mm5                \n\t" /* 0RGB0RGB 3 */\
787
                        "psrlq $8, %%mm3                \n\t" /* 00RGB0RG 3 */\
788 9b464428 Felix Bünemann
                        "pand "MANGLE(bm00000111)", %%mm5\n\t" /* 00000RGB 3 */\
789
                        "pand "MANGLE(bm11111000)", %%mm3\n\t" /* 00RGB000 3.5 */\
790 d604bab9 Michael Niedermayer
                        "por %%mm5, %%mm3                \n\t" /* 00RGBRGB 3 */\
791
                        "psllq $16, %%mm3                \n\t" /* RGBRGB00 3 */\
792
                        "por %%mm4, %%mm3                \n\t" /* RGBRGBRG 2.5 */\
793
\
794 6542b44e Michael Niedermayer
                        MOVNTQ(%%mm0, (dst))\
795
                        MOVNTQ(%%mm2, 8(dst))\
796
                        MOVNTQ(%%mm3, 16(dst))\
797 6e1c66bc Aurelien Jacobs
                        "add $24, "#dst"                \n\t"\
798 d604bab9 Michael Niedermayer
\
799 6e1c66bc Aurelien Jacobs
                        "add $8, "#index"                \n\t"\
800
                        "cmp "#dstw", "#index"                \n\t"\
801 d604bab9 Michael Niedermayer
                        " jb 1b                                \n\t"
802
803 6542b44e Michael Niedermayer
#define WRITEBGR24MMX(dst, dstw, index) \
804 99d2cb72 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
805
                        "movq %%mm2, %%mm1                \n\t" /* B */\
806
                        "movq %%mm5, %%mm6                \n\t" /* R */\
807
                        "punpcklbw %%mm4, %%mm2                \n\t" /* GBGBGBGB 0 */\
808
                        "punpcklbw %%mm7, %%mm5                \n\t" /* 0R0R0R0R 0 */\
809
                        "punpckhbw %%mm4, %%mm1                \n\t" /* GBGBGBGB 2 */\
810
                        "punpckhbw %%mm7, %%mm6                \n\t" /* 0R0R0R0R 2 */\
811
                        "movq %%mm2, %%mm0                \n\t" /* GBGBGBGB 0 */\
812
                        "movq %%mm1, %%mm3                \n\t" /* GBGBGBGB 2 */\
813
                        "punpcklwd %%mm5, %%mm0                \n\t" /* 0RGB0RGB 0 */\
814
                        "punpckhwd %%mm5, %%mm2                \n\t" /* 0RGB0RGB 1 */\
815
                        "punpcklwd %%mm6, %%mm1                \n\t" /* 0RGB0RGB 2 */\
816
                        "punpckhwd %%mm6, %%mm3                \n\t" /* 0RGB0RGB 3 */\
817
\
818
                        "movq %%mm0, %%mm4                \n\t" /* 0RGB0RGB 0 */\
819
                        "movq %%mm2, %%mm6                \n\t" /* 0RGB0RGB 1 */\
820
                        "movq %%mm1, %%mm5                \n\t" /* 0RGB0RGB 2 */\
821
                        "movq %%mm3, %%mm7                \n\t" /* 0RGB0RGB 3 */\
822
\
823
                        "psllq $40, %%mm0                \n\t" /* RGB00000 0 */\
824
                        "psllq $40, %%mm2                \n\t" /* RGB00000 1 */\
825
                        "psllq $40, %%mm1                \n\t" /* RGB00000 2 */\
826
                        "psllq $40, %%mm3                \n\t" /* RGB00000 3 */\
827
\
828
                        "punpckhdq %%mm4, %%mm0                \n\t" /* 0RGBRGB0 0 */\
829
                        "punpckhdq %%mm6, %%mm2                \n\t" /* 0RGBRGB0 1 */\
830
                        "punpckhdq %%mm5, %%mm1                \n\t" /* 0RGBRGB0 2 */\
831
                        "punpckhdq %%mm7, %%mm3                \n\t" /* 0RGBRGB0 3 */\
832
\
833
                        "psrlq $8, %%mm0                \n\t" /* 00RGBRGB 0 */\
834
                        "movq %%mm2, %%mm6                \n\t" /* 0RGBRGB0 1 */\
835
                        "psllq $40, %%mm2                \n\t" /* GB000000 1 */\
836
                        "por %%mm2, %%mm0                \n\t" /* GBRGBRGB 0 */\
837 6542b44e Michael Niedermayer
                        MOVNTQ(%%mm0, (dst))\
838 99d2cb72 Michael Niedermayer
\
839
                        "psrlq $24, %%mm6                \n\t" /* 0000RGBR 1 */\
840
                        "movq %%mm1, %%mm5                \n\t" /* 0RGBRGB0 2 */\
841
                        "psllq $24, %%mm1                \n\t" /* BRGB0000 2 */\
842
                        "por %%mm1, %%mm6                \n\t" /* BRGBRGBR 1 */\
843 6542b44e Michael Niedermayer
                        MOVNTQ(%%mm6, 8(dst))\
844 99d2cb72 Michael Niedermayer
\
845
                        "psrlq $40, %%mm5                \n\t" /* 000000RG 2 */\
846
                        "psllq $8, %%mm3                \n\t" /* RGBRGB00 3 */\
847
                        "por %%mm3, %%mm5                \n\t" /* RGBRGBRG 2 */\
848 6542b44e Michael Niedermayer
                        MOVNTQ(%%mm5, 16(dst))\
849 99d2cb72 Michael Niedermayer
\
850 6e1c66bc Aurelien Jacobs
                        "add $24, "#dst"                \n\t"\
851 99d2cb72 Michael Niedermayer
\
852 6e1c66bc Aurelien Jacobs
                        "add $8, "#index"                        \n\t"\
853
                        "cmp "#dstw", "#index"                        \n\t"\
854 99d2cb72 Michael Niedermayer
                        " jb 1b                                \n\t"
855
856 6542b44e Michael Niedermayer
#define WRITEBGR24MMX2(dst, dstw, index) \
857 99d2cb72 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
858 9b464428 Felix Bünemann
                        "movq "MANGLE(M24A)", %%mm0        \n\t"\
859
                        "movq "MANGLE(M24C)", %%mm7        \n\t"\
860 99d2cb72 Michael Niedermayer
                        "pshufw $0x50, %%mm2, %%mm1        \n\t" /* B3 B2 B3 B2  B1 B0 B1 B0 */\
861
                        "pshufw $0x50, %%mm4, %%mm3        \n\t" /* G3 G2 G3 G2  G1 G0 G1 G0 */\
862
                        "pshufw $0x00, %%mm5, %%mm6        \n\t" /* R1 R0 R1 R0  R1 R0 R1 R0 */\
863
\
864
                        "pand %%mm0, %%mm1                \n\t" /*    B2        B1       B0 */\
865
                        "pand %%mm0, %%mm3                \n\t" /*    G2        G1       G0 */\
866
                        "pand %%mm7, %%mm6                \n\t" /*       R1        R0       */\
867
\
868
                        "psllq $8, %%mm3                \n\t" /* G2        G1       G0    */\
869
                        "por %%mm1, %%mm6                \n\t"\
870
                        "por %%mm3, %%mm6                \n\t"\
871 6542b44e Michael Niedermayer
                        MOVNTQ(%%mm6, (dst))\
872 99d2cb72 Michael Niedermayer
\
873
                        "psrlq $8, %%mm4                \n\t" /* 00 G7 G6 G5  G4 G3 G2 G1 */\
874
                        "pshufw $0xA5, %%mm2, %%mm1        \n\t" /* B5 B4 B5 B4  B3 B2 B3 B2 */\
875
                        "pshufw $0x55, %%mm4, %%mm3        \n\t" /* G4 G3 G4 G3  G4 G3 G4 G3 */\
876
                        "pshufw $0xA5, %%mm5, %%mm6        \n\t" /* R5 R4 R5 R4  R3 R2 R3 R2 */\
877
\
878 9b464428 Felix Bünemann
                        "pand "MANGLE(M24B)", %%mm1        \n\t" /* B5       B4        B3    */\
879 99d2cb72 Michael Niedermayer
                        "pand %%mm7, %%mm3                \n\t" /*       G4        G3       */\
880
                        "pand %%mm0, %%mm6                \n\t" /*    R4        R3       R2 */\
881
\
882
                        "por %%mm1, %%mm3                \n\t" /* B5    G4 B4     G3 B3    */\
883
                        "por %%mm3, %%mm6                \n\t"\
884 6542b44e Michael Niedermayer
                        MOVNTQ(%%mm6, 8(dst))\
885 99d2cb72 Michael Niedermayer
\
886
                        "pshufw $0xFF, %%mm2, %%mm1        \n\t" /* B7 B6 B7 B6  B7 B6 B6 B7 */\
887
                        "pshufw $0xFA, %%mm4, %%mm3        \n\t" /* 00 G7 00 G7  G6 G5 G6 G5 */\
888
                        "pshufw $0xFA, %%mm5, %%mm6        \n\t" /* R7 R6 R7 R6  R5 R4 R5 R4 */\
889
\
890
                        "pand %%mm7, %%mm1                \n\t" /*       B7        B6       */\
891
                        "pand %%mm0, %%mm3                \n\t" /*    G7        G6       G5 */\
892 9b464428 Felix Bünemann
                        "pand "MANGLE(M24B)", %%mm6        \n\t" /* R7       R6        R5    */\
893 99d2cb72 Michael Niedermayer
\
894
                        "por %%mm1, %%mm3                \n\t"\
895
                        "por %%mm3, %%mm6                \n\t"\
896 6542b44e Michael Niedermayer
                        MOVNTQ(%%mm6, 16(dst))\
897 99d2cb72 Michael Niedermayer
\
898 6e1c66bc Aurelien Jacobs
                        "add $24, "#dst"                \n\t"\
899 99d2cb72 Michael Niedermayer
\
900 6e1c66bc Aurelien Jacobs
                        "add $8, "#index"                \n\t"\
901
                        "cmp "#dstw", "#index"                \n\t"\
902 99d2cb72 Michael Niedermayer
                        " jb 1b                                \n\t"
903
904
#ifdef HAVE_MMX2
905 7630f2e0 Michael Niedermayer
#undef WRITEBGR24
906 6e1c66bc Aurelien Jacobs
#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX2(dst, dstw, index)
907 99d2cb72 Michael Niedermayer
#else
908 7630f2e0 Michael Niedermayer
#undef WRITEBGR24
909 6e1c66bc Aurelien Jacobs
#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX(dst, dstw, index)
910 99d2cb72 Michael Niedermayer
#endif
911
912 6e1c66bc Aurelien Jacobs
#define REAL_WRITEYUY2(dst, dstw, index) \
913 25593e29 Michael Niedermayer
                        "packuswb %%mm3, %%mm3                \n\t"\
914
                        "packuswb %%mm4, %%mm4                \n\t"\
915
                        "packuswb %%mm7, %%mm1                \n\t"\
916
                        "punpcklbw %%mm4, %%mm3                \n\t"\
917
                        "movq %%mm1, %%mm7                \n\t"\
918
                        "punpcklbw %%mm3, %%mm1                \n\t"\
919
                        "punpckhbw %%mm3, %%mm7                \n\t"\
920
\
921 6542b44e Michael Niedermayer
                        MOVNTQ(%%mm1, (dst, index, 2))\
922
                        MOVNTQ(%%mm7, 8(dst, index, 2))\
923 25593e29 Michael Niedermayer
\
924 6e1c66bc Aurelien Jacobs
                        "add $8, "#index"                \n\t"\
925
                        "cmp "#dstw", "#index"                \n\t"\
926 25593e29 Michael Niedermayer
                        " jb 1b                                \n\t"
927 6e1c66bc Aurelien Jacobs
#define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
928 25593e29 Michael Niedermayer
929
930 77a49659 Michael Niedermayer
static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
931 c1b0bfb4 Michael Niedermayer
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
932 7f526efd Reimar Döffinger
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
933 38858470 Michael Niedermayer
{
934 c1b0bfb4 Michael Niedermayer
#ifdef HAVE_MMX
935 bca11e75 Michael Niedermayer
        if(c->flags & SWS_ACCURATE_RND){
936
                if(uDest){
937
                        YSCALEYUV2YV12X_ACCURATE(   0, CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
938
                        YSCALEYUV2YV12X_ACCURATE(4096, CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
939
                }
940
941
                YSCALEYUV2YV12X_ACCURATE(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
942
        }else{
943
                if(uDest){
944
                        YSCALEYUV2YV12X(   0, CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
945
                        YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
946
                }
947
948
                YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
949
        }
950 c1b0bfb4 Michael Niedermayer
#else
951 a2faa401 Romain Dolbeau
#ifdef HAVE_ALTIVEC
952
yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
953
                      chrFilter, chrSrc, chrFilterSize,
954
                      dest, uDest, vDest, dstW, chrDstW);
955
#else //HAVE_ALTIVEC
956 5859233b Michael Niedermayer
yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
957 e3d2500f Michael Niedermayer
            chrFilter, chrSrc, chrFilterSize,
958 5859233b Michael Niedermayer
            dest, uDest, vDest, dstW, chrDstW);
959 a2faa401 Romain Dolbeau
#endif //!HAVE_ALTIVEC
960 7630f2e0 Michael Niedermayer
#endif
961 c1b0bfb4 Michael Niedermayer
}
962 2add307d Michael Niedermayer
963 6118e52e Ville Syrjälä
static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
964
                                     int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
965
                                     uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
966
{
967
yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
968
             chrFilter, chrSrc, chrFilterSize,
969
             dest, uDest, dstW, chrDstW, dstFormat);
970
}
971
972 c1b0bfb4 Michael Niedermayer
static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc,
973 7f526efd Reimar Döffinger
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
974 c1b0bfb4 Michael Niedermayer
{
975
#ifdef HAVE_MMX
976
        if(uDest != NULL)
977 38858470 Michael Niedermayer
        {
978 c1b0bfb4 Michael Niedermayer
                asm volatile(
979
                                YSCALEYUV2YV121
980 e616aa93 Michael Niedermayer
                                :: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW),
981 7f526efd Reimar Döffinger
                                "g" (-chrDstW)
982 6e1c66bc Aurelien Jacobs
                                : "%"REG_a
983 c1b0bfb4 Michael Niedermayer
                        );
984
985
                asm volatile(
986
                                YSCALEYUV2YV121
987 e616aa93 Michael Niedermayer
                                :: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW),
988 7f526efd Reimar Döffinger
                                "g" (-chrDstW)
989 6e1c66bc Aurelien Jacobs
                                : "%"REG_a
990 c1b0bfb4 Michael Niedermayer
                        );
991 38858470 Michael Niedermayer
        }
992
993 c1b0bfb4 Michael Niedermayer
        asm volatile(
994
                YSCALEYUV2YV121
995
                :: "r" (lumSrc + dstW), "r" (dest + dstW),
996 7f526efd Reimar Döffinger
                "g" (-dstW)
997 6e1c66bc Aurelien Jacobs
                : "%"REG_a
998 c1b0bfb4 Michael Niedermayer
        );
999
#else
1000
        int i;
1001
        for(i=0; i<dstW; i++)
1002 38858470 Michael Niedermayer
        {
1003 c1b0bfb4 Michael Niedermayer
                int val= lumSrc[i]>>7;
1004 44c1035c Michael Niedermayer
                
1005
                if(val&256){
1006
                        if(val<0) val=0;
1007
                        else      val=255;
1008
                }
1009 c1b0bfb4 Michael Niedermayer
1010 44c1035c Michael Niedermayer
                dest[i]= val;
1011 c1b0bfb4 Michael Niedermayer
        }
1012
1013
        if(uDest != NULL)
1014 e616aa93 Michael Niedermayer
                for(i=0; i<chrDstW; i++)
1015 38858470 Michael Niedermayer
                {
1016 c1b0bfb4 Michael Niedermayer
                        int u=chrSrc[i]>>7;
1017
                        int v=chrSrc[i + 2048]>>7;
1018
1019 44c1035c Michael Niedermayer
                        if((u|v)&256){
1020
                                if(u<0)         u=0;
1021
                                else if (u>255) u=255;
1022
                                if(v<0)         v=0;
1023
                                else if (v>255) v=255;
1024
                        }
1025
1026
                        uDest[i]= u;
1027
                        vDest[i]= v;
1028 38858470 Michael Niedermayer
                }
1029 c1b0bfb4 Michael Niedermayer
#endif
1030 38858470 Michael Niedermayer
}
1031
1032 c1b0bfb4 Michael Niedermayer
1033 d604bab9 Michael Niedermayer
/**
1034
 * vertical scale YV12 to RGB
1035
 */
1036 25593e29 Michael Niedermayer
static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
1037 c1b0bfb4 Michael Niedermayer
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
1038 065ee1ec Reimar Döffinger
                            uint8_t *dest, long dstW, long dstY)
1039 c1b0bfb4 Michael Niedermayer
{
1040 bca11e75 Michael Niedermayer
#ifdef HAVE_MMX
1041 f8d61128 Diego Biurrun
    long dummy=0;
1042 bca11e75 Michael Niedermayer
    if(c->flags & SWS_ACCURATE_RND){
1043
                switch(c->dstFormat){
1044 e9e12f0e Luca Abeni
                case PIX_FMT_RGB32:
1045 8422aa88 Michael Niedermayer
                                YSCALEYUV2PACKEDX_ACCURATE
1046
                                YSCALEYUV2RGBX
1047 bca11e75 Michael Niedermayer
                                WRITEBGR32(%4, %5, %%REGa)
1048
1049 8422aa88 Michael Niedermayer
                                YSCALEYUV2PACKEDX_END
1050 bca11e75 Michael Niedermayer
                        return;
1051 e9e12f0e Luca Abeni
                case PIX_FMT_BGR24:
1052 8422aa88 Michael Niedermayer
                                YSCALEYUV2PACKEDX_ACCURATE
1053
                                YSCALEYUV2RGBX
1054 83c89c78 Jason Tackaberry
                                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
1055
                                "add %4, %%"REG_c"                        \n\t"
1056
                                WRITEBGR24(%%REGc, %5, %%REGa)
1057 bca11e75 Michael Niedermayer
1058 8422aa88 Michael Niedermayer
1059 bca11e75 Michael Niedermayer
                        :: "r" (&c->redDither), 
1060
                           "m" (dummy), "m" (dummy), "m" (dummy),
1061
                           "r" (dest), "m" (dstW)
1062 83c89c78 Jason Tackaberry
                        : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1063 bca11e75 Michael Niedermayer
                        );
1064
                        return;
1065 e9e12f0e Luca Abeni
                case PIX_FMT_BGR555:
1066 8422aa88 Michael Niedermayer
                                YSCALEYUV2PACKEDX_ACCURATE
1067
                                YSCALEYUV2RGBX
1068 bca11e75 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1069
#ifdef DITHER1XBPP
1070
                                "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1071
                                "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
1072
                                "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1073
#endif
1074
1075
                                WRITEBGR15(%4, %5, %%REGa)
1076 8422aa88 Michael Niedermayer
                                YSCALEYUV2PACKEDX_END
1077 bca11e75 Michael Niedermayer
                        return;
1078 e9e12f0e Luca Abeni
                case PIX_FMT_BGR565:
1079 8422aa88 Michael Niedermayer
                                YSCALEYUV2PACKEDX_ACCURATE
1080
                                YSCALEYUV2RGBX
1081 bca11e75 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1082
#ifdef DITHER1XBPP
1083
                                "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1084
                                "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
1085
                                "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1086
#endif
1087
1088
                                WRITEBGR16(%4, %5, %%REGa)
1089 8422aa88 Michael Niedermayer
                                YSCALEYUV2PACKEDX_END
1090 bca11e75 Michael Niedermayer
                        return;
1091 e9e12f0e Luca Abeni
                case PIX_FMT_YUYV422:
1092 bca11e75 Michael Niedermayer
                                YSCALEYUV2PACKEDX_ACCURATE
1093
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1094
1095
                                "psraw $3, %%mm3                \n\t"
1096
                                "psraw $3, %%mm4                \n\t"
1097
                                "psraw $3, %%mm1                \n\t"
1098
                                "psraw $3, %%mm7                \n\t"
1099
                                WRITEYUY2(%4, %5, %%REGa)
1100 8422aa88 Michael Niedermayer
                                YSCALEYUV2PACKEDX_END
1101 bca11e75 Michael Niedermayer
                        return;
1102
                }
1103
    }else{
1104 cf7d1c1a Michael Niedermayer
        switch(c->dstFormat)
1105 c1b0bfb4 Michael Niedermayer
        {
1106 e9e12f0e Luca Abeni
        case PIX_FMT_RGB32:
1107 8422aa88 Michael Niedermayer
                                YSCALEYUV2PACKEDX
1108
                                YSCALEYUV2RGBX
1109 6e1c66bc Aurelien Jacobs
                                WRITEBGR32(%4, %5, %%REGa)
1110 8422aa88 Michael Niedermayer
                                YSCALEYUV2PACKEDX_END
1111 bca11e75 Michael Niedermayer
                return;
1112 e9e12f0e Luca Abeni
        case PIX_FMT_BGR24:
1113 8422aa88 Michael Niedermayer
                                YSCALEYUV2PACKEDX
1114
                                YSCALEYUV2RGBX
1115 83c89c78 Jason Tackaberry
                                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
1116
                                "add %4, %%"REG_c"                        \n\t"
1117
                                WRITEBGR24(%%REGc, %5, %%REGa)
1118 c1b0bfb4 Michael Niedermayer
1119 77a49659 Michael Niedermayer
                        :: "r" (&c->redDither), 
1120
                           "m" (dummy), "m" (dummy), "m" (dummy),
1121
                           "r" (dest), "m" (dstW)
1122 83c89c78 Jason Tackaberry
                        : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1123 c1b0bfb4 Michael Niedermayer
                        );
1124 bca11e75 Michael Niedermayer
                return;
1125 e9e12f0e Luca Abeni
        case PIX_FMT_BGR555:
1126 8422aa88 Michael Niedermayer
                                YSCALEYUV2PACKEDX
1127
                                YSCALEYUV2RGBX
1128 c1b0bfb4 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1129
#ifdef DITHER1XBPP
1130 9b464428 Felix Bünemann
                                "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1131
                                "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
1132
                                "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1133 c1b0bfb4 Michael Niedermayer
#endif
1134
1135 6e1c66bc Aurelien Jacobs
                                WRITEBGR15(%4, %5, %%REGa)
1136 8422aa88 Michael Niedermayer
                                YSCALEYUV2PACKEDX_END
1137 bca11e75 Michael Niedermayer
                return;
1138 e9e12f0e Luca Abeni
        case PIX_FMT_BGR565:
1139 8422aa88 Michael Niedermayer
                                YSCALEYUV2PACKEDX
1140
                                YSCALEYUV2RGBX
1141 c1b0bfb4 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1142
#ifdef DITHER1XBPP
1143 9b464428 Felix Bünemann
                                "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1144
                                "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
1145
                                "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1146 c1b0bfb4 Michael Niedermayer
#endif
1147
1148 6e1c66bc Aurelien Jacobs
                                WRITEBGR16(%4, %5, %%REGa)
1149 8422aa88 Michael Niedermayer
                                YSCALEYUV2PACKEDX_END
1150 bca11e75 Michael Niedermayer
                return;
1151 e9e12f0e Luca Abeni
        case PIX_FMT_YUYV422:
1152 25593e29 Michael Niedermayer
                                YSCALEYUV2PACKEDX
1153
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1154
1155
                                "psraw $3, %%mm3                \n\t"
1156
                                "psraw $3, %%mm4                \n\t"
1157
                                "psraw $3, %%mm1                \n\t"
1158
                                "psraw $3, %%mm7                \n\t"
1159 6e1c66bc Aurelien Jacobs
                                WRITEYUY2(%4, %5, %%REGa)
1160 8422aa88 Michael Niedermayer
                                YSCALEYUV2PACKEDX_END
1161 bca11e75 Michael Niedermayer
                return;
1162
        }
1163
    }
1164 c1b0bfb4 Michael Niedermayer
#endif
1165 a31de956 Michael Niedermayer
#ifdef HAVE_ALTIVEC
1166 b9a6fae9 Alan Curry
                /* The following list of supported dstFormat values should
1167
                   match what's found in the body of altivec_yuv2packedX() */
1168 e9e12f0e Luca Abeni
                if(c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
1169
                   c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
1170
                   c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB)
1171 b9a6fae9 Alan Curry
                        altivec_yuv2packedX (c, lumFilter, lumSrc, lumFilterSize,
1172
                                    chrFilter, chrSrc, chrFilterSize,
1173
                                    dest, dstW, dstY);
1174
                else
1175 a31de956 Michael Niedermayer
#endif
1176 b9a6fae9 Alan Curry
                        yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
1177
                                    chrFilter, chrSrc, chrFilterSize,
1178
                                    dest, dstW, dstY);
1179 c1b0bfb4 Michael Niedermayer
}
1180
1181
/**
1182
 * vertical bilinear scale YV12 to RGB
1183
 */
1184 25593e29 Michael Niedermayer
static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
1185 cf7d1c1a Michael Niedermayer
                            uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
1186 d604bab9 Michael Niedermayer
{
1187
        int yalpha1=yalpha^4095;
1188
        int uvalpha1=uvalpha^4095;
1189 cf7d1c1a Michael Niedermayer
        int i;
1190 d604bab9 Michael Niedermayer
1191 77a416e8 Gabucino
#if 0 //isn't used
1192 1e621b18 Michael Niedermayer
        if(flags&SWS_FULL_CHR_H_INT)
1193 d604bab9 Michael Niedermayer
        {
1194 cf7d1c1a Michael Niedermayer
                switch(dstFormat)
1195 d604bab9 Michael Niedermayer
                {
1196 cf7d1c1a Michael Niedermayer
#ifdef HAVE_MMX
1197 e9e12f0e Luca Abeni
                case PIX_FMT_RGB32:
1198 d604bab9 Michael Niedermayer
                        asm volatile(
1199

1200

1201
FULL_YSCALEYUV2RGB
1202
                        "punpcklbw %%mm1, %%mm3                \n\t" // BGBGBGBG
1203
                        "punpcklbw %%mm7, %%mm0                \n\t" // R0R0R0R0
1204

1205
                        "movq %%mm3, %%mm1                \n\t"
1206
                        "punpcklwd %%mm0, %%mm3                \n\t" // BGR0BGR0
1207
                        "punpckhwd %%mm0, %%mm1                \n\t" // BGR0BGR0
1208

1209 6e1c66bc Aurelien Jacobs
                        MOVNTQ(%%mm3, (%4, %%REGa, 4))
1210
                        MOVNTQ(%%mm1, 8(%4, %%REGa, 4))
1211 d604bab9 Michael Niedermayer

1212 6e1c66bc Aurelien Jacobs
                        "add $4, %%"REG_a"                \n\t"
1213
                        "cmp %5, %%"REG_a"                \n\t"
1214 d604bab9 Michael Niedermayer
                        " jb 1b                                \n\t"
1215

1216

1217 6e1c66bc Aurelien Jacobs
                        :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" ((long)dstW),
1218 d604bab9 Michael Niedermayer
                        "m" (yalpha1), "m" (uvalpha1)
1219 6e1c66bc Aurelien Jacobs
                        : "%"REG_a
1220 d604bab9 Michael Niedermayer
                        );
1221 cf7d1c1a Michael Niedermayer
                        break;
1222 e9e12f0e Luca Abeni
                case PIX_FMT_BGR24:
1223 d604bab9 Michael Niedermayer
                        asm volatile(
1224

1225
FULL_YSCALEYUV2RGB
1226

1227
                                                                // lsb ... msb
1228
                        "punpcklbw %%mm1, %%mm3                \n\t" // BGBGBGBG
1229
                        "punpcklbw %%mm7, %%mm0                \n\t" // R0R0R0R0
1230

1231
                        "movq %%mm3, %%mm1                \n\t"
1232
                        "punpcklwd %%mm0, %%mm3                \n\t" // BGR0BGR0
1233
                        "punpckhwd %%mm0, %%mm1                \n\t" // BGR0BGR0
1234

1235
                        "movq %%mm3, %%mm2                \n\t" // BGR0BGR0
1236
                        "psrlq $8, %%mm3                \n\t" // GR0BGR00
1237 9b464428 Felix Bünemann
                        "pand "MANGLE(bm00000111)", %%mm2\n\t" // BGR00000
1238
                        "pand "MANGLE(bm11111000)", %%mm3\n\t" // 000BGR00
1239 d604bab9 Michael Niedermayer
                        "por %%mm2, %%mm3                \n\t" // BGRBGR00
1240
                        "movq %%mm1, %%mm2                \n\t"
1241
                        "psllq $48, %%mm1                \n\t" // 000000BG
1242
                        "por %%mm1, %%mm3                \n\t" // BGRBGRBG
1243

1244
                        "movq %%mm2, %%mm1                \n\t" // BGR0BGR0
1245
                        "psrld $16, %%mm2                \n\t" // R000R000
1246
                        "psrlq $24, %%mm1                \n\t" // 0BGR0000
1247
                        "por %%mm2, %%mm1                \n\t" // RBGRR000
1248

1249 6e1c66bc Aurelien Jacobs
                        "mov %4, %%"REG_b"                \n\t"
1250
                        "add %%"REG_a", %%"REG_b"        \n\t"
1251 d604bab9 Michael Niedermayer

1252
#ifdef HAVE_MMX2
1253
                        //FIXME Alignment
1254 6e1c66bc Aurelien Jacobs
                        "movntq %%mm3, (%%"REG_b", %%"REG_a", 2)\n\t"
1255
                        "movntq %%mm1, 8(%%"REG_b", %%"REG_a", 2)\n\t"
1256 d604bab9 Michael Niedermayer
#else
1257 6e1c66bc Aurelien Jacobs
                        "movd %%mm3, (%%"REG_b", %%"REG_a", 2)        \n\t"
1258 d604bab9 Michael Niedermayer
                        "psrlq $32, %%mm3                \n\t"
1259 6e1c66bc Aurelien Jacobs
                        "movd %%mm3, 4(%%"REG_b", %%"REG_a", 2)        \n\t"
1260
                        "movd %%mm1, 8(%%"REG_b", %%"REG_a", 2)        \n\t"
1261 d604bab9 Michael Niedermayer
#endif
1262 6e1c66bc Aurelien Jacobs
                        "add $4, %%"REG_a"                \n\t"
1263
                        "cmp %5, %%"REG_a"                \n\t"
1264 d604bab9 Michael Niedermayer
                        " jb 1b                                \n\t"
1265
1266 d1fac6cf Michael Niedermayer
                        :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW),
1267 d604bab9 Michael Niedermayer
                        "m" (yalpha1), "m" (uvalpha1)
1268 6e1c66bc Aurelien Jacobs
                        : "%"REG_a, "%"REG_b
1269 d604bab9 Michael Niedermayer
                        );
1270 cf7d1c1a Michael Niedermayer
                        break;
1271 e9e12f0e Luca Abeni
                case PIX_FMT_BGR555:
1272 d604bab9 Michael Niedermayer
                        asm volatile(
1273
1274
FULL_YSCALEYUV2RGB
1275
#ifdef DITHER1XBPP
1276 9b464428 Felix Bünemann
                        "paddusb "MANGLE(g5Dither)", %%mm1\n\t"
1277
                        "paddusb "MANGLE(r5Dither)", %%mm0\n\t"
1278
                        "paddusb "MANGLE(b5Dither)", %%mm3\n\t"
1279 d604bab9 Michael Niedermayer
#endif
1280
                        "punpcklbw %%mm7, %%mm1                \n\t" // 0G0G0G0G
1281
                        "punpcklbw %%mm7, %%mm3                \n\t" // 0B0B0B0B
1282
                        "punpcklbw %%mm7, %%mm0                \n\t" // 0R0R0R0R
1283
1284
                        "psrlw $3, %%mm3                \n\t"
1285
                        "psllw $2, %%mm1                \n\t"
1286
                        "psllw $7, %%mm0                \n\t"
1287 9b464428 Felix Bünemann
                        "pand "MANGLE(g15Mask)", %%mm1        \n\t"
1288
                        "pand "MANGLE(r15Mask)", %%mm0        \n\t"
1289 d604bab9 Michael Niedermayer
1290
                        "por %%mm3, %%mm1                \n\t"
1291
                        "por %%mm1, %%mm0                \n\t"
1292
1293 6e1c66bc Aurelien Jacobs
                        MOVNTQ(%%mm0, (%4, %%REGa, 2))
1294 d604bab9 Michael Niedermayer
1295 6e1c66bc Aurelien Jacobs
                        "add $4, %%"REG_a"                \n\t"
1296
                        "cmp %5, %%"REG_a"                \n\t"
1297 d604bab9 Michael Niedermayer
                        " jb 1b                                \n\t"
1298
1299 d1fac6cf Michael Niedermayer
                        :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
1300 d604bab9 Michael Niedermayer
                        "m" (yalpha1), "m" (uvalpha1)
1301 6e1c66bc Aurelien Jacobs
                        : "%"REG_a
1302 d604bab9 Michael Niedermayer
                        );
1303 cf7d1c1a Michael Niedermayer
                        break;
1304 e9e12f0e Luca Abeni
                case PIX_FMT_BGR565:
1305 d604bab9 Michael Niedermayer
                        asm volatile(
1306
1307
FULL_YSCALEYUV2RGB
1308
#ifdef DITHER1XBPP
1309 9b464428 Felix Bünemann
                        "paddusb "MANGLE(g6Dither)", %%mm1\n\t"
1310
                        "paddusb "MANGLE(r5Dither)", %%mm0\n\t"
1311
                        "paddusb "MANGLE(b5Dither)", %%mm3\n\t"
1312 d604bab9 Michael Niedermayer
#endif
1313
                        "punpcklbw %%mm7, %%mm1                \n\t" // 0G0G0G0G
1314
                        "punpcklbw %%mm7, %%mm3                \n\t" // 0B0B0B0B
1315
                        "punpcklbw %%mm7, %%mm0                \n\t" // 0R0R0R0R
1316
1317
                        "psrlw $3, %%mm3                \n\t"
1318
                        "psllw $3, %%mm1                \n\t"
1319
                        "psllw $8, %%mm0                \n\t"
1320 9b464428 Felix Bünemann
                        "pand "MANGLE(g16Mask)", %%mm1        \n\t"
1321
                        "pand "MANGLE(r16Mask)", %%mm0        \n\t"
1322 d604bab9 Michael Niedermayer
1323
                        "por %%mm3, %%mm1                \n\t"
1324
                        "por %%mm1, %%mm0                \n\t"
1325
1326 6e1c66bc Aurelien Jacobs
                        MOVNTQ(%%mm0, (%4, %%REGa, 2))
1327 d604bab9 Michael Niedermayer
1328 6e1c66bc Aurelien Jacobs
                        "add $4, %%"REG_a"                \n\t"
1329
                        "cmp %5, %%"REG_a"                \n\t"
1330 d604bab9 Michael Niedermayer
                        " jb 1b                                \n\t"
1331
1332 d1fac6cf Michael Niedermayer
                        :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
1333 d604bab9 Michael Niedermayer
                        "m" (yalpha1), "m" (uvalpha1)
1334 6e1c66bc Aurelien Jacobs
                        : "%"REG_a
1335 d604bab9 Michael Niedermayer
                        );
1336 cf7d1c1a Michael Niedermayer
                break;
1337
#endif
1338 e9e12f0e Luca Abeni
                case PIX_FMT_BGR32:
1339 cf7d1c1a Michael Niedermayer
#ifndef HAVE_MMX
1340 e9e12f0e Luca Abeni
                case PIX_FMT_RGB32:
1341 cf7d1c1a Michael Niedermayer
#endif
1342 e9e12f0e Luca Abeni
                if(dstFormat==PIX_FMT_RGB32)
1343 28bf81c9 Michael Niedermayer
                {
1344 2ba1bff0 Michael Niedermayer
                        int i;
1345 df3c183a Michael Niedermayer
#ifdef WORDS_BIGENDIAN
1346
                        dest++;
1347
#endif
1348 28bf81c9 Michael Niedermayer
                        for(i=0;i<dstW;i++){
1349
                                // vertical linear interpolation && yuv2rgb in a single step:
1350
                                int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
1351
                                int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
1352
                                int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
1353
                                dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
1354
                                dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
1355
                                dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
1356
                                dest+= 4;
1357
                        }
1358
                }
1359 e9e12f0e Luca Abeni
                else if(dstFormat==PIX_FMT_BGR24)
1360 d604bab9 Michael Niedermayer
                {
1361 96034638 Michael Niedermayer
                        int i;
1362 d1fac6cf Michael Niedermayer
                        for(i=0;i<dstW;i++){
1363 d604bab9 Michael Niedermayer
                                // vertical linear interpolation && yuv2rgb in a single step:
1364
                                int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
1365
                                int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
1366
                                int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
1367 390b20a6 Michael Niedermayer
                                dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
1368
                                dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
1369
                                dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
1370 28bf81c9 Michael Niedermayer
                                dest+= 3;
1371 d604bab9 Michael Niedermayer
                        }
1372
                }
1373 e9e12f0e Luca Abeni
                else if(dstFormat==PIX_FMT_BGR565)
1374 d604bab9 Michael Niedermayer
                {
1375 96034638 Michael Niedermayer
                        int i;
1376 d1fac6cf Michael Niedermayer
                        for(i=0;i<dstW;i++){
1377 d604bab9 Michael Niedermayer
                                // vertical linear interpolation && yuv2rgb in a single step:
1378
                                int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
1379
                                int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
1380
                                int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
1381
1382 d022ce5c Michael Niedermayer
                                ((uint16_t*)dest)[i] =
1383 b18ea156 Michael Niedermayer
                                        clip_table16b[(Y + yuvtab_40cf[U]) >>13] |
1384
                                        clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] |
1385
                                        clip_table16r[(Y + yuvtab_3343[V]) >>13];
1386 d604bab9 Michael Niedermayer
                        }
1387
                }
1388 e9e12f0e Luca Abeni
                else if(dstFormat==PIX_FMT_BGR555)
1389 d604bab9 Michael Niedermayer
                {
1390 96034638 Michael Niedermayer
                        int i;
1391 d1fac6cf Michael Niedermayer
                        for(i=0;i<dstW;i++){
1392 d604bab9 Michael Niedermayer
                                // vertical linear interpolation && yuv2rgb in a single step:
1393
                                int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
1394
                                int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
1395
                                int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
1396
1397 d022ce5c Michael Niedermayer
                                ((uint16_t*)dest)[i] =
1398 b18ea156 Michael Niedermayer
                                        clip_table15b[(Y + yuvtab_40cf[U]) >>13] |
1399
                                        clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] |
1400
                                        clip_table15r[(Y + yuvtab_3343[V]) >>13];
1401 d604bab9 Michael Niedermayer
                        }
1402
                }
1403
        }//FULL_UV_IPOL
1404
        else
1405
        {
1406 cf7d1c1a Michael Niedermayer
#endif // if 0
1407 d604bab9 Michael Niedermayer
#ifdef HAVE_MMX
1408 cf7d1c1a Michael Niedermayer
        switch(c->dstFormat)
1409
        {
1410 77a416e8 Gabucino
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
1411 e9e12f0e Luca Abeni
        case PIX_FMT_RGB32:
1412 d604bab9 Michael Niedermayer
                        asm volatile(
1413 46fe31a0 Michael Niedermayer
                                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1414
                                "mov %4, %%"REG_b"                        \n\t"
1415
                                "push %%"REG_BP"                        \n\t"
1416
                                YSCALEYUV2RGB(%%REGBP, %5)
1417
                                WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
1418
                                "pop %%"REG_BP"                         \n\t"
1419
                                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1420
1421
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1422
                        "a" (&c->redDither)
1423 d604bab9 Michael Niedermayer
                        );
1424 cf7d1c1a Michael Niedermayer
                        return;
1425 e9e12f0e Luca Abeni
        case PIX_FMT_BGR24:
1426 d604bab9 Michael Niedermayer
                        asm volatile(
1427 46fe31a0 Michael Niedermayer
                                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1428
                                "mov %4, %%"REG_b"                        \n\t"
1429
                                "push %%"REG_BP"                        \n\t"
1430
                                YSCALEYUV2RGB(%%REGBP, %5)
1431
                                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1432
                                "pop %%"REG_BP"                         \n\t"
1433
                                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1434
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1435
                        "a" (&c->redDither)
1436 d604bab9 Michael Niedermayer
                        );
1437 cf7d1c1a Michael Niedermayer
                        return;
1438 e9e12f0e Luca Abeni
        case PIX_FMT_BGR555:
1439 d604bab9 Michael Niedermayer
                        asm volatile(
1440 46fe31a0 Michael Niedermayer
                                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1441
                                "mov %4, %%"REG_b"                        \n\t"
1442
                                "push %%"REG_BP"                        \n\t"
1443
                                YSCALEYUV2RGB(%%REGBP, %5)
1444 d604bab9 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1445
#ifdef DITHER1XBPP
1446 9b464428 Felix Bünemann
                                "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1447
                                "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
1448
                                "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1449 d604bab9 Michael Niedermayer
#endif
1450
1451 46fe31a0 Michael Niedermayer
                                WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
1452
                                "pop %%"REG_BP"                         \n\t"
1453
                                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1454 d604bab9 Michael Niedermayer
1455 46fe31a0 Michael Niedermayer
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1456
                        "a" (&c->redDither)
1457 d604bab9 Michael Niedermayer
                        );
1458 cf7d1c1a Michael Niedermayer
                        return;
1459 e9e12f0e Luca Abeni
        case PIX_FMT_BGR565:
1460 d604bab9 Michael Niedermayer
                        asm volatile(
1461 46fe31a0 Michael Niedermayer
                                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1462
                                "mov %4, %%"REG_b"                        \n\t"
1463
                                "push %%"REG_BP"                        \n\t"
1464
                                YSCALEYUV2RGB(%%REGBP, %5)
1465 d604bab9 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1466
#ifdef DITHER1XBPP
1467 9b464428 Felix Bünemann
                                "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1468
                                "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
1469
                                "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1470 d604bab9 Michael Niedermayer
#endif
1471
1472 46fe31a0 Michael Niedermayer
                                WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
1473
                                "pop %%"REG_BP"                         \n\t"
1474
                                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1475
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1476
                        "a" (&c->redDither)
1477 d604bab9 Michael Niedermayer
                        );
1478 cf7d1c1a Michael Niedermayer
                        return;
1479 e9e12f0e Luca Abeni
        case PIX_FMT_YUYV422:
1480 25593e29 Michael Niedermayer
                        asm volatile(
1481 46fe31a0 Michael Niedermayer
                                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1482
                                "mov %4, %%"REG_b"                        \n\t"
1483
                                "push %%"REG_BP"                        \n\t"
1484
                                YSCALEYUV2PACKED(%%REGBP, %5)
1485
                                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1486
                                "pop %%"REG_BP"                         \n\t"
1487
                                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1488
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1489
                        "a" (&c->redDither)
1490 25593e29 Michael Niedermayer
                        );
1491
                        return;
1492 cf7d1c1a Michael Niedermayer
        default: break;
1493
        }
1494
#endif //HAVE_MMX
1495 25593e29 Michael Niedermayer
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C)
1496 d604bab9 Michael Niedermayer
}
1497
1498
/**
1499
 * YV12 to RGB without scaling or interpolating
1500
 */
1501 25593e29 Michael Niedermayer
static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
1502 cf7d1c1a Michael Niedermayer
                            uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
1503 d604bab9 Michael Niedermayer
{
1504 c1b0bfb4 Michael Niedermayer
        const int yalpha1=0;
1505 cf7d1c1a Michael Niedermayer
        int i;
1506
        
1507
        uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1
1508
        const int yalpha= 4096; //FIXME ...
1509 96034638 Michael Niedermayer
1510 1e621b18 Michael Niedermayer
        if(flags&SWS_FULL_CHR_H_INT)
1511 d604bab9 Michael Niedermayer
        {
1512 25593e29 Michael Niedermayer
                RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y);
1513 d604bab9 Michael Niedermayer
                return;
1514
        }
1515 397c035e Michael Niedermayer
1516
#ifdef HAVE_MMX
1517 497d4f99 Michael Niedermayer
        if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster
1518
        {
1519 cf7d1c1a Michael Niedermayer
                switch(dstFormat)
1520 d604bab9 Michael Niedermayer
                {
1521 e9e12f0e Luca Abeni
                case PIX_FMT_RGB32:
1522 d604bab9 Michael Niedermayer
                        asm volatile(
1523 46fe31a0 Michael Niedermayer
                                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1524
                                "mov %4, %%"REG_b"                        \n\t"
1525
                                "push %%"REG_BP"                        \n\t"
1526
                                YSCALEYUV2RGB1(%%REGBP, %5)
1527
                                WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
1528
                                "pop %%"REG_BP"                         \n\t"
1529
                                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1530
1531
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1532
                        "a" (&c->redDither)
1533 d604bab9 Michael Niedermayer
                        );
1534 cf7d1c1a Michael Niedermayer
                        return;
1535 e9e12f0e Luca Abeni
                case PIX_FMT_BGR24:
1536 d604bab9 Michael Niedermayer
                        asm volatile(
1537 46fe31a0 Michael Niedermayer
                                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1538
                                "mov %4, %%"REG_b"                        \n\t"
1539
                                "push %%"REG_BP"                        \n\t"
1540
                                YSCALEYUV2RGB1(%%REGBP, %5)
1541
                                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1542
                                "pop %%"REG_BP"                         \n\t"
1543
                                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1544
1545
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1546
                        "a" (&c->redDither)
1547 d604bab9 Michael Niedermayer
                        );
1548 cf7d1c1a Michael Niedermayer
                        return;
1549 e9e12f0e Luca Abeni
                case PIX_FMT_BGR555:
1550 d604bab9 Michael Niedermayer
                        asm volatile(
1551 46fe31a0 Michael Niedermayer
                                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1552
                                "mov %4, %%"REG_b"                        \n\t"
1553
                                "push %%"REG_BP"                        \n\t"
1554
                                YSCALEYUV2RGB1(%%REGBP, %5)
1555 d604bab9 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1556
#ifdef DITHER1XBPP
1557 9b464428 Felix Bünemann
                                "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1558
                                "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
1559
                                "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1560 d604bab9 Michael Niedermayer
#endif
1561 46fe31a0 Michael Niedermayer
                                WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
1562
                                "pop %%"REG_BP"                         \n\t"
1563
                                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1564 e54d94ba Michael Niedermayer
1565 46fe31a0 Michael Niedermayer
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1566
                        "a" (&c->redDither)
1567 d604bab9 Michael Niedermayer
                        );
1568 cf7d1c1a Michael Niedermayer
                        return;
1569 e9e12f0e Luca Abeni
                case PIX_FMT_BGR565:
1570 d604bab9 Michael Niedermayer
                        asm volatile(
1571 46fe31a0 Michael Niedermayer
                                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1572
                                "mov %4, %%"REG_b"                        \n\t"
1573
                                "push %%"REG_BP"                        \n\t"
1574
                                YSCALEYUV2RGB1(%%REGBP, %5)
1575 d604bab9 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1576
#ifdef DITHER1XBPP
1577 9b464428 Felix Bünemann
                                "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1578
                                "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
1579
                                "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1580 d604bab9 Michael Niedermayer
#endif
1581
1582 46fe31a0 Michael Niedermayer
                                WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
1583
                                "pop %%"REG_BP"                         \n\t"
1584
                                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1585 e54d94ba Michael Niedermayer
1586 46fe31a0 Michael Niedermayer
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1587
                        "a" (&c->redDither)
1588 d604bab9 Michael Niedermayer
                        );
1589 cf7d1c1a Michael Niedermayer
                        return;
1590 e9e12f0e Luca Abeni
                case PIX_FMT_YUYV422:
1591 25593e29 Michael Niedermayer
                        asm volatile(
1592 46fe31a0 Michael Niedermayer
                                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1593
                                "mov %4, %%"REG_b"                        \n\t"
1594
                                "push %%"REG_BP"                        \n\t"
1595
                                YSCALEYUV2PACKED1(%%REGBP, %5)
1596
                                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1597
                                "pop %%"REG_BP"                         \n\t"
1598
                                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1599
1600
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1601
                        "a" (&c->redDither)
1602 25593e29 Michael Niedermayer
                        );
1603
                        return;
1604 d604bab9 Michael Niedermayer
                }
1605 497d4f99 Michael Niedermayer
        }
1606
        else
1607
        {
1608 cf7d1c1a Michael Niedermayer
                switch(dstFormat)
1609 d604bab9 Michael Niedermayer
                {
1610 e9e12f0e Luca Abeni
                case PIX_FMT_RGB32:
1611 497d4f99 Michael Niedermayer
                        asm volatile(
1612 46fe31a0 Michael Niedermayer
                                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1613
                                "mov %4, %%"REG_b"                        \n\t"
1614
                                "push %%"REG_BP"                        \n\t"
1615
                                YSCALEYUV2RGB1b(%%REGBP, %5)
1616
                                WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
1617
                                "pop %%"REG_BP"                         \n\t"
1618
                                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1619
1620
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1621
                        "a" (&c->redDither)
1622 497d4f99 Michael Niedermayer
                        );
1623 cf7d1c1a Michael Niedermayer
                        return;
1624 e9e12f0e Luca Abeni
                case PIX_FMT_BGR24:
1625 497d4f99 Michael Niedermayer
                        asm volatile(
1626 46fe31a0 Michael Niedermayer
                                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1627
                                "mov %4, %%"REG_b"                        \n\t"
1628
                                "push %%"REG_BP"                        \n\t"
1629
                                YSCALEYUV2RGB1b(%%REGBP, %5)
1630
                                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1631
                                "pop %%"REG_BP"                         \n\t"
1632
                                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1633
1634
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1635
                        "a" (&c->redDither)
1636 497d4f99 Michael Niedermayer
                        );
1637 cf7d1c1a Michael Niedermayer
                        return;
1638 e9e12f0e Luca Abeni
                case PIX_FMT_BGR555:
1639 497d4f99 Michael Niedermayer
                        asm volatile(
1640 46fe31a0 Michael Niedermayer
                                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1641
                                "mov %4, %%"REG_b"                        \n\t"
1642
                                "push %%"REG_BP"                        \n\t"
1643
                                YSCALEYUV2RGB1b(%%REGBP, %5)
1644 497d4f99 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1645
#ifdef DITHER1XBPP
1646 9b464428 Felix Bünemann
                                "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1647
                                "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
1648
                                "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1649 497d4f99 Michael Niedermayer
#endif
1650 46fe31a0 Michael Niedermayer
                                WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
1651
                                "pop %%"REG_BP"                         \n\t"
1652
                                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1653 e54d94ba Michael Niedermayer
1654 46fe31a0 Michael Niedermayer
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1655
                        "a" (&c->redDither)
1656 497d4f99 Michael Niedermayer
                        );
1657 cf7d1c1a Michael Niedermayer
                        return;
1658 e9e12f0e Luca Abeni
                case PIX_FMT_BGR565:
1659 497d4f99 Michael Niedermayer
                        asm volatile(
1660 46fe31a0 Michael Niedermayer
                                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1661
                                "mov %4, %%"REG_b"                        \n\t"
1662
                                "push %%"REG_BP"                        \n\t"
1663
                                YSCALEYUV2RGB1b(%%REGBP, %5)
1664 497d4f99 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1665
#ifdef DITHER1XBPP
1666 9b464428 Felix Bünemann
                                "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1667
                                "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
1668
                                "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1669 497d4f99 Michael Niedermayer
#endif
1670 d604bab9 Michael Niedermayer
1671 46fe31a0 Michael Niedermayer
                                WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
1672
                                "pop %%"REG_BP"                         \n\t"
1673
                                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1674 e54d94ba Michael Niedermayer
1675 46fe31a0 Michael Niedermayer
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1676
                        "a" (&c->redDither)
1677 497d4f99 Michael Niedermayer
                        );
1678 cf7d1c1a Michael Niedermayer
                        return;
1679 e9e12f0e Luca Abeni
                case PIX_FMT_YUYV422:
1680 25593e29 Michael Niedermayer
                        asm volatile(
1681 46fe31a0 Michael Niedermayer
                                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1682
                                "mov %4, %%"REG_b"                        \n\t"
1683
                                "push %%"REG_BP"                        \n\t"
1684
                                YSCALEYUV2PACKED1b(%%REGBP, %5)
1685
                                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1686
                                "pop %%"REG_BP"                         \n\t"
1687
                                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1688
1689
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1690
                        "a" (&c->redDither)
1691 25593e29 Michael Niedermayer
                        );
1692
                        return;
1693 d604bab9 Michael Niedermayer
                }
1694 497d4f99 Michael Niedermayer
        }
1695 df3c183a Michael Niedermayer
#endif
1696 cf7d1c1a Michael Niedermayer
        if( uvalpha < 2048 )
1697 497d4f99 Michael Niedermayer
        {
1698 25593e29 Michael Niedermayer
                YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C)
1699 cf7d1c1a Michael Niedermayer
        }else{
1700 25593e29 Michael Niedermayer
                YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C)
1701 497d4f99 Michael Niedermayer
        }
1702 d604bab9 Michael Niedermayer
}
1703
1704 6ff0ad6b Michael Niedermayer
//FIXME yuy2* can read upto 7 samples to much
1705
1706 7f526efd Reimar Döffinger
static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width)
1707 1e621b18 Michael Niedermayer
{
1708 6ff0ad6b Michael Niedermayer
#ifdef HAVE_MMX
1709
        asm volatile(
1710
                "movq "MANGLE(bm01010101)", %%mm2\n\t"
1711 6e1c66bc Aurelien Jacobs
                "mov %0, %%"REG_a"                \n\t"
1712 6ff0ad6b Michael Niedermayer
                "1:                                \n\t"
1713 6e1c66bc Aurelien Jacobs
                "movq (%1, %%"REG_a",2), %%mm0        \n\t"
1714
                "movq 8(%1, %%"REG_a",2), %%mm1        \n\t"
1715 6ff0ad6b Michael Niedermayer
                "pand %%mm2, %%mm0                \n\t"
1716
                "pand %%mm2, %%mm1                \n\t"
1717
                "packuswb %%mm1, %%mm0                \n\t"
1718 6e1c66bc Aurelien Jacobs
                "movq %%mm0, (%2, %%"REG_a")        \n\t"
1719
                "add $8, %%"REG_a"                \n\t"
1720 6ff0ad6b Michael Niedermayer
                " js 1b                                \n\t"
1721 7f526efd Reimar Döffinger
                : : "g" (-width), "r" (src+width*2), "r" (dst+width)
1722 6e1c66bc Aurelien Jacobs
                : "%"REG_a
1723 6ff0ad6b Michael Niedermayer
        );
1724 1e621b18 Michael Niedermayer
#else
1725
        int i;
1726
        for(i=0; i<width; i++)
1727
                dst[i]= src[2*i];
1728
#endif
1729
}
1730
1731 7f526efd Reimar Döffinger
static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
1732 1e621b18 Michael Niedermayer
{
1733 c2271987 Michael Niedermayer
#ifdef HAVE_MMX
1734 6ff0ad6b Michael Niedermayer
        asm volatile(
1735
                "movq "MANGLE(bm01010101)", %%mm4\n\t"
1736 6e1c66bc Aurelien Jacobs
                "mov %0, %%"REG_a"                \n\t"
1737 6ff0ad6b Michael Niedermayer
                "1:                                \n\t"
1738 6e1c66bc Aurelien Jacobs
                "movq (%1, %%"REG_a",4), %%mm0        \n\t"
1739
                "movq 8(%1, %%"REG_a",4), %%mm1        \n\t"
1740 6ff0ad6b Michael Niedermayer
                "psrlw $8, %%mm0                \n\t"
1741
                "psrlw $8, %%mm1                \n\t"
1742
                "packuswb %%mm1, %%mm0                \n\t"
1743
                "movq %%mm0, %%mm1                \n\t"
1744
                "psrlw $8, %%mm0                \n\t"
1745
                "pand %%mm4, %%mm1                \n\t"
1746
                "packuswb %%mm0, %%mm0                \n\t"
1747
                "packuswb %%mm1, %%mm1                \n\t"
1748 c2271987 Michael Niedermayer
                "movd %%mm0, (%3, %%"REG_a")        \n\t"
1749
                "movd %%mm1, (%2, %%"REG_a")        \n\t"
1750 6e1c66bc Aurelien Jacobs
                "add $4, %%"REG_a"                \n\t"
1751 6ff0ad6b Michael Niedermayer
                " js 1b                                \n\t"
1752 c2271987 Michael Niedermayer
                : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
1753 6e1c66bc Aurelien Jacobs
                : "%"REG_a
1754 6ff0ad6b Michael Niedermayer
        );
1755 1e621b18 Michael Niedermayer
#else
1756
        int i;
1757
        for(i=0; i<width; i++)
1758
        {
1759 c2271987 Michael Niedermayer
                dstU[i]= src1[4*i + 1];
1760
                dstV[i]= src1[4*i + 3];
1761 1e621b18 Michael Niedermayer
        }
1762
#endif
1763 0683a5c5 Luca Abeni
        assert(src1 == src2);
1764 1e621b18 Michael Niedermayer
}
1765
1766 7322a67c Michael Niedermayer
//this is allmost identical to the previous, end exists only cuz yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses
1767 7f526efd Reimar Döffinger
static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width)
1768 7322a67c Michael Niedermayer
{
1769
#ifdef HAVE_MMX
1770
        asm volatile(
1771 6e1c66bc Aurelien Jacobs
                "mov %0, %%"REG_a"                \n\t"
1772 7322a67c Michael Niedermayer
                "1:                                \n\t"
1773 6e1c66bc Aurelien Jacobs
                "movq (%1, %%"REG_a",2), %%mm0        \n\t"
1774
                "movq 8(%1, %%"REG_a",2), %%mm1        \n\t"
1775 7322a67c Michael Niedermayer
                "psrlw $8, %%mm0                \n\t"
1776
                "psrlw $8, %%mm1                \n\t"
1777
                "packuswb %%mm1, %%mm0                \n\t"
1778 6e1c66bc Aurelien Jacobs
                "movq %%mm0, (%2, %%"REG_a")        \n\t"
1779
                "add $8, %%"REG_a"                \n\t"
1780 7322a67c Michael Niedermayer
                " js 1b                                \n\t"
1781 7f526efd Reimar Döffinger
                : : "g" (-width), "r" (src+width*2), "r" (dst+width)
1782 6e1c66bc Aurelien Jacobs
                : "%"REG_a
1783 7322a67c Michael Niedermayer
        );
1784
#else
1785
        int i;
1786
        for(i=0; i<width; i++)
1787
                dst[i]= src[2*i+1];
1788
#endif
1789
}
1790
1791 7f526efd Reimar Döffinger
static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
1792 7322a67c Michael Niedermayer
{
1793 c2271987 Michael Niedermayer
#ifdef HAVE_MMX
1794 7322a67c Michael Niedermayer
        asm volatile(
1795
                "movq "MANGLE(bm01010101)", %%mm4\n\t"
1796 6e1c66bc Aurelien Jacobs
                "mov %0, %%"REG_a"                \n\t"
1797 7322a67c Michael Niedermayer
                "1:                                \n\t"
1798 6e1c66bc Aurelien Jacobs
                "movq (%1, %%"REG_a",4), %%mm0        \n\t"
1799
                "movq 8(%1, %%"REG_a",4), %%mm1        \n\t"
1800 7322a67c Michael Niedermayer
                "pand %%mm4, %%mm0                \n\t"
1801
                "pand %%mm4, %%mm1                \n\t"
1802
                "packuswb %%mm1, %%mm0                \n\t"
1803
                "movq %%mm0, %%mm1                \n\t"
1804
                "psrlw $8, %%mm0                \n\t"
1805
                "pand %%mm4, %%mm1                \n\t"
1806
                "packuswb %%mm0, %%mm0                \n\t"
1807
                "packuswb %%mm1, %%mm1                \n\t"
1808 c2271987 Michael Niedermayer
                "movd %%mm0, (%3, %%"REG_a")        \n\t"
1809
                "movd %%mm1, (%2, %%"REG_a")        \n\t"
1810 6e1c66bc Aurelien Jacobs
                "add $4, %%"REG_a"                \n\t"
1811 7322a67c Michael Niedermayer
                " js 1b                                \n\t"
1812 c2271987 Michael Niedermayer
                : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
1813 6e1c66bc Aurelien Jacobs
                : "%"REG_a
1814 7322a67c Michael Niedermayer
        );
1815
#else
1816
        int i;
1817
        for(i=0; i<width; i++)
1818
        {
1819 c2271987 Michael Niedermayer
                dstU[i]= src1[4*i + 0];
1820
                dstV[i]= src1[4*i + 2];
1821 7322a67c Michael Niedermayer
        }
1822
#endif
1823 0683a5c5 Luca Abeni
        assert(src1 == src2);
1824 7322a67c Michael Niedermayer
}
1825
1826 1e621b18 Michael Niedermayer
static inline void RENAME(bgr32ToY)(uint8_t *dst, uint8_t *src, int width)
1827
{
1828
        int i;
1829
        for(i=0; i<width; i++)
1830
        {
1831 4e61e21c Michael Niedermayer
                int b=  ((uint32_t*)src)[i]&0xFF;
1832
                int g= (((uint32_t*)src)[i]>>8)&0xFF;
1833 3e499f53 Michael Niedermayer
                int r= (((uint32_t*)src)[i]>>16)&0xFF;
1834 1e621b18 Michael Niedermayer
1835 4e61e21c Michael Niedermayer
                dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
1836 1e621b18 Michael Niedermayer
        }
1837
}
1838
1839
static inline void RENAME(bgr32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
1840
{
1841
        int i;
1842 c2271987 Michael Niedermayer
        assert(src1 == src2);
1843 1e621b18 Michael Niedermayer
        for(i=0; i<width; i++)
1844
        {
1845 4e61e21c Michael Niedermayer
                const int a= ((uint32_t*)src1)[2*i+0];
1846
                const int e= ((uint32_t*)src1)[2*i+1];
1847 c2271987 Michael Niedermayer
                const int l= (a&0xFF00FF) + (e&0xFF00FF);
1848
                const int h= (a&0x00FF00) + (e&0x00FF00);
1849 4e61e21c Michael Niedermayer
                 const int b=  l&0x3FF;
1850
                const int g=  h>>8;
1851
                const int r=  l>>16;
1852 1e621b18 Michael Niedermayer
1853 c2271987 Michael Niedermayer
                dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
1854
                dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
1855 1e621b18 Michael Niedermayer
        }
1856
}
1857
1858 7f526efd Reimar Döffinger
static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width)
1859 1e621b18 Michael Niedermayer
{
1860 ac6a2e45 Michael Niedermayer
#ifdef HAVE_MMX
1861
        asm volatile(
1862 6e1c66bc Aurelien Jacobs
                "mov %2, %%"REG_a"                \n\t"
1863 854288bb Felix Bünemann
                "movq "MANGLE(bgr2YCoeff)", %%mm6                \n\t"
1864
                "movq "MANGLE(w1111)", %%mm5                \n\t"
1865 ac6a2e45 Michael Niedermayer
                "pxor %%mm7, %%mm7                \n\t"
1866 83c89c78 Jason Tackaberry
                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"\n\t"
1867 4bff9ef9 Diego Biurrun
                ASMALIGN(4)
1868 ac6a2e45 Michael Niedermayer
                "1:                                \n\t"
1869 83c89c78 Jason Tackaberry
                PREFETCH" 64(%0, %%"REG_d")        \n\t"
1870
                "movd (%0, %%"REG_d"), %%mm0        \n\t"
1871
                "movd 3(%0, %%"REG_d"), %%mm1        \n\t"
1872 ac6a2e45 Michael Niedermayer
                "punpcklbw %%mm7, %%mm0                \n\t"
1873
                "punpcklbw %%mm7, %%mm1                \n\t"
1874 83c89c78 Jason Tackaberry
                "movd 6(%0, %%"REG_d"), %%mm2        \n\t"
1875
                "movd 9(%0, %%"REG_d"), %%mm3        \n\t"
1876 ac6a2e45 Michael Niedermayer
                "punpcklbw %%mm7, %%mm2                \n\t"
1877
                "punpcklbw %%mm7, %%mm3                \n\t"
1878
                "pmaddwd %%mm6, %%mm0                \n\t"
1879
                "pmaddwd %%mm6, %%mm1                \n\t"
1880
                "pmaddwd %%mm6, %%mm2                \n\t"
1881
                "pmaddwd %%mm6, %%mm3                \n\t"
1882
#ifndef FAST_BGR2YV12
1883
                "psrad $8, %%mm0                \n\t"
1884
                "psrad $8, %%mm1                \n\t"
1885
                "psrad $8, %%mm2                \n\t"
1886
                "psrad $8, %%mm3                \n\t"
1887
#endif
1888
                "packssdw %%mm1, %%mm0                \n\t"
1889
                "packssdw %%mm3, %%mm2                \n\t"
1890
                "pmaddwd %%mm5, %%mm0                \n\t"
1891
                "pmaddwd %%mm5, %%mm2                \n\t"
1892
                "packssdw %%mm2, %%mm0                \n\t"
1893
                "psraw $7, %%mm0                \n\t"
1894
1895 83c89c78 Jason Tackaberry
                "movd 12(%0, %%"REG_d"), %%mm4        \n\t"
1896
                "movd 15(%0, %%"REG_d"), %%mm1        \n\t"
1897 ac6a2e45 Michael Niedermayer
                "punpcklbw %%mm7, %%mm4                \n\t"
1898
                "punpcklbw %%mm7, %%mm1                \n\t"
1899 83c89c78 Jason Tackaberry
                "movd 18(%0, %%"REG_d"), %%mm2        \n\t"
1900
                "movd 21(%0, %%"REG_d"), %%mm3        \n\t"
1901 ac6a2e45 Michael Niedermayer
                "punpcklbw %%mm7, %%mm2                \n\t"
1902
                "punpcklbw %%mm7, %%mm3                \n\t"
1903
                "pmaddwd %%mm6, %%mm4                \n\t"
1904
                "pmaddwd %%mm6, %%mm1                \n\t"
1905
                "pmaddwd %%mm6, %%mm2                \n\t"
1906
                "pmaddwd %%mm6, %%mm3                \n\t"
1907
#ifndef FAST_BGR2YV12
1908
                "psrad $8, %%mm4                \n\t"
1909
                "psrad $8, %%mm1                \n\t"
1910
                "psrad $8, %%mm2                \n\t"
1911
                "psrad $8, %%mm3                \n\t"
1912
#endif
1913
                "packssdw %%mm1, %%mm4                \n\t"
1914
                "packssdw %%mm3, %%mm2                \n\t"
1915
                "pmaddwd %%mm5, %%mm4                \n\t"
1916
                "pmaddwd %%mm5, %%mm2                \n\t"
1917 83c89c78 Jason Tackaberry
                "add $24, %%"REG_d"                \n\t"
1918 ac6a2e45 Michael Niedermayer
                "packssdw %%mm2, %%mm4                \n\t"
1919
                "psraw $7, %%mm4                \n\t"
1920
1921
                "packuswb %%mm4, %%mm0                \n\t"
1922 854288bb Felix Bünemann
                "paddusb "MANGLE(bgr2YOffset)", %%mm0        \n\t"
1923 ac6a2e45 Michael Niedermayer
1924 6e1c66bc Aurelien Jacobs
                "movq %%mm0, (%1, %%"REG_a")        \n\t"
1925
                "add $8, %%"REG_a"                \n\t"
1926 ac6a2e45 Michael Niedermayer
                " js 1b                                \n\t"
1927 7f526efd Reimar Döffinger
                : : "r" (src+width*3), "r" (dst+width), "g" (-width)
1928 83c89c78 Jason Tackaberry
                : "%"REG_a, "%"REG_d
1929 ac6a2e45 Michael Niedermayer
        );
1930 1e621b18 Michael Niedermayer
#else
1931
        int i;
1932
        for(i=0; i<width; i++)
1933
        {
1934
                int b= src[i*3+0];
1935
                int g= src[i*3+1];
1936
                int r= src[i*3+2];
1937
1938 9902f4e2 Michael Niedermayer
                dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)) )>>RGB2YUV_SHIFT);
1939 1e621b18 Michael Niedermayer
        }
1940
#endif
1941
}
1942
1943 7f526efd Reimar Döffinger
static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
1944 1e621b18 Michael Niedermayer
{
1945 4342fc14 Michael Niedermayer
#ifdef HAVE_MMX
1946
        asm volatile(
1947 c2271987 Michael Niedermayer
                "mov %3, %%"REG_a"                \n\t"
1948 854288bb Felix Bünemann
                "movq "MANGLE(w1111)", %%mm5                \n\t"
1949
                "movq "MANGLE(bgr2UCoeff)", %%mm6                \n\t"
1950 4342fc14 Michael Niedermayer
                "pxor %%mm7, %%mm7                \n\t"
1951 83c89c78 Jason Tackaberry
                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"        \n\t"
1952
                "add %%"REG_d", %%"REG_d"        \n\t"
1953 4bff9ef9 Diego Biurrun
                ASMALIGN(4)
1954 4342fc14 Michael Niedermayer
                "1:                                \n\t"
1955 83c89c78 Jason Tackaberry
                PREFETCH" 64(%0, %%"REG_d")        \n\t"
1956 4342fc14 Michael Niedermayer
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1957 83c89c78 Jason Tackaberry
                "movq (%0, %%"REG_d"), %%mm0        \n\t"
1958
                "movq 6(%0, %%"REG_d"), %%mm2        \n\t"
1959 4342fc14 Michael Niedermayer
                "movq %%mm0, %%mm1                \n\t"
1960
                "movq %%mm2, %%mm3                \n\t"
1961
                "psrlq $24, %%mm0                \n\t"
1962
                "psrlq $24, %%mm2                \n\t"
1963
                PAVGB(%%mm1, %%mm0)
1964
                PAVGB(%%mm3, %%mm2)
1965
                "punpcklbw %%mm7, %%mm0                \n\t"
1966
                "punpcklbw %%mm7, %%mm2                \n\t"
1967
#else
1968 83c89c78 Jason Tackaberry
                "movd (%0, %%"REG_d"), %%mm0        \n\t"
1969
                "movd 3(%0, %%"REG_d"), %%mm2        \n\t"
1970 4342fc14 Michael Niedermayer
                "punpcklbw %%mm7, %%mm0                \n\t"
1971
                "punpcklbw %%mm7, %%mm2                \n\t"
1972
                "paddw %%mm2, %%mm0                \n\t"
1973 83c89c78 Jason Tackaberry
                "movd 6(%0, %%"REG_d"), %%mm4        \n\t"
1974
                "movd 9(%0, %%"REG_d"), %%mm2        \n\t"
1975 4342fc14 Michael Niedermayer
                "punpcklbw %%mm7, %%mm4                \n\t"
1976
                "punpcklbw %%mm7, %%mm2                \n\t"
1977
                "paddw %%mm4, %%mm2                \n\t"
1978 c2271987 Michael Niedermayer
                "psrlw $1, %%mm0                \n\t"
1979
                "psrlw $1, %%mm2                \n\t"
1980 4342fc14 Michael Niedermayer
#endif
1981 854288bb Felix Bünemann
                "movq "MANGLE(bgr2VCoeff)", %%mm1                \n\t"
1982
                "movq "MANGLE(bgr2VCoeff)", %%mm3                \n\t"
1983 4342fc14 Michael Niedermayer
                
1984
                "pmaddwd %%mm0, %%mm1                \n\t"
1985
                "pmaddwd %%mm2, %%mm3                \n\t"
1986
                "pmaddwd %%mm6, %%mm0                \n\t"
1987
                "pmaddwd %%mm6, %%mm2                \n\t"
1988
#ifndef FAST_BGR2YV12
1989
                "psrad $8, %%mm0                \n\t"
1990
                "psrad $8, %%mm1                \n\t"
1991
                "psrad $8, %%mm2                \n\t"
1992