Statistics
| Branch: | Revision:

ffmpeg / libswscale / swscale_template.c @ 3164d25e

History | View | Annotate | Download (139 KB)

1 fe8054c0 Michael Niedermayer
/*
2 d026b45e Diego Biurrun
 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with FFmpeg; if not, write to the Free Software
18 b19bcbaa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 d026b45e Diego Biurrun
 *
20 8a322796 Diego Biurrun
 * The C code (not assembly, MMX, ...) of this file can be used
21
 * under the LGPL license.
22 d026b45e Diego Biurrun
 */
23 783e9cc9 Michael Niedermayer
24 6e1c66bc Aurelien Jacobs
#undef REAL_MOVNTQ
25 541c4eb9 Michael Niedermayer
#undef MOVNTQ
26 7d7f78b5 Michael Niedermayer
#undef PAVGB
27 48a05cec Michael Niedermayer
#undef PREFETCH
28
#undef PREFETCHW
29
#undef EMMS
30
#undef SFENCE
31
32 f4406ec1 Diego Biurrun
#if HAVE_AMD3DNOW
33 aeb87a49 Diego Biurrun
/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
34 48a05cec Michael Niedermayer
#define EMMS     "femms"
35
#else
36
#define EMMS     "emms"
37
#endif
38
39 f4406ec1 Diego Biurrun
#if HAVE_AMD3DNOW
40 48a05cec Michael Niedermayer
#define PREFETCH  "prefetch"
41
#define PREFETCHW "prefetchw"
42 b63f641e Aurelien Jacobs
#elif HAVE_MMX2
43 48a05cec Michael Niedermayer
#define PREFETCH "prefetchnta"
44
#define PREFETCHW "prefetcht0"
45
#else
46 d904b5fc Nigel Pearson
#define PREFETCH  " # nop"
47
#define PREFETCHW " # nop"
48 48a05cec Michael Niedermayer
#endif
49
50 b63f641e Aurelien Jacobs
#if HAVE_MMX2
51 48a05cec Michael Niedermayer
#define SFENCE "sfence"
52
#else
53 d904b5fc Nigel Pearson
#define SFENCE " # nop"
54 48a05cec Michael Niedermayer
#endif
55 d3f41512 Michael Niedermayer
56 b63f641e Aurelien Jacobs
#if HAVE_MMX2
57 d604bab9 Michael Niedermayer
#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
58 f4406ec1 Diego Biurrun
#elif HAVE_AMD3DNOW
59 d604bab9 Michael Niedermayer
#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
60
#endif
61 d3f41512 Michael Niedermayer
62 b63f641e Aurelien Jacobs
#if HAVE_MMX2
63 6e1c66bc Aurelien Jacobs
#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
64 d604bab9 Michael Niedermayer
#else
65 6e1c66bc Aurelien Jacobs
#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
66 d604bab9 Michael Niedermayer
#endif
67 6e1c66bc Aurelien Jacobs
#define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
68 d604bab9 Michael Niedermayer
69 b63f641e Aurelien Jacobs
#if HAVE_ALTIVEC
70 a2faa401 Romain Dolbeau
#include "swscale_altivec_template.c"
71
#endif
72
73 bca11e75 Michael Niedermayer
#define YSCALEYUV2YV12X(x, offset, dest, width) \
74 7ad6469e Diego Pettenò
    __asm__ volatile(\
75 2da0d70d Diego Biurrun
    "xor                          %%"REG_a", %%"REG_a"  \n\t"\
76
    "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
77
    "movq                             %%mm3, %%mm4      \n\t"\
78
    "lea                     " offset "(%0), %%"REG_d"  \n\t"\
79
    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
80
    ASMALIGN(4) /* FIXME Unroll? */\
81
    "1:                                                 \n\t"\
82
    "movq                      8(%%"REG_d"), %%mm0      \n\t" /* filterCoeff */\
83 8b2fce0d Michael Niedermayer
    "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
84
    "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm5      \n\t" /* srcData */\
85 2da0d70d Diego Biurrun
    "add                                $16, %%"REG_d"  \n\t"\
86
    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
87
    "test                         %%"REG_S", %%"REG_S"  \n\t"\
88
    "pmulhw                           %%mm0, %%mm2      \n\t"\
89
    "pmulhw                           %%mm0, %%mm5      \n\t"\
90
    "paddw                            %%mm2, %%mm3      \n\t"\
91
    "paddw                            %%mm5, %%mm4      \n\t"\
92
    " jnz                                1b             \n\t"\
93
    "psraw                               $3, %%mm3      \n\t"\
94
    "psraw                               $3, %%mm4      \n\t"\
95
    "packuswb                         %%mm4, %%mm3      \n\t"\
96
    MOVNTQ(%%mm3, (%1, %%REGa))\
97
    "add                                 $8, %%"REG_a"  \n\t"\
98
    "cmp                                 %2, %%"REG_a"  \n\t"\
99
    "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
100
    "movq                             %%mm3, %%mm4      \n\t"\
101
    "lea                     " offset "(%0), %%"REG_d"  \n\t"\
102
    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
103
    "jb                                  1b             \n\t"\
104
    :: "r" (&c->redDither),\
105
    "r" (dest), "g" (width)\
106
    : "%"REG_a, "%"REG_d, "%"REG_S\
107
    );
108 bca11e75 Michael Niedermayer
109
#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
110 7ad6469e Diego Pettenò
    __asm__ volatile(\
111 2da0d70d Diego Biurrun
    "lea                     " offset "(%0), %%"REG_d"  \n\t"\
112
    "xor                          %%"REG_a", %%"REG_a"  \n\t"\
113
    "pxor                             %%mm4, %%mm4      \n\t"\
114
    "pxor                             %%mm5, %%mm5      \n\t"\
115
    "pxor                             %%mm6, %%mm6      \n\t"\
116
    "pxor                             %%mm7, %%mm7      \n\t"\
117
    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
118
    ASMALIGN(4) \
119
    "1:                                                 \n\t"\
120 8b2fce0d Michael Niedermayer
    "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm0      \n\t" /* srcData */\
121
    "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
122 1625216e Michael Niedermayer
    "mov        "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"  \n\t"\
123 8b2fce0d Michael Niedermayer
    "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm1      \n\t" /* srcData */\
124 2da0d70d Diego Biurrun
    "movq                             %%mm0, %%mm3      \n\t"\
125
    "punpcklwd                        %%mm1, %%mm0      \n\t"\
126
    "punpckhwd                        %%mm1, %%mm3      \n\t"\
127 1625216e Michael Niedermayer
    "movq       "STR(APCK_COEF)"(%%"REG_d"), %%mm1      \n\t" /* filterCoeff */\
128 2da0d70d Diego Biurrun
    "pmaddwd                          %%mm1, %%mm0      \n\t"\
129
    "pmaddwd                          %%mm1, %%mm3      \n\t"\
130
    "paddd                            %%mm0, %%mm4      \n\t"\
131
    "paddd                            %%mm3, %%mm5      \n\t"\
132 8b2fce0d Michael Niedermayer
    "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm3      \n\t" /* srcData */\
133 1625216e Michael Niedermayer
    "mov        "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"  \n\t"\
134
    "add                  $"STR(APCK_SIZE)", %%"REG_d"  \n\t"\
135 2da0d70d Diego Biurrun
    "test                         %%"REG_S", %%"REG_S"  \n\t"\
136
    "movq                             %%mm2, %%mm0      \n\t"\
137
    "punpcklwd                        %%mm3, %%mm2      \n\t"\
138
    "punpckhwd                        %%mm3, %%mm0      \n\t"\
139
    "pmaddwd                          %%mm1, %%mm2      \n\t"\
140
    "pmaddwd                          %%mm1, %%mm0      \n\t"\
141
    "paddd                            %%mm2, %%mm6      \n\t"\
142
    "paddd                            %%mm0, %%mm7      \n\t"\
143
    " jnz                                1b             \n\t"\
144
    "psrad                              $16, %%mm4      \n\t"\
145
    "psrad                              $16, %%mm5      \n\t"\
146
    "psrad                              $16, %%mm6      \n\t"\
147
    "psrad                              $16, %%mm7      \n\t"\
148
    "movq             "VROUNDER_OFFSET"(%0), %%mm0      \n\t"\
149
    "packssdw                         %%mm5, %%mm4      \n\t"\
150
    "packssdw                         %%mm7, %%mm6      \n\t"\
151
    "paddw                            %%mm0, %%mm4      \n\t"\
152
    "paddw                            %%mm0, %%mm6      \n\t"\
153
    "psraw                               $3, %%mm4      \n\t"\
154
    "psraw                               $3, %%mm6      \n\t"\
155
    "packuswb                         %%mm6, %%mm4      \n\t"\
156
    MOVNTQ(%%mm4, (%1, %%REGa))\
157
    "add                                 $8, %%"REG_a"  \n\t"\
158
    "cmp                                 %2, %%"REG_a"  \n\t"\
159
    "lea                     " offset "(%0), %%"REG_d"  \n\t"\
160
    "pxor                             %%mm4, %%mm4      \n\t"\
161
    "pxor                             %%mm5, %%mm5      \n\t"\
162
    "pxor                             %%mm6, %%mm6      \n\t"\
163
    "pxor                             %%mm7, %%mm7      \n\t"\
164
    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
165
    "jb                                  1b             \n\t"\
166
    :: "r" (&c->redDither),\
167
    "r" (dest), "g" (width)\
168
    : "%"REG_a, "%"REG_d, "%"REG_S\
169
    );
170 c1b0bfb4 Michael Niedermayer
171
#define YSCALEYUV2YV121 \
172 2da0d70d Diego Biurrun
    "mov %2, %%"REG_a"                    \n\t"\
173
    ASMALIGN(4) /* FIXME Unroll? */\
174
    "1:                                   \n\t"\
175
    "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"\
176
    "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"\
177
    "psraw                 $7, %%mm0      \n\t"\
178
    "psraw                 $7, %%mm1      \n\t"\
179
    "packuswb           %%mm1, %%mm0      \n\t"\
180
    MOVNTQ(%%mm0, (%1, %%REGa))\
181
    "add                   $8, %%"REG_a"  \n\t"\
182
    "jnc                   1b             \n\t"
183 c1b0bfb4 Michael Niedermayer
184 bf2bdde6 Michael Niedermayer
#define YSCALEYUV2YV121_ACCURATE \
185
    "mov %2, %%"REG_a"                    \n\t"\
186
    "pcmpeqw %%mm7, %%mm7                 \n\t"\
187
    "psrlw                 $15, %%mm7     \n\t"\
188
    "psllw                  $6, %%mm7     \n\t"\
189
    ASMALIGN(4) /* FIXME Unroll? */\
190
    "1:                                   \n\t"\
191
    "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"\
192
    "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"\
193 33a67bd6 Michael Niedermayer
    "paddsw             %%mm7, %%mm0      \n\t"\
194
    "paddsw             %%mm7, %%mm1      \n\t"\
195 bf2bdde6 Michael Niedermayer
    "psraw                 $7, %%mm0      \n\t"\
196
    "psraw                 $7, %%mm1      \n\t"\
197
    "packuswb           %%mm1, %%mm0      \n\t"\
198
    MOVNTQ(%%mm0, (%1, %%REGa))\
199
    "add                   $8, %%"REG_a"  \n\t"\
200
    "jnc                   1b             \n\t"
201
202 c1b0bfb4 Michael Niedermayer
/*
203 2da0d70d Diego Biurrun
    :: "m" (-lumFilterSize), "m" (-chrFilterSize),
204
       "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
205
       "r" (dest), "m" (dstW),
206
       "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
207
    : "%eax", "%ebx", "%ecx", "%edx", "%esi"
208 c1b0bfb4 Michael Niedermayer
*/
209 df57ab14 Cédric Schieli
#define YSCALEYUV2PACKEDX_UV \
210 7ad6469e Diego Pettenò
    __asm__ volatile(\
211 2da0d70d Diego Biurrun
    "xor                   %%"REG_a", %%"REG_a"     \n\t"\
212
    ASMALIGN(4)\
213
    "nop                                            \n\t"\
214
    "1:                                             \n\t"\
215
    "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
216
    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
217
    "movq      "VROUNDER_OFFSET"(%0), %%mm3         \n\t"\
218
    "movq                      %%mm3, %%mm4         \n\t"\
219
    ASMALIGN(4)\
220
    "2:                                             \n\t"\
221
    "movq               8(%%"REG_d"), %%mm0         \n\t" /* filterCoeff */\
222
    "movq     (%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* UsrcData */\
223 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm5         \n\t" /* VsrcData */\
224 2da0d70d Diego Biurrun
    "add                         $16, %%"REG_d"     \n\t"\
225
    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
226
    "pmulhw                    %%mm0, %%mm2         \n\t"\
227
    "pmulhw                    %%mm0, %%mm5         \n\t"\
228
    "paddw                     %%mm2, %%mm3         \n\t"\
229
    "paddw                     %%mm5, %%mm4         \n\t"\
230
    "test                  %%"REG_S", %%"REG_S"     \n\t"\
231
    " jnz                         2b                \n\t"\
232 df57ab14 Cédric Schieli
233 fe91924d Cédric Schieli
#define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
234 df57ab14 Cédric Schieli
    "lea                "offset"(%0), %%"REG_d"     \n\t"\
235 2da0d70d Diego Biurrun
    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
236 fe91924d Cédric Schieli
    "movq      "VROUNDER_OFFSET"(%0), "#dst1"       \n\t"\
237
    "movq                    "#dst1", "#dst2"       \n\t"\
238 2da0d70d Diego Biurrun
    ASMALIGN(4)\
239
    "2:                                             \n\t"\
240 fe91924d Cédric Schieli
    "movq               8(%%"REG_d"), "#coeff"      \n\t" /* filterCoeff */\
241
    "movq  (%%"REG_S", %%"REG_a", 2), "#src1"       \n\t" /* Y1srcData */\
242
    "movq 8(%%"REG_S", %%"REG_a", 2), "#src2"       \n\t" /* Y2srcData */\
243 2da0d70d Diego Biurrun
    "add                         $16, %%"REG_d"            \n\t"\
244
    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
245 fe91924d Cédric Schieli
    "pmulhw                 "#coeff", "#src1"       \n\t"\
246
    "pmulhw                 "#coeff", "#src2"       \n\t"\
247
    "paddw                   "#src1", "#dst1"       \n\t"\
248
    "paddw                   "#src2", "#dst2"       \n\t"\
249 2da0d70d Diego Biurrun
    "test                  %%"REG_S", %%"REG_S"     \n\t"\
250
    " jnz                         2b                \n\t"\
251
252 df57ab14 Cédric Schieli
#define YSCALEYUV2PACKEDX \
253
    YSCALEYUV2PACKEDX_UV \
254 fe91924d Cédric Schieli
    YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
255 df57ab14 Cédric Schieli
256 2da0d70d Diego Biurrun
#define YSCALEYUV2PACKEDX_END                 \
257
    :: "r" (&c->redDither),                   \
258
        "m" (dummy), "m" (dummy), "m" (dummy),\
259
        "r" (dest), "m" (dstW)                \
260
    : "%"REG_a, "%"REG_d, "%"REG_S            \
261
    );
262 8422aa88 Michael Niedermayer
263 df57ab14 Cédric Schieli
#define YSCALEYUV2PACKEDX_ACCURATE_UV \
264 7ad6469e Diego Pettenò
    __asm__ volatile(\
265 2da0d70d Diego Biurrun
    "xor %%"REG_a", %%"REG_a"                       \n\t"\
266
    ASMALIGN(4)\
267
    "nop                                            \n\t"\
268
    "1:                                             \n\t"\
269
    "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
270
    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
271
    "pxor                      %%mm4, %%mm4         \n\t"\
272
    "pxor                      %%mm5, %%mm5         \n\t"\
273
    "pxor                      %%mm6, %%mm6         \n\t"\
274
    "pxor                      %%mm7, %%mm7         \n\t"\
275
    ASMALIGN(4)\
276
    "2:                                             \n\t"\
277
    "movq     (%%"REG_S", %%"REG_a"), %%mm0         \n\t" /* UsrcData */\
278 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* VsrcData */\
279 1625216e Michael Niedermayer
    "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
280 2da0d70d Diego Biurrun
    "movq     (%%"REG_S", %%"REG_a"), %%mm1         \n\t" /* UsrcData */\
281
    "movq                      %%mm0, %%mm3         \n\t"\
282
    "punpcklwd                 %%mm1, %%mm0         \n\t"\
283
    "punpckhwd                 %%mm1, %%mm3         \n\t"\
284 1625216e Michael Niedermayer
    "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1         \n\t" /* filterCoeff */\
285 2da0d70d Diego Biurrun
    "pmaddwd                   %%mm1, %%mm0         \n\t"\
286
    "pmaddwd                   %%mm1, %%mm3         \n\t"\
287
    "paddd                     %%mm0, %%mm4         \n\t"\
288
    "paddd                     %%mm3, %%mm5         \n\t"\
289 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3         \n\t" /* VsrcData */\
290 1625216e Michael Niedermayer
    "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
291
    "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
292 2da0d70d Diego Biurrun
    "test                  %%"REG_S", %%"REG_S"     \n\t"\
293
    "movq                      %%mm2, %%mm0         \n\t"\
294
    "punpcklwd                 %%mm3, %%mm2         \n\t"\
295
    "punpckhwd                 %%mm3, %%mm0         \n\t"\
296
    "pmaddwd                   %%mm1, %%mm2         \n\t"\
297
    "pmaddwd                   %%mm1, %%mm0         \n\t"\
298
    "paddd                     %%mm2, %%mm6         \n\t"\
299
    "paddd                     %%mm0, %%mm7         \n\t"\
300
    " jnz                         2b                \n\t"\
301
    "psrad                       $16, %%mm4         \n\t"\
302
    "psrad                       $16, %%mm5         \n\t"\
303
    "psrad                       $16, %%mm6         \n\t"\
304
    "psrad                       $16, %%mm7         \n\t"\
305
    "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
306
    "packssdw                  %%mm5, %%mm4         \n\t"\
307
    "packssdw                  %%mm7, %%mm6         \n\t"\
308
    "paddw                     %%mm0, %%mm4         \n\t"\
309
    "paddw                     %%mm0, %%mm6         \n\t"\
310
    "movq                      %%mm4, "U_TEMP"(%0)  \n\t"\
311
    "movq                      %%mm6, "V_TEMP"(%0)  \n\t"\
312 df57ab14 Cédric Schieli
313
#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
314
    "lea                "offset"(%0), %%"REG_d"     \n\t"\
315 2da0d70d Diego Biurrun
    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
316
    "pxor                      %%mm1, %%mm1         \n\t"\
317
    "pxor                      %%mm5, %%mm5         \n\t"\
318
    "pxor                      %%mm7, %%mm7         \n\t"\
319
    "pxor                      %%mm6, %%mm6         \n\t"\
320
    ASMALIGN(4)\
321
    "2:                                             \n\t"\
322
    "movq  (%%"REG_S", %%"REG_a", 2), %%mm0         \n\t" /* Y1srcData */\
323
    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2         \n\t" /* Y2srcData */\
324 1625216e Michael Niedermayer
    "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
325 2da0d70d Diego Biurrun
    "movq  (%%"REG_S", %%"REG_a", 2), %%mm4         \n\t" /* Y1srcData */\
326
    "movq                      %%mm0, %%mm3         \n\t"\
327
    "punpcklwd                 %%mm4, %%mm0         \n\t"\
328
    "punpckhwd                 %%mm4, %%mm3         \n\t"\
329 1625216e Michael Niedermayer
    "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4         \n\t" /* filterCoeff */\
330 2da0d70d Diego Biurrun
    "pmaddwd                   %%mm4, %%mm0         \n\t"\
331
    "pmaddwd                   %%mm4, %%mm3         \n\t"\
332
    "paddd                     %%mm0, %%mm1         \n\t"\
333
    "paddd                     %%mm3, %%mm5         \n\t"\
334
    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3         \n\t" /* Y2srcData */\
335 1625216e Michael Niedermayer
    "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
336
    "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
337 2da0d70d Diego Biurrun
    "test                  %%"REG_S", %%"REG_S"     \n\t"\
338
    "movq                      %%mm2, %%mm0         \n\t"\
339
    "punpcklwd                 %%mm3, %%mm2         \n\t"\
340
    "punpckhwd                 %%mm3, %%mm0         \n\t"\
341
    "pmaddwd                   %%mm4, %%mm2         \n\t"\
342
    "pmaddwd                   %%mm4, %%mm0         \n\t"\
343
    "paddd                     %%mm2, %%mm7         \n\t"\
344
    "paddd                     %%mm0, %%mm6         \n\t"\
345
    " jnz                         2b                \n\t"\
346
    "psrad                       $16, %%mm1         \n\t"\
347
    "psrad                       $16, %%mm5         \n\t"\
348
    "psrad                       $16, %%mm7         \n\t"\
349
    "psrad                       $16, %%mm6         \n\t"\
350
    "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
351
    "packssdw                  %%mm5, %%mm1         \n\t"\
352
    "packssdw                  %%mm6, %%mm7         \n\t"\
353
    "paddw                     %%mm0, %%mm1         \n\t"\
354
    "paddw                     %%mm0, %%mm7         \n\t"\
355
    "movq               "U_TEMP"(%0), %%mm3         \n\t"\
356
    "movq               "V_TEMP"(%0), %%mm4         \n\t"\
357 bca11e75 Michael Niedermayer
358 df57ab14 Cédric Schieli
#define YSCALEYUV2PACKEDX_ACCURATE \
359
    YSCALEYUV2PACKEDX_ACCURATE_UV \
360
    YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
361
362 8422aa88 Michael Niedermayer
#define YSCALEYUV2RGBX \
363 2da0d70d Diego Biurrun
    "psubw  "U_OFFSET"(%0), %%mm3       \n\t" /* (U-128)8*/\
364
    "psubw  "V_OFFSET"(%0), %%mm4       \n\t" /* (V-128)8*/\
365
    "movq            %%mm3, %%mm2       \n\t" /* (U-128)8*/\
366
    "movq            %%mm4, %%mm5       \n\t" /* (V-128)8*/\
367
    "pmulhw "UG_COEFF"(%0), %%mm3       \n\t"\
368
    "pmulhw "VG_COEFF"(%0), %%mm4       \n\t"\
369
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
370
    "pmulhw "UB_COEFF"(%0), %%mm2       \n\t"\
371
    "pmulhw "VR_COEFF"(%0), %%mm5       \n\t"\
372
    "psubw  "Y_OFFSET"(%0), %%mm1       \n\t" /* 8(Y-16)*/\
373
    "psubw  "Y_OFFSET"(%0), %%mm7       \n\t" /* 8(Y-16)*/\
374
    "pmulhw  "Y_COEFF"(%0), %%mm1       \n\t"\
375
    "pmulhw  "Y_COEFF"(%0), %%mm7       \n\t"\
376
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
377
    "paddw           %%mm3, %%mm4       \n\t"\
378
    "movq            %%mm2, %%mm0       \n\t"\
379
    "movq            %%mm5, %%mm6       \n\t"\
380
    "movq            %%mm4, %%mm3       \n\t"\
381
    "punpcklwd       %%mm2, %%mm2       \n\t"\
382
    "punpcklwd       %%mm5, %%mm5       \n\t"\
383
    "punpcklwd       %%mm4, %%mm4       \n\t"\
384
    "paddw           %%mm1, %%mm2       \n\t"\
385
    "paddw           %%mm1, %%mm5       \n\t"\
386
    "paddw           %%mm1, %%mm4       \n\t"\
387
    "punpckhwd       %%mm0, %%mm0       \n\t"\
388
    "punpckhwd       %%mm6, %%mm6       \n\t"\
389
    "punpckhwd       %%mm3, %%mm3       \n\t"\
390
    "paddw           %%mm7, %%mm0       \n\t"\
391
    "paddw           %%mm7, %%mm6       \n\t"\
392
    "paddw           %%mm7, %%mm3       \n\t"\
393
    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
394
    "packuswb        %%mm0, %%mm2       \n\t"\
395
    "packuswb        %%mm6, %%mm5       \n\t"\
396
    "packuswb        %%mm3, %%mm4       \n\t"\
397 d604bab9 Michael Niedermayer
398 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2PACKED(index, c) \
399 2da0d70d Diego Biurrun
    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
400
    "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
401
    "psraw                $3, %%mm0                           \n\t"\
402
    "psraw                $3, %%mm1                           \n\t"\
403
    "movq              %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
404
    "movq              %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
405
    "xor            "#index", "#index"                        \n\t"\
406
    ASMALIGN(4)\
407
    "1:                                 \n\t"\
408
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
409
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
410 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
411
    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
412 2da0d70d Diego Biurrun
    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
413
    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
414
    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
415
    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
416
    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
417
    "psraw                $7, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
418
    "psraw                $7, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
419
    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
420
    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
421
    "movq  (%0, "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
422
    "movq  (%1, "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
423
    "movq 8(%0, "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
424
    "movq 8(%1, "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
425
    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
426
    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
427
    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
428
    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
429
    "psraw                $7, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
430
    "psraw                $7, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
431
    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
432
    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
433 6a4970ab Diego Biurrun
434 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
435 6a4970ab Diego Biurrun
436 df57ab14 Cédric Schieli
#define REAL_YSCALEYUV2RGB_UV(index, c) \
437 2da0d70d Diego Biurrun
    "xor            "#index", "#index"  \n\t"\
438
    ASMALIGN(4)\
439
    "1:                                 \n\t"\
440
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
441
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
442 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
443
    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
444 2da0d70d Diego Biurrun
    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
445
    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
446
    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
447
    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
448
    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
449
    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
450
    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
451
    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
452
    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
453
    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
454
    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
455
    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
456
    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
457
    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
458
    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
459
    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
460 df57ab14 Cédric Schieli
461 786dcfef Cédric Schieli
#define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
462
    "movq  ("#b1", "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
463
    "movq  ("#b2", "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
464
    "movq 8("#b1", "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
465
    "movq 8("#b2", "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
466 2da0d70d Diego Biurrun
    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
467
    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
468
    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
469
    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
470
    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
471
    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
472
    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
473
    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
474 df57ab14 Cédric Schieli
475
#define REAL_YSCALEYUV2RGB_COEFF(c) \
476 2da0d70d Diego Biurrun
    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
477
    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
478
    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
479
    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
480
    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
481
    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
482
    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
483
    "paddw             %%mm3, %%mm4     \n\t"\
484
    "movq              %%mm2, %%mm0     \n\t"\
485
    "movq              %%mm5, %%mm6     \n\t"\
486
    "movq              %%mm4, %%mm3     \n\t"\
487
    "punpcklwd         %%mm2, %%mm2     \n\t"\
488
    "punpcklwd         %%mm5, %%mm5     \n\t"\
489
    "punpcklwd         %%mm4, %%mm4     \n\t"\
490
    "paddw             %%mm1, %%mm2     \n\t"\
491
    "paddw             %%mm1, %%mm5     \n\t"\
492
    "paddw             %%mm1, %%mm4     \n\t"\
493
    "punpckhwd         %%mm0, %%mm0     \n\t"\
494
    "punpckhwd         %%mm6, %%mm6     \n\t"\
495
    "punpckhwd         %%mm3, %%mm3     \n\t"\
496
    "paddw             %%mm7, %%mm0     \n\t"\
497
    "paddw             %%mm7, %%mm6     \n\t"\
498
    "paddw             %%mm7, %%mm3     \n\t"\
499
    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
500
    "packuswb          %%mm0, %%mm2     \n\t"\
501
    "packuswb          %%mm6, %%mm5     \n\t"\
502
    "packuswb          %%mm3, %%mm4     \n\t"\
503 40494418 Cédric Schieli
504 786dcfef Cédric Schieli
#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
505 df57ab14 Cédric Schieli
506
#define YSCALEYUV2RGB(index, c) \
507
    REAL_YSCALEYUV2RGB_UV(index, c) \
508 786dcfef Cédric Schieli
    REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
509 df57ab14 Cédric Schieli
    REAL_YSCALEYUV2RGB_COEFF(c)
510 6a4970ab Diego Biurrun
511 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2PACKED1(index, c) \
512 2da0d70d Diego Biurrun
    "xor            "#index", "#index"  \n\t"\
513
    ASMALIGN(4)\
514
    "1:                                 \n\t"\
515
    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
516 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
517 2da0d70d Diego Biurrun
    "psraw                $7, %%mm3     \n\t" \
518
    "psraw                $7, %%mm4     \n\t" \
519
    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
520
    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
521
    "psraw                $7, %%mm1     \n\t" \
522
    "psraw                $7, %%mm7     \n\t" \
523 6a4970ab Diego Biurrun
524 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
525 6a4970ab Diego Biurrun
526 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2RGB1(index, c) \
527 2da0d70d Diego Biurrun
    "xor            "#index", "#index"  \n\t"\
528
    ASMALIGN(4)\
529
    "1:                                 \n\t"\
530
    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
531 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
532 2da0d70d Diego Biurrun
    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
533
    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
534
    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
535
    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
536
    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
537
    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
538
    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
539
    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
540
    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
541
    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
542
    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
543
    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
544
    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
545
    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
546
    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
547
    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
548
    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
549
    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
550
    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
551
    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
552
    "paddw             %%mm3, %%mm4     \n\t"\
553
    "movq              %%mm2, %%mm0     \n\t"\
554
    "movq              %%mm5, %%mm6     \n\t"\
555
    "movq              %%mm4, %%mm3     \n\t"\
556
    "punpcklwd         %%mm2, %%mm2     \n\t"\
557
    "punpcklwd         %%mm5, %%mm5     \n\t"\
558
    "punpcklwd         %%mm4, %%mm4     \n\t"\
559
    "paddw             %%mm1, %%mm2     \n\t"\
560
    "paddw             %%mm1, %%mm5     \n\t"\
561
    "paddw             %%mm1, %%mm4     \n\t"\
562
    "punpckhwd         %%mm0, %%mm0     \n\t"\
563
    "punpckhwd         %%mm6, %%mm6     \n\t"\
564
    "punpckhwd         %%mm3, %%mm3     \n\t"\
565
    "paddw             %%mm7, %%mm0     \n\t"\
566
    "paddw             %%mm7, %%mm6     \n\t"\
567
    "paddw             %%mm7, %%mm3     \n\t"\
568
    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
569
    "packuswb          %%mm0, %%mm2     \n\t"\
570
    "packuswb          %%mm6, %%mm5     \n\t"\
571
    "packuswb          %%mm3, %%mm4     \n\t"\
572 40494418 Cédric Schieli
573 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
574 497d4f99 Michael Niedermayer
575 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2PACKED1b(index, c) \
576 2da0d70d Diego Biurrun
    "xor "#index", "#index"             \n\t"\
577
    ASMALIGN(4)\
578
    "1:                                 \n\t"\
579
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
580
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
581 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
582
    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
583 2da0d70d Diego Biurrun
    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
584
    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
585
    "psrlw                $8, %%mm3     \n\t" \
586
    "psrlw                $8, %%mm4     \n\t" \
587
    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
588
    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
589
    "psraw                $7, %%mm1     \n\t" \
590
    "psraw                $7, %%mm7     \n\t"
591 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
592 6a4970ab Diego Biurrun
593 497d4f99 Michael Niedermayer
// do vertical chrominance interpolation
594 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2RGB1b(index, c) \
595 2da0d70d Diego Biurrun
    "xor            "#index", "#index"  \n\t"\
596
    ASMALIGN(4)\
597
    "1:                                 \n\t"\
598
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
599
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
600 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
601
    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
602 2da0d70d Diego Biurrun
    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
603
    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
604
    "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
605
    "psrlw                $5, %%mm4     \n\t" /*FIXME might overflow*/\
606
    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
607
    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
608
    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
609
    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
610
    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
611
    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
612
    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
613
    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
614
    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
615
    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
616
    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
617
    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
618
    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
619
    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
620
    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
621
    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
622
    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
623
    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
624
    "paddw             %%mm3, %%mm4     \n\t"\
625
    "movq              %%mm2, %%mm0     \n\t"\
626
    "movq              %%mm5, %%mm6     \n\t"\
627
    "movq              %%mm4, %%mm3     \n\t"\
628
    "punpcklwd         %%mm2, %%mm2     \n\t"\
629
    "punpcklwd         %%mm5, %%mm5     \n\t"\
630
    "punpcklwd         %%mm4, %%mm4     \n\t"\
631
    "paddw             %%mm1, %%mm2     \n\t"\
632
    "paddw             %%mm1, %%mm5     \n\t"\
633
    "paddw             %%mm1, %%mm4     \n\t"\
634
    "punpckhwd         %%mm0, %%mm0     \n\t"\
635
    "punpckhwd         %%mm6, %%mm6     \n\t"\
636
    "punpckhwd         %%mm3, %%mm3     \n\t"\
637
    "paddw             %%mm7, %%mm0     \n\t"\
638
    "paddw             %%mm7, %%mm6     \n\t"\
639
    "paddw             %%mm7, %%mm3     \n\t"\
640
    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
641
    "packuswb          %%mm0, %%mm2     \n\t"\
642
    "packuswb          %%mm6, %%mm5     \n\t"\
643
    "packuswb          %%mm3, %%mm4     \n\t"\
644 40494418 Cédric Schieli
645 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
646 d604bab9 Michael Niedermayer
647 6858492e Cédric Schieli
#define REAL_YSCALEYUV2RGB1_ALPHA(index) \
648
    "movq  (%1, "#index", 2), %%mm7     \n\t" /* abuf0[index  ]     */\
649
    "movq 8(%1, "#index", 2), %%mm1     \n\t" /* abuf0[index+4]     */\
650
    "psraw                $7, %%mm7     \n\t" /* abuf0[index  ] >>7 */\
651
    "psraw                $7, %%mm1     \n\t" /* abuf0[index+4] >>7 */\
652
    "packuswb          %%mm1, %%mm7     \n\t"
653
#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
654
655 9c77b26b Cédric Schieli
#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
656
    "movq       "#b", "#q2"     \n\t" /* B */\
657
    "movq       "#r", "#t"      \n\t" /* R */\
658
    "punpcklbw  "#g", "#b"      \n\t" /* GBGBGBGB 0 */\
659
    "punpcklbw  "#a", "#r"      \n\t" /* ARARARAR 0 */\
660
    "punpckhbw  "#g", "#q2"     \n\t" /* GBGBGBGB 2 */\
661
    "punpckhbw  "#a", "#t"      \n\t" /* ARARARAR 2 */\
662
    "movq       "#b", "#q0"     \n\t" /* GBGBGBGB 0 */\
663
    "movq      "#q2", "#q3"     \n\t" /* GBGBGBGB 2 */\
664
    "punpcklwd  "#r", "#q0"     \n\t" /* ARGBARGB 0 */\
665
    "punpckhwd  "#r", "#b"      \n\t" /* ARGBARGB 1 */\
666
    "punpcklwd  "#t", "#q2"     \n\t" /* ARGBARGB 2 */\
667
    "punpckhwd  "#t", "#q3"     \n\t" /* ARGBARGB 3 */\
668 d604bab9 Michael Niedermayer
\
669 9c77b26b Cédric Schieli
    MOVNTQ(   q0,   (dst, index, 4))\
670
    MOVNTQ(    b,  8(dst, index, 4))\
671
    MOVNTQ(   q2, 16(dst, index, 4))\
672
    MOVNTQ(   q3, 24(dst, index, 4))\
673 d604bab9 Michael Niedermayer
\
674 2da0d70d Diego Biurrun
    "add      $8, "#index"      \n\t"\
675
    "cmp "#dstw", "#index"      \n\t"\
676
    " jb      1b                \n\t"
677 9c77b26b Cédric Schieli
#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)  REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
678 d604bab9 Michael Niedermayer
679 27a90b04 Michael Niedermayer
#define REAL_WRITERGB16(dst, dstw, index) \
680 2da0d70d Diego Biurrun
    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
681
    "pand "MANGLE(bFC)", %%mm4  \n\t" /* G */\
682
    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
683
    "psrlq           $3, %%mm2  \n\t"\
684 d604bab9 Michael Niedermayer
\
685 2da0d70d Diego Biurrun
    "movq         %%mm2, %%mm1  \n\t"\
686
    "movq         %%mm4, %%mm3  \n\t"\
687 d604bab9 Michael Niedermayer
\
688 2da0d70d Diego Biurrun
    "punpcklbw    %%mm7, %%mm3  \n\t"\
689
    "punpcklbw    %%mm5, %%mm2  \n\t"\
690
    "punpckhbw    %%mm7, %%mm4  \n\t"\
691
    "punpckhbw    %%mm5, %%mm1  \n\t"\
692 d604bab9 Michael Niedermayer
\
693 2da0d70d Diego Biurrun
    "psllq           $3, %%mm3  \n\t"\
694
    "psllq           $3, %%mm4  \n\t"\
695 d604bab9 Michael Niedermayer
\
696 2da0d70d Diego Biurrun
    "por          %%mm3, %%mm2  \n\t"\
697
    "por          %%mm4, %%mm1  \n\t"\
698 d604bab9 Michael Niedermayer
\
699 2da0d70d Diego Biurrun
    MOVNTQ(%%mm2,  (dst, index, 2))\
700
    MOVNTQ(%%mm1, 8(dst, index, 2))\
701 d604bab9 Michael Niedermayer
\
702 2da0d70d Diego Biurrun
    "add             $8, "#index"   \n\t"\
703
    "cmp        "#dstw", "#index"   \n\t"\
704
    " jb             1b             \n\t"
705 27a90b04 Michael Niedermayer
#define WRITERGB16(dst, dstw, index)  REAL_WRITERGB16(dst, dstw, index)
706 d604bab9 Michael Niedermayer
707 27a90b04 Michael Niedermayer
#define REAL_WRITERGB15(dst, dstw, index) \
708 2da0d70d Diego Biurrun
    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
709
    "pand "MANGLE(bF8)", %%mm4  \n\t" /* G */\
710
    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
711
    "psrlq           $3, %%mm2  \n\t"\
712
    "psrlq           $1, %%mm5  \n\t"\
713 d604bab9 Michael Niedermayer
\
714 2da0d70d Diego Biurrun
    "movq         %%mm2, %%mm1  \n\t"\
715
    "movq         %%mm4, %%mm3  \n\t"\
716 d604bab9 Michael Niedermayer
\
717 2da0d70d Diego Biurrun
    "punpcklbw    %%mm7, %%mm3  \n\t"\
718
    "punpcklbw    %%mm5, %%mm2  \n\t"\
719
    "punpckhbw    %%mm7, %%mm4  \n\t"\
720
    "punpckhbw    %%mm5, %%mm1  \n\t"\
721 d604bab9 Michael Niedermayer
\
722 2da0d70d Diego Biurrun
    "psllq           $2, %%mm3  \n\t"\
723
    "psllq           $2, %%mm4  \n\t"\
724 d604bab9 Michael Niedermayer
\
725 2da0d70d Diego Biurrun
    "por          %%mm3, %%mm2  \n\t"\
726
    "por          %%mm4, %%mm1  \n\t"\
727 d604bab9 Michael Niedermayer
\
728 2da0d70d Diego Biurrun
    MOVNTQ(%%mm2,  (dst, index, 2))\
729
    MOVNTQ(%%mm1, 8(dst, index, 2))\
730 d604bab9 Michael Niedermayer
\
731 2da0d70d Diego Biurrun
    "add             $8, "#index"   \n\t"\
732
    "cmp        "#dstw", "#index"   \n\t"\
733
    " jb             1b             \n\t"
734 27a90b04 Michael Niedermayer
#define WRITERGB15(dst, dstw, index)  REAL_WRITERGB15(dst, dstw, index)
735 f62255fb Michael Niedermayer
736 6542b44e Michael Niedermayer
#define WRITEBGR24OLD(dst, dstw, index) \
737 2da0d70d Diego Biurrun
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
738
    "movq      %%mm2, %%mm1             \n\t" /* B */\
739
    "movq      %%mm5, %%mm6             \n\t" /* R */\
740
    "punpcklbw %%mm4, %%mm2             \n\t" /* GBGBGBGB 0 */\
741
    "punpcklbw %%mm7, %%mm5             \n\t" /* 0R0R0R0R 0 */\
742
    "punpckhbw %%mm4, %%mm1             \n\t" /* GBGBGBGB 2 */\
743
    "punpckhbw %%mm7, %%mm6             \n\t" /* 0R0R0R0R 2 */\
744
    "movq      %%mm2, %%mm0             \n\t" /* GBGBGBGB 0 */\
745
    "movq      %%mm1, %%mm3             \n\t" /* GBGBGBGB 2 */\
746
    "punpcklwd %%mm5, %%mm0             \n\t" /* 0RGB0RGB 0 */\
747
    "punpckhwd %%mm5, %%mm2             \n\t" /* 0RGB0RGB 1 */\
748
    "punpcklwd %%mm6, %%mm1             \n\t" /* 0RGB0RGB 2 */\
749
    "punpckhwd %%mm6, %%mm3             \n\t" /* 0RGB0RGB 3 */\
750 d604bab9 Michael Niedermayer
\
751 2da0d70d Diego Biurrun
    "movq      %%mm0, %%mm4             \n\t" /* 0RGB0RGB 0 */\
752
    "psrlq        $8, %%mm0             \n\t" /* 00RGB0RG 0 */\
753
    "pand "MANGLE(bm00000111)", %%mm4   \n\t" /* 00000RGB 0 */\
754
    "pand "MANGLE(bm11111000)", %%mm0   \n\t" /* 00RGB000 0.5 */\
755
    "por       %%mm4, %%mm0             \n\t" /* 00RGBRGB 0 */\
756
    "movq      %%mm2, %%mm4             \n\t" /* 0RGB0RGB 1 */\
757
    "psllq       $48, %%mm2             \n\t" /* GB000000 1 */\
758
    "por       %%mm2, %%mm0             \n\t" /* GBRGBRGB 0 */\
759 d604bab9 Michael Niedermayer
\
760 2da0d70d Diego Biurrun
    "movq      %%mm4, %%mm2             \n\t" /* 0RGB0RGB 1 */\
761
    "psrld       $16, %%mm4             \n\t" /* 000R000R 1 */\
762
    "psrlq       $24, %%mm2             \n\t" /* 0000RGB0 1.5 */\
763
    "por       %%mm4, %%mm2             \n\t" /* 000RRGBR 1 */\
764
    "pand "MANGLE(bm00001111)", %%mm2   \n\t" /* 0000RGBR 1 */\
765
    "movq      %%mm1, %%mm4             \n\t" /* 0RGB0RGB 2 */\
766
    "psrlq        $8, %%mm1             \n\t" /* 00RGB0RG 2 */\
767
    "pand "MANGLE(bm00000111)", %%mm4   \n\t" /* 00000RGB 2 */\
768
    "pand "MANGLE(bm11111000)", %%mm1   \n\t" /* 00RGB000 2.5 */\
769
    "por       %%mm4, %%mm1             \n\t" /* 00RGBRGB 2 */\
770
    "movq      %%mm1, %%mm4             \n\t" /* 00RGBRGB 2 */\
771
    "psllq       $32, %%mm1             \n\t" /* BRGB0000 2 */\
772
    "por       %%mm1, %%mm2             \n\t" /* BRGBRGBR 1 */\
773 d604bab9 Michael Niedermayer
\
774 2da0d70d Diego Biurrun
    "psrlq       $32, %%mm4             \n\t" /* 000000RG 2.5 */\
775
    "movq      %%mm3, %%mm5             \n\t" /* 0RGB0RGB 3 */\
776
    "psrlq        $8, %%mm3             \n\t" /* 00RGB0RG 3 */\
777
    "pand "MANGLE(bm00000111)", %%mm5   \n\t" /* 00000RGB 3 */\
778
    "pand "MANGLE(bm11111000)", %%mm3   \n\t" /* 00RGB000 3.5 */\
779
    "por       %%mm5, %%mm3             \n\t" /* 00RGBRGB 3 */\
780
    "psllq       $16, %%mm3             \n\t" /* RGBRGB00 3 */\
781
    "por       %%mm4, %%mm3             \n\t" /* RGBRGBRG 2.5 */\
782 d604bab9 Michael Niedermayer
\
783 2da0d70d Diego Biurrun
    MOVNTQ(%%mm0,   (dst))\
784
    MOVNTQ(%%mm2,  8(dst))\
785
    MOVNTQ(%%mm3, 16(dst))\
786
    "add         $24, "#dst"            \n\t"\
787 d604bab9 Michael Niedermayer
\
788 2da0d70d Diego Biurrun
    "add          $8, "#index"          \n\t"\
789
    "cmp     "#dstw", "#index"          \n\t"\
790
    " jb          1b                    \n\t"
791 d604bab9 Michael Niedermayer
792 6542b44e Michael Niedermayer
#define WRITEBGR24MMX(dst, dstw, index) \
793 2da0d70d Diego Biurrun
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
794
    "movq      %%mm2, %%mm1     \n\t" /* B */\
795
    "movq      %%mm5, %%mm6     \n\t" /* R */\
796
    "punpcklbw %%mm4, %%mm2     \n\t" /* GBGBGBGB 0 */\
797
    "punpcklbw %%mm7, %%mm5     \n\t" /* 0R0R0R0R 0 */\
798
    "punpckhbw %%mm4, %%mm1     \n\t" /* GBGBGBGB 2 */\
799
    "punpckhbw %%mm7, %%mm6     \n\t" /* 0R0R0R0R 2 */\
800
    "movq      %%mm2, %%mm0     \n\t" /* GBGBGBGB 0 */\
801
    "movq      %%mm1, %%mm3     \n\t" /* GBGBGBGB 2 */\
802
    "punpcklwd %%mm5, %%mm0     \n\t" /* 0RGB0RGB 0 */\
803
    "punpckhwd %%mm5, %%mm2     \n\t" /* 0RGB0RGB 1 */\
804
    "punpcklwd %%mm6, %%mm1     \n\t" /* 0RGB0RGB 2 */\
805
    "punpckhwd %%mm6, %%mm3     \n\t" /* 0RGB0RGB 3 */\
806 99d2cb72 Michael Niedermayer
\
807 2da0d70d Diego Biurrun
    "movq      %%mm0, %%mm4     \n\t" /* 0RGB0RGB 0 */\
808
    "movq      %%mm2, %%mm6     \n\t" /* 0RGB0RGB 1 */\
809
    "movq      %%mm1, %%mm5     \n\t" /* 0RGB0RGB 2 */\
810
    "movq      %%mm3, %%mm7     \n\t" /* 0RGB0RGB 3 */\
811 99d2cb72 Michael Niedermayer
\
812 2da0d70d Diego Biurrun
    "psllq       $40, %%mm0     \n\t" /* RGB00000 0 */\
813
    "psllq       $40, %%mm2     \n\t" /* RGB00000 1 */\
814
    "psllq       $40, %%mm1     \n\t" /* RGB00000 2 */\
815
    "psllq       $40, %%mm3     \n\t" /* RGB00000 3 */\
816 99d2cb72 Michael Niedermayer
\
817 2da0d70d Diego Biurrun
    "punpckhdq %%mm4, %%mm0     \n\t" /* 0RGBRGB0 0 */\
818
    "punpckhdq %%mm6, %%mm2     \n\t" /* 0RGBRGB0 1 */\
819
    "punpckhdq %%mm5, %%mm1     \n\t" /* 0RGBRGB0 2 */\
820
    "punpckhdq %%mm7, %%mm3     \n\t" /* 0RGBRGB0 3 */\
821 99d2cb72 Michael Niedermayer
\
822 2da0d70d Diego Biurrun
    "psrlq        $8, %%mm0     \n\t" /* 00RGBRGB 0 */\
823
    "movq      %%mm2, %%mm6     \n\t" /* 0RGBRGB0 1 */\
824
    "psllq       $40, %%mm2     \n\t" /* GB000000 1 */\
825
    "por       %%mm2, %%mm0     \n\t" /* GBRGBRGB 0 */\
826
    MOVNTQ(%%mm0, (dst))\
827 99d2cb72 Michael Niedermayer
\
828 2da0d70d Diego Biurrun
    "psrlq       $24, %%mm6     \n\t" /* 0000RGBR 1 */\
829
    "movq      %%mm1, %%mm5     \n\t" /* 0RGBRGB0 2 */\
830
    "psllq       $24, %%mm1     \n\t" /* BRGB0000 2 */\
831
    "por       %%mm1, %%mm6     \n\t" /* BRGBRGBR 1 */\
832
    MOVNTQ(%%mm6, 8(dst))\
833 99d2cb72 Michael Niedermayer
\
834 2da0d70d Diego Biurrun
    "psrlq       $40, %%mm5     \n\t" /* 000000RG 2 */\
835
    "psllq        $8, %%mm3     \n\t" /* RGBRGB00 3 */\
836
    "por       %%mm3, %%mm5     \n\t" /* RGBRGBRG 2 */\
837
    MOVNTQ(%%mm5, 16(dst))\
838 99d2cb72 Michael Niedermayer
\
839 2da0d70d Diego Biurrun
    "add         $24, "#dst"    \n\t"\
840 99d2cb72 Michael Niedermayer
\
841 2da0d70d Diego Biurrun
    "add          $8, "#index"  \n\t"\
842
    "cmp     "#dstw", "#index"  \n\t"\
843
    " jb          1b            \n\t"
844 99d2cb72 Michael Niedermayer
845 6542b44e Michael Niedermayer
#define WRITEBGR24MMX2(dst, dstw, index) \
846 2da0d70d Diego Biurrun
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
847 5802683a Reimar Döffinger
    "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
848
    "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
849 2da0d70d Diego Biurrun
    "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2  B1 B0 B1 B0 */\
850
    "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2  G1 G0 G1 G0 */\
851
    "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0  R1 R0 R1 R0 */\
852 99d2cb72 Michael Niedermayer
\
853 2da0d70d Diego Biurrun
    "pand   %%mm0, %%mm1        \n\t" /*    B2        B1       B0 */\
854
    "pand   %%mm0, %%mm3        \n\t" /*    G2        G1       G0 */\
855
    "pand   %%mm7, %%mm6        \n\t" /*       R1        R0       */\
856 99d2cb72 Michael Niedermayer
\
857 2da0d70d Diego Biurrun
    "psllq     $8, %%mm3        \n\t" /* G2        G1       G0    */\
858
    "por    %%mm1, %%mm6        \n\t"\
859
    "por    %%mm3, %%mm6        \n\t"\
860
    MOVNTQ(%%mm6, (dst))\
861 99d2cb72 Michael Niedermayer
\
862 2da0d70d Diego Biurrun
    "psrlq     $8, %%mm4        \n\t" /* 00 G7 G6 G5  G4 G3 G2 G1 */\
863
    "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4  B3 B2 B3 B2 */\
864
    "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3  G4 G3 G4 G3 */\
865
    "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4  R3 R2 R3 R2 */\
866 99d2cb72 Michael Niedermayer
\
867 5802683a Reimar Döffinger
    "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5       B4        B3    */\
868 2da0d70d Diego Biurrun
    "pand   %%mm7, %%mm3        \n\t" /*       G4        G3       */\
869
    "pand   %%mm0, %%mm6        \n\t" /*    R4        R3       R2 */\
870 99d2cb72 Michael Niedermayer
\
871 2da0d70d Diego Biurrun
    "por    %%mm1, %%mm3        \n\t" /* B5    G4 B4     G3 B3    */\
872
    "por    %%mm3, %%mm6        \n\t"\
873
    MOVNTQ(%%mm6, 8(dst))\
874 99d2cb72 Michael Niedermayer
\
875 2da0d70d Diego Biurrun
    "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6  B7 B6 B6 B7 */\
876
    "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7  G6 G5 G6 G5 */\
877
    "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6  R5 R4 R5 R4 */\
878 99d2cb72 Michael Niedermayer
\
879 2da0d70d Diego Biurrun
    "pand   %%mm7, %%mm1        \n\t" /*       B7        B6       */\
880
    "pand   %%mm0, %%mm3        \n\t" /*    G7        G6       G5 */\
881 5802683a Reimar Döffinger
    "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7       R6        R5    */\
882 99d2cb72 Michael Niedermayer
\
883 2da0d70d Diego Biurrun
    "por    %%mm1, %%mm3        \n\t"\
884
    "por    %%mm3, %%mm6        \n\t"\
885
    MOVNTQ(%%mm6, 16(dst))\
886 99d2cb72 Michael Niedermayer
\
887 2da0d70d Diego Biurrun
    "add      $24, "#dst"       \n\t"\
888 99d2cb72 Michael Niedermayer
\
889 2da0d70d Diego Biurrun
    "add       $8, "#index"     \n\t"\
890
    "cmp  "#dstw", "#index"     \n\t"\
891
    " jb       1b               \n\t"
892 99d2cb72 Michael Niedermayer
893 b63f641e Aurelien Jacobs
#if HAVE_MMX2
894 7630f2e0 Michael Niedermayer
#undef WRITEBGR24
895 6e1c66bc Aurelien Jacobs
#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX2(dst, dstw, index)
896 99d2cb72 Michael Niedermayer
#else
897 7630f2e0 Michael Niedermayer
#undef WRITEBGR24
898 6e1c66bc Aurelien Jacobs
#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX(dst, dstw, index)
899 99d2cb72 Michael Niedermayer
#endif
900
901 6e1c66bc Aurelien Jacobs
#define REAL_WRITEYUY2(dst, dstw, index) \
902 2da0d70d Diego Biurrun
    "packuswb  %%mm3, %%mm3     \n\t"\
903
    "packuswb  %%mm4, %%mm4     \n\t"\
904
    "packuswb  %%mm7, %%mm1     \n\t"\
905
    "punpcklbw %%mm4, %%mm3     \n\t"\
906
    "movq      %%mm1, %%mm7     \n\t"\
907
    "punpcklbw %%mm3, %%mm1     \n\t"\
908
    "punpckhbw %%mm3, %%mm7     \n\t"\
909 25593e29 Michael Niedermayer
\
910 2da0d70d Diego Biurrun
    MOVNTQ(%%mm1, (dst, index, 2))\
911
    MOVNTQ(%%mm7, 8(dst, index, 2))\
912 25593e29 Michael Niedermayer
\
913 2da0d70d Diego Biurrun
    "add          $8, "#index"  \n\t"\
914
    "cmp     "#dstw", "#index"  \n\t"\
915
    " jb          1b            \n\t"
916 6e1c66bc Aurelien Jacobs
#define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
917 25593e29 Michael Niedermayer
918
919 77a49659 Michael Niedermayer
static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
920 6858492e Cédric Schieli
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, int16_t **alpSrc,
921
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
922 38858470 Michael Niedermayer
{
923 b63f641e Aurelien Jacobs
#if HAVE_MMX
924 f433c8ab Michael Niedermayer
    if(!(c->flags & SWS_BITEXACT)){
925 14014d47 Michael Niedermayer
        if (c->flags & SWS_ACCURATE_RND){
926
            if (uDest){
927
                YSCALEYUV2YV12X_ACCURATE(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
928
                YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
929
            }
930 6858492e Cédric Schieli
            if (CONFIG_SWSCALE_ALPHA && aDest){
931
                YSCALEYUV2YV12X_ACCURATE(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
932
            }
933 bca11e75 Michael Niedermayer
934 14014d47 Michael Niedermayer
            YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
935
        }else{
936
            if (uDest){
937
                YSCALEYUV2YV12X(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
938
                YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
939
            }
940 6858492e Cédric Schieli
            if (CONFIG_SWSCALE_ALPHA && aDest){
941
                YSCALEYUV2YV12X(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
942
            }
943 2da0d70d Diego Biurrun
944 14014d47 Michael Niedermayer
            YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
945
        }
946 f433c8ab Michael Niedermayer
        return;
947
    }
948
#endif
949 b63f641e Aurelien Jacobs
#if HAVE_ALTIVEC
950 a2faa401 Romain Dolbeau
yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
951 2da0d70d Diego Biurrun
                      chrFilter, chrSrc, chrFilterSize,
952
                      dest, uDest, vDest, dstW, chrDstW);
953 a2faa401 Romain Dolbeau
#else //HAVE_ALTIVEC
954 5859233b Michael Niedermayer
yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
955 2da0d70d Diego Biurrun
            chrFilter, chrSrc, chrFilterSize,
956 6858492e Cédric Schieli
            alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
957 a2faa401 Romain Dolbeau
#endif //!HAVE_ALTIVEC
958 c1b0bfb4 Michael Niedermayer
}
959 2add307d Michael Niedermayer
960 6118e52e Ville Syrjälä
static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
961 2da0d70d Diego Biurrun
                                     int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
962
                                     uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
963 6118e52e Ville Syrjälä
{
964
yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
965 2da0d70d Diego Biurrun
             chrFilter, chrSrc, chrFilterSize,
966
             dest, uDest, dstW, chrDstW, dstFormat);
967 6118e52e Ville Syrjälä
}
968
969 6858492e Cédric Schieli
static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chrSrc, int16_t *alpSrc,
970
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
971 c1b0bfb4 Michael Niedermayer
{
972 f433c8ab Michael Niedermayer
    int i;
973 b63f641e Aurelien Jacobs
#if HAVE_MMX
974 f433c8ab Michael Niedermayer
    if(!(c->flags & SWS_BITEXACT)){
975 6858492e Cédric Schieli
        long p= 4;
976
        uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
977
        uint8_t *dst[4]= {aDest, dest, uDest, vDest};
978
        x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
979 2da0d70d Diego Biurrun
980 14014d47 Michael Niedermayer
        if (c->flags & SWS_ACCURATE_RND){
981
            while(p--){
982 3164d25e Cédric Schieli
                if (dst[p]){
983
                    __asm__ volatile(
984
                        YSCALEYUV2YV121_ACCURATE
985
                        :: "r" (src[p]), "r" (dst[p] + counter[p]),
986
                        "g" (-counter[p])
987
                        : "%"REG_a
988
                    );
989
                }
990 6858492e Cédric Schieli
            }
991 14014d47 Michael Niedermayer
        }else{
992
            while(p--){
993 3164d25e Cédric Schieli
                if (dst[p]){
994
                    __asm__ volatile(
995
                        YSCALEYUV2YV121
996
                        :: "r" (src[p]), "r" (dst[p] + counter[p]),
997
                        "g" (-counter[p])
998
                        : "%"REG_a
999
                    );
1000
                }
1001 6858492e Cédric Schieli
            }
1002 d78c1ea1 Michael Niedermayer
        }
1003 f433c8ab Michael Niedermayer
        return;
1004
    }
1005
#endif
1006 2da0d70d Diego Biurrun
    for (i=0; i<dstW; i++)
1007
    {
1008 a1f3ffa3 Michael Niedermayer
        int val= (lumSrc[i]+64)>>7;
1009 2da0d70d Diego Biurrun
1010
        if (val&256){
1011
            if (val<0) val=0;
1012
            else       val=255;
1013
        }
1014
1015
        dest[i]= val;
1016
    }
1017
1018 1b0a4572 Benoit Fouet
    if (uDest)
1019 2da0d70d Diego Biurrun
        for (i=0; i<chrDstW; i++)
1020
        {
1021 a1f3ffa3 Michael Niedermayer
            int u=(chrSrc[i       ]+64)>>7;
1022
            int v=(chrSrc[i + VOFW]+64)>>7;
1023 2da0d70d Diego Biurrun
1024
            if ((u|v)&256){
1025
                if (u<0)        u=0;
1026
                else if (u>255) u=255;
1027
                if (v<0)        v=0;
1028
                else if (v>255) v=255;
1029
            }
1030
1031
            uDest[i]= u;
1032
            vDest[i]= v;
1033
        }
1034 6858492e Cédric Schieli
1035
    if (CONFIG_SWSCALE_ALPHA && aDest)
1036
        for (i=0; i<dstW; i++){
1037
            int val= (alpSrc[i]+64)>>7;
1038
            aDest[i]= av_clip_uint8(val);
1039
        }
1040 38858470 Michael Niedermayer
}
1041
1042 c1b0bfb4 Michael Niedermayer
1043 d604bab9 Michael Niedermayer
/**
1044
 * vertical scale YV12 to RGB
1045
 */
1046 25593e29 Michael Niedermayer
static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
1047 2da0d70d Diego Biurrun
                                       int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
1048 6858492e Cédric Schieli
                                       int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
1049 c1b0bfb4 Michael Niedermayer
{
1050 b63f641e Aurelien Jacobs
#if HAVE_MMX
1051 d0ce212a Ramiro Polla
    x86_reg dummy=0;
1052 f433c8ab Michael Niedermayer
    if(!(c->flags & SWS_BITEXACT)){
1053 14014d47 Michael Niedermayer
        if (c->flags & SWS_ACCURATE_RND){
1054
            switch(c->dstFormat){
1055
            case PIX_FMT_RGB32:
1056 6858492e Cédric Schieli
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1057
                    YSCALEYUV2PACKEDX_ACCURATE
1058
                    YSCALEYUV2RGBX
1059
                    "movq                      %%mm2, "U_TEMP"(%0)  \n\t"
1060
                    "movq                      %%mm4, "V_TEMP"(%0)  \n\t"
1061
                    "movq                      %%mm5, "Y_TEMP"(%0)  \n\t"
1062
                    YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
1063
                    "movq               "Y_TEMP"(%0), %%mm5         \n\t"
1064
                    "psraw                        $3, %%mm1         \n\t"
1065
                    "psraw                        $3, %%mm7         \n\t"
1066
                    "packuswb                  %%mm7, %%mm1         \n\t"
1067
                    WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
1068
1069
                    YSCALEYUV2PACKEDX_END
1070
                }else{
1071 3164d25e Cédric Schieli
                    YSCALEYUV2PACKEDX_ACCURATE
1072
                    YSCALEYUV2RGBX
1073
                    "pcmpeqd %%mm7, %%mm7 \n\t"
1074
                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1075 2da0d70d Diego Biurrun
1076 3164d25e Cédric Schieli
                    YSCALEYUV2PACKEDX_END
1077 6858492e Cédric Schieli
                }
1078 14014d47 Michael Niedermayer
                return;
1079
            case PIX_FMT_BGR24:
1080
                YSCALEYUV2PACKEDX_ACCURATE
1081
                YSCALEYUV2RGBX
1082 40494418 Cédric Schieli
                "pxor %%mm7, %%mm7 \n\t"
1083 14014d47 Michael Niedermayer
                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
1084
                "add %4, %%"REG_c"                        \n\t"
1085
                WRITEBGR24(%%REGc, %5, %%REGa)
1086 2da0d70d Diego Biurrun
1087
1088 14014d47 Michael Niedermayer
                :: "r" (&c->redDither),
1089
                "m" (dummy), "m" (dummy), "m" (dummy),
1090
                "r" (dest), "m" (dstW)
1091
                : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1092
                );
1093
                return;
1094
            case PIX_FMT_RGB555:
1095
                YSCALEYUV2PACKEDX_ACCURATE
1096
                YSCALEYUV2RGBX
1097 40494418 Cédric Schieli
                "pxor %%mm7, %%mm7 \n\t"
1098 14014d47 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1099 bca11e75 Michael Niedermayer
#ifdef DITHER1XBPP
1100 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
1101
                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
1102
                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
1103 2da0d70d Diego Biurrun
#endif
1104
1105 14014d47 Michael Niedermayer
                WRITERGB15(%4, %5, %%REGa)
1106
                YSCALEYUV2PACKEDX_END
1107
                return;
1108
            case PIX_FMT_RGB565:
1109
                YSCALEYUV2PACKEDX_ACCURATE
1110
                YSCALEYUV2RGBX
1111 40494418 Cédric Schieli
                "pxor %%mm7, %%mm7 \n\t"
1112 14014d47 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1113 bca11e75 Michael Niedermayer
#ifdef DITHER1XBPP
1114 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
1115
                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
1116
                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
1117 2da0d70d Diego Biurrun
#endif
1118
1119 14014d47 Michael Niedermayer
                WRITERGB16(%4, %5, %%REGa)
1120
                YSCALEYUV2PACKEDX_END
1121
                return;
1122
            case PIX_FMT_YUYV422:
1123
                YSCALEYUV2PACKEDX_ACCURATE
1124
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1125
1126
                "psraw $3, %%mm3    \n\t"
1127
                "psraw $3, %%mm4    \n\t"
1128
                "psraw $3, %%mm1    \n\t"
1129
                "psraw $3, %%mm7    \n\t"
1130
                WRITEYUY2(%4, %5, %%REGa)
1131
                YSCALEYUV2PACKEDX_END
1132
                return;
1133
            }
1134
        }else{
1135
            switch(c->dstFormat)
1136
            {
1137
            case PIX_FMT_RGB32:
1138 6858492e Cédric Schieli
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1139
                    YSCALEYUV2PACKEDX
1140
                    YSCALEYUV2RGBX
1141
                    YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
1142
                    "psraw                        $3, %%mm1         \n\t"
1143
                    "psraw                        $3, %%mm7         \n\t"
1144
                    "packuswb                  %%mm7, %%mm1         \n\t"
1145
                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1146
                    YSCALEYUV2PACKEDX_END
1147
                }else{
1148 3164d25e Cédric Schieli
                    YSCALEYUV2PACKEDX
1149
                    YSCALEYUV2RGBX
1150
                    "pcmpeqd %%mm7, %%mm7 \n\t"
1151
                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1152
                    YSCALEYUV2PACKEDX_END
1153 6858492e Cédric Schieli
                }
1154 14014d47 Michael Niedermayer
                return;
1155
            case PIX_FMT_BGR24:
1156
                YSCALEYUV2PACKEDX
1157
                YSCALEYUV2RGBX
1158 40494418 Cédric Schieli
                "pxor                    %%mm7, %%mm7       \n\t"
1159 14014d47 Michael Niedermayer
                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"   \n\t" //FIXME optimize
1160
                "add                        %4, %%"REG_c"   \n\t"
1161
                WRITEBGR24(%%REGc, %5, %%REGa)
1162
1163
                :: "r" (&c->redDither),
1164
                "m" (dummy), "m" (dummy), "m" (dummy),
1165
                "r" (dest),  "m" (dstW)
1166
                : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1167
                );
1168
                return;
1169
            case PIX_FMT_RGB555:
1170
                YSCALEYUV2PACKEDX
1171
                YSCALEYUV2RGBX
1172 40494418 Cédric Schieli
                "pxor %%mm7, %%mm7 \n\t"
1173 14014d47 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1174 c1b0bfb4 Michael Niedermayer
#ifdef DITHER1XBPP
1175 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
1176
                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
1177
                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
1178 2da0d70d Diego Biurrun
#endif
1179
1180 14014d47 Michael Niedermayer
                WRITERGB15(%4, %5, %%REGa)
1181
                YSCALEYUV2PACKEDX_END
1182
                return;
1183
            case PIX_FMT_RGB565:
1184
                YSCALEYUV2PACKEDX
1185
                YSCALEYUV2RGBX
1186 40494418 Cédric Schieli
                "pxor %%mm7, %%mm7 \n\t"
1187 14014d47 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1188 c1b0bfb4 Michael Niedermayer
#ifdef DITHER1XBPP
1189 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
1190
                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
1191
                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
1192 2da0d70d Diego Biurrun
#endif
1193
1194 14014d47 Michael Niedermayer
                WRITERGB16(%4, %5, %%REGa)
1195
                YSCALEYUV2PACKEDX_END
1196
                return;
1197
            case PIX_FMT_YUYV422:
1198
                YSCALEYUV2PACKEDX
1199
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1200
1201
                "psraw $3, %%mm3    \n\t"
1202
                "psraw $3, %%mm4    \n\t"
1203
                "psraw $3, %%mm1    \n\t"
1204
                "psraw $3, %%mm7    \n\t"
1205
                WRITEYUY2(%4, %5, %%REGa)
1206
                YSCALEYUV2PACKEDX_END
1207
                return;
1208
            }
1209 bca11e75 Michael Niedermayer
        }
1210
    }
1211 bc279024 Diego Biurrun
#endif /* HAVE_MMX */
1212 b63f641e Aurelien Jacobs
#if HAVE_ALTIVEC
1213 2da0d70d Diego Biurrun
    /* The following list of supported dstFormat values should
1214 780daf2b Diego Biurrun
       match what's found in the body of ff_yuv2packedX_altivec() */
1215 6858492e Cédric Schieli
    if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf
1216 12794f73 Kostya Shishkov
       (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
1217 2da0d70d Diego Biurrun
        c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
1218 12794f73 Kostya Shishkov
        c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB))
1219 780daf2b Diego Biurrun
            ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize,
1220
                                   chrFilter, chrSrc, chrFilterSize,
1221
                                   dest, dstW, dstY);
1222 2da0d70d Diego Biurrun
    else
1223
#endif
1224
        yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
1225
                       chrFilter, chrSrc, chrFilterSize,
1226 6858492e Cédric Schieli
                       alpSrc, dest, dstW, dstY);
1227 c1b0bfb4 Michael Niedermayer
}
1228
1229
/**
1230
 * vertical bilinear scale YV12 to RGB
1231
 */
1232 25593e29 Michael Niedermayer
static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
1233 6858492e Cédric Schieli
                          uint16_t *abuf0, uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
1234 d604bab9 Michael Niedermayer
{
1235 ac0ad729 Michael Niedermayer
    int  yalpha1=4095- yalpha;
1236
    int uvalpha1=4095-uvalpha;
1237 2da0d70d Diego Biurrun
    int i;
1238 d604bab9 Michael Niedermayer
1239 b63f641e Aurelien Jacobs
#if HAVE_MMX
1240 f433c8ab Michael Niedermayer
    if(!(c->flags & SWS_BITEXACT)){
1241 2da0d70d Diego Biurrun
        switch(c->dstFormat)
1242
        {
1243
            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
1244
            case PIX_FMT_RGB32:
1245 6858492e Cédric Schieli
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1246
#if ARCH_X86_64
1247
                    __asm__ volatile(
1248
                    "mov        %4, %%"REG_b"               \n\t"
1249
                    YSCALEYUV2RGB(%%REGBP, %5)
1250
                    YSCALEYUV2RGB_YA(%%REGBP, %5, %6, %7)
1251
                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1252
                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1253
                    "packuswb            %%mm7, %%mm1       \n\t"
1254
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1255
1256
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1257
                    "a" (&c->redDither)
1258
                    ,"r" (abuf0), "r" (abuf1)
1259
                    : "%"REG_b, "%"REG_BP
1260
                    );
1261
#else
1262
                    *(uint16_t **)(&c->u_temp)=abuf0;
1263
                    *(uint16_t **)(&c->v_temp)=abuf1;
1264
                    __asm__ volatile(
1265
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1266
                    "mov        %4, %%"REG_b"               \n\t"
1267
                    "push %%"REG_BP"                        \n\t"
1268
                    YSCALEYUV2RGB(%%REGBP, %5)
1269
                    "push                   %0              \n\t"
1270
                    "push                   %1              \n\t"
1271
                    "mov          "U_TEMP"(%5), %0          \n\t"
1272
                    "mov          "V_TEMP"(%5), %1          \n\t"
1273
                    YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
1274
                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1275
                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1276
                    "packuswb            %%mm7, %%mm1       \n\t"
1277
                    "pop                    %1              \n\t"
1278
                    "pop                    %0              \n\t"
1279
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1280
                    "pop %%"REG_BP"                         \n\t"
1281
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1282
1283
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1284
                    "a" (&c->redDither)
1285
                    );
1286
#endif
1287
                }else{
1288 3164d25e Cédric Schieli
                    __asm__ volatile(
1289
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1290
                    "mov        %4, %%"REG_b"               \n\t"
1291
                    "push %%"REG_BP"                        \n\t"
1292
                    YSCALEYUV2RGB(%%REGBP, %5)
1293
                    "pcmpeqd %%mm7, %%mm7                   \n\t"
1294
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1295
                    "pop %%"REG_BP"                         \n\t"
1296
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1297 2da0d70d Diego Biurrun
1298 3164d25e Cédric Schieli
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1299
                    "a" (&c->redDither)
1300
                    );
1301 6858492e Cédric Schieli
                }
1302 2da0d70d Diego Biurrun
                return;
1303
            case PIX_FMT_BGR24:
1304 7ad6469e Diego Pettenò
                __asm__ volatile(
1305 2da0d70d Diego Biurrun
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1306
                "mov        %4, %%"REG_b"               \n\t"
1307
                "push %%"REG_BP"                        \n\t"
1308
                YSCALEYUV2RGB(%%REGBP, %5)
1309 40494418 Cédric Schieli
                "pxor    %%mm7, %%mm7                   \n\t"
1310 2da0d70d Diego Biurrun
                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1311
                "pop %%"REG_BP"                         \n\t"
1312
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1313
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1314
                "a" (&c->redDither)
1315
                );
1316
                return;
1317 27a90b04 Michael Niedermayer
            case PIX_FMT_RGB555:
1318 7ad6469e Diego Pettenò
                __asm__ volatile(
1319 2da0d70d Diego Biurrun
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1320
                "mov        %4, %%"REG_b"               \n\t"
1321
                "push %%"REG_BP"                        \n\t"
1322
                YSCALEYUV2RGB(%%REGBP, %5)
1323 40494418 Cédric Schieli
                "pxor    %%mm7, %%mm7                   \n\t"
1324 2da0d70d Diego Biurrun
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1325 d604bab9 Michael Niedermayer
#ifdef DITHER1XBPP
1326 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
1327
                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1328
                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1329 2da0d70d Diego Biurrun
#endif
1330
1331 27a90b04 Michael Niedermayer
                WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1332 2da0d70d Diego Biurrun
                "pop %%"REG_BP"                         \n\t"
1333
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1334
1335
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1336
                "a" (&c->redDither)
1337
                );
1338
                return;
1339 27a90b04 Michael Niedermayer
            case PIX_FMT_RGB565:
1340 7ad6469e Diego Pettenò
                __asm__ volatile(
1341 2da0d70d Diego Biurrun
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1342
                "mov        %4, %%"REG_b"               \n\t"
1343
                "push %%"REG_BP"                        \n\t"
1344
                YSCALEYUV2RGB(%%REGBP, %5)
1345 40494418 Cédric Schieli
                "pxor    %%mm7, %%mm7                   \n\t"
1346 2da0d70d Diego Biurrun
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1347 d604bab9 Michael Niedermayer
#ifdef DITHER1XBPP
1348 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
1349
                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1350
                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1351 2da0d70d Diego Biurrun
#endif
1352
1353 27a90b04 Michael Niedermayer
                WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1354 2da0d70d Diego Biurrun
                "pop %%"REG_BP"                         \n\t"
1355
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1356
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1357
                "a" (&c->redDither)
1358
                );
1359
                return;
1360
            case PIX_FMT_YUYV422:
1361 7ad6469e Diego Pettenò
                __asm__ volatile(
1362 2da0d70d Diego Biurrun
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1363
                "mov %4, %%"REG_b"                        \n\t"
1364
                "push %%"REG_BP"                        \n\t"
1365
                YSCALEYUV2PACKED(%%REGBP, %5)
1366
                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1367
                "pop %%"REG_BP"                         \n\t"
1368
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1369
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1370
                "a" (&c->redDither)
1371
                );
1372
                return;
1373
            default: break;
1374
        }
1375 f433c8ab Michael Niedermayer
    }
1376 cf7d1c1a Michael Niedermayer
#endif //HAVE_MMX
1377 6858492e Cédric Schieli
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
1378 d604bab9 Michael Niedermayer
}
1379
1380
/**
1381
 * YV12 to RGB without scaling or interpolating
1382
 */
1383 25593e29 Michael Niedermayer
static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
1384 6858492e Cédric Schieli
                          uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
1385 d604bab9 Michael Niedermayer
{
1386 2da0d70d Diego Biurrun
    const int yalpha1=0;
1387
    int i;
1388 6a4970ab Diego Biurrun
1389 8a322796 Diego Biurrun
    uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1390 2da0d70d Diego Biurrun
    const int yalpha= 4096; //FIXME ...
1391 96034638 Michael Niedermayer
1392 2da0d70d Diego Biurrun
    if (flags&SWS_FULL_CHR_H_INT)
1393
    {
1394 6858492e Cédric Schieli
        RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
1395 2da0d70d Diego Biurrun
        return;
1396
    }
1397 397c035e Michael Niedermayer
1398 b63f641e Aurelien Jacobs
#if HAVE_MMX
1399 f433c8ab Michael Niedermayer
    if(!(flags & SWS_BITEXACT)){
1400 14014d47 Michael Niedermayer
        if (uvalpha < 2048) // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1401 2da0d70d Diego Biurrun
        {
1402 14014d47 Michael Niedermayer
            switch(dstFormat)
1403
            {
1404
            case PIX_FMT_RGB32:
1405 6858492e Cédric Schieli
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1406
                    __asm__ volatile(
1407
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1408
                    "mov        %4, %%"REG_b"               \n\t"
1409
                    "push %%"REG_BP"                        \n\t"
1410
                    YSCALEYUV2RGB1(%%REGBP, %5)
1411
                    YSCALEYUV2RGB1_ALPHA(%%REGBP)
1412
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1413
                    "pop %%"REG_BP"                         \n\t"
1414
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1415
1416
                    :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1417
                    "a" (&c->redDither)
1418
                    );
1419
                }else{
1420 3164d25e Cédric Schieli
                    __asm__ volatile(
1421
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1422
                    "mov        %4, %%"REG_b"               \n\t"
1423
                    "push %%"REG_BP"                        \n\t"
1424
                    YSCALEYUV2RGB1(%%REGBP, %5)
1425
                    "pcmpeqd %%mm7, %%mm7                   \n\t"
1426
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1427
                    "pop %%"REG_BP"                         \n\t"
1428
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1429 14014d47 Michael Niedermayer
1430 3164d25e Cédric Schieli
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1431
                    "a" (&c->redDither)
1432
                    );
1433 6858492e Cédric Schieli
                }
1434 14014d47 Michael Niedermayer
                return;
1435
            case PIX_FMT_BGR24:
1436 7ad6469e Diego Pettenò
                __asm__ volatile(
1437 14014d47 Michael Niedermayer
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1438
                "mov        %4, %%"REG_b"               \n\t"
1439
                "push %%"REG_BP"                        \n\t"
1440
                YSCALEYUV2RGB1(%%REGBP, %5)
1441 40494418 Cédric Schieli
                "pxor    %%mm7, %%mm7                   \n\t"
1442 14014d47 Michael Niedermayer
                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1443
                "pop %%"REG_BP"                         \n\t"
1444
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1445
1446
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1447
                "a" (&c->redDither)
1448
                );
1449
                return;
1450
            case PIX_FMT_RGB555:
1451 7ad6469e Diego Pettenò
                __asm__ volatile(
1452 14014d47 Michael Niedermayer
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1453
                "mov        %4, %%"REG_b"               \n\t"
1454
                "push %%"REG_BP"                        \n\t"
1455
                YSCALEYUV2RGB1(%%REGBP, %5)
1456 40494418 Cédric Schieli
                "pxor    %%mm7, %%mm7                   \n\t"
1457 14014d47 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1458 d604bab9 Michael Niedermayer
#ifdef DITHER1XBPP
1459 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
1460
                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1461
                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1462 2da0d70d Diego Biurrun
#endif
1463 14014d47 Michael Niedermayer
                WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1464
                "pop %%"REG_BP"                         \n\t"
1465
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1466 2da0d70d Diego Biurrun
1467 14014d47 Michael Niedermayer
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1468
                "a" (&c->redDither)
1469
                );
1470
                return;
1471
            case PIX_FMT_RGB565:
1472 7ad6469e Diego Pettenò
                __asm__ volatile(
1473 14014d47 Michael Niedermayer
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1474
                "mov        %4, %%"REG_b"               \n\t"
1475
                "push %%"REG_BP"                        \n\t"
1476
                YSCALEYUV2RGB1(%%REGBP, %5)
1477 40494418 Cédric Schieli
                "pxor    %%mm7, %%mm7                   \n\t"
1478 14014d47 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1479 d604bab9 Michael Niedermayer
#ifdef DITHER1XBPP
1480 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
1481
                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1482
                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1483 2da0d70d Diego Biurrun
#endif
1484
1485 14014d47 Michael Niedermayer
                WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1486
                "pop %%"REG_BP"                         \n\t"
1487
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1488 2da0d70d Diego Biurrun
1489 14014d47 Michael Niedermayer
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1490
                "a" (&c->redDither)
1491
                );
1492
                return;
1493
            case PIX_FMT_YUYV422:
1494 7ad6469e Diego Pettenò
                __asm__ volatile(
1495 14014d47 Michael Niedermayer
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1496
                "mov        %4, %%"REG_b"               \n\t"
1497
                "push %%"REG_BP"                        \n\t"
1498
                YSCALEYUV2PACKED1(%%REGBP, %5)
1499
                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1500
                "pop %%"REG_BP"                         \n\t"
1501
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1502
1503
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1504
                "a" (&c->redDither)
1505
                );
1506
                return;
1507
            }
1508 2da0d70d Diego Biurrun
        }
1509 14014d47 Michael Niedermayer
        else
1510 2da0d70d Diego Biurrun
        {
1511 14014d47 Michael Niedermayer
            switch(dstFormat)
1512
            {
1513
            case PIX_FMT_RGB32:
1514 6858492e Cédric Schieli
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1515
                    __asm__ volatile(
1516
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1517
                    "mov        %4, %%"REG_b"               \n\t"
1518
                    "push %%"REG_BP"                        \n\t"
1519
                    YSCALEYUV2RGB1b(%%REGBP, %5)
1520
                    YSCALEYUV2RGB1_ALPHA(%%REGBP)
1521
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1522
                    "pop %%"REG_BP"                         \n\t"
1523
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1524
1525
                    :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1526
                    "a" (&c->redDither)
1527
                    );
1528
                }else{
1529 3164d25e Cédric Schieli
                    __asm__ volatile(
1530
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1531
                    "mov        %4, %%"REG_b"               \n\t"
1532
                    "push %%"REG_BP"                        \n\t"
1533
                    YSCALEYUV2RGB1b(%%REGBP, %5)
1534
                    "pcmpeqd %%mm7, %%mm7                   \n\t"
1535
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1536
                    "pop %%"REG_BP"                         \n\t"
1537
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1538 14014d47 Michael Niedermayer
1539 3164d25e Cédric Schieli
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1540
                    "a" (&c->redDither)
1541
                    );
1542 6858492e Cédric Schieli
                }
1543 14014d47 Michael Niedermayer
                return;
1544
            case PIX_FMT_BGR24:
1545 7ad6469e Diego Pettenò
                __asm__ volatile(
1546 14014d47 Michael Niedermayer
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1547
                "mov        %4, %%"REG_b"               \n\t"
1548
                "push %%"REG_BP"                        \n\t"
1549
                YSCALEYUV2RGB1b(%%REGBP, %5)
1550 40494418 Cédric Schieli
                "pxor    %%mm7, %%mm7                   \n\t"
1551 14014d47 Michael Niedermayer
                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1552
                "pop %%"REG_BP"                         \n\t"
1553
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1554
1555
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1556
                "a" (&c->redDither)
1557
                );
1558
                return;
1559
            case PIX_FMT_RGB555:
1560 7ad6469e Diego Pettenò
                __asm__ volatile(
1561 14014d47 Michael Niedermayer
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1562
                "mov        %4, %%"REG_b"               \n\t"
1563
                "push %%"REG_BP"                        \n\t"
1564
                YSCALEYUV2RGB1b(%%REGBP, %5)
1565 40494418 Cédric Schieli
                "pxor    %%mm7, %%mm7                   \n\t"
1566 14014d47 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1567 497d4f99 Michael Niedermayer
#ifdef DITHER1XBPP
1568 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
1569
                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1570
                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1571 2da0d70d Diego Biurrun
#endif
1572 14014d47 Michael Niedermayer
                WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1573
                "pop %%"REG_BP"                         \n\t"
1574
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1575 2da0d70d Diego Biurrun
1576 14014d47 Michael Niedermayer
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1577
                "a" (&c->redDither)
1578
                );
1579
                return;
1580
            case PIX_FMT_RGB565:
1581 7ad6469e Diego Pettenò
                __asm__ volatile(
1582 14014d47 Michael Niedermayer
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1583
                "mov        %4, %%"REG_b"               \n\t"
1584
                "push %%"REG_BP"                        \n\t"
1585
                YSCALEYUV2RGB1b(%%REGBP, %5)
1586 40494418 Cédric Schieli
                "pxor    %%mm7, %%mm7                   \n\t"
1587 14014d47 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1588 497d4f99 Michael Niedermayer
#ifdef DITHER1XBPP
1589 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
1590
                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1591
                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1592 2da0d70d Diego Biurrun
#endif
1593
1594 14014d47 Michael Niedermayer
                WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1595
                "pop %%"REG_BP"                         \n\t"
1596
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1597 2da0d70d Diego Biurrun
1598 14014d47 Michael Niedermayer
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1599
                "a" (&c->redDither)
1600
                );
1601
                return;
1602
            case PIX_FMT_YUYV422:
1603 7ad6469e Diego Pettenò
                __asm__ volatile(
1604 14014d47 Michael Niedermayer
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1605
                "mov        %4, %%"REG_b"               \n\t"
1606
                "push %%"REG_BP"                        \n\t"
1607
                YSCALEYUV2PACKED1b(%%REGBP, %5)
1608
                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1609
                "pop %%"REG_BP"                         \n\t"
1610
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1611
1612
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1613
                "a" (&c->redDither)
1614
                );
1615
                return;
1616
            }
1617 2da0d70d Diego Biurrun
        }
1618
    }
1619 bc279024 Diego Biurrun
#endif /* HAVE_MMX */
1620 e5091488 Benoit Fouet
    if (uvalpha < 2048)
1621 2da0d70d Diego Biurrun
    {
1622 6858492e Cédric Schieli
        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1623 2da0d70d Diego Biurrun
    }else{
1624 6858492e Cédric Schieli
        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1625 2da0d70d Diego Biurrun
    }
1626 d604bab9 Michael Niedermayer
}
1627
1628 8a322796 Diego Biurrun
//FIXME yuy2* can read up to 7 samples too much
1629 6ff0ad6b Michael Niedermayer
1630 896a22b8 Luca Barbato
static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
1631 1e621b18 Michael Niedermayer
{
1632 b63f641e Aurelien Jacobs
#if HAVE_MMX
1633 7ad6469e Diego Pettenò
    __asm__ volatile(
1634 2da0d70d Diego Biurrun
    "movq "MANGLE(bm01010101)", %%mm2           \n\t"
1635
    "mov                    %0, %%"REG_a"       \n\t"
1636
    "1:                                         \n\t"
1637
    "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
1638
    "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
1639
    "pand                %%mm2, %%mm0           \n\t"
1640
    "pand                %%mm2, %%mm1           \n\t"
1641
    "packuswb            %%mm1, %%mm0           \n\t"
1642
    "movq                %%mm0, (%2, %%"REG_a") \n\t"
1643
    "add                    $8, %%"REG_a"       \n\t"
1644
    " js                    1b                  \n\t"
1645 d0ce212a Ramiro Polla
    : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
1646 2da0d70d Diego Biurrun
    : "%"REG_a
1647
    );
1648 1e621b18 Michael Niedermayer
#else
1649 2da0d70d Diego Biurrun
    int i;
1650
    for (i=0; i<width; i++)
1651
        dst[i]= src[2*i];
1652 1e621b18 Michael Niedermayer
#endif
1653
}
1654
1655 896a22b8 Luca Barbato
static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
1656 1e621b18 Michael Niedermayer
{
1657 b63f641e Aurelien Jacobs
#if HAVE_MMX
1658 7ad6469e Diego Pettenò
    __asm__ volatile(
1659 2da0d70d Diego Biurrun
    "movq "MANGLE(bm01010101)", %%mm4           \n\t"
1660
    "mov                    %0, %%"REG_a"       \n\t"
1661
    "1:                                         \n\t"
1662
    "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
1663
    "movq   8(%1, %%"REG_a",4), %%mm1           \n\t"
1664
    "psrlw                  $8, %%mm0           \n\t"
1665
    "psrlw                  $8, %%mm1           \n\t"
1666
    "packuswb            %%mm1, %%mm0           \n\t"
1667
    "movq                %%mm0, %%mm1           \n\t"
1668
    "psrlw                  $8, %%mm0           \n\t"
1669
    "pand                %%mm4, %%mm1           \n\t"
1670
    "packuswb            %%mm0, %%mm0           \n\t"
1671
    "packuswb            %%mm1, %%mm1           \n\t"
1672
    "movd                %%mm0, (%3, %%"REG_a") \n\t"
1673
    "movd                %%mm1, (%2, %%"REG_a") \n\t"
1674
    "add                    $4, %%"REG_a"       \n\t"
1675
    " js                    1b                  \n\t"
1676 d0ce212a Ramiro Polla
    : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
1677 2da0d70d Diego Biurrun
    : "%"REG_a
1678
    );
1679 1e621b18 Michael Niedermayer
#else
1680 2da0d70d Diego Biurrun
    int i;
1681
    for (i=0; i<width; i++)
1682
    {
1683
        dstU[i]= src1[4*i + 1];
1684
        dstV[i]= src1[4*i + 3];
1685
    }
1686
#endif
1687
    assert(src1 == src2);
1688 1e621b18 Michael Niedermayer
}
1689
1690 4cf16bbe Diego Biurrun
/* This is almost identical to the previous, end exists only because
1691
 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1692 896a22b8 Luca Barbato
static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
1693 7322a67c Michael Niedermayer
{
1694 b63f641e Aurelien Jacobs
#if HAVE_MMX
1695 7ad6469e Diego Pettenò
    __asm__ volatile(
1696 2da0d70d Diego Biurrun
    "mov                  %0, %%"REG_a"         \n\t"
1697
    "1:                                         \n\t"
1698
    "movq  (%1, %%"REG_a",2), %%mm0             \n\t"
1699
    "movq 8(%1, %%"REG_a",2), %%mm1             \n\t"
1700
    "psrlw                $8, %%mm0             \n\t"
1701
    "psrlw                $8, %%mm1             \n\t"
1702
    "packuswb          %%mm1, %%mm0             \n\t"
1703
    "movq              %%mm0, (%2, %%"REG_a")   \n\t"
1704
    "add                  $8, %%"REG_a"         \n\t"
1705
    " js                  1b                    \n\t"
1706 d0ce212a Ramiro Polla
    : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
1707 2da0d70d Diego Biurrun
    : "%"REG_a
1708
    );
1709 7322a67c Michael Niedermayer
#else
1710 2da0d70d Diego Biurrun
    int i;
1711
    for (i=0; i<width; i++)
1712
        dst[i]= src[2*i+1];
1713 7322a67c Michael Niedermayer
#endif
1714
}
1715
1716 896a22b8 Luca Barbato
static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
1717 7322a67c Michael Niedermayer
{
1718 b63f641e Aurelien Jacobs
#if HAVE_MMX
1719 7ad6469e Diego Pettenò
    __asm__ volatile(
1720 2da0d70d Diego Biurrun
    "movq "MANGLE(bm01010101)", %%mm4           \n\t"
1721
    "mov                    %0, %%"REG_a"       \n\t"
1722
    "1:                                         \n\t"
1723
    "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
1724
    "movq   8(%1, %%"REG_a",4), %%mm1           \n\t"
1725
    "pand                %%mm4, %%mm0           \n\t"
1726
    "pand                %%mm4, %%mm1           \n\t"
1727
    "packuswb            %%mm1, %%mm0           \n\t"
1728
    "movq                %%mm0, %%mm1           \n\t"
1729
    "psrlw                  $8, %%mm0           \n\t"
1730
    "pand                %%mm4, %%mm1           \n\t"
1731
    "packuswb            %%mm0, %%mm0           \n\t"
1732
    "packuswb            %%mm1, %%mm1           \n\t"
1733
    "movd                %%mm0, (%3, %%"REG_a") \n\t"
1734
    "movd                %%mm1, (%2, %%"REG_a") \n\t"
1735
    "add                    $4, %%"REG_a"       \n\t"
1736
    " js                    1b                  \n\t"
1737 d0ce212a Ramiro Polla
    : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
1738 2da0d70d Diego Biurrun
    : "%"REG_a
1739
    );
1740 7322a67c Michael Niedermayer
#else
1741 2da0d70d Diego Biurrun
    int i;
1742
    for (i=0; i<width; i++)
1743
    {
1744
        dstU[i]= src1[4*i + 0];
1745
        dstV[i]= src1[4*i + 2];
1746
    }
1747
#endif
1748
    assert(src1 == src2);
1749 7322a67c Michael Niedermayer
}
1750
1751 214892ee Michael Niedermayer
#define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
1752 896a22b8 Luca Barbato
static inline void RENAME(name)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)\
1753 214892ee Michael Niedermayer
{\
1754
    int i;\
1755
    for (i=0; i<width; i++)\
1756
    {\
1757
        int b= (((type*)src)[i]>>shb)&maskb;\
1758
        int g= (((type*)src)[i]>>shg)&maskg;\
1759
        int r= (((type*)src)[i]>>shr)&maskr;\
1760
\
1761
        dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
1762
    }\
1763 1e621b18 Michael Niedermayer
}
1764
1765 214892ee Michael Niedermayer
BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
1766
BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
1767
BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY    , RGB2YUV_SHIFT+8)
1768
BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY    , RGB2YUV_SHIFT+7)
1769
BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1770
BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1771
1772 6858492e Cédric Schieli
static inline void RENAME(abgrToA)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused){
1773
    int i;
1774
    for (i=0; i<width; i++){
1775
        dst[i]= src[4*i];
1776
    }
1777
}
1778
1779 f8a138be Cédric Schieli
#define BGR2UV(type, name, shr, shg, shb, maska, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S)\
1780 896a22b8 Luca Barbato
static inline void RENAME(name)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width, uint32_t *unused)\
1781 a0baa07a Michael Niedermayer
{\
1782
    int i;\
1783
    for (i=0; i<width; i++)\
1784
    {\
1785 ba83d862 Michael Niedermayer
        int b= (((type*)src)[i]&maskb)>>shb;\
1786
        int g= (((type*)src)[i]&maskg)>>shg;\
1787
        int r= (((type*)src)[i]&maskr)>>shr;\
1788 a0baa07a Michael Niedermayer
\
1789
        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
1790
        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
1791
    }\
1792 ba83d862 Michael Niedermayer
}\
1793 896a22b8 Luca Barbato
static inline void RENAME(name ## _half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width, uint32_t *unused)\
1794 ba83d862 Michael Niedermayer
{\
1795
    int i;\
1796
    for (i=0; i<width; i++)\
1797
    {\
1798
        int pix0= ((type*)src)[2*i+0];\
1799
        int pix1= ((type*)src)[2*i+1];\
1800 bcff32d1 Kostya Shishkov
        int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
1801 ba83d862 Michael Niedermayer
        int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
1802
        int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
1803 f8a138be Cédric Schieli
        g&= maskg|(2*maskg);\
1804 ba83d862 Michael Niedermayer
\
1805
        g>>=shg;\
1806
\
1807 6b79dbce Michael Niedermayer
        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
1808
        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
1809 ba83d862 Michael Niedermayer
    }\
1810 2f60f629 Michael Niedermayer
}
1811
1812 f8a138be Cédric Schieli
BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0xFF000000, 0xFF0000, 0xFF00,   0x00FF, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
1813
BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0xFF000000,   0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
1814
BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0,          0,   0x001F, 0x07E0,   0xF800, RU<<11, GU<<5, BU    , RV<<11, GV<<5, BV    , RGB2YUV_SHIFT+8)
1815
BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0,          0,   0x001F, 0x03E0,   0x7C00, RU<<10, GU<<5, BU    , RV<<10, GV<<5, BV    , RGB2YUV_SHIFT+7)
1816
BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0,          0,   0xF800, 0x07E0,   0x001F, RU    , GU<<5, BU<<11, RV    , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
1817
BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0,          0,   0x7C00, 0x03E0,   0x001F, RU    , GU<<5, BU<<10, RV    , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
1818 a0baa07a Michael Niedermayer
1819 b63f641e Aurelien Jacobs
#if HAVE_MMX
1820 a35acd7f Benjamin Zores
static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, uint8_t *src, long width, int srcFormat)
1821 dfb09bd1 Michael Niedermayer
{
1822
1823
    if(srcFormat == PIX_FMT_BGR24){
1824 7ad6469e Diego Pettenò
        __asm__ volatile(
1825 ff9a056d Michael Niedermayer
            "movq  "MANGLE(ff_bgr24toY1Coeff)", %%mm5       \n\t"
1826
            "movq  "MANGLE(ff_bgr24toY2Coeff)", %%mm6       \n\t"
1827
            :
1828 dfb09bd1 Michael Niedermayer
        );
1829
    }else{
1830 7ad6469e Diego Pettenò
        __asm__ volatile(
1831 ff9a056d Michael Niedermayer
            "movq  "MANGLE(ff_rgb24toY1Coeff)", %%mm5       \n\t"
1832
            "movq  "MANGLE(ff_rgb24toY2Coeff)", %%mm6       \n\t"
1833
            :
1834 dfb09bd1 Michael Niedermayer
        );
1835
    }
1836
1837 7ad6469e Diego Pettenò
    __asm__ volatile(
1838 dfb09bd1 Michael Niedermayer
        "movq  "MANGLE(ff_bgr24toYOffset)", %%mm4   \n\t"
1839
        "mov                        %2, %%"REG_a"   \n\t"
1840
        "pxor                    %%mm7, %%mm7       \n\t"
1841
        "1:                                         \n\t"
1842
        PREFETCH"               64(%0)              \n\t"
1843
        "movd                     (%0), %%mm0       \n\t"
1844
        "movd                    2(%0), %%mm1       \n\t"
1845
        "movd                    6(%0), %%mm2       \n\t"
1846
        "movd                    8(%0), %%mm3       \n\t"
1847
        "add                       $12, %0          \n\t"
1848
        "punpcklbw               %%mm7, %%mm0       \n\t"
1849
        "punpcklbw               %%mm7, %%mm1       \n\t"
1850
        "punpcklbw               %%mm7, %%mm2       \n\t"
1851
        "punpcklbw               %%mm7, %%mm3       \n\t"
1852
        "pmaddwd                 %%mm5, %%mm0       \n\t"
1853
        "pmaddwd                 %%mm6, %%mm1       \n\t"
1854
        "pmaddwd                 %%mm5, %%mm2       \n\t"
1855
        "pmaddwd                 %%mm6, %%mm3       \n\t"
1856
        "paddd                   %%mm1, %%mm0       \n\t"
1857
        "paddd                   %%mm3, %%mm2       \n\t"
1858
        "paddd                   %%mm4, %%mm0       \n\t"
1859
        "paddd                   %%mm4, %%mm2       \n\t"
1860
        "psrad                     $15, %%mm0       \n\t"
1861
        "psrad                     $15, %%mm2       \n\t"
1862
        "packssdw                %%mm2, %%mm0       \n\t"
1863
        "packuswb                %%mm0, %%mm0       \n\t"
1864
        "movd                %%mm0, (%1, %%"REG_a") \n\t"
1865
        "add                        $4, %%"REG_a"   \n\t"
1866
        " js                        1b              \n\t"
1867
    : "+r" (src)
1868 d0ce212a Ramiro Polla
    : "r" (dst+width), "g" ((x86_reg)-width)
1869 dfb09bd1 Michael Niedermayer
    : "%"REG_a
1870 2da0d70d Diego Biurrun
    );
1871 dfb09bd1 Michael Niedermayer
}
1872
1873 a35acd7f Benjamin Zores
static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, long width, int srcFormat)
1874 dfb09bd1 Michael Niedermayer
{
1875 7ad6469e Diego Pettenò
    __asm__ volatile(
1876 dfb09bd1 Michael Niedermayer
        "movq                    24+%4, %%mm6       \n\t"
1877
        "mov                        %3, %%"REG_a"   \n\t"
1878
        "pxor                    %%mm7, %%mm7       \n\t"
1879
        "1:                                         \n\t"
1880
        PREFETCH"               64(%0)              \n\t"
1881
        "movd                     (%0), %%mm0       \n\t"
1882
        "movd                    2(%0), %%mm1       \n\t"
1883
        "punpcklbw               %%mm7, %%mm0       \n\t"
1884
        "punpcklbw               %%mm7, %%mm1       \n\t"
1885
        "movq                    %%mm0, %%mm2       \n\t"
1886
        "movq                    %%mm1, %%mm3       \n\t"
1887
        "pmaddwd                    %4, %%mm0       \n\t"
1888
        "pmaddwd                  8+%4, %%mm1       \n\t"
1889
        "pmaddwd                 16+%4, %%mm2       \n\t"
1890
        "pmaddwd                 %%mm6, %%mm3       \n\t"
1891
        "paddd                   %%mm1, %%mm0       \n\t"
1892
        "paddd                   %%mm3, %%mm2       \n\t"
1893
1894
        "movd                    6(%0), %%mm1       \n\t"
1895
        "movd                    8(%0), %%mm3       \n\t"
1896
        "add                       $12, %0          \n\t"
1897
        "punpcklbw               %%mm7, %%mm1       \n\t"
1898
        "punpcklbw               %%mm7, %%mm3       \n\t"
1899
        "movq                    %%mm1, %%mm4       \n\t"
1900
        "movq                    %%mm3, %%mm5       \n\t"
1901
        "pmaddwd                    %4, %%mm1       \n\t"
1902
        "pmaddwd                  8+%4, %%mm3       \n\t"
1903
        "pmaddwd                 16+%4, %%mm4       \n\t"
1904
        "pmaddwd                 %%mm6, %%mm5       \n\t"
1905
        "paddd                   %%mm3, %%mm1       \n\t"
1906
        "paddd                   %%mm5, %%mm4       \n\t"
1907
1908
        "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3       \n\t"
1909
        "paddd                   %%mm3, %%mm0       \n\t"
1910
        "paddd                   %%mm3, %%mm2       \n\t"
1911
        "paddd                   %%mm3, %%mm1       \n\t"
1912
        "paddd                   %%mm3, %%mm4       \n\t"
1913
        "psrad                     $15, %%mm0       \n\t"
1914
        "psrad                     $15, %%mm2       \n\t"
1915
        "psrad                     $15, %%mm1       \n\t"
1916
        "psrad                     $15, %%mm4       \n\t"
1917
        "packssdw                %%mm1, %%mm0       \n\t"
1918
        "packssdw                %%mm4, %%mm2       \n\t"
1919
        "packuswb                %%mm0, %%mm0       \n\t"
1920
        "packuswb                %%mm2, %%mm2       \n\t"
1921
        "movd                %%mm0, (%1, %%"REG_a") \n\t"
1922
        "movd                %%mm2, (%2, %%"REG_a") \n\t"
1923
        "add                        $4, %%"REG_a"   \n\t"
1924
        " js                        1b              \n\t"
1925
    : "+r" (src)
1926 d0ce212a Ramiro Polla
    : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "m"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24][0])
1927 dfb09bd1 Michael Niedermayer
    : "%"REG_a
1928
    );
1929
}
1930
#endif
1931
1932 896a22b8 Luca Barbato
static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
1933 dfb09bd1 Michael Niedermayer
{
1934 b63f641e Aurelien Jacobs
#if HAVE_MMX
1935 a35acd7f Benjamin Zores
    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
1936 1e621b18 Michael Niedermayer
#else
1937 2da0d70d Diego Biurrun
    int i;
1938
    for (i=0; i<width; i++)
1939
    {
1940
        int b= src[i*3+0];
1941
        int g= src[i*3+1];
1942
        int r= src[i*3+2];
1943 1e621b18 Michael Niedermayer
1944 e5091488 Benoit Fouet
        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1945 2da0d70d Diego Biurrun
    }
1946 bc279024 Diego Biurrun
#endif /* HAVE_MMX */
1947 1e621b18 Michael Niedermayer
}
1948
1949 896a22b8 Luca Barbato
static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
1950 1e621b18 Michael Niedermayer
{
1951 b63f641e Aurelien Jacobs
#if HAVE_MMX
1952 a35acd7f Benjamin Zores
    RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
1953 1e621b18 Michael Niedermayer
#else
1954 2da0d70d Diego Biurrun
    int i;
1955
    for (i=0; i<width; i++)
1956
    {
1957 dfb09bd1 Michael Niedermayer
        int b= src1[3*i + 0];
1958
        int g= src1[3*i + 1];
1959
        int r= src1[3*i + 2];
1960 2da0d70d Diego Biurrun
1961 dfb09bd1 Michael Niedermayer
        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1962
        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1963 2da0d70d Diego Biurrun
    }
1964 bc279024 Diego Biurrun
#endif /* HAVE_MMX */
1965 2da0d70d Diego Biurrun
    assert(src1 == src2);
1966 1e621b18 Michael Niedermayer
}
1967
1968 896a22b8 Luca Barbato
static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
1969 2f60f629 Michael Niedermayer
{
1970
    int i;
1971
    for (i=0; i<width; i++)
1972
    {
1973
        int b= src1[6*i + 0] + src1[6*i + 3];
1974
        int g= src1[6*i + 1] + src1[6*i + 4];
1975
        int r= src1[6*i + 2] + src1[6*i + 5];
1976
1977
        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1978
        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1979
    }
1980
    assert(src1 == src2);
1981
}
1982
1983 896a22b8 Luca Barbato
static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
1984 a861d4d7 Michael Niedermayer
{
1985 b63f641e Aurelien Jacobs
#if HAVE_MMX
1986 a35acd7f Benjamin Zores
    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
1987 dfb09bd1 Michael Niedermayer
#else
1988 2da0d70d Diego Biurrun
    int i;
1989
    for (i=0; i<width; i++)
1990
    {
1991
        int r= src[i*3+0];
1992
        int g= src[i*3+1];
1993
        int b= src[i*3+2];
1994
1995 e5091488 Benoit Fouet
        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1996 2da0d70d Diego Biurrun
    }
1997 dfb09bd1 Michael Niedermayer
#endif
1998 a861d4d7 Michael Niedermayer
}
1999
2000 896a22b8 Luca Barbato
static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
2001 a861d4d7 Michael Niedermayer
{
2002 b63f641e Aurelien Jacobs
#if HAVE_MMX
2003 5155b839 Diego Biurrun
    assert(src1==src2);
2004 a35acd7f Benjamin Zores
    RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
2005 dfb09bd1 Michael Niedermayer
#else
2006 5155b839 Diego Biurrun
    int i;
2007
    assert(src1==src2);
2008 2da0d70d Diego Biurrun
    for (i=0; i<width; i++)
2009
    {
2010 dfb09bd1 Michael Niedermayer
        int r= src1[3*i + 0];
2011
        int g= src1[3*i + 1];
2012
        int b= src1[3*i + 2];
2013 2da0d70d Diego Biurrun
2014 dfb09bd1 Michael Niedermayer
        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
2015
        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
2016 2da0d70d Diego Biurrun
    }
2017 dfb09bd1 Michael Niedermayer
#endif
2018 a861d4d7 Michael Niedermayer
}
2019
2020 896a22b8 Luca Barbato
static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
2021 2f60f629 Michael Niedermayer
{
2022
    int i;
2023
    assert(src1==src2);
2024
    for (i=0; i<width; i++)
2025
    {
2026 e09d7eef Michael Niedermayer
        int r= src1[6*i + 0] + src1[6*i + 3];
2027
        int g= src1[6*i + 1] + src1[6*i + 4];
2028
        int b= src1[6*i + 2] + src1[6*i + 5];
2029 2f60f629 Michael Niedermayer
2030
        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
2031
        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
2032
    }
2033
}
2034
2035 1e621b18 Michael Niedermayer
2036 97b93389 Luca Barbato
static inline void RENAME(palToY)(uint8_t *dst, uint8_t *src, long width, uint32_t *pal)
2037 e28630fc Michael Niedermayer
{
2038 2da0d70d Diego Biurrun
    int i;
2039
    for (i=0; i<width; i++)
2040
    {
2041
        int d= src[i];
2042 e28630fc Michael Niedermayer
2043 2da0d70d Diego Biurrun
        dst[i]= pal[d] & 0xFF;
2044
    }
2045 e28630fc Michael Niedermayer
}
2046
2047 97b93389 Luca Barbato
static inline void RENAME(palToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *pal)
2048 e28630fc Michael Niedermayer
{
2049 2da0d70d Diego Biurrun
    int i;
2050
    assert(src1 == src2);
2051
    for (i=0; i<width; i++)
2052
    {
2053
        int p= pal[src1[i]];
2054
2055
        dstU[i]= p>>8;
2056
        dstV[i]= p>>16;
2057
    }
2058 e28630fc Michael Niedermayer
}
2059
2060 896a22b8 Luca Barbato
static inline void RENAME(monowhite2Y)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused)
2061 3d05e078 Michael Niedermayer
{
2062
    int i, j;
2063
    for (i=0; i<width/8; i++){
2064 3a5ba0c3 Luca Barbato
        int d= ~src[i];
2065
        for(j=0; j<8; j++)
2066
            dst[8*i+j]= ((d>>(7-j))&1)*255;
2067
    }
2068