Statistics
| Branch: | Revision:

ffmpeg / libswscale / swscale_template.c @ ad40b153

History | View | Annotate | Download (133 KB)

1 fe8054c0 Michael Niedermayer
/*
2 d026b45e Diego Biurrun
 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with FFmpeg; if not, write to the Free Software
18 b19bcbaa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 d026b45e Diego Biurrun
 *
20 807e0c66 Luca Abeni
 * the C code (not assembly, mmx, ...) of this file can be used
21
 * under the LGPL license too
22 d026b45e Diego Biurrun
 */
23 783e9cc9 Michael Niedermayer
24 6e1c66bc Aurelien Jacobs
#undef REAL_MOVNTQ
25 541c4eb9 Michael Niedermayer
#undef MOVNTQ
26 7d7f78b5 Michael Niedermayer
#undef PAVGB
27 48a05cec Michael Niedermayer
#undef PREFETCH
28
#undef PREFETCHW
29
#undef EMMS
30
#undef SFENCE
31
32
#ifdef HAVE_3DNOW
33
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
34
#define EMMS     "femms"
35
#else
36
#define EMMS     "emms"
37
#endif
38
39
#ifdef HAVE_3DNOW
40
#define PREFETCH  "prefetch"
41
#define PREFETCHW "prefetchw"
42 e5091488 Benoit Fouet
#elif defined (HAVE_MMX2)
43 48a05cec Michael Niedermayer
#define PREFETCH "prefetchnta"
44
#define PREFETCHW "prefetcht0"
45
#else
46 d904b5fc Nigel Pearson
#define PREFETCH  " # nop"
47
#define PREFETCHW " # nop"
48 48a05cec Michael Niedermayer
#endif
49
50
#ifdef HAVE_MMX2
51
#define SFENCE "sfence"
52
#else
53 d904b5fc Nigel Pearson
#define SFENCE " # nop"
54 48a05cec Michael Niedermayer
#endif
55 d3f41512 Michael Niedermayer
56 d604bab9 Michael Niedermayer
#ifdef HAVE_MMX2
57
#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
58
#elif defined (HAVE_3DNOW)
59
#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
60
#endif
61 d3f41512 Michael Niedermayer
62 d604bab9 Michael Niedermayer
#ifdef HAVE_MMX2
63 6e1c66bc Aurelien Jacobs
#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
64 d604bab9 Michael Niedermayer
#else
65 6e1c66bc Aurelien Jacobs
#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
66 d604bab9 Michael Niedermayer
#endif
67 6e1c66bc Aurelien Jacobs
#define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
68 d604bab9 Michael Niedermayer
69 a2faa401 Romain Dolbeau
#ifdef HAVE_ALTIVEC
70
#include "swscale_altivec_template.c"
71
#endif
72
73 bca11e75 Michael Niedermayer
#define YSCALEYUV2YV12X(x, offset, dest, width) \
74 2da0d70d Diego Biurrun
    asm volatile(\
75
    "xor                          %%"REG_a", %%"REG_a"  \n\t"\
76
    "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
77
    "movq                             %%mm3, %%mm4      \n\t"\
78
    "lea                     " offset "(%0), %%"REG_d"  \n\t"\
79
    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
80
    ASMALIGN(4) /* FIXME Unroll? */\
81
    "1:                                                 \n\t"\
82
    "movq                      8(%%"REG_d"), %%mm0      \n\t" /* filterCoeff */\
83
    "movq   " #x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
84
    "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5      \n\t" /* srcData */\
85
    "add                                $16, %%"REG_d"  \n\t"\
86
    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
87
    "test                         %%"REG_S", %%"REG_S"  \n\t"\
88
    "pmulhw                           %%mm0, %%mm2      \n\t"\
89
    "pmulhw                           %%mm0, %%mm5      \n\t"\
90
    "paddw                            %%mm2, %%mm3      \n\t"\
91
    "paddw                            %%mm5, %%mm4      \n\t"\
92
    " jnz                                1b             \n\t"\
93
    "psraw                               $3, %%mm3      \n\t"\
94
    "psraw                               $3, %%mm4      \n\t"\
95
    "packuswb                         %%mm4, %%mm3      \n\t"\
96
    MOVNTQ(%%mm3, (%1, %%REGa))\
97
    "add                                 $8, %%"REG_a"  \n\t"\
98
    "cmp                                 %2, %%"REG_a"  \n\t"\
99
    "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
100
    "movq                             %%mm3, %%mm4      \n\t"\
101
    "lea                     " offset "(%0), %%"REG_d"  \n\t"\
102
    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
103
    "jb                                  1b             \n\t"\
104
    :: "r" (&c->redDither),\
105
    "r" (dest), "g" (width)\
106
    : "%"REG_a, "%"REG_d, "%"REG_S\
107
    );
108 bca11e75 Michael Niedermayer
109
#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
110 2da0d70d Diego Biurrun
    asm volatile(\
111
    "lea                     " offset "(%0), %%"REG_d"  \n\t"\
112
    "xor                          %%"REG_a", %%"REG_a"  \n\t"\
113
    "pxor                             %%mm4, %%mm4      \n\t"\
114
    "pxor                             %%mm5, %%mm5      \n\t"\
115
    "pxor                             %%mm6, %%mm6      \n\t"\
116
    "pxor                             %%mm7, %%mm7      \n\t"\
117
    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
118
    ASMALIGN(4) \
119
    "1:                                                 \n\t"\
120
    "movq   " #x "(%%"REG_S", %%"REG_a", 2), %%mm0      \n\t" /* srcData */\
121
    "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
122
    "mov                       4(%%"REG_d"), %%"REG_S"  \n\t"\
123
    "movq   " #x "(%%"REG_S", %%"REG_a", 2), %%mm1      \n\t" /* srcData */\
124
    "movq                             %%mm0, %%mm3      \n\t"\
125
    "punpcklwd                        %%mm1, %%mm0      \n\t"\
126
    "punpckhwd                        %%mm1, %%mm3      \n\t"\
127
    "movq                      8(%%"REG_d"), %%mm1      \n\t" /* filterCoeff */\
128
    "pmaddwd                          %%mm1, %%mm0      \n\t"\
129
    "pmaddwd                          %%mm1, %%mm3      \n\t"\
130
    "paddd                            %%mm0, %%mm4      \n\t"\
131
    "paddd                            %%mm3, %%mm5      \n\t"\
132
    "movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm3      \n\t" /* srcData */\
133
    "mov                      16(%%"REG_d"), %%"REG_S"  \n\t"\
134
    "add                                $16, %%"REG_d"  \n\t"\
135
    "test                         %%"REG_S", %%"REG_S"  \n\t"\
136
    "movq                             %%mm2, %%mm0      \n\t"\
137
    "punpcklwd                        %%mm3, %%mm2      \n\t"\
138
    "punpckhwd                        %%mm3, %%mm0      \n\t"\
139
    "pmaddwd                          %%mm1, %%mm2      \n\t"\
140
    "pmaddwd                          %%mm1, %%mm0      \n\t"\
141
    "paddd                            %%mm2, %%mm6      \n\t"\
142
    "paddd                            %%mm0, %%mm7      \n\t"\
143
    " jnz                                1b             \n\t"\
144
    "psrad                              $16, %%mm4      \n\t"\
145
    "psrad                              $16, %%mm5      \n\t"\
146
    "psrad                              $16, %%mm6      \n\t"\
147
    "psrad                              $16, %%mm7      \n\t"\
148
    "movq             "VROUNDER_OFFSET"(%0), %%mm0      \n\t"\
149
    "packssdw                         %%mm5, %%mm4      \n\t"\
150
    "packssdw                         %%mm7, %%mm6      \n\t"\
151
    "paddw                            %%mm0, %%mm4      \n\t"\
152
    "paddw                            %%mm0, %%mm6      \n\t"\
153
    "psraw                               $3, %%mm4      \n\t"\
154
    "psraw                               $3, %%mm6      \n\t"\
155
    "packuswb                         %%mm6, %%mm4      \n\t"\
156
    MOVNTQ(%%mm4, (%1, %%REGa))\
157
    "add                                 $8, %%"REG_a"  \n\t"\
158
    "cmp                                 %2, %%"REG_a"  \n\t"\
159
    "lea                     " offset "(%0), %%"REG_d"  \n\t"\
160
    "pxor                             %%mm4, %%mm4      \n\t"\
161
    "pxor                             %%mm5, %%mm5      \n\t"\
162
    "pxor                             %%mm6, %%mm6      \n\t"\
163
    "pxor                             %%mm7, %%mm7      \n\t"\
164
    "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
165
    "jb                                  1b             \n\t"\
166
    :: "r" (&c->redDither),\
167
    "r" (dest), "g" (width)\
168
    : "%"REG_a, "%"REG_d, "%"REG_S\
169
    );
170 c1b0bfb4 Michael Niedermayer
171
#define YSCALEYUV2YV121 \
172 2da0d70d Diego Biurrun
    "mov %2, %%"REG_a"                    \n\t"\
173
    ASMALIGN(4) /* FIXME Unroll? */\
174
    "1:                                   \n\t"\
175
    "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"\
176
    "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"\
177
    "psraw                 $7, %%mm0      \n\t"\
178
    "psraw                 $7, %%mm1      \n\t"\
179
    "packuswb           %%mm1, %%mm0      \n\t"\
180
    MOVNTQ(%%mm0, (%1, %%REGa))\
181
    "add                   $8, %%"REG_a"  \n\t"\
182
    "jnc                   1b             \n\t"
183 c1b0bfb4 Michael Niedermayer
184
/*
185 2da0d70d Diego Biurrun
    :: "m" (-lumFilterSize), "m" (-chrFilterSize),
186
       "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
187
       "r" (dest), "m" (dstW),
188
       "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
189
    : "%eax", "%ebx", "%ecx", "%edx", "%esi"
190 c1b0bfb4 Michael Niedermayer
*/
191 25593e29 Michael Niedermayer
#define YSCALEYUV2PACKEDX \
192 2da0d70d Diego Biurrun
    asm volatile(\
193
    "xor                   %%"REG_a", %%"REG_a"     \n\t"\
194
    ASMALIGN(4)\
195
    "nop                                            \n\t"\
196
    "1:                                             \n\t"\
197
    "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
198
    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
199
    "movq      "VROUNDER_OFFSET"(%0), %%mm3         \n\t"\
200
    "movq                      %%mm3, %%mm4         \n\t"\
201
    ASMALIGN(4)\
202
    "2:                                             \n\t"\
203
    "movq               8(%%"REG_d"), %%mm0         \n\t" /* filterCoeff */\
204
    "movq     (%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* UsrcData */\
205
    "movq 4096(%%"REG_S", %%"REG_a"), %%mm5         \n\t" /* VsrcData */\
206
    "add                         $16, %%"REG_d"     \n\t"\
207
    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
208
    "pmulhw                    %%mm0, %%mm2         \n\t"\
209
    "pmulhw                    %%mm0, %%mm5         \n\t"\
210
    "paddw                     %%mm2, %%mm3         \n\t"\
211
    "paddw                     %%mm5, %%mm4         \n\t"\
212
    "test                  %%"REG_S", %%"REG_S"     \n\t"\
213
    " jnz                         2b                \n\t"\
214 c1b0bfb4 Michael Niedermayer
\
215 2da0d70d Diego Biurrun
    "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
216
    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
217
    "movq      "VROUNDER_OFFSET"(%0), %%mm1         \n\t"\
218
    "movq                      %%mm1, %%mm7         \n\t"\
219
    ASMALIGN(4)\
220
    "2:                                             \n\t"\
221
    "movq               8(%%"REG_d"), %%mm0         \n\t" /* filterCoeff */\
222
    "movq  (%%"REG_S", %%"REG_a", 2), %%mm2         \n\t" /* Y1srcData */\
223
    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm5         \n\t" /* Y2srcData */\
224
    "add                         $16, %%"REG_d"            \n\t"\
225
    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
226
    "pmulhw                    %%mm0, %%mm2         \n\t"\
227
    "pmulhw                    %%mm0, %%mm5         \n\t"\
228
    "paddw                     %%mm2, %%mm1         \n\t"\
229
    "paddw                     %%mm5, %%mm7         \n\t"\
230
    "test                  %%"REG_S", %%"REG_S"     \n\t"\
231
    " jnz                         2b                \n\t"\
232
233
#define YSCALEYUV2PACKEDX_END                 \
234
    :: "r" (&c->redDither),                   \
235
        "m" (dummy), "m" (dummy), "m" (dummy),\
236
        "r" (dest), "m" (dstW)                \
237
    : "%"REG_a, "%"REG_d, "%"REG_S            \
238
    );
239 8422aa88 Michael Niedermayer
240 bca11e75 Michael Niedermayer
#define YSCALEYUV2PACKEDX_ACCURATE \
241 2da0d70d Diego Biurrun
    asm volatile(\
242
    "xor %%"REG_a", %%"REG_a"                       \n\t"\
243
    ASMALIGN(4)\
244
    "nop                                            \n\t"\
245
    "1:                                             \n\t"\
246
    "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
247
    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
248
    "pxor                      %%mm4, %%mm4         \n\t"\
249
    "pxor                      %%mm5, %%mm5         \n\t"\
250
    "pxor                      %%mm6, %%mm6         \n\t"\
251
    "pxor                      %%mm7, %%mm7         \n\t"\
252
    ASMALIGN(4)\
253
    "2:                                             \n\t"\
254
    "movq     (%%"REG_S", %%"REG_a"), %%mm0         \n\t" /* UsrcData */\
255
    "movq 4096(%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* VsrcData */\
256
    "mov                4(%%"REG_d"), %%"REG_S"     \n\t"\
257
    "movq     (%%"REG_S", %%"REG_a"), %%mm1         \n\t" /* UsrcData */\
258
    "movq                      %%mm0, %%mm3         \n\t"\
259
    "punpcklwd                 %%mm1, %%mm0         \n\t"\
260
    "punpckhwd                 %%mm1, %%mm3         \n\t"\
261
    "movq               8(%%"REG_d"), %%mm1         \n\t" /* filterCoeff */\
262
    "pmaddwd                   %%mm1, %%mm0         \n\t"\
263
    "pmaddwd                   %%mm1, %%mm3         \n\t"\
264
    "paddd                     %%mm0, %%mm4         \n\t"\
265
    "paddd                     %%mm3, %%mm5         \n\t"\
266
    "movq 4096(%%"REG_S", %%"REG_a"), %%mm3         \n\t" /* VsrcData */\
267
    "mov               16(%%"REG_d"), %%"REG_S"     \n\t"\
268
    "add                         $16, %%"REG_d"     \n\t"\
269
    "test                  %%"REG_S", %%"REG_S"     \n\t"\
270
    "movq                      %%mm2, %%mm0         \n\t"\
271
    "punpcklwd                 %%mm3, %%mm2         \n\t"\
272
    "punpckhwd                 %%mm3, %%mm0         \n\t"\
273
    "pmaddwd                   %%mm1, %%mm2         \n\t"\
274
    "pmaddwd                   %%mm1, %%mm0         \n\t"\
275
    "paddd                     %%mm2, %%mm6         \n\t"\
276
    "paddd                     %%mm0, %%mm7         \n\t"\
277
    " jnz                         2b                \n\t"\
278
    "psrad                       $16, %%mm4         \n\t"\
279
    "psrad                       $16, %%mm5         \n\t"\
280
    "psrad                       $16, %%mm6         \n\t"\
281
    "psrad                       $16, %%mm7         \n\t"\
282
    "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
283
    "packssdw                  %%mm5, %%mm4         \n\t"\
284
    "packssdw                  %%mm7, %%mm6         \n\t"\
285
    "paddw                     %%mm0, %%mm4         \n\t"\
286
    "paddw                     %%mm0, %%mm6         \n\t"\
287
    "movq                      %%mm4, "U_TEMP"(%0)  \n\t"\
288
    "movq                      %%mm6, "V_TEMP"(%0)  \n\t"\
289 bca11e75 Michael Niedermayer
\
290 2da0d70d Diego Biurrun
    "lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
291
    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
292
    "pxor                      %%mm1, %%mm1         \n\t"\
293
    "pxor                      %%mm5, %%mm5         \n\t"\
294
    "pxor                      %%mm7, %%mm7         \n\t"\
295
    "pxor                      %%mm6, %%mm6         \n\t"\
296
    ASMALIGN(4)\
297
    "2:                                             \n\t"\
298
    "movq  (%%"REG_S", %%"REG_a", 2), %%mm0         \n\t" /* Y1srcData */\
299
    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2         \n\t" /* Y2srcData */\
300
    "mov                4(%%"REG_d"), %%"REG_S"     \n\t"\
301
    "movq  (%%"REG_S", %%"REG_a", 2), %%mm4         \n\t" /* Y1srcData */\
302
    "movq                      %%mm0, %%mm3         \n\t"\
303
    "punpcklwd                 %%mm4, %%mm0         \n\t"\
304
    "punpckhwd                 %%mm4, %%mm3         \n\t"\
305
    "movq               8(%%"REG_d"), %%mm4         \n\t" /* filterCoeff */\
306
    "pmaddwd                   %%mm4, %%mm0         \n\t"\
307
    "pmaddwd                   %%mm4, %%mm3         \n\t"\
308
    "paddd                     %%mm0, %%mm1         \n\t"\
309
    "paddd                     %%mm3, %%mm5         \n\t"\
310
    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3         \n\t" /* Y2srcData */\
311
    "mov               16(%%"REG_d"), %%"REG_S"     \n\t"\
312
    "add                         $16, %%"REG_d"     \n\t"\
313
    "test                  %%"REG_S", %%"REG_S"     \n\t"\
314
    "movq                      %%mm2, %%mm0         \n\t"\
315
    "punpcklwd                 %%mm3, %%mm2         \n\t"\
316
    "punpckhwd                 %%mm3, %%mm0         \n\t"\
317
    "pmaddwd                   %%mm4, %%mm2         \n\t"\
318
    "pmaddwd                   %%mm4, %%mm0         \n\t"\
319
    "paddd                     %%mm2, %%mm7         \n\t"\
320
    "paddd                     %%mm0, %%mm6         \n\t"\
321
    " jnz                         2b                \n\t"\
322
    "psrad                       $16, %%mm1         \n\t"\
323
    "psrad                       $16, %%mm5         \n\t"\
324
    "psrad                       $16, %%mm7         \n\t"\
325
    "psrad                       $16, %%mm6         \n\t"\
326
    "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
327
    "packssdw                  %%mm5, %%mm1         \n\t"\
328
    "packssdw                  %%mm6, %%mm7         \n\t"\
329
    "paddw                     %%mm0, %%mm1         \n\t"\
330
    "paddw                     %%mm0, %%mm7         \n\t"\
331
    "movq               "U_TEMP"(%0), %%mm3         \n\t"\
332
    "movq               "V_TEMP"(%0), %%mm4         \n\t"\
333 bca11e75 Michael Niedermayer
334 8422aa88 Michael Niedermayer
#define YSCALEYUV2RGBX \
335 2da0d70d Diego Biurrun
    "psubw  "U_OFFSET"(%0), %%mm3       \n\t" /* (U-128)8*/\
336
    "psubw  "V_OFFSET"(%0), %%mm4       \n\t" /* (V-128)8*/\
337
    "movq            %%mm3, %%mm2       \n\t" /* (U-128)8*/\
338
    "movq            %%mm4, %%mm5       \n\t" /* (V-128)8*/\
339
    "pmulhw "UG_COEFF"(%0), %%mm3       \n\t"\
340
    "pmulhw "VG_COEFF"(%0), %%mm4       \n\t"\
341
/* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
342
    "pmulhw "UB_COEFF"(%0), %%mm2       \n\t"\
343
    "pmulhw "VR_COEFF"(%0), %%mm5       \n\t"\
344
    "psubw  "Y_OFFSET"(%0), %%mm1       \n\t" /* 8(Y-16)*/\
345
    "psubw  "Y_OFFSET"(%0), %%mm7       \n\t" /* 8(Y-16)*/\
346
    "pmulhw  "Y_COEFF"(%0), %%mm1       \n\t"\
347
    "pmulhw  "Y_COEFF"(%0), %%mm7       \n\t"\
348
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
349
    "paddw           %%mm3, %%mm4       \n\t"\
350
    "movq            %%mm2, %%mm0       \n\t"\
351
    "movq            %%mm5, %%mm6       \n\t"\
352
    "movq            %%mm4, %%mm3       \n\t"\
353
    "punpcklwd       %%mm2, %%mm2       \n\t"\
354
    "punpcklwd       %%mm5, %%mm5       \n\t"\
355
    "punpcklwd       %%mm4, %%mm4       \n\t"\
356
    "paddw           %%mm1, %%mm2       \n\t"\
357
    "paddw           %%mm1, %%mm5       \n\t"\
358
    "paddw           %%mm1, %%mm4       \n\t"\
359
    "punpckhwd       %%mm0, %%mm0       \n\t"\
360
    "punpckhwd       %%mm6, %%mm6       \n\t"\
361
    "punpckhwd       %%mm3, %%mm3       \n\t"\
362
    "paddw           %%mm7, %%mm0       \n\t"\
363
    "paddw           %%mm7, %%mm6       \n\t"\
364
    "paddw           %%mm7, %%mm3       \n\t"\
365
    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
366
    "packuswb        %%mm0, %%mm2       \n\t"\
367
    "packuswb        %%mm6, %%mm5       \n\t"\
368
    "packuswb        %%mm3, %%mm4       \n\t"\
369
    "pxor            %%mm7, %%mm7       \n\t"
370 77a49659 Michael Niedermayer
#if 0
371 d604bab9 Michael Niedermayer
#define FULL_YSCALEYUV2RGB \
372 2da0d70d Diego Biurrun
    "pxor                 %%mm7, %%mm7  \n\t"\
373
    "movd                    %6, %%mm6  \n\t" /*yalpha1*/\
374
    "punpcklwd            %%mm6, %%mm6  \n\t"\
375
    "punpcklwd            %%mm6, %%mm6  \n\t"\
376
    "movd                    %7, %%mm5  \n\t" /*uvalpha1*/\
377
    "punpcklwd            %%mm5, %%mm5  \n\t"\
378
    "punpcklwd            %%mm5, %%mm5  \n\t"\
379
    "xor              %%"REG_a", %%"REG_a"  \n\t"\
380
    ASMALIGN(4)\
381
    "1:                                 \n\t"\
382
    "movq     (%0, %%"REG_a",2), %%mm0  \n\t" /*buf0[eax]*/\
383
    "movq     (%1, %%"REG_a",2), %%mm1  \n\t" /*buf1[eax]*/\
384
    "movq     (%2, %%"REG_a",2), %%mm2  \n\t" /* uvbuf0[eax]*/\
385
    "movq     (%3, %%"REG_a",2), %%mm3  \n\t" /* uvbuf1[eax]*/\
386
    "psubw                %%mm1, %%mm0  \n\t" /* buf0[eax] - buf1[eax]*/\
387
    "psubw                %%mm3, %%mm2  \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
388
    "pmulhw               %%mm6, %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
389
    "pmulhw               %%mm5, %%mm2  \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
390
    "psraw                   $4, %%mm1  \n\t" /* buf0[eax] - buf1[eax] >>4*/\
391
    "movq 4096(%2, %%"REG_a",2), %%mm4  \n\t" /* uvbuf0[eax+2048]*/\
392
    "psraw                   $4, %%mm3  \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
393
    "paddw                %%mm0, %%mm1  \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
394
    "movq 4096(%3, %%"REG_a",2), %%mm0  \n\t" /* uvbuf1[eax+2048]*/\
395
    "paddw                %%mm2, %%mm3  \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
396
    "psubw                %%mm0, %%mm4  \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
397
    "psubw        "MANGLE(w80)", %%mm1  \n\t" /* 8(Y-16)*/\
398
    "psubw       "MANGLE(w400)", %%mm3  \n\t" /* 8(U-128)*/\
399
    "pmulhw    "MANGLE(yCoeff)", %%mm1  \n\t"\
400 d604bab9 Michael Niedermayer
\
401
\
402 2da0d70d Diego Biurrun
    "pmulhw               %%mm5, %%mm4  \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
403
    "movq                 %%mm3, %%mm2  \n\t" /* (U-128)8*/\
404
    "pmulhw   "MANGLE(ubCoeff)", %%mm3  \n\t"\
405
    "psraw                   $4, %%mm0  \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
406
    "pmulhw   "MANGLE(ugCoeff)", %%mm2  \n\t"\
407
    "paddw                %%mm4, %%mm0  \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
408
    "psubw       "MANGLE(w400)", %%mm0  \n\t" /* (V-128)8*/\
409 d604bab9 Michael Niedermayer
\
410
\
411 2da0d70d Diego Biurrun
    "movq                 %%mm0, %%mm4  \n\t" /* (V-128)8*/\
412
    "pmulhw   "MANGLE(vrCoeff)", %%mm0  \n\t"\
413
    "pmulhw   "MANGLE(vgCoeff)", %%mm4  \n\t"\
414
    "paddw                %%mm1, %%mm3  \n\t" /* B*/\
415
    "paddw                %%mm1, %%mm0  \n\t" /* R*/\
416
    "packuswb             %%mm3, %%mm3  \n\t"\
417 d604bab9 Michael Niedermayer
\
418 2da0d70d Diego Biurrun
    "packuswb             %%mm0, %%mm0  \n\t"\
419
    "paddw                %%mm4, %%mm2  \n\t"\
420
    "paddw                %%mm2, %%mm1  \n\t" /* G*/\
421 d604bab9 Michael Niedermayer
\
422 2da0d70d Diego Biurrun
    "packuswb             %%mm1, %%mm1  \n\t"
423 77a49659 Michael Niedermayer
#endif
424 d604bab9 Michael Niedermayer
425 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2PACKED(index, c) \
426 2da0d70d Diego Biurrun
    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
427
    "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
428
    "psraw                $3, %%mm0                           \n\t"\
429
    "psraw                $3, %%mm1                           \n\t"\
430
    "movq              %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
431
    "movq              %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
432
    "xor            "#index", "#index"                        \n\t"\
433
    ASMALIGN(4)\
434
    "1:                                 \n\t"\
435
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
436
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
437
    "movq 4096(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
438
    "movq 4096(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
439
    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
440
    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
441
    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
442
    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
443
    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
444
    "psraw                $7, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
445
    "psraw                $7, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
446
    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
447
    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
448
    "movq  (%0, "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
449
    "movq  (%1, "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
450
    "movq 8(%0, "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
451
    "movq 8(%1, "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
452
    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
453
    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
454
    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
455
    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
456
    "psraw                $7, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
457
    "psraw                $7, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
458
    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
459
    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
460 6a4970ab Diego Biurrun
461 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
462 6a4970ab Diego Biurrun
463 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2RGB(index, c) \
464 2da0d70d Diego Biurrun
    "xor            "#index", "#index"  \n\t"\
465
    ASMALIGN(4)\
466
    "1:                                 \n\t"\
467
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
468
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
469
    "movq 4096(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
470
    "movq 4096(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
471
    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
472
    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
473
    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
474
    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
475
    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
476
    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
477
    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
478
    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
479
    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
480
    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
481
    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
482
    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
483
    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
484
    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
485
    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
486
    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
487
    "movq  (%0, "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
488
    "movq  (%1, "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
489
    "movq 8(%0, "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
490
    "movq 8(%1, "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
491
    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
492
    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
493
    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
494
    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
495
    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
496
    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
497
    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
498
    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
499
    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
500
    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
501
    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
502
    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
503
    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
504
    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
505
    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
506
    "paddw             %%mm3, %%mm4     \n\t"\
507
    "movq              %%mm2, %%mm0     \n\t"\
508
    "movq              %%mm5, %%mm6     \n\t"\
509
    "movq              %%mm4, %%mm3     \n\t"\
510
    "punpcklwd         %%mm2, %%mm2     \n\t"\
511
    "punpcklwd         %%mm5, %%mm5     \n\t"\
512
    "punpcklwd         %%mm4, %%mm4     \n\t"\
513
    "paddw             %%mm1, %%mm2     \n\t"\
514
    "paddw             %%mm1, %%mm5     \n\t"\
515
    "paddw             %%mm1, %%mm4     \n\t"\
516
    "punpckhwd         %%mm0, %%mm0     \n\t"\
517
    "punpckhwd         %%mm6, %%mm6     \n\t"\
518
    "punpckhwd         %%mm3, %%mm3     \n\t"\
519
    "paddw             %%mm7, %%mm0     \n\t"\
520
    "paddw             %%mm7, %%mm6     \n\t"\
521
    "paddw             %%mm7, %%mm3     \n\t"\
522
    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
523
    "packuswb          %%mm0, %%mm2     \n\t"\
524
    "packuswb          %%mm6, %%mm5     \n\t"\
525
    "packuswb          %%mm3, %%mm4     \n\t"\
526
    "pxor              %%mm7, %%mm7     \n\t"
527 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2RGB(index, c)  REAL_YSCALEYUV2RGB(index, c)
528 6a4970ab Diego Biurrun
529 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2PACKED1(index, c) \
530 2da0d70d Diego Biurrun
    "xor            "#index", "#index"  \n\t"\
531
    ASMALIGN(4)\
532
    "1:                                 \n\t"\
533
    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
534
    "movq 4096(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
535
    "psraw                $7, %%mm3     \n\t" \
536
    "psraw                $7, %%mm4     \n\t" \
537
    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
538
    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
539
    "psraw                $7, %%mm1     \n\t" \
540
    "psraw                $7, %%mm7     \n\t" \
541 6a4970ab Diego Biurrun
542 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
543 6a4970ab Diego Biurrun
544 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2RGB1(index, c) \
545 2da0d70d Diego Biurrun
    "xor            "#index", "#index"  \n\t"\
546
    ASMALIGN(4)\
547
    "1:                                 \n\t"\
548
    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
549
    "movq 4096(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
550
    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
551
    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
552
    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
553
    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
554
    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
555
    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
556
    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
557
    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
558
    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
559
    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
560
    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
561
    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
562
    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
563
    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
564
    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
565
    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
566
    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
567
    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
568
    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
569
    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
570
    "paddw             %%mm3, %%mm4     \n\t"\
571
    "movq              %%mm2, %%mm0     \n\t"\
572
    "movq              %%mm5, %%mm6     \n\t"\
573
    "movq              %%mm4, %%mm3     \n\t"\
574
    "punpcklwd         %%mm2, %%mm2     \n\t"\
575
    "punpcklwd         %%mm5, %%mm5     \n\t"\
576
    "punpcklwd         %%mm4, %%mm4     \n\t"\
577
    "paddw             %%mm1, %%mm2     \n\t"\
578
    "paddw             %%mm1, %%mm5     \n\t"\
579
    "paddw             %%mm1, %%mm4     \n\t"\
580
    "punpckhwd         %%mm0, %%mm0     \n\t"\
581
    "punpckhwd         %%mm6, %%mm6     \n\t"\
582
    "punpckhwd         %%mm3, %%mm3     \n\t"\
583
    "paddw             %%mm7, %%mm0     \n\t"\
584
    "paddw             %%mm7, %%mm6     \n\t"\
585
    "paddw             %%mm7, %%mm3     \n\t"\
586
    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
587
    "packuswb          %%mm0, %%mm2     \n\t"\
588
    "packuswb          %%mm6, %%mm5     \n\t"\
589
    "packuswb          %%mm3, %%mm4     \n\t"\
590
    "pxor              %%mm7, %%mm7     \n\t"
591 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
592 497d4f99 Michael Niedermayer
593 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2PACKED1b(index, c) \
594 2da0d70d Diego Biurrun
    "xor "#index", "#index"             \n\t"\
595
    ASMALIGN(4)\
596
    "1:                                 \n\t"\
597
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
598
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
599
    "movq 4096(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
600
    "movq 4096(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
601
    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
602
    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
603
    "psrlw                $8, %%mm3     \n\t" \
604
    "psrlw                $8, %%mm4     \n\t" \
605
    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
606
    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
607
    "psraw                $7, %%mm1     \n\t" \
608
    "psraw                $7, %%mm7     \n\t"
609 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
610 6a4970ab Diego Biurrun
611 497d4f99 Michael Niedermayer
// do vertical chrominance interpolation
612 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2RGB1b(index, c) \
613 2da0d70d Diego Biurrun
    "xor            "#index", "#index"  \n\t"\
614
    ASMALIGN(4)\
615
    "1:                                 \n\t"\
616
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
617
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
618
    "movq 4096(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
619
    "movq 4096(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
620
    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
621
    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
622
    "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
623
    "psrlw                $5, %%mm4     \n\t" /*FIXME might overflow*/\
624
    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
625
    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
626
    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
627
    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
628
    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
629
    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
630
    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
631
    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
632
    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
633
    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
634
    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
635
    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
636
    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
637
    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
638
    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
639
    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
640
    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
641
    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
642
    "paddw             %%mm3, %%mm4     \n\t"\
643
    "movq              %%mm2, %%mm0     \n\t"\
644
    "movq              %%mm5, %%mm6     \n\t"\
645
    "movq              %%mm4, %%mm3     \n\t"\
646
    "punpcklwd         %%mm2, %%mm2     \n\t"\
647
    "punpcklwd         %%mm5, %%mm5     \n\t"\
648
    "punpcklwd         %%mm4, %%mm4     \n\t"\
649
    "paddw             %%mm1, %%mm2     \n\t"\
650
    "paddw             %%mm1, %%mm5     \n\t"\
651
    "paddw             %%mm1, %%mm4     \n\t"\
652
    "punpckhwd         %%mm0, %%mm0     \n\t"\
653
    "punpckhwd         %%mm6, %%mm6     \n\t"\
654
    "punpckhwd         %%mm3, %%mm3     \n\t"\
655
    "paddw             %%mm7, %%mm0     \n\t"\
656
    "paddw             %%mm7, %%mm6     \n\t"\
657
    "paddw             %%mm7, %%mm3     \n\t"\
658
    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
659
    "packuswb          %%mm0, %%mm2     \n\t"\
660
    "packuswb          %%mm6, %%mm5     \n\t"\
661
    "packuswb          %%mm3, %%mm4     \n\t"\
662
    "pxor              %%mm7, %%mm7     \n\t"
663 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
664 d604bab9 Michael Niedermayer
665 6e1c66bc Aurelien Jacobs
#define REAL_WRITEBGR32(dst, dstw, index) \
666 2da0d70d Diego Biurrun
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
667
    "movq      %%mm2, %%mm1     \n\t" /* B */\
668
    "movq      %%mm5, %%mm6     \n\t" /* R */\
669
    "punpcklbw %%mm4, %%mm2     \n\t" /* GBGBGBGB 0 */\
670
    "punpcklbw %%mm7, %%mm5     \n\t" /* 0R0R0R0R 0 */\
671
    "punpckhbw %%mm4, %%mm1     \n\t" /* GBGBGBGB 2 */\
672
    "punpckhbw %%mm7, %%mm6     \n\t" /* 0R0R0R0R 2 */\
673
    "movq      %%mm2, %%mm0     \n\t" /* GBGBGBGB 0 */\
674
    "movq      %%mm1, %%mm3     \n\t" /* GBGBGBGB 2 */\
675
    "punpcklwd %%mm5, %%mm0     \n\t" /* 0RGB0RGB 0 */\
676
    "punpckhwd %%mm5, %%mm2     \n\t" /* 0RGB0RGB 1 */\
677
    "punpcklwd %%mm6, %%mm1     \n\t" /* 0RGB0RGB 2 */\
678
    "punpckhwd %%mm6, %%mm3     \n\t" /* 0RGB0RGB 3 */\
679 d604bab9 Michael Niedermayer
\
680 2da0d70d Diego Biurrun
    MOVNTQ(%%mm0,   (dst, index, 4))\
681
    MOVNTQ(%%mm2,  8(dst, index, 4))\
682
    MOVNTQ(%%mm1, 16(dst, index, 4))\
683
    MOVNTQ(%%mm3, 24(dst, index, 4))\
684 d604bab9 Michael Niedermayer
\
685 2da0d70d Diego Biurrun
    "add      $8, "#index"      \n\t"\
686
    "cmp "#dstw", "#index"      \n\t"\
687
    " jb      1b                \n\t"
688 6e1c66bc Aurelien Jacobs
#define WRITEBGR32(dst, dstw, index)  REAL_WRITEBGR32(dst, dstw, index)
689 d604bab9 Michael Niedermayer
690 6e1c66bc Aurelien Jacobs
#define REAL_WRITEBGR16(dst, dstw, index) \
691 2da0d70d Diego Biurrun
    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
692
    "pand "MANGLE(bFC)", %%mm4  \n\t" /* G */\
693
    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
694
    "psrlq           $3, %%mm2  \n\t"\
695 d604bab9 Michael Niedermayer
\
696 2da0d70d Diego Biurrun
    "movq         %%mm2, %%mm1  \n\t"\
697
    "movq         %%mm4, %%mm3  \n\t"\
698 d604bab9 Michael Niedermayer
\
699 2da0d70d Diego Biurrun
    "punpcklbw    %%mm7, %%mm3  \n\t"\
700
    "punpcklbw    %%mm5, %%mm2  \n\t"\
701
    "punpckhbw    %%mm7, %%mm4  \n\t"\
702
    "punpckhbw    %%mm5, %%mm1  \n\t"\
703 d604bab9 Michael Niedermayer
\
704 2da0d70d Diego Biurrun
    "psllq           $3, %%mm3  \n\t"\
705
    "psllq           $3, %%mm4  \n\t"\
706 d604bab9 Michael Niedermayer
\
707 2da0d70d Diego Biurrun
    "por          %%mm3, %%mm2  \n\t"\
708
    "por          %%mm4, %%mm1  \n\t"\
709 d604bab9 Michael Niedermayer
\
710 2da0d70d Diego Biurrun
    MOVNTQ(%%mm2,  (dst, index, 2))\
711
    MOVNTQ(%%mm1, 8(dst, index, 2))\
712 d604bab9 Michael Niedermayer
\
713 2da0d70d Diego Biurrun
    "add             $8, "#index"   \n\t"\
714
    "cmp        "#dstw", "#index"   \n\t"\
715
    " jb             1b             \n\t"
716 6e1c66bc Aurelien Jacobs
#define WRITEBGR16(dst, dstw, index)  REAL_WRITEBGR16(dst, dstw, index)
717 d604bab9 Michael Niedermayer
718 6e1c66bc Aurelien Jacobs
#define REAL_WRITEBGR15(dst, dstw, index) \
719 2da0d70d Diego Biurrun
    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
720
    "pand "MANGLE(bF8)", %%mm4  \n\t" /* G */\
721
    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
722
    "psrlq           $3, %%mm2  \n\t"\
723
    "psrlq           $1, %%mm5  \n\t"\
724 d604bab9 Michael Niedermayer
\
725 2da0d70d Diego Biurrun
    "movq         %%mm2, %%mm1  \n\t"\
726
    "movq         %%mm4, %%mm3  \n\t"\
727 d604bab9 Michael Niedermayer
\
728 2da0d70d Diego Biurrun
    "punpcklbw    %%mm7, %%mm3  \n\t"\
729
    "punpcklbw    %%mm5, %%mm2  \n\t"\
730
    "punpckhbw    %%mm7, %%mm4  \n\t"\
731
    "punpckhbw    %%mm5, %%mm1  \n\t"\
732 d604bab9 Michael Niedermayer
\
733 2da0d70d Diego Biurrun
    "psllq           $2, %%mm3  \n\t"\
734
    "psllq           $2, %%mm4  \n\t"\
735 d604bab9 Michael Niedermayer
\
736 2da0d70d Diego Biurrun
    "por          %%mm3, %%mm2  \n\t"\
737
    "por          %%mm4, %%mm1  \n\t"\
738 d604bab9 Michael Niedermayer
\
739 2da0d70d Diego Biurrun
    MOVNTQ(%%mm2,  (dst, index, 2))\
740
    MOVNTQ(%%mm1, 8(dst, index, 2))\
741 d604bab9 Michael Niedermayer
\
742 2da0d70d Diego Biurrun
    "add             $8, "#index"   \n\t"\
743
    "cmp        "#dstw", "#index"   \n\t"\
744
    " jb             1b             \n\t"
745 6e1c66bc Aurelien Jacobs
#define WRITEBGR15(dst, dstw, index)  REAL_WRITEBGR15(dst, dstw, index)
746 f62255fb Michael Niedermayer
747 6542b44e Michael Niedermayer
#define WRITEBGR24OLD(dst, dstw, index) \
748 2da0d70d Diego Biurrun
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
749
    "movq      %%mm2, %%mm1             \n\t" /* B */\
750
    "movq      %%mm5, %%mm6             \n\t" /* R */\
751
    "punpcklbw %%mm4, %%mm2             \n\t" /* GBGBGBGB 0 */\
752
    "punpcklbw %%mm7, %%mm5             \n\t" /* 0R0R0R0R 0 */\
753
    "punpckhbw %%mm4, %%mm1             \n\t" /* GBGBGBGB 2 */\
754
    "punpckhbw %%mm7, %%mm6             \n\t" /* 0R0R0R0R 2 */\
755
    "movq      %%mm2, %%mm0             \n\t" /* GBGBGBGB 0 */\
756
    "movq      %%mm1, %%mm3             \n\t" /* GBGBGBGB 2 */\
757
    "punpcklwd %%mm5, %%mm0             \n\t" /* 0RGB0RGB 0 */\
758
    "punpckhwd %%mm5, %%mm2             \n\t" /* 0RGB0RGB 1 */\
759
    "punpcklwd %%mm6, %%mm1             \n\t" /* 0RGB0RGB 2 */\
760
    "punpckhwd %%mm6, %%mm3             \n\t" /* 0RGB0RGB 3 */\
761 d604bab9 Michael Niedermayer
\
762 2da0d70d Diego Biurrun
    "movq      %%mm0, %%mm4             \n\t" /* 0RGB0RGB 0 */\
763
    "psrlq        $8, %%mm0             \n\t" /* 00RGB0RG 0 */\
764
    "pand "MANGLE(bm00000111)", %%mm4   \n\t" /* 00000RGB 0 */\
765
    "pand "MANGLE(bm11111000)", %%mm0   \n\t" /* 00RGB000 0.5 */\
766
    "por       %%mm4, %%mm0             \n\t" /* 00RGBRGB 0 */\
767
    "movq      %%mm2, %%mm4             \n\t" /* 0RGB0RGB 1 */\
768
    "psllq       $48, %%mm2             \n\t" /* GB000000 1 */\
769
    "por       %%mm2, %%mm0             \n\t" /* GBRGBRGB 0 */\
770 d604bab9 Michael Niedermayer
\
771 2da0d70d Diego Biurrun
    "movq      %%mm4, %%mm2             \n\t" /* 0RGB0RGB 1 */\
772
    "psrld       $16, %%mm4             \n\t" /* 000R000R 1 */\
773
    "psrlq       $24, %%mm2             \n\t" /* 0000RGB0 1.5 */\
774
    "por       %%mm4, %%mm2             \n\t" /* 000RRGBR 1 */\
775
    "pand "MANGLE(bm00001111)", %%mm2   \n\t" /* 0000RGBR 1 */\
776
    "movq      %%mm1, %%mm4             \n\t" /* 0RGB0RGB 2 */\
777
    "psrlq        $8, %%mm1             \n\t" /* 00RGB0RG 2 */\
778
    "pand "MANGLE(bm00000111)", %%mm4   \n\t" /* 00000RGB 2 */\
779
    "pand "MANGLE(bm11111000)", %%mm1   \n\t" /* 00RGB000 2.5 */\
780
    "por       %%mm4, %%mm1             \n\t" /* 00RGBRGB 2 */\
781
    "movq      %%mm1, %%mm4             \n\t" /* 00RGBRGB 2 */\
782
    "psllq       $32, %%mm1             \n\t" /* BRGB0000 2 */\
783
    "por       %%mm1, %%mm2             \n\t" /* BRGBRGBR 1 */\
784 d604bab9 Michael Niedermayer
\
785 2da0d70d Diego Biurrun
    "psrlq       $32, %%mm4             \n\t" /* 000000RG 2.5 */\
786
    "movq      %%mm3, %%mm5             \n\t" /* 0RGB0RGB 3 */\
787
    "psrlq        $8, %%mm3             \n\t" /* 00RGB0RG 3 */\
788
    "pand "MANGLE(bm00000111)", %%mm5   \n\t" /* 00000RGB 3 */\
789
    "pand "MANGLE(bm11111000)", %%mm3   \n\t" /* 00RGB000 3.5 */\
790
    "por       %%mm5, %%mm3             \n\t" /* 00RGBRGB 3 */\
791
    "psllq       $16, %%mm3             \n\t" /* RGBRGB00 3 */\
792
    "por       %%mm4, %%mm3             \n\t" /* RGBRGBRG 2.5 */\
793 d604bab9 Michael Niedermayer
\
794 2da0d70d Diego Biurrun
    MOVNTQ(%%mm0,   (dst))\
795
    MOVNTQ(%%mm2,  8(dst))\
796
    MOVNTQ(%%mm3, 16(dst))\
797
    "add         $24, "#dst"            \n\t"\
798 d604bab9 Michael Niedermayer
\
799 2da0d70d Diego Biurrun
    "add          $8, "#index"          \n\t"\
800
    "cmp     "#dstw", "#index"          \n\t"\
801
    " jb          1b                    \n\t"
802 d604bab9 Michael Niedermayer
803 6542b44e Michael Niedermayer
#define WRITEBGR24MMX(dst, dstw, index) \
804 2da0d70d Diego Biurrun
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
805
    "movq      %%mm2, %%mm1     \n\t" /* B */\
806
    "movq      %%mm5, %%mm6     \n\t" /* R */\
807
    "punpcklbw %%mm4, %%mm2     \n\t" /* GBGBGBGB 0 */\
808
    "punpcklbw %%mm7, %%mm5     \n\t" /* 0R0R0R0R 0 */\
809
    "punpckhbw %%mm4, %%mm1     \n\t" /* GBGBGBGB 2 */\
810
    "punpckhbw %%mm7, %%mm6     \n\t" /* 0R0R0R0R 2 */\
811
    "movq      %%mm2, %%mm0     \n\t" /* GBGBGBGB 0 */\
812
    "movq      %%mm1, %%mm3     \n\t" /* GBGBGBGB 2 */\
813
    "punpcklwd %%mm5, %%mm0     \n\t" /* 0RGB0RGB 0 */\
814
    "punpckhwd %%mm5, %%mm2     \n\t" /* 0RGB0RGB 1 */\
815
    "punpcklwd %%mm6, %%mm1     \n\t" /* 0RGB0RGB 2 */\
816
    "punpckhwd %%mm6, %%mm3     \n\t" /* 0RGB0RGB 3 */\
817 99d2cb72 Michael Niedermayer
\
818 2da0d70d Diego Biurrun
    "movq      %%mm0, %%mm4     \n\t" /* 0RGB0RGB 0 */\
819
    "movq      %%mm2, %%mm6     \n\t" /* 0RGB0RGB 1 */\
820
    "movq      %%mm1, %%mm5     \n\t" /* 0RGB0RGB 2 */\
821
    "movq      %%mm3, %%mm7     \n\t" /* 0RGB0RGB 3 */\
822 99d2cb72 Michael Niedermayer
\
823 2da0d70d Diego Biurrun
    "psllq       $40, %%mm0     \n\t" /* RGB00000 0 */\
824
    "psllq       $40, %%mm2     \n\t" /* RGB00000 1 */\
825
    "psllq       $40, %%mm1     \n\t" /* RGB00000 2 */\
826
    "psllq       $40, %%mm3     \n\t" /* RGB00000 3 */\
827 99d2cb72 Michael Niedermayer
\
828 2da0d70d Diego Biurrun
    "punpckhdq %%mm4, %%mm0     \n\t" /* 0RGBRGB0 0 */\
829
    "punpckhdq %%mm6, %%mm2     \n\t" /* 0RGBRGB0 1 */\
830
    "punpckhdq %%mm5, %%mm1     \n\t" /* 0RGBRGB0 2 */\
831
    "punpckhdq %%mm7, %%mm3     \n\t" /* 0RGBRGB0 3 */\
832 99d2cb72 Michael Niedermayer
\
833 2da0d70d Diego Biurrun
    "psrlq        $8, %%mm0     \n\t" /* 00RGBRGB 0 */\
834
    "movq      %%mm2, %%mm6     \n\t" /* 0RGBRGB0 1 */\
835
    "psllq       $40, %%mm2     \n\t" /* GB000000 1 */\
836
    "por       %%mm2, %%mm0     \n\t" /* GBRGBRGB 0 */\
837
    MOVNTQ(%%mm0, (dst))\
838 99d2cb72 Michael Niedermayer
\
839 2da0d70d Diego Biurrun
    "psrlq       $24, %%mm6     \n\t" /* 0000RGBR 1 */\
840
    "movq      %%mm1, %%mm5     \n\t" /* 0RGBRGB0 2 */\
841
    "psllq       $24, %%mm1     \n\t" /* BRGB0000 2 */\
842
    "por       %%mm1, %%mm6     \n\t" /* BRGBRGBR 1 */\
843
    MOVNTQ(%%mm6, 8(dst))\
844 99d2cb72 Michael Niedermayer
\
845 2da0d70d Diego Biurrun
    "psrlq       $40, %%mm5     \n\t" /* 000000RG 2 */\
846
    "psllq        $8, %%mm3     \n\t" /* RGBRGB00 3 */\
847
    "por       %%mm3, %%mm5     \n\t" /* RGBRGBRG 2 */\
848
    MOVNTQ(%%mm5, 16(dst))\
849 99d2cb72 Michael Niedermayer
\
850 2da0d70d Diego Biurrun
    "add         $24, "#dst"    \n\t"\
851 99d2cb72 Michael Niedermayer
\
852 2da0d70d Diego Biurrun
    "add          $8, "#index"  \n\t"\
853
    "cmp     "#dstw", "#index"  \n\t"\
854
    " jb          1b            \n\t"
855 99d2cb72 Michael Niedermayer
856 6542b44e Michael Niedermayer
#define WRITEBGR24MMX2(dst, dstw, index) \
857 2da0d70d Diego Biurrun
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
858 5802683a Reimar Döffinger
    "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
859
    "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
860 2da0d70d Diego Biurrun
    "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2  B1 B0 B1 B0 */\
861
    "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2  G1 G0 G1 G0 */\
862
    "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0  R1 R0 R1 R0 */\
863 99d2cb72 Michael Niedermayer
\
864 2da0d70d Diego Biurrun
    "pand   %%mm0, %%mm1        \n\t" /*    B2        B1       B0 */\
865
    "pand   %%mm0, %%mm3        \n\t" /*    G2        G1       G0 */\
866
    "pand   %%mm7, %%mm6        \n\t" /*       R1        R0       */\
867 99d2cb72 Michael Niedermayer
\
868 2da0d70d Diego Biurrun
    "psllq     $8, %%mm3        \n\t" /* G2        G1       G0    */\
869
    "por    %%mm1, %%mm6        \n\t"\
870
    "por    %%mm3, %%mm6        \n\t"\
871
    MOVNTQ(%%mm6, (dst))\
872 99d2cb72 Michael Niedermayer
\
873 2da0d70d Diego Biurrun
    "psrlq     $8, %%mm4        \n\t" /* 00 G7 G6 G5  G4 G3 G2 G1 */\
874
    "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4  B3 B2 B3 B2 */\
875
    "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3  G4 G3 G4 G3 */\
876
    "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4  R3 R2 R3 R2 */\
877 99d2cb72 Michael Niedermayer
\
878 5802683a Reimar Döffinger
    "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5       B4        B3    */\
879 2da0d70d Diego Biurrun
    "pand   %%mm7, %%mm3        \n\t" /*       G4        G3       */\
880
    "pand   %%mm0, %%mm6        \n\t" /*    R4        R3       R2 */\
881 99d2cb72 Michael Niedermayer
\
882 2da0d70d Diego Biurrun
    "por    %%mm1, %%mm3        \n\t" /* B5    G4 B4     G3 B3    */\
883
    "por    %%mm3, %%mm6        \n\t"\
884
    MOVNTQ(%%mm6, 8(dst))\
885 99d2cb72 Michael Niedermayer
\
886 2da0d70d Diego Biurrun
    "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6  B7 B6 B6 B7 */\
887
    "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7  G6 G5 G6 G5 */\
888
    "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6  R5 R4 R5 R4 */\
889 99d2cb72 Michael Niedermayer
\
890 2da0d70d Diego Biurrun
    "pand   %%mm7, %%mm1        \n\t" /*       B7        B6       */\
891
    "pand   %%mm0, %%mm3        \n\t" /*    G7        G6       G5 */\
892 5802683a Reimar Döffinger
    "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7       R6        R5    */\
893 99d2cb72 Michael Niedermayer
\
894 2da0d70d Diego Biurrun
    "por    %%mm1, %%mm3        \n\t"\
895
    "por    %%mm3, %%mm6        \n\t"\
896
    MOVNTQ(%%mm6, 16(dst))\
897 99d2cb72 Michael Niedermayer
\
898 2da0d70d Diego Biurrun
    "add      $24, "#dst"       \n\t"\
899 99d2cb72 Michael Niedermayer
\
900 2da0d70d Diego Biurrun
    "add       $8, "#index"     \n\t"\
901
    "cmp  "#dstw", "#index"     \n\t"\
902
    " jb       1b               \n\t"
903 99d2cb72 Michael Niedermayer
904
#ifdef HAVE_MMX2
905 7630f2e0 Michael Niedermayer
#undef WRITEBGR24
906 6e1c66bc Aurelien Jacobs
#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX2(dst, dstw, index)
907 99d2cb72 Michael Niedermayer
#else
908 7630f2e0 Michael Niedermayer
#undef WRITEBGR24
909 6e1c66bc Aurelien Jacobs
#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX(dst, dstw, index)
910 99d2cb72 Michael Niedermayer
#endif
911
912 6e1c66bc Aurelien Jacobs
#define REAL_WRITEYUY2(dst, dstw, index) \
913 2da0d70d Diego Biurrun
    "packuswb  %%mm3, %%mm3     \n\t"\
914
    "packuswb  %%mm4, %%mm4     \n\t"\
915
    "packuswb  %%mm7, %%mm1     \n\t"\
916
    "punpcklbw %%mm4, %%mm3     \n\t"\
917
    "movq      %%mm1, %%mm7     \n\t"\
918
    "punpcklbw %%mm3, %%mm1     \n\t"\
919
    "punpckhbw %%mm3, %%mm7     \n\t"\
920 25593e29 Michael Niedermayer
\
921 2da0d70d Diego Biurrun
    MOVNTQ(%%mm1, (dst, index, 2))\
922
    MOVNTQ(%%mm7, 8(dst, index, 2))\
923 25593e29 Michael Niedermayer
\
924 2da0d70d Diego Biurrun
    "add          $8, "#index"  \n\t"\
925
    "cmp     "#dstw", "#index"  \n\t"\
926
    " jb          1b            \n\t"
927 6e1c66bc Aurelien Jacobs
#define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
928 25593e29 Michael Niedermayer
929
930 77a49659 Michael Niedermayer
static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
931 2da0d70d Diego Biurrun
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
932
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
933 38858470 Michael Niedermayer
{
934 c1b0bfb4 Michael Niedermayer
#ifdef HAVE_MMX
935 2da0d70d Diego Biurrun
    if (c->flags & SWS_ACCURATE_RND){
936
        if (uDest){
937
            YSCALEYUV2YV12X_ACCURATE(   0, CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
938
            YSCALEYUV2YV12X_ACCURATE(4096, CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
939
        }
940 bca11e75 Michael Niedermayer
941 2da0d70d Diego Biurrun
        YSCALEYUV2YV12X_ACCURATE(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
942
    }else{
943
        if (uDest){
944
            YSCALEYUV2YV12X(   0, CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
945
            YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
946 bca11e75 Michael Niedermayer
        }
947 2da0d70d Diego Biurrun
948
        YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET, dest, dstW)
949
    }
950 c1b0bfb4 Michael Niedermayer
#else
951 a2faa401 Romain Dolbeau
#ifdef HAVE_ALTIVEC
952
yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
953 2da0d70d Diego Biurrun
                      chrFilter, chrSrc, chrFilterSize,
954
                      dest, uDest, vDest, dstW, chrDstW);
955 a2faa401 Romain Dolbeau
#else //HAVE_ALTIVEC
956 5859233b Michael Niedermayer
yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
957 2da0d70d Diego Biurrun
            chrFilter, chrSrc, chrFilterSize,
958
            dest, uDest, vDest, dstW, chrDstW);
959 a2faa401 Romain Dolbeau
#endif //!HAVE_ALTIVEC
960 bc279024 Diego Biurrun
#endif /* HAVE_MMX */
961 c1b0bfb4 Michael Niedermayer
}
962 2add307d Michael Niedermayer
963 6118e52e Ville Syrjälä
static inline void RENAME(yuv2nv12X)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
964 2da0d70d Diego Biurrun
                                     int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
965
                                     uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
966 6118e52e Ville Syrjälä
{
967
yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
968 2da0d70d Diego Biurrun
             chrFilter, chrSrc, chrFilterSize,
969
             dest, uDest, dstW, chrDstW, dstFormat);
970 6118e52e Ville Syrjälä
}
971
972 c1b0bfb4 Michael Niedermayer
static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc,
973 2da0d70d Diego Biurrun
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
974 c1b0bfb4 Michael Niedermayer
{
975
#ifdef HAVE_MMX
976 1b0a4572 Benoit Fouet
    if (uDest)
977 2da0d70d Diego Biurrun
    {
978
        asm volatile(
979
            YSCALEYUV2YV121
980
            :: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW),
981
            "g" (-chrDstW)
982
            : "%"REG_a
983
        );
984
985
        asm volatile(
986
            YSCALEYUV2YV121
987
            :: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW),
988
            "g" (-chrDstW)
989
            : "%"REG_a
990
        );
991
    }
992
993
    asm volatile(
994
        YSCALEYUV2YV121
995
        :: "r" (lumSrc + dstW), "r" (dest + dstW),
996
        "g" (-dstW)
997
        : "%"REG_a
998
    );
999 c1b0bfb4 Michael Niedermayer
#else
1000 2da0d70d Diego Biurrun
    int i;
1001
    for (i=0; i<dstW; i++)
1002
    {
1003
        int val= lumSrc[i]>>7;
1004
1005
        if (val&256){
1006
            if (val<0) val=0;
1007
            else       val=255;
1008
        }
1009
1010
        dest[i]= val;
1011
    }
1012
1013 1b0a4572 Benoit Fouet
    if (uDest)
1014 2da0d70d Diego Biurrun
        for (i=0; i<chrDstW; i++)
1015
        {
1016
            int u=chrSrc[i]>>7;
1017
            int v=chrSrc[i + 2048]>>7;
1018
1019
            if ((u|v)&256){
1020
                if (u<0)        u=0;
1021
                else if (u>255) u=255;
1022
                if (v<0)        v=0;
1023
                else if (v>255) v=255;
1024
            }
1025
1026
            uDest[i]= u;
1027
            vDest[i]= v;
1028
        }
1029 c1b0bfb4 Michael Niedermayer
#endif
1030 38858470 Michael Niedermayer
}
1031
1032 c1b0bfb4 Michael Niedermayer
1033 d604bab9 Michael Niedermayer
/**
1034
 * vertical scale YV12 to RGB
1035
 */
1036 25593e29 Michael Niedermayer
static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
1037 2da0d70d Diego Biurrun
                                       int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
1038
                                       uint8_t *dest, long dstW, long dstY)
1039 c1b0bfb4 Michael Niedermayer
{
1040 bca11e75 Michael Niedermayer
#ifdef HAVE_MMX
1041 f8d61128 Diego Biurrun
    long dummy=0;
1042 2da0d70d Diego Biurrun
    if (c->flags & SWS_ACCURATE_RND){
1043
        switch(c->dstFormat){
1044
        case PIX_FMT_RGB32:
1045
            YSCALEYUV2PACKEDX_ACCURATE
1046
            YSCALEYUV2RGBX
1047
            WRITEBGR32(%4, %5, %%REGa)
1048
1049
            YSCALEYUV2PACKEDX_END
1050
            return;
1051
        case PIX_FMT_BGR24:
1052
            YSCALEYUV2PACKEDX_ACCURATE
1053
            YSCALEYUV2RGBX
1054
            "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
1055
            "add %4, %%"REG_c"                        \n\t"
1056
            WRITEBGR24(%%REGc, %5, %%REGa)
1057
1058
1059
            :: "r" (&c->redDither),
1060
               "m" (dummy), "m" (dummy), "m" (dummy),
1061
               "r" (dest), "m" (dstW)
1062
            : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1063
            );
1064
            return;
1065
        case PIX_FMT_BGR555:
1066
            YSCALEYUV2PACKEDX_ACCURATE
1067
            YSCALEYUV2RGBX
1068
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1069 bca11e75 Michael Niedermayer
#ifdef DITHER1XBPP
1070 2da0d70d Diego Biurrun
            "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1071
            "paddusb "MANGLE(g5Dither)", %%mm4\n\t"
1072
            "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1073
#endif
1074
1075
            WRITEBGR15(%4, %5, %%REGa)
1076
            YSCALEYUV2PACKEDX_END
1077
            return;
1078
        case PIX_FMT_BGR565:
1079
            YSCALEYUV2PACKEDX_ACCURATE
1080
            YSCALEYUV2RGBX
1081
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1082 bca11e75 Michael Niedermayer
#ifdef DITHER1XBPP
1083 2da0d70d Diego Biurrun
            "paddusb "MANGLE(b5Dither)", %%mm2\n\t"
1084
            "paddusb "MANGLE(g6Dither)", %%mm4\n\t"
1085
            "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
1086
#endif
1087
1088
            WRITEBGR16(%4, %5, %%REGa)
1089
            YSCALEYUV2PACKEDX_END
1090
            return;
1091
        case PIX_FMT_YUYV422:
1092
            YSCALEYUV2PACKEDX_ACCURATE
1093
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1094
1095
            "psraw $3, %%mm3    \n\t"
1096
            "psraw $3, %%mm4    \n\t"
1097
            "psraw $3, %%mm1    \n\t"
1098
            "psraw $3, %%mm7    \n\t"
1099
            WRITEYUY2(%4, %5, %%REGa)
1100
            YSCALEYUV2PACKEDX_END
1101
            return;
1102
    }
1103 bca11e75 Michael Niedermayer
    }else{
1104 2da0d70d Diego Biurrun
        switch(c->dstFormat)
1105
        {
1106
        case PIX_FMT_RGB32:
1107
            YSCALEYUV2PACKEDX
1108
            YSCALEYUV2RGBX
1109
            WRITEBGR32(%4, %5, %%REGa)
1110
            YSCALEYUV2PACKEDX_END
1111
            return;
1112
        case PIX_FMT_BGR24:
1113
            YSCALEYUV2PACKEDX
1114
            YSCALEYUV2RGBX
1115
            "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"   \n\t" //FIXME optimize
1116
            "add                        %4, %%"REG_c"   \n\t"
1117
            WRITEBGR24(%%REGc, %5, %%REGa)
1118
1119
            :: "r" (&c->redDither),
1120
               "m" (dummy), "m" (dummy), "m" (dummy),
1121
               "r" (dest),  "m" (dstW)
1122
            : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1123
            );
1124
            return;
1125
        case PIX_FMT_BGR555:
1126
            YSCALEYUV2PACKEDX
1127
            YSCALEYUV2RGBX
1128
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1129 c1b0bfb4 Michael Niedermayer
#ifdef DITHER1XBPP
1130 2da0d70d Diego Biurrun
            "paddusb "MANGLE(b5Dither)", %%mm2  \n\t"
1131
            "paddusb "MANGLE(g5Dither)", %%mm4  \n\t"
1132
            "paddusb "MANGLE(r5Dither)", %%mm5  \n\t"
1133
#endif
1134
1135
            WRITEBGR15(%4, %5, %%REGa)
1136
            YSCALEYUV2PACKEDX_END
1137
            return;
1138
        case PIX_FMT_BGR565:
1139
            YSCALEYUV2PACKEDX
1140
            YSCALEYUV2RGBX
1141
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1142 c1b0bfb4 Michael Niedermayer
#ifdef DITHER1XBPP
1143 2da0d70d Diego Biurrun
            "paddusb "MANGLE(b5Dither)", %%mm2  \n\t"
1144
            "paddusb "MANGLE(g6Dither)", %%mm4  \n\t"
1145
            "paddusb "MANGLE(r5Dither)", %%mm5  \n\t"
1146
#endif
1147
1148
            WRITEBGR16(%4, %5, %%REGa)
1149
            YSCALEYUV2PACKEDX_END
1150
            return;
1151
        case PIX_FMT_YUYV422:
1152
            YSCALEYUV2PACKEDX
1153
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1154
1155
            "psraw $3, %%mm3    \n\t"
1156
            "psraw $3, %%mm4    \n\t"
1157
            "psraw $3, %%mm1    \n\t"
1158
            "psraw $3, %%mm7    \n\t"
1159
            WRITEYUY2(%4, %5, %%REGa)
1160
            YSCALEYUV2PACKEDX_END
1161
            return;
1162 bca11e75 Michael Niedermayer
        }
1163
    }
1164 bc279024 Diego Biurrun
#endif /* HAVE_MMX */
1165 a31de956 Michael Niedermayer
#ifdef HAVE_ALTIVEC
1166 2da0d70d Diego Biurrun
    /* The following list of supported dstFormat values should
1167
       match what's found in the body of altivec_yuv2packedX() */
1168
    if (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
1169
        c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
1170
        c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB)
1171
            altivec_yuv2packedX (c, lumFilter, lumSrc, lumFilterSize,
1172
                                 chrFilter, chrSrc, chrFilterSize,
1173
                                 dest, dstW, dstY);
1174
    else
1175
#endif
1176
        yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
1177
                       chrFilter, chrSrc, chrFilterSize,
1178
                       dest, dstW, dstY);
1179 c1b0bfb4 Michael Niedermayer
}
1180
1181
/**
1182
 * vertical bilinear scale YV12 to RGB
1183
 */
1184 25593e29 Michael Niedermayer
static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
1185 2da0d70d Diego Biurrun
                          uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
1186 d604bab9 Michael Niedermayer
{
1187 2da0d70d Diego Biurrun
    int yalpha1=yalpha^4095;
1188
    int uvalpha1=uvalpha^4095;
1189
    int i;
1190 d604bab9 Michael Niedermayer
1191 77a416e8 Gabucino
#if 0 //isn't used
1192 2da0d70d Diego Biurrun
    if (flags&SWS_FULL_CHR_H_INT)
1193
    {
1194
        switch(dstFormat)
1195
        {
1196 cf7d1c1a Michael Niedermayer
#ifdef HAVE_MMX
1197 2da0d70d Diego Biurrun
        case PIX_FMT_RGB32:
1198
            asm volatile(
1199 d604bab9 Michael Niedermayer

1200

1201
FULL_YSCALEYUV2RGB
1202 2da0d70d Diego Biurrun
            "punpcklbw %%mm1, %%mm3    \n\t" // BGBGBGBG
1203
            "punpcklbw %%mm7, %%mm0    \n\t" // R0R0R0R0
1204 d604bab9 Michael Niedermayer

1205 2da0d70d Diego Biurrun
            "movq      %%mm3, %%mm1    \n\t"
1206
            "punpcklwd %%mm0, %%mm3    \n\t" // BGR0BGR0
1207
            "punpckhwd %%mm0, %%mm1    \n\t" // BGR0BGR0
1208 d604bab9 Michael Niedermayer

1209 2da0d70d Diego Biurrun
            MOVNTQ(%%mm3,  (%4, %%REGa, 4))
1210
            MOVNTQ(%%mm1, 8(%4, %%REGa, 4))
1211 d604bab9 Michael Niedermayer

1212 2da0d70d Diego Biurrun
            "add $4, %%"REG_a"  \n\t"
1213
            "cmp %5, %%"REG_a"  \n\t"
1214
            " jb 1b             \n\t"
1215 d604bab9 Michael Niedermayer

1216 2da0d70d Diego Biurrun
            :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" ((long)dstW),
1217
            "m" (yalpha1), "m" (uvalpha1)
1218
            : "%"REG_a
1219
            );
1220
            break;
1221
        case PIX_FMT_BGR24:
1222
            asm volatile(
1223 d604bab9 Michael Niedermayer

1224
FULL_YSCALEYUV2RGB
1225

1226 2da0d70d Diego Biurrun
                                              // lsb ... msb
1227
            "punpcklbw %%mm1, %%mm3     \n\t" // BGBGBGBG
1228
            "punpcklbw %%mm7, %%mm0     \n\t" // R0R0R0R0
1229 d604bab9 Michael Niedermayer

1230 2da0d70d Diego Biurrun
            "movq      %%mm3, %%mm1     \n\t"
1231
            "punpcklwd %%mm0, %%mm3     \n\t" // BGR0BGR0
1232
            "punpckhwd %%mm0, %%mm1     \n\t" // BGR0BGR0
1233 d604bab9 Michael Niedermayer

1234 2da0d70d Diego Biurrun
            "movq      %%mm3, %%mm2     \n\t" // BGR0BGR0
1235
            "psrlq        $8, %%mm3     \n\t" // GR0BGR00
1236
            "pand "MANGLE(bm00000111)", %%mm2   \n\t" // BGR00000
1237
            "pand "MANGLE(bm11111000)", %%mm3   \n\t" // 000BGR00
1238
            "por       %%mm2, %%mm3     \n\t" // BGRBGR00
1239
            "movq      %%mm1, %%mm2     \n\t"
1240
            "psllq       $48, %%mm1     \n\t" // 000000BG
1241
            "por       %%mm1, %%mm3     \n\t" // BGRBGRBG
1242 d604bab9 Michael Niedermayer

1243 2da0d70d Diego Biurrun
            "movq      %%mm2, %%mm1     \n\t" // BGR0BGR0
1244
            "psrld       $16, %%mm2     \n\t" // R000R000
1245
            "psrlq       $24, %%mm1     \n\t" // 0BGR0000
1246
            "por       %%mm2, %%mm1     \n\t" // RBGRR000
1247 d604bab9 Michael Niedermayer

1248 2da0d70d Diego Biurrun
            "mov          %4, %%"REG_b" \n\t"
1249
            "add   %%"REG_a", %%"REG_b" \n\t"
1250 d604bab9 Michael Niedermayer

1251
#ifdef HAVE_MMX2
1252 2da0d70d Diego Biurrun
            //FIXME Alignment
1253
            "movntq %%mm3,  (%%"REG_b", %%"REG_a", 2)   \n\t"
1254
            "movntq %%mm1, 8(%%"REG_b", %%"REG_a", 2)   \n\t"
1255 d604bab9 Michael Niedermayer
#else
1256 2da0d70d Diego Biurrun
            "movd %%mm3,  (%%"REG_b", %%"REG_a", 2)     \n\t"
1257
            "psrlq  $32, %%mm3                          \n\t"
1258
            "movd %%mm3, 4(%%"REG_b", %%"REG_a", 2)     \n\t"
1259
            "movd %%mm1, 8(%%"REG_b", %%"REG_a", 2)     \n\t"
1260
#endif
1261
            "add     $4, %%"REG_a"                      \n\t"
1262
            "cmp     %5, %%"REG_a"                      \n\t"
1263
            " jb     1b                                 \n\t"
1264
1265
            :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW),
1266
            "m" (yalpha1), "m" (uvalpha1)
1267
            : "%"REG_a, "%"REG_b
1268
            );
1269
            break;
1270
        case PIX_FMT_BGR555:
1271
            asm volatile(
1272 d604bab9 Michael Niedermayer
1273
FULL_YSCALEYUV2RGB
1274
#ifdef DITHER1XBPP
1275 2da0d70d Diego Biurrun
            "paddusb "MANGLE(g5Dither)", %%mm1  \n\t"
1276
            "paddusb "MANGLE(r5Dither)", %%mm0  \n\t"
1277
            "paddusb "MANGLE(b5Dither)", %%mm3  \n\t"
1278 d604bab9 Michael Niedermayer
#endif
1279 2da0d70d Diego Biurrun
            "punpcklbw            %%mm7, %%mm1  \n\t" // 0G0G0G0G
1280
            "punpcklbw            %%mm7, %%mm3  \n\t" // 0B0B0B0B
1281
            "punpcklbw            %%mm7, %%mm0  \n\t" // 0R0R0R0R
1282 d604bab9 Michael Niedermayer
1283 2da0d70d Diego Biurrun
            "psrlw                   $3, %%mm3  \n\t"
1284
            "psllw                   $2, %%mm1  \n\t"
1285
            "psllw                   $7, %%mm0  \n\t"
1286
            "pand     "MANGLE(g15Mask)", %%mm1  \n\t"
1287
            "pand     "MANGLE(r15Mask)", %%mm0  \n\t"
1288 d604bab9 Michael Niedermayer
1289 2da0d70d Diego Biurrun
            "por                  %%mm3, %%mm1  \n\t"
1290
            "por                  %%mm1, %%mm0  \n\t"
1291 d604bab9 Michael Niedermayer
1292 2da0d70d Diego Biurrun
            MOVNTQ(%%mm0, (%4, %%REGa, 2))
1293 d604bab9 Michael Niedermayer
1294 2da0d70d Diego Biurrun
            "add $4, %%"REG_a"  \n\t"
1295
            "cmp %5, %%"REG_a"  \n\t"
1296
            " jb 1b             \n\t"
1297 d604bab9 Michael Niedermayer
1298 2da0d70d Diego Biurrun
            :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
1299
            "m" (yalpha1), "m" (uvalpha1)
1300
            : "%"REG_a
1301
            );
1302
            break;
1303
        case PIX_FMT_BGR565:
1304
            asm volatile(
1305 d604bab9 Michael Niedermayer
1306
FULL_YSCALEYUV2RGB
1307
#ifdef DITHER1XBPP
1308 2da0d70d Diego Biurrun
            "paddusb "MANGLE(g6Dither)", %%mm1  \n\t"
1309
            "paddusb "MANGLE(r5Dither)", %%mm0  \n\t"
1310
            "paddusb "MANGLE(b5Dither)", %%mm3  \n\t"
1311 d604bab9 Michael Niedermayer
#endif
1312 2da0d70d Diego Biurrun
            "punpcklbw            %%mm7, %%mm1  \n\t" // 0G0G0G0G
1313
            "punpcklbw            %%mm7, %%mm3  \n\t" // 0B0B0B0B
1314
            "punpcklbw            %%mm7, %%mm0  \n\t" // 0R0R0R0R
1315 d604bab9 Michael Niedermayer
1316 2da0d70d Diego Biurrun
            "psrlw                   $3, %%mm3  \n\t"
1317
            "psllw                   $3, %%mm1  \n\t"
1318
            "psllw                   $8, %%mm0  \n\t"
1319
            "pand     "MANGLE(g16Mask)", %%mm1  \n\t"
1320
            "pand     "MANGLE(r16Mask)", %%mm0  \n\t"
1321 d604bab9 Michael Niedermayer
1322 2da0d70d Diego Biurrun
            "por                  %%mm3, %%mm1  \n\t"
1323
            "por                  %%mm1, %%mm0  \n\t"
1324 d604bab9 Michael Niedermayer
1325 2da0d70d Diego Biurrun
            MOVNTQ(%%mm0, (%4, %%REGa, 2))
1326 d604bab9 Michael Niedermayer
1327 2da0d70d Diego Biurrun
            "add $4, %%"REG_a"  \n\t"
1328
            "cmp %5, %%"REG_a"  \n\t"
1329
            " jb 1b             \n\t"
1330 d604bab9 Michael Niedermayer
1331 2da0d70d Diego Biurrun
            :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
1332
            "m" (yalpha1), "m" (uvalpha1)
1333
            : "%"REG_a
1334
            );
1335
            break;
1336 bc279024 Diego Biurrun
#endif /* HAVE_MMX */
1337 2da0d70d Diego Biurrun
        case PIX_FMT_BGR32:
1338 cf7d1c1a Michael Niedermayer
#ifndef HAVE_MMX
1339 2da0d70d Diego Biurrun
        case PIX_FMT_RGB32:
1340 cf7d1c1a Michael Niedermayer
#endif
1341 2da0d70d Diego Biurrun
            if (dstFormat==PIX_FMT_RGB32)
1342
            {
1343
                int i;
1344 df3c183a Michael Niedermayer
#ifdef WORDS_BIGENDIAN
1345 2da0d70d Diego Biurrun
                dest++;
1346
#endif
1347
                for (i=0;i<dstW;i++){
1348
                    // vertical linear interpolation && yuv2rgb in a single step:
1349
                    int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
1350
                    int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
1351
                    int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
1352
                    dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
1353
                    dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
1354
                    dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
1355
                    dest+= 4;
1356
                }
1357
            }
1358
            else if (dstFormat==PIX_FMT_BGR24)
1359
            {
1360
                int i;
1361
                for (i=0;i<dstW;i++){
1362
                    // vertical linear interpolation && yuv2rgb in a single step:
1363
                    int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
1364
                    int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
1365
                    int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
1366
                    dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)];
1367
                    dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)];
1368
                    dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)];
1369
                    dest+= 3;
1370
                }
1371
            }
1372
            else if (dstFormat==PIX_FMT_BGR565)
1373
            {
1374
                int i;
1375
                for (i=0;i<dstW;i++){
1376
                    // vertical linear interpolation && yuv2rgb in a single step:
1377
                    int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
1378
                    int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
1379
                    int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
1380
1381
                    ((uint16_t*)dest)[i] =
1382
                        clip_table16b[(Y + yuvtab_40cf[U]) >>13] |
1383
                        clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] |
1384
                        clip_table16r[(Y + yuvtab_3343[V]) >>13];
1385
                }
1386
            }
1387
            else if (dstFormat==PIX_FMT_BGR555)
1388
            {
1389
                int i;
1390
                for (i=0;i<dstW;i++){
1391
                    // vertical linear interpolation && yuv2rgb in a single step:
1392
                    int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
1393
                    int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
1394
                    int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
1395
1396
                    ((uint16_t*)dest)[i] =
1397
                        clip_table15b[(Y + yuvtab_40cf[U]) >>13] |
1398
                        clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] |
1399
                        clip_table15r[(Y + yuvtab_3343[V]) >>13];
1400
                }
1401
            }
1402
        }//FULL_UV_IPOL
1403
    else
1404
    {
1405 cf7d1c1a Michael Niedermayer
#endif // if 0
1406 d604bab9 Michael Niedermayer
#ifdef HAVE_MMX
1407 2da0d70d Diego Biurrun
        switch(c->dstFormat)
1408
        {
1409
            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
1410
            case PIX_FMT_RGB32:
1411
                asm volatile(
1412
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1413
                "mov        %4, %%"REG_b"               \n\t"
1414
                "push %%"REG_BP"                        \n\t"
1415
                YSCALEYUV2RGB(%%REGBP, %5)
1416
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
1417
                "pop %%"REG_BP"                         \n\t"
1418
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1419
1420
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1421
                "a" (&c->redDither)
1422
                );
1423
                return;
1424
            case PIX_FMT_BGR24:
1425
                asm volatile(
1426
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1427
                "mov        %4, %%"REG_b"               \n\t"
1428
                "push %%"REG_BP"                        \n\t"
1429
                YSCALEYUV2RGB(%%REGBP, %5)
1430
                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1431
                "pop %%"REG_BP"                         \n\t"
1432
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1433
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1434
                "a" (&c->redDither)
1435
                );
1436
                return;
1437
            case PIX_FMT_BGR555:
1438
                asm volatile(
1439
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1440
                "mov        %4, %%"REG_b"               \n\t"
1441
                "push %%"REG_BP"                        \n\t"
1442
                YSCALEYUV2RGB(%%REGBP, %5)
1443
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1444 d604bab9 Michael Niedermayer
#ifdef DITHER1XBPP
1445 2da0d70d Diego Biurrun
                "paddusb "MANGLE(b5Dither)", %%mm2      \n\t"
1446
                "paddusb "MANGLE(g5Dither)", %%mm4      \n\t"
1447
                "paddusb "MANGLE(r5Dither)", %%mm5      \n\t"
1448
#endif
1449
1450
                WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
1451
                "pop %%"REG_BP"                         \n\t"
1452
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1453
1454
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1455
                "a" (&c->redDither)
1456
                );
1457
                return;
1458
            case PIX_FMT_BGR565:
1459
                asm volatile(
1460
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1461
                "mov        %4, %%"REG_b"               \n\t"
1462
                "push %%"REG_BP"                        \n\t"
1463
                YSCALEYUV2RGB(%%REGBP, %5)
1464
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1465 d604bab9 Michael Niedermayer
#ifdef DITHER1XBPP
1466 2da0d70d Diego Biurrun
                "paddusb "MANGLE(b5Dither)", %%mm2      \n\t"
1467
                "paddusb "MANGLE(g6Dither)", %%mm4      \n\t"
1468
                "paddusb "MANGLE(r5Dither)", %%mm5      \n\t"
1469
#endif
1470
1471
                WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
1472
                "pop %%"REG_BP"                         \n\t"
1473
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1474
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1475
                "a" (&c->redDither)
1476
                );
1477
                return;
1478
            case PIX_FMT_YUYV422:
1479
                asm volatile(
1480
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1481
                "mov %4, %%"REG_b"                        \n\t"
1482
                "push %%"REG_BP"                        \n\t"
1483
                YSCALEYUV2PACKED(%%REGBP, %5)
1484
                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1485
                "pop %%"REG_BP"                         \n\t"
1486
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1487
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1488
                "a" (&c->redDither)
1489
                );
1490
                return;
1491
            default: break;
1492
        }
1493 cf7d1c1a Michael Niedermayer
#endif //HAVE_MMX
1494 25593e29 Michael Niedermayer
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C)
1495 d604bab9 Michael Niedermayer
}
1496
1497
/**
1498
 * YV12 to RGB without scaling or interpolating
1499
 */
1500 25593e29 Michael Niedermayer
static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
1501 2da0d70d Diego Biurrun
                          uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
1502 d604bab9 Michael Niedermayer
{
1503 2da0d70d Diego Biurrun
    const int yalpha1=0;
1504
    int i;
1505 6a4970ab Diego Biurrun
1506 2da0d70d Diego Biurrun
    uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1
1507
    const int yalpha= 4096; //FIXME ...
1508 96034638 Michael Niedermayer
1509 2da0d70d Diego Biurrun
    if (flags&SWS_FULL_CHR_H_INT)
1510
    {
1511
        RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y);
1512
        return;
1513
    }
1514 397c035e Michael Niedermayer
1515
#ifdef HAVE_MMX
1516 e5091488 Benoit Fouet
    if (uvalpha < 2048) // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1517 2da0d70d Diego Biurrun
    {
1518
        switch(dstFormat)
1519
        {
1520
        case PIX_FMT_RGB32:
1521
            asm volatile(
1522
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1523
            "mov        %4, %%"REG_b"               \n\t"
1524
            "push %%"REG_BP"                        \n\t"
1525
            YSCALEYUV2RGB1(%%REGBP, %5)
1526
            WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
1527
            "pop %%"REG_BP"                         \n\t"
1528
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1529
1530
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1531
            "a" (&c->redDither)
1532
            );
1533
            return;
1534
        case PIX_FMT_BGR24:
1535
            asm volatile(
1536
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1537
            "mov        %4, %%"REG_b"               \n\t"
1538
            "push %%"REG_BP"                        \n\t"
1539
            YSCALEYUV2RGB1(%%REGBP, %5)
1540
            WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1541
            "pop %%"REG_BP"                         \n\t"
1542
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1543
1544
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1545
            "a" (&c->redDither)
1546
            );
1547
            return;
1548
        case PIX_FMT_BGR555:
1549
            asm volatile(
1550
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1551
            "mov        %4, %%"REG_b"               \n\t"
1552
            "push %%"REG_BP"                        \n\t"
1553
            YSCALEYUV2RGB1(%%REGBP, %5)
1554
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1555 d604bab9 Michael Niedermayer
#ifdef DITHER1XBPP
1556 2da0d70d Diego Biurrun
            "paddusb "MANGLE(b5Dither)", %%mm2      \n\t"
1557
            "paddusb "MANGLE(g5Dither)", %%mm4      \n\t"
1558
            "paddusb "MANGLE(r5Dither)", %%mm5      \n\t"
1559
#endif
1560
            WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
1561
            "pop %%"REG_BP"                         \n\t"
1562
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1563
1564
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1565
            "a" (&c->redDither)
1566
            );
1567
            return;
1568
        case PIX_FMT_BGR565:
1569
            asm volatile(
1570
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1571
            "mov        %4, %%"REG_b"               \n\t"
1572
            "push %%"REG_BP"                        \n\t"
1573
            YSCALEYUV2RGB1(%%REGBP, %5)
1574
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1575 d604bab9 Michael Niedermayer
#ifdef DITHER1XBPP
1576 2da0d70d Diego Biurrun
            "paddusb "MANGLE(b5Dither)", %%mm2      \n\t"
1577
            "paddusb "MANGLE(g6Dither)", %%mm4      \n\t"
1578
            "paddusb "MANGLE(r5Dither)", %%mm5      \n\t"
1579
#endif
1580
1581
            WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
1582
            "pop %%"REG_BP"                         \n\t"
1583
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1584
1585
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1586
            "a" (&c->redDither)
1587
            );
1588
            return;
1589
        case PIX_FMT_YUYV422:
1590
            asm volatile(
1591
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1592
            "mov        %4, %%"REG_b"               \n\t"
1593
            "push %%"REG_BP"                        \n\t"
1594
            YSCALEYUV2PACKED1(%%REGBP, %5)
1595
            WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1596
            "pop %%"REG_BP"                         \n\t"
1597
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1598
1599
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1600
            "a" (&c->redDither)
1601
            );
1602
            return;
1603
        }
1604
    }
1605
    else
1606
    {
1607
        switch(dstFormat)
1608
        {
1609
        case PIX_FMT_RGB32:
1610
            asm volatile(
1611
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1612
            "mov        %4, %%"REG_b"               \n\t"
1613
            "push %%"REG_BP"                        \n\t"
1614
            YSCALEYUV2RGB1b(%%REGBP, %5)
1615
            WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
1616
            "pop %%"REG_BP"                         \n\t"
1617
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1618
1619
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1620
            "a" (&c->redDither)
1621
            );
1622
            return;
1623
        case PIX_FMT_BGR24:
1624
            asm volatile(
1625
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1626
            "mov        %4, %%"REG_b"               \n\t"
1627
            "push %%"REG_BP"                        \n\t"
1628
            YSCALEYUV2RGB1b(%%REGBP, %5)
1629
            WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1630
            "pop %%"REG_BP"                         \n\t"
1631
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1632
1633
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1634
            "a" (&c->redDither)
1635
            );
1636
            return;
1637
        case PIX_FMT_BGR555:
1638
            asm volatile(
1639
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1640
            "mov        %4, %%"REG_b"               \n\t"
1641
            "push %%"REG_BP"                        \n\t"
1642
            YSCALEYUV2RGB1b(%%REGBP, %5)
1643
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1644 497d4f99 Michael Niedermayer
#ifdef DITHER1XBPP
1645 2da0d70d Diego Biurrun
            "paddusb "MANGLE(b5Dither)", %%mm2      \n\t"
1646
            "paddusb "MANGLE(g5Dither)", %%mm4      \n\t"
1647
            "paddusb "MANGLE(r5Dither)", %%mm5      \n\t"
1648
#endif
1649
            WRITEBGR15(%%REGb, 8280(%5), %%REGBP)
1650
            "pop %%"REG_BP"                         \n\t"
1651
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1652
1653
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1654
            "a" (&c->redDither)
1655
            );
1656
            return;
1657
        case PIX_FMT_BGR565:
1658
            asm volatile(
1659
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1660
            "mov        %4, %%"REG_b"               \n\t"
1661
            "push %%"REG_BP"                        \n\t"
1662
            YSCALEYUV2RGB1b(%%REGBP, %5)
1663
            /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1664 497d4f99 Michael Niedermayer
#ifdef DITHER1XBPP
1665 2da0d70d Diego Biurrun
            "paddusb "MANGLE(b5Dither)", %%mm2      \n\t"
1666
            "paddusb "MANGLE(g6Dither)", %%mm4      \n\t"
1667
            "paddusb "MANGLE(r5Dither)", %%mm5      \n\t"
1668
#endif
1669
1670
            WRITEBGR16(%%REGb, 8280(%5), %%REGBP)
1671
            "pop %%"REG_BP"                         \n\t"
1672
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1673
1674
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1675
            "a" (&c->redDither)
1676
            );
1677
            return;
1678
        case PIX_FMT_YUYV422:
1679
            asm volatile(
1680
            "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1681
            "mov        %4, %%"REG_b"               \n\t"
1682
            "push %%"REG_BP"                        \n\t"
1683
            YSCALEYUV2PACKED1b(%%REGBP, %5)
1684
            WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1685
            "pop %%"REG_BP"                         \n\t"
1686
            "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1687
1688
            :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1689
            "a" (&c->redDither)
1690
            );
1691
            return;
1692
        }
1693
    }
1694 bc279024 Diego Biurrun
#endif /* HAVE_MMX */
1695 e5091488 Benoit Fouet
    if (uvalpha < 2048)
1696 2da0d70d Diego Biurrun
    {
1697
        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C)
1698
    }else{
1699
        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C)
1700
    }
1701 d604bab9 Michael Niedermayer
}
1702
1703 6ff0ad6b Michael Niedermayer
//FIXME yuy2* can read upto 7 samples to much
1704
1705 7f526efd Reimar Döffinger
static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width)
1706 1e621b18 Michael Niedermayer
{
1707 6ff0ad6b Michael Niedermayer
#ifdef HAVE_MMX
1708 2da0d70d Diego Biurrun
    asm volatile(
1709
    "movq "MANGLE(bm01010101)", %%mm2           \n\t"
1710
    "mov                    %0, %%"REG_a"       \n\t"
1711
    "1:                                         \n\t"
1712
    "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
1713
    "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
1714
    "pand                %%mm2, %%mm0           \n\t"
1715
    "pand                %%mm2, %%mm1           \n\t"
1716
    "packuswb            %%mm1, %%mm0           \n\t"
1717
    "movq                %%mm0, (%2, %%"REG_a") \n\t"
1718
    "add                    $8, %%"REG_a"       \n\t"
1719
    " js                    1b                  \n\t"
1720
    : : "g" (-width), "r" (src+width*2), "r" (dst+width)
1721
    : "%"REG_a
1722
    );
1723 1e621b18 Michael Niedermayer
#else
1724 2da0d70d Diego Biurrun
    int i;
1725
    for (i=0; i<width; i++)
1726
        dst[i]= src[2*i];
1727 1e621b18 Michael Niedermayer
#endif
1728
}
1729
1730 7f526efd Reimar Döffinger
static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
1731 1e621b18 Michael Niedermayer
{
1732 c2271987 Michael Niedermayer
#ifdef HAVE_MMX
1733 2da0d70d Diego Biurrun
    asm volatile(
1734
    "movq "MANGLE(bm01010101)", %%mm4           \n\t"
1735
    "mov                    %0, %%"REG_a"       \n\t"
1736
    "1:                                         \n\t"
1737
    "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
1738
    "movq   8(%1, %%"REG_a",4), %%mm1           \n\t"
1739
    "psrlw                  $8, %%mm0           \n\t"
1740
    "psrlw                  $8, %%mm1           \n\t"
1741
    "packuswb            %%mm1, %%mm0           \n\t"
1742
    "movq                %%mm0, %%mm1           \n\t"
1743
    "psrlw                  $8, %%mm0           \n\t"
1744
    "pand                %%mm4, %%mm1           \n\t"
1745
    "packuswb            %%mm0, %%mm0           \n\t"
1746
    "packuswb            %%mm1, %%mm1           \n\t"
1747
    "movd                %%mm0, (%3, %%"REG_a") \n\t"
1748
    "movd                %%mm1, (%2, %%"REG_a") \n\t"
1749
    "add                    $4, %%"REG_a"       \n\t"
1750
    " js                    1b                  \n\t"
1751
    : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
1752
    : "%"REG_a
1753
    );
1754 1e621b18 Michael Niedermayer
#else
1755 2da0d70d Diego Biurrun
    int i;
1756
    for (i=0; i<width; i++)
1757
    {
1758
        dstU[i]= src1[4*i + 1];
1759
        dstV[i]= src1[4*i + 3];
1760
    }
1761
#endif
1762
    assert(src1 == src2);
1763 1e621b18 Michael Niedermayer
}
1764
1765 7322a67c Michael Niedermayer
//this is allmost identical to the previous, end exists only cuz yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses
1766 7f526efd Reimar Döffinger
static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width)
1767 7322a67c Michael Niedermayer
{
1768
#ifdef HAVE_MMX
1769 2da0d70d Diego Biurrun
    asm volatile(
1770
    "mov                  %0, %%"REG_a"         \n\t"
1771
    "1:                                         \n\t"
1772
    "movq  (%1, %%"REG_a",2), %%mm0             \n\t"
1773
    "movq 8(%1, %%"REG_a",2), %%mm1             \n\t"
1774
    "psrlw                $8, %%mm0             \n\t"
1775
    "psrlw                $8, %%mm1             \n\t"
1776
    "packuswb          %%mm1, %%mm0             \n\t"
1777
    "movq              %%mm0, (%2, %%"REG_a")   \n\t"
1778
    "add                  $8, %%"REG_a"         \n\t"
1779
    " js                  1b                    \n\t"
1780
    : : "g" (-width), "r" (src+width*2), "r" (dst+width)
1781
    : "%"REG_a
1782
    );
1783 7322a67c Michael Niedermayer
#else
1784 2da0d70d Diego Biurrun
    int i;
1785
    for (i=0; i<width; i++)
1786
        dst[i]= src[2*i+1];
1787 7322a67c Michael Niedermayer
#endif
1788
}
1789
1790 7f526efd Reimar Döffinger
static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
1791 7322a67c Michael Niedermayer
{
1792 c2271987 Michael Niedermayer
#ifdef HAVE_MMX
1793 2da0d70d Diego Biurrun
    asm volatile(
1794
    "movq "MANGLE(bm01010101)", %%mm4           \n\t"
1795
    "mov                    %0, %%"REG_a"       \n\t"
1796
    "1:                                         \n\t"
1797
    "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
1798
    "movq   8(%1, %%"REG_a",4), %%mm1           \n\t"
1799
    "pand                %%mm4, %%mm0           \n\t"
1800
    "pand                %%mm4, %%mm1           \n\t"
1801
    "packuswb            %%mm1, %%mm0           \n\t"
1802
    "movq                %%mm0, %%mm1           \n\t"
1803
    "psrlw                  $8, %%mm0           \n\t"
1804
    "pand                %%mm4, %%mm1           \n\t"
1805
    "packuswb            %%mm0, %%mm0           \n\t"
1806
    "packuswb            %%mm1, %%mm1           \n\t"
1807
    "movd                %%mm0, (%3, %%"REG_a") \n\t"
1808
    "movd                %%mm1, (%2, %%"REG_a") \n\t"
1809
    "add                    $4, %%"REG_a"       \n\t"
1810
    " js                    1b                  \n\t"
1811
    : : "g" (-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
1812
    : "%"REG_a
1813
    );
1814 7322a67c Michael Niedermayer
#else
1815 2da0d70d Diego Biurrun
    int i;
1816
    for (i=0; i<width; i++)
1817
    {
1818
        dstU[i]= src1[4*i + 0];
1819
        dstV[i]= src1[4*i + 2];
1820
    }
1821
#endif
1822
    assert(src1 == src2);
1823 7322a67c Michael Niedermayer
}
1824
1825 1e621b18 Michael Niedermayer
static inline void RENAME(bgr32ToY)(uint8_t *dst, uint8_t *src, int width)
1826
{
1827 2da0d70d Diego Biurrun
    int i;
1828
    for (i=0; i<width; i++)
1829
    {
1830
        int b=  ((uint32_t*)src)[i]&0xFF;
1831
        int g= (((uint32_t*)src)[i]>>8)&0xFF;
1832
        int r= (((uint32_t*)src)[i]>>16)&0xFF;
1833
1834 e5091488 Benoit Fouet
        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1835 2da0d70d Diego Biurrun
    }
1836 1e621b18 Michael Niedermayer
}
1837
1838
static inline void RENAME(bgr32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
1839
{
1840 2da0d70d Diego Biurrun
    int i;
1841
    assert(src1 == src2);
1842
    for (i=0; i<width; i++)
1843
    {
1844
        const int a= ((uint32_t*)src1)[2*i+0];
1845
        const int e= ((uint32_t*)src1)[2*i+1];
1846
        const int l= (a&0xFF00FF) + (e&0xFF00FF);
1847
        const int h= (a&0x00FF00) + (e&0x00FF00);
1848
        const int b=  l&0x3FF;
1849
        const int g=  h>>8;
1850
        const int r=  l>>16;
1851
1852
        dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
1853
        dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
1854
    }
1855 1e621b18 Michael Niedermayer
}
1856
1857 7f526efd Reimar Döffinger
static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width)
1858 1e621b18 Michael Niedermayer
{
1859 ac6a2e45 Michael Niedermayer
#ifdef HAVE_MMX
1860 2da0d70d Diego Biurrun
    asm volatile(
1861
    "mov                        %2, %%"REG_a"   \n\t"
1862 5802683a Reimar Döffinger
    "movq  "MANGLE(ff_bgr2YCoeff)", %%mm6       \n\t"
1863
    "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
1864 2da0d70d Diego Biurrun
    "pxor                    %%mm7, %%mm7       \n\t"
1865
    "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
1866
    ASMALIGN(4)
1867
    "1:                                         \n\t"
1868
    PREFETCH" 64(%0, %%"REG_d")                 \n\t"
1869
    "movd          (%0, %%"REG_d"), %%mm0       \n\t"
1870
    "movd         3(%0, %%"REG_d"), %%mm1       \n\t"
1871
    "punpcklbw               %%mm7, %%mm0       \n\t"
1872
    "punpcklbw               %%mm7, %%mm1       \n\t"
1873
    "movd         6(%0, %%"REG_d"), %%mm2       \n\t"
1874
    "movd         9(%0, %%"REG_d"), %%mm3       \n\t"
1875
    "punpcklbw               %%mm7, %%mm2       \n\t"
1876
    "punpcklbw               %%mm7, %%mm3       \n\t"
1877
    "pmaddwd                 %%mm6, %%mm0       \n\t"
1878
    "pmaddwd                 %%mm6, %%mm1       \n\t"
1879
    "pmaddwd                 %%mm6, %%mm2       \n\t"
1880
    "pmaddwd                 %%mm6, %%mm3       \n\t"
1881 ac6a2e45 Michael Niedermayer
#ifndef FAST_BGR2YV12
1882 2da0d70d Diego Biurrun
    "psrad                      $8, %%mm0       \n\t"
1883
    "psrad                      $8, %%mm1       \n\t"
1884
    "psrad                      $8, %%mm2       \n\t"
1885
    "psrad                      $8, %%mm3       \n\t"
1886
#endif
1887
    "packssdw                %%mm1, %%mm0       \n\t"
1888
    "packssdw                %%mm3, %%mm2       \n\t"
1889
    "pmaddwd                 %%mm5, %%mm0       \n\t"
1890
    "pmaddwd                 %%mm5, %%mm2       \n\t"
1891
    "packssdw                %%mm2, %%mm0       \n\t"
1892
    "psraw                      $7, %%mm0       \n\t"
1893
1894
    "movd        12(%0, %%"REG_d"), %%mm4       \n\t"
1895
    "movd        15(%0, %%"REG_d"), %%mm1       \n\t"
1896
    "punpcklbw               %%mm7, %%mm4       \n\t"
1897
    "punpcklbw               %%mm7, %%mm1       \n\t"
1898
    "movd        18(%0, %%"REG_d"), %%mm2       \n\t"
1899
    "movd        21(%0, %%"REG_d"), %%mm3       \n\t"
1900
    "punpcklbw               %%mm7, %%mm2       \n\t"
1901
    "punpcklbw               %%mm7, %%mm3       \n\t"
1902
    "pmaddwd                 %%mm6, %%mm4       \n\t"
1903
    "pmaddwd                 %%mm6, %%mm1       \n\t"
1904
    "pmaddwd                 %%mm6, %%mm2       \n\t"
1905
    "pmaddwd                 %%mm6, %%mm3       \n\t"
1906 ac6a2e45 Michael Niedermayer
#ifndef FAST_BGR2YV12
1907 2da0d70d Diego Biurrun
    "psrad                      $8, %%mm4       \n\t"
1908
    "psrad                      $8, %%mm1       \n\t"
1909
    "psrad                      $8, %%mm2       \n\t"
1910
    "psrad                      $8, %%mm3       \n\t"
1911
#endif
1912
    "packssdw                %%mm1, %%mm4       \n\t"
1913
    "packssdw                %%mm3, %%mm2       \n\t"
1914
    "pmaddwd                 %%mm5, %%mm4       \n\t"
1915
    "pmaddwd                 %%mm5, %%mm2       \n\t"
1916
    "add                       $24, %%"REG_d"   \n\t"
1917
    "packssdw                %%mm2, %%mm4       \n\t"
1918
    "psraw                      $7, %%mm4       \n\t"
1919
1920
    "packuswb                %%mm4, %%mm0       \n\t"
1921 5802683a Reimar Döffinger
    "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0    \n\t"
1922 2da0d70d Diego Biurrun
1923
    "movq                    %%mm0, (%1, %%"REG_a") \n\t"
1924
    "add                        $8, %%"REG_a"   \n\t"
1925
    " js                        1b              \n\t"
1926
    : : "r" (src+width*3), "r" (dst+width), "g" (-width)
1927
    : "%"REG_a, "%"REG_d
1928
    );
1929 1e621b18 Michael Niedermayer
#else
1930 2da0d70d Diego Biurrun
    int i;
1931
    for (i=0; i<width; i++)
1932
    {
1933
        int b= src[i*3+0];
1934
        int g= src[i*3+1];
1935
        int r= src[i*3+2];
1936 1e621b18 Michael Niedermayer
1937 e5091488 Benoit Fouet
        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1938 2da0d70d Diego Biurrun
    }
1939 bc279024 Diego Biurrun
#endif /* HAVE_MMX */
1940 1e621b18 Michael Niedermayer
}
1941
1942 7f526efd Reimar Döffinger
static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
1943 1e621b18 Michael Niedermayer
{
1944 4342fc14 Michael Niedermayer
#ifdef HAVE_MMX
1945 2da0d70d Diego Biurrun
    asm volatile(
1946
    "mov                        %3, %%"REG_a"   \n\t"
1947 5802683a Reimar Döffinger
    "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
1948
    "movq  "MANGLE(ff_bgr2UCoeff)", %%mm6       \n\t"
1949 2da0d70d Diego Biurrun
    "pxor                    %%mm7, %%mm7       \n\t"
1950
    "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
1951
    "add                 %%"REG_d", %%"REG_d"   \n\t"
1952
    ASMALIGN(4)
1953
    "1:                                         \n\t"
1954
    PREFETCH" 64(%0, %%"REG_d")                 \n\t"
1955 4342fc14 Michael Niedermayer
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
1956 2da0d70d Diego Biurrun
    "movq          (%0, %%"REG_d"), %%mm0       \n\t"
1957
    "movq         6(%0, %%"REG_d"), %%mm2       \n\t"
1958
    "movq                    %%mm0, %%mm1       \n\t"
1959
    "movq                    %%mm2, %%mm3       \n\t"
1960
    "psrlq                     $24, %%mm0       \n\t"
1961
    "psrlq                     $24, %%mm2       \n\t"
1962
    PAVGB(%%mm1, %%mm0)
1963
    PAVGB(%%mm3, %%mm2)
1964
    "punpcklbw               %%mm7, %%mm0       \n\t"
1965
    "punpcklbw               %%mm7, %%mm2       \n\t"
1966 4342fc14 Michael Niedermayer
#else
1967 2da0d70d Diego Biurrun
    "movd          (%0, %%"REG_d"), %%mm0       \n\t"
1968
    "movd         3(%0, %%"REG_d"), %%mm2       \n\t"
1969
    "punpcklbw               %%mm7, %%mm0       \n\t"
1970
    "punpcklbw               %%mm7, %%mm2       \n\t"
1971
    "paddw                   %%mm2, %%mm0       \n\t"
1972
    "movd         6(%0, %%"REG_d"), %%mm4       \n\t"
1973
    "movd         9(%0, %%"REG_d"), %%mm2       \n\t"
1974
    "punpcklbw               %%mm7, %%mm4       \n\t"
1975
    "punpcklbw               %%mm7, %%mm2       \n\t"
1976
    "paddw                   %%mm4, %%mm2       \n\t"
1977
    "psrlw                      $1, %%mm0       \n\t"
1978
    "psrlw                      $1, %%mm2       \n\t"
1979
#endif
1980 5802683a Reimar Döffinger
    "movq  "MANGLE(ff_bgr2VCoeff)", %%mm1       \n\t"
1981
    "movq  "MANGLE(ff_bgr2VCoeff)", %%mm3       \n\t"
1982 2da0d70d Diego Biurrun
1983
    "pmaddwd                 %%mm0, %%mm1       \n\t"
1984
    "pmaddwd                 %%mm2, %%mm3       \n\t"
1985
    "pmaddwd                 %%mm6, %%mm0       \n\t"
1986
    "pmaddwd                 %%mm6, %%mm2       \n\t"
1987 4342fc14 Michael Niedermayer
#ifndef FAST_BGR2YV12
1988 2da0d70d Diego Biurrun
    "psrad                      $8, %%mm0       \n\t"
1989
    "psrad                      $8, %%mm1       \n\t"
1990
    "psrad                      $8, %%mm2       \n\t"
1991
    "psrad                      $8, %%mm3       \n\t"
1992
#endif
1993
    "packssdw                %%mm2, %%mm0       \n\t"
1994
    "packssdw                %%mm3, %%mm1       \n\t"
1995
    "pmaddwd                 %%mm5, %%mm0       \n\t"
1996
    "pmaddwd                 %%mm5, %%mm1       \n\t"
1997
    "packssdw                %%mm1, %%mm0       \n\t" // V1 V0 U1 U0
1998
    "psraw                      $7, %%mm0       \n\t"
1999 4342fc14 Michael Niedermayer
2000
#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
2001 2da0d70d Diego Biurrun
    "movq       12(%0, %%"REG_d"), %%mm4       \n\t"
2002
    "movq       18(%0, %%"REG_d"), %%mm2       \n\t"
2003
    "movq                   %%mm4, %%mm1       \n\t"
2004
    "movq                   %%mm2, %%mm3       \n\t"
2005
    "psrlq                    $24, %%mm4       \n\t"
2006
    "psrlq                    $24, %%mm2       \n\t"
2007
    PAVGB(%%mm1, %%mm4)
2008
    PAVGB(%%mm3, %%mm2)
2009
    "punpcklbw              %%mm7, %%mm4       \n\t"
2010
    "punpcklbw              %%mm7, %%mm2       \n\t"
2011 4342fc14 Michael Niedermayer
#else
2012 2da0d70d Diego Biurrun
    "movd       12(%0, %%"REG_d"), %%mm4       \n\t"
2013
    "movd       15(%0, %%"REG_d"), %%mm2       \n\t"
2014
    "punpcklbw              %%mm7, %%mm4       \n\t"
2015
    "punpcklbw              %%mm7, %%mm2       \n\t"
2016
    "paddw                  %%mm2, %%mm4       \n\t"
2017
    "movd       18(%0, %%"REG_d"), %%mm5       \n\t"
2018
    "movd       21(%0, %%"REG_d"), %%mm2       \n\t"
2019
    "punpcklbw              %%mm7, %%mm5       \n\t"
2020
    "punpcklbw              %%mm7, %%mm2       \n\t"
2021
    "paddw                  %%mm5, %%mm2       \n\t"
2022 5802683a Reimar Döffinger
    "movq      "MANGLE(ff_w1111)", %%mm5       \n\t"
2023 2da0d70d Diego Biurrun
    "psrlw                     $2, %%mm4       \n\t"
2024
    "psrlw                     $2, %%mm2       \n\t"
2025
#endif
2026 5802683a Reimar Döffinger
    "movq "MANGLE(ff_bgr2VCoeff)", %%mm1       \n\t"
2027
    "movq "MANGLE(ff_bgr2VCoeff)", %%mm3       \n\t"
2028 2da0d70d Diego Biurrun
2029
    "pmaddwd                %%mm4, %%mm1       \n\t"
2030
    "pmaddwd                %%mm2, %%mm3       \n\t"
2031
    "pmaddwd                %%mm6, %%mm4       \n\t"
2032
    "pmaddwd                %%mm6, %%mm2       \n\t"
2033 4342fc14 Michael Niedermayer
#ifndef FAST_BGR2YV12
2034 2da0d70d Diego Biurrun
    "psrad                     $8, %%mm4       \n\t"
2035
    "psrad                     $8, %%mm1       \n\t"
2036
    "psrad                     $8, %%mm2       \n\t"
2037
    "psrad                     $8, %%mm3       \n\t"
2038
#endif
2039
    "packssdw               %%mm2, %%mm4       \n\t"
2040
    "packssdw               %%mm3, %%mm1       \n\t"
2041
    "pmaddwd                %%mm5, %%mm4       \n\t"
2042
    "pmaddwd                %%mm5, %%mm1       \n\t"
2043
    "add                      $24, %%"REG_d"   \n\t"
2044
    "packssdw               %%mm1, %%mm4       \n\t" // V3 V2 U3 U2
2045
    "psraw                     $7, %%mm4       \n\t"
2046
2047
    "movq                   %%mm0, %%mm1       \n\t"
2048
    "punpckldq              %%mm4, %%mm0       \n\t"
2049
    "punpckhdq              %%mm4, %%mm1       \n\t"
2050
    "packsswb               %%mm1, %%mm0       \n\t"
2051 5802683a Reimar Döffinger
    "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0    \n\t"
2052 2da0d70d Diego Biurrun
2053
    "movd                   %%mm0, (%1, %%"REG_a")  \n\t"
2054
    "punpckhdq              %%mm0, %%mm0            \n\t"
2055
    "movd                   %%mm0, (%2, %%"REG_a")  \n\t"
2056
    "add                       $4, %%"REG_a"        \n\t"
2057
    " js                       1b                   \n\t"
2058
    : : "r" (src1+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width)
2059
    : "%"REG_a, "%"REG_d
2060
    );
2061 1e621b18 Michael Niedermayer
#else
2062 2da0d70d Diego Biurrun
    int i;
2063
    for (i=0; i<width; i++)
2064
    {
2065
        int b= src1[6*i + 0] + src1[6*i + 3];
2066
        int g= src1[6*i + 1] + src1[6*i + 4];
2067
        int r= src1[6*i + 2] + src1[6*i + 5];
2068
2069
        dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
2070
        dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
2071
    }
2072 bc279024 Diego Biurrun
#endif /* HAVE_MMX */
2073 2da0d70d Diego Biurrun
    assert(src1 == src2);
2074 1e621b18 Michael Niedermayer
}
2075
2076 a680708d Diego Biurrun
static inline void RENAME(rgb16ToY)(uint8_t *dst, uint8_t *src, int width)
2077 6af250ea Michael Niedermayer
{
2078 2da0d70d Diego Biurrun
    int i;
2079
    for (i=0; i<width; i++)
2080
    {
2081
        int d= ((uint16_t*)src)[i];
2082
        int b= d&0x1F;
2083
        int g= (d>>5)&0x3F;
2084
        int r= (d>>11)&0x1F;
2085
2086
        dst[i]= ((2*RY*r + GY*g + 2*BY*b)>>(RGB2YUV_SHIFT-2)) + 16;
2087
    }
2088 6af250ea Michael Niedermayer
}
2089
2090 a680708d Diego Biurrun
static inline void RENAME(rgb16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
2091 6af250ea Michael Niedermayer
{
2092 2da0d70d Diego Biurrun
    int i;
2093
    assert(src1==src2);
2094
    for (i=0; i<width; i++)
2095
    {
2096
        int d0= ((uint32_t*)src1)[i];
2097
2098
        int dl= (d0&0x07E0F81F);
2099
        int dh= ((d0>>5)&0x07C0F83F);
2100
2101
        int dh2= (dh>>11) + (dh<<21);
2102
        int d= dh2 + dl;
2103
2104
        int b= d&0x7F;
2105
        int r= (d>>11)&0x7F;
2106
        int g= d>>21;
2107
        dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+1-2)) + 128;
2108
        dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+1-2)) + 128;
2109
    }
2110 6af250ea Michael Niedermayer
}
2111
2112 a680708d Diego Biurrun
static inline void RENAME(rgb15ToY)(uint8_t *dst, uint8_t *src, int width)
2113 b72034dd Michael Niedermayer
{
2114 2da0d70d Diego Biurrun
    int i;
2115
    for (i=0; i<width; i++)
2116
    {
2117
        int d= ((uint16_t*)src)[i];
2118
        int b= d&0x1F;
2119
        int g= (d>>5)&0x1F;
2120
        int r= (d>>10)&0x1F;
2121
2122
        dst[i]= ((RY*r + GY*g + BY*b)>>(RGB2YUV_SHIFT-3)) + 16;
2123
    }
2124 b72034dd Michael Niedermayer
}
2125
2126 a680708d Diego Biurrun
static inline void RENAME(rgb15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
2127 b72034dd Michael Niedermayer
{
2128 2da0d70d Diego Biurrun
    int i;
2129
    assert(src1==src2);
2130
    for (i=0; i<width; i++)
2131
    {
2132
        int d0= ((uint32_t*)src1)[i];
2133
2134
        int dl= (d0&0x03E07C1F);
2135
        int dh= ((d0>>5)&0x03E0F81F);
2136
2137
        int dh2= (dh>>11) + (dh<<21);
2138
        int d= dh2 + dl;
2139
2140
        int b= d&0x7F;
2141
        int r= (d>>10)&0x7F;
2142
        int g= d>>21;
2143
        dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1-3)) + 128;
2144
        dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1-3)) + 128;
2145
    }
2146 b72034dd Michael Niedermayer
}
2147
2148
2149 a861d4d7 Michael Niedermayer
static inline void RENAME(rgb32ToY)(uint8_t *dst, uint8_t *src, int width)
2150
{
2151 2da0d70d Diego Biurrun
    int i;
2152
    for (i=0; i<width; i++)
2153
    {
2154
        int r=  ((uint32_t*)src)[i]&0xFF;
2155
        int g= (((uint32_t*)src)[i]>>8)&0xFF;
2156
        int b= (((uint32_t*)src)[i]>>16)&0xFF;
2157
2158 e5091488 Benoit Fouet
        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2159 2da0d70d Diego Biurrun
    }
2160 a861d4d7 Michael Niedermayer
}
2161
2162
static inline void RENAME(rgb32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
2163
{
2164 2da0d70d Diego Biurrun
    int i;
2165
    assert(src1==src2);
2166
    for (i=0; i<width; i++)
2167
    {
2168
        const int a= ((uint32_t*)src1)[2*i+0];
2169
        const int e= ((uint32_t*)src1)[2*i+1];
2170
        const int l= (a&0xFF00FF) + (e&0xFF00FF);
2171
        const int h= (a&0x00FF00) + (e&0x00FF00);
2172
        const int r=  l&0x3FF;
2173
        const int g=  h>>8;
2174
        const int b=  l>>16;
2175
2176
        dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
2177
        dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
2178
    }
2179 a861d4d7 Michael Niedermayer
}
2180
2181
static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, int width)
2182
{
2183 2da0d70d Diego Biurrun
    int i;
2184
    for (i=0; i<width; i++)
2185
    {
2186
        int r= src[i*3+0];
2187
        int g= src[i*3+1];
2188
        int b= src[i*3+2];
2189
2190 e5091488 Benoit Fouet
        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2191 2da0d70d Diego Biurrun
    }
2192 a861d4d7 Michael Niedermayer
}
2193
2194
static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
2195
{
2196 2da0d70d Diego Biurrun
    int i;
2197
    assert(src1==src2);
2198
    for (i=0; i<width; i++)
2199
    {
2200
        int r= src1[6*i + 0] + src1[6*i + 3];
2201
        int g= src1[6*i + 1] + src1[6*i + 4];
2202
        int b= src1[6*i + 2] + src1[6*i + 5];
2203
2204
        dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
2205
        dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
2206
    }
2207 a861d4d7 Michael Niedermayer
}
2208