Statistics
| Branch: | Revision:

ffmpeg / libswscale / swscale_template.c @ e77ddfa2

History | View | Annotate | Download (139 KB)

1 fe8054c0 Michael Niedermayer
/*
2 d026b45e Diego Biurrun
 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with FFmpeg; if not, write to the Free Software
18 b19bcbaa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 d026b45e Diego Biurrun
 *
20 8a322796 Diego Biurrun
 * The C code (not assembly, MMX, ...) of this file can be used
21
 * under the LGPL license.
22 d026b45e Diego Biurrun
 */
23 783e9cc9 Michael Niedermayer
24 6e1c66bc Aurelien Jacobs
#undef REAL_MOVNTQ
25 541c4eb9 Michael Niedermayer
#undef MOVNTQ
26 7d7f78b5 Michael Niedermayer
#undef PAVGB
27 48a05cec Michael Niedermayer
#undef PREFETCH
28
#undef PREFETCHW
29
30 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_AMD3DNOW
31 48a05cec Michael Niedermayer
#define PREFETCH  "prefetch"
32
#define PREFETCHW "prefetchw"
33 94daf2e9 Ramiro Polla
#elif COMPILE_TEMPLATE_MMX2
34 48a05cec Michael Niedermayer
#define PREFETCH "prefetchnta"
35
#define PREFETCHW "prefetcht0"
36
#else
37 d904b5fc Nigel Pearson
#define PREFETCH  " # nop"
38
#define PREFETCHW " # nop"
39 48a05cec Michael Niedermayer
#endif
40
41 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX2
42 d604bab9 Michael Niedermayer
#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
43 94daf2e9 Ramiro Polla
#elif COMPILE_TEMPLATE_AMD3DNOW
44 d604bab9 Michael Niedermayer
#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
45
#endif
46 d3f41512 Michael Niedermayer
47 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX2
48 6e1c66bc Aurelien Jacobs
#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
49 d604bab9 Michael Niedermayer
#else
50 6e1c66bc Aurelien Jacobs
#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
51 d604bab9 Michael Niedermayer
#endif
52 6e1c66bc Aurelien Jacobs
#define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
53 d604bab9 Michael Niedermayer
54 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_ALTIVEC
55 009d2d74 Diego Biurrun
#include "ppc/swscale_altivec_template.c"
56 a2faa401 Romain Dolbeau
#endif
57
58 bca11e75 Michael Niedermayer
#define YSCALEYUV2YV12X(x, offset, dest, width) \
59 7ad6469e Diego Pettenò
    __asm__ volatile(\
60 c255994b Ramiro Polla
        "xor                          %%"REG_a", %%"REG_a"  \n\t"\
61
        "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
62
        "movq                             %%mm3, %%mm4      \n\t"\
63
        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
64
        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
65
        ASMALIGN(4) /* FIXME Unroll? */\
66
        "1:                                                 \n\t"\
67
        "movq                      8(%%"REG_d"), %%mm0      \n\t" /* filterCoeff */\
68
        "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
69
        "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm5      \n\t" /* srcData */\
70
        "add                                $16, %%"REG_d"  \n\t"\
71
        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
72
        "test                         %%"REG_S", %%"REG_S"  \n\t"\
73
        "pmulhw                           %%mm0, %%mm2      \n\t"\
74
        "pmulhw                           %%mm0, %%mm5      \n\t"\
75
        "paddw                            %%mm2, %%mm3      \n\t"\
76
        "paddw                            %%mm5, %%mm4      \n\t"\
77
        " jnz                                1b             \n\t"\
78
        "psraw                               $3, %%mm3      \n\t"\
79
        "psraw                               $3, %%mm4      \n\t"\
80
        "packuswb                         %%mm4, %%mm3      \n\t"\
81
        MOVNTQ(%%mm3, (%1, %%REGa))\
82
        "add                                 $8, %%"REG_a"  \n\t"\
83
        "cmp                                 %2, %%"REG_a"  \n\t"\
84
        "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
85
        "movq                             %%mm3, %%mm4      \n\t"\
86
        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
87
        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
88
        "jb                                  1b             \n\t"\
89
        :: "r" (&c->redDither),\
90
        "r" (dest), "g" (width)\
91
        : "%"REG_a, "%"REG_d, "%"REG_S\
92 2da0d70d Diego Biurrun
    );
93 bca11e75 Michael Niedermayer
94
#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
95 7ad6469e Diego Pettenò
    __asm__ volatile(\
96 c255994b Ramiro Polla
        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
97
        "xor                          %%"REG_a", %%"REG_a"  \n\t"\
98
        "pxor                             %%mm4, %%mm4      \n\t"\
99
        "pxor                             %%mm5, %%mm5      \n\t"\
100
        "pxor                             %%mm6, %%mm6      \n\t"\
101
        "pxor                             %%mm7, %%mm7      \n\t"\
102
        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
103
        ASMALIGN(4) \
104
        "1:                                                 \n\t"\
105
        "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm0      \n\t" /* srcData */\
106
        "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
107
        "mov        "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"  \n\t"\
108
        "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm1      \n\t" /* srcData */\
109
        "movq                             %%mm0, %%mm3      \n\t"\
110
        "punpcklwd                        %%mm1, %%mm0      \n\t"\
111
        "punpckhwd                        %%mm1, %%mm3      \n\t"\
112
        "movq       "STR(APCK_COEF)"(%%"REG_d"), %%mm1      \n\t" /* filterCoeff */\
113
        "pmaddwd                          %%mm1, %%mm0      \n\t"\
114
        "pmaddwd                          %%mm1, %%mm3      \n\t"\
115
        "paddd                            %%mm0, %%mm4      \n\t"\
116
        "paddd                            %%mm3, %%mm5      \n\t"\
117
        "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm3      \n\t" /* srcData */\
118
        "mov        "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"  \n\t"\
119
        "add                  $"STR(APCK_SIZE)", %%"REG_d"  \n\t"\
120
        "test                         %%"REG_S", %%"REG_S"  \n\t"\
121
        "movq                             %%mm2, %%mm0      \n\t"\
122
        "punpcklwd                        %%mm3, %%mm2      \n\t"\
123
        "punpckhwd                        %%mm3, %%mm0      \n\t"\
124
        "pmaddwd                          %%mm1, %%mm2      \n\t"\
125
        "pmaddwd                          %%mm1, %%mm0      \n\t"\
126
        "paddd                            %%mm2, %%mm6      \n\t"\
127
        "paddd                            %%mm0, %%mm7      \n\t"\
128
        " jnz                                1b             \n\t"\
129
        "psrad                              $16, %%mm4      \n\t"\
130
        "psrad                              $16, %%mm5      \n\t"\
131
        "psrad                              $16, %%mm6      \n\t"\
132
        "psrad                              $16, %%mm7      \n\t"\
133
        "movq             "VROUNDER_OFFSET"(%0), %%mm0      \n\t"\
134
        "packssdw                         %%mm5, %%mm4      \n\t"\
135
        "packssdw                         %%mm7, %%mm6      \n\t"\
136
        "paddw                            %%mm0, %%mm4      \n\t"\
137
        "paddw                            %%mm0, %%mm6      \n\t"\
138
        "psraw                               $3, %%mm4      \n\t"\
139
        "psraw                               $3, %%mm6      \n\t"\
140
        "packuswb                         %%mm6, %%mm4      \n\t"\
141
        MOVNTQ(%%mm4, (%1, %%REGa))\
142
        "add                                 $8, %%"REG_a"  \n\t"\
143
        "cmp                                 %2, %%"REG_a"  \n\t"\
144
        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
145
        "pxor                             %%mm4, %%mm4      \n\t"\
146
        "pxor                             %%mm5, %%mm5      \n\t"\
147
        "pxor                             %%mm6, %%mm6      \n\t"\
148
        "pxor                             %%mm7, %%mm7      \n\t"\
149
        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
150
        "jb                                  1b             \n\t"\
151
        :: "r" (&c->redDither),\
152
        "r" (dest), "g" (width)\
153
        : "%"REG_a, "%"REG_d, "%"REG_S\
154 2da0d70d Diego Biurrun
    );
155 c1b0bfb4 Michael Niedermayer
156
#define YSCALEYUV2YV121 \
157 2da0d70d Diego Biurrun
    "mov %2, %%"REG_a"                    \n\t"\
158
    ASMALIGN(4) /* FIXME Unroll? */\
159
    "1:                                   \n\t"\
160
    "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"\
161
    "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"\
162
    "psraw                 $7, %%mm0      \n\t"\
163
    "psraw                 $7, %%mm1      \n\t"\
164
    "packuswb           %%mm1, %%mm0      \n\t"\
165
    MOVNTQ(%%mm0, (%1, %%REGa))\
166
    "add                   $8, %%"REG_a"  \n\t"\
167
    "jnc                   1b             \n\t"
168 c1b0bfb4 Michael Niedermayer
169 bf2bdde6 Michael Niedermayer
#define YSCALEYUV2YV121_ACCURATE \
170
    "mov %2, %%"REG_a"                    \n\t"\
171
    "pcmpeqw %%mm7, %%mm7                 \n\t"\
172
    "psrlw                 $15, %%mm7     \n\t"\
173
    "psllw                  $6, %%mm7     \n\t"\
174
    ASMALIGN(4) /* FIXME Unroll? */\
175
    "1:                                   \n\t"\
176
    "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"\
177
    "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"\
178 33a67bd6 Michael Niedermayer
    "paddsw             %%mm7, %%mm0      \n\t"\
179
    "paddsw             %%mm7, %%mm1      \n\t"\
180 bf2bdde6 Michael Niedermayer
    "psraw                 $7, %%mm0      \n\t"\
181
    "psraw                 $7, %%mm1      \n\t"\
182
    "packuswb           %%mm1, %%mm0      \n\t"\
183
    MOVNTQ(%%mm0, (%1, %%REGa))\
184
    "add                   $8, %%"REG_a"  \n\t"\
185
    "jnc                   1b             \n\t"
186
187 c1b0bfb4 Michael Niedermayer
/*
188 2da0d70d Diego Biurrun
    :: "m" (-lumFilterSize), "m" (-chrFilterSize),
189
       "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
190
       "r" (dest), "m" (dstW),
191
       "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
192
    : "%eax", "%ebx", "%ecx", "%edx", "%esi"
193 c1b0bfb4 Michael Niedermayer
*/
194 df57ab14 Cédric Schieli
#define YSCALEYUV2PACKEDX_UV \
195 7ad6469e Diego Pettenò
    __asm__ volatile(\
196 c255994b Ramiro Polla
        "xor                   %%"REG_a", %%"REG_a"     \n\t"\
197
        ASMALIGN(4)\
198
        "nop                                            \n\t"\
199
        "1:                                             \n\t"\
200
        "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
201
        "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
202
        "movq      "VROUNDER_OFFSET"(%0), %%mm3         \n\t"\
203
        "movq                      %%mm3, %%mm4         \n\t"\
204
        ASMALIGN(4)\
205
        "2:                                             \n\t"\
206
        "movq               8(%%"REG_d"), %%mm0         \n\t" /* filterCoeff */\
207
        "movq     (%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* UsrcData */\
208
        "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm5         \n\t" /* VsrcData */\
209
        "add                         $16, %%"REG_d"     \n\t"\
210
        "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
211
        "pmulhw                    %%mm0, %%mm2         \n\t"\
212
        "pmulhw                    %%mm0, %%mm5         \n\t"\
213
        "paddw                     %%mm2, %%mm3         \n\t"\
214
        "paddw                     %%mm5, %%mm4         \n\t"\
215
        "test                  %%"REG_S", %%"REG_S"     \n\t"\
216
        " jnz                         2b                \n\t"\
217 df57ab14 Cédric Schieli
218 fe91924d Cédric Schieli
#define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
219 df57ab14 Cédric Schieli
    "lea                "offset"(%0), %%"REG_d"     \n\t"\
220 2da0d70d Diego Biurrun
    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
221 fe91924d Cédric Schieli
    "movq      "VROUNDER_OFFSET"(%0), "#dst1"       \n\t"\
222
    "movq                    "#dst1", "#dst2"       \n\t"\
223 2da0d70d Diego Biurrun
    ASMALIGN(4)\
224
    "2:                                             \n\t"\
225 fe91924d Cédric Schieli
    "movq               8(%%"REG_d"), "#coeff"      \n\t" /* filterCoeff */\
226
    "movq  (%%"REG_S", %%"REG_a", 2), "#src1"       \n\t" /* Y1srcData */\
227
    "movq 8(%%"REG_S", %%"REG_a", 2), "#src2"       \n\t" /* Y2srcData */\
228 2da0d70d Diego Biurrun
    "add                         $16, %%"REG_d"            \n\t"\
229
    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
230 fe91924d Cédric Schieli
    "pmulhw                 "#coeff", "#src1"       \n\t"\
231
    "pmulhw                 "#coeff", "#src2"       \n\t"\
232
    "paddw                   "#src1", "#dst1"       \n\t"\
233
    "paddw                   "#src2", "#dst2"       \n\t"\
234 2da0d70d Diego Biurrun
    "test                  %%"REG_S", %%"REG_S"     \n\t"\
235
    " jnz                         2b                \n\t"\
236
237 df57ab14 Cédric Schieli
#define YSCALEYUV2PACKEDX \
238
    YSCALEYUV2PACKEDX_UV \
239 fe91924d Cédric Schieli
    YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
240 df57ab14 Cédric Schieli
241 c255994b Ramiro Polla
#define YSCALEYUV2PACKEDX_END                     \
242
        :: "r" (&c->redDither),                   \
243
            "m" (dummy), "m" (dummy), "m" (dummy),\
244
            "r" (dest), "m" (dstW)                \
245
        : "%"REG_a, "%"REG_d, "%"REG_S            \
246 2da0d70d Diego Biurrun
    );
247 8422aa88 Michael Niedermayer
248 df57ab14 Cédric Schieli
#define YSCALEYUV2PACKEDX_ACCURATE_UV \
249 7ad6469e Diego Pettenò
    __asm__ volatile(\
250 c255994b Ramiro Polla
        "xor %%"REG_a", %%"REG_a"                       \n\t"\
251
        ASMALIGN(4)\
252
        "nop                                            \n\t"\
253
        "1:                                             \n\t"\
254
        "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
255
        "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
256
        "pxor                      %%mm4, %%mm4         \n\t"\
257
        "pxor                      %%mm5, %%mm5         \n\t"\
258
        "pxor                      %%mm6, %%mm6         \n\t"\
259
        "pxor                      %%mm7, %%mm7         \n\t"\
260
        ASMALIGN(4)\
261
        "2:                                             \n\t"\
262
        "movq     (%%"REG_S", %%"REG_a"), %%mm0         \n\t" /* UsrcData */\
263
        "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* VsrcData */\
264
        "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
265
        "movq     (%%"REG_S", %%"REG_a"), %%mm1         \n\t" /* UsrcData */\
266
        "movq                      %%mm0, %%mm3         \n\t"\
267
        "punpcklwd                 %%mm1, %%mm0         \n\t"\
268
        "punpckhwd                 %%mm1, %%mm3         \n\t"\
269
        "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1         \n\t" /* filterCoeff */\
270
        "pmaddwd                   %%mm1, %%mm0         \n\t"\
271
        "pmaddwd                   %%mm1, %%mm3         \n\t"\
272
        "paddd                     %%mm0, %%mm4         \n\t"\
273
        "paddd                     %%mm3, %%mm5         \n\t"\
274
        "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3         \n\t" /* VsrcData */\
275
        "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
276
        "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
277
        "test                  %%"REG_S", %%"REG_S"     \n\t"\
278
        "movq                      %%mm2, %%mm0         \n\t"\
279
        "punpcklwd                 %%mm3, %%mm2         \n\t"\
280
        "punpckhwd                 %%mm3, %%mm0         \n\t"\
281
        "pmaddwd                   %%mm1, %%mm2         \n\t"\
282
        "pmaddwd                   %%mm1, %%mm0         \n\t"\
283
        "paddd                     %%mm2, %%mm6         \n\t"\
284
        "paddd                     %%mm0, %%mm7         \n\t"\
285
        " jnz                         2b                \n\t"\
286
        "psrad                       $16, %%mm4         \n\t"\
287
        "psrad                       $16, %%mm5         \n\t"\
288
        "psrad                       $16, %%mm6         \n\t"\
289
        "psrad                       $16, %%mm7         \n\t"\
290
        "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
291
        "packssdw                  %%mm5, %%mm4         \n\t"\
292
        "packssdw                  %%mm7, %%mm6         \n\t"\
293
        "paddw                     %%mm0, %%mm4         \n\t"\
294
        "paddw                     %%mm0, %%mm6         \n\t"\
295
        "movq                      %%mm4, "U_TEMP"(%0)  \n\t"\
296
        "movq                      %%mm6, "V_TEMP"(%0)  \n\t"\
297 df57ab14 Cédric Schieli
298
#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
299
    "lea                "offset"(%0), %%"REG_d"     \n\t"\
300 2da0d70d Diego Biurrun
    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
301
    "pxor                      %%mm1, %%mm1         \n\t"\
302
    "pxor                      %%mm5, %%mm5         \n\t"\
303
    "pxor                      %%mm7, %%mm7         \n\t"\
304
    "pxor                      %%mm6, %%mm6         \n\t"\
305
    ASMALIGN(4)\
306
    "2:                                             \n\t"\
307
    "movq  (%%"REG_S", %%"REG_a", 2), %%mm0         \n\t" /* Y1srcData */\
308
    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2         \n\t" /* Y2srcData */\
309 1625216e Michael Niedermayer
    "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
310 2da0d70d Diego Biurrun
    "movq  (%%"REG_S", %%"REG_a", 2), %%mm4         \n\t" /* Y1srcData */\
311
    "movq                      %%mm0, %%mm3         \n\t"\
312
    "punpcklwd                 %%mm4, %%mm0         \n\t"\
313
    "punpckhwd                 %%mm4, %%mm3         \n\t"\
314 1625216e Michael Niedermayer
    "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4         \n\t" /* filterCoeff */\
315 2da0d70d Diego Biurrun
    "pmaddwd                   %%mm4, %%mm0         \n\t"\
316
    "pmaddwd                   %%mm4, %%mm3         \n\t"\
317
    "paddd                     %%mm0, %%mm1         \n\t"\
318
    "paddd                     %%mm3, %%mm5         \n\t"\
319
    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3         \n\t" /* Y2srcData */\
320 1625216e Michael Niedermayer
    "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
321
    "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
322 2da0d70d Diego Biurrun
    "test                  %%"REG_S", %%"REG_S"     \n\t"\
323
    "movq                      %%mm2, %%mm0         \n\t"\
324
    "punpcklwd                 %%mm3, %%mm2         \n\t"\
325
    "punpckhwd                 %%mm3, %%mm0         \n\t"\
326
    "pmaddwd                   %%mm4, %%mm2         \n\t"\
327
    "pmaddwd                   %%mm4, %%mm0         \n\t"\
328
    "paddd                     %%mm2, %%mm7         \n\t"\
329
    "paddd                     %%mm0, %%mm6         \n\t"\
330
    " jnz                         2b                \n\t"\
331
    "psrad                       $16, %%mm1         \n\t"\
332
    "psrad                       $16, %%mm5         \n\t"\
333
    "psrad                       $16, %%mm7         \n\t"\
334
    "psrad                       $16, %%mm6         \n\t"\
335
    "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
336
    "packssdw                  %%mm5, %%mm1         \n\t"\
337
    "packssdw                  %%mm6, %%mm7         \n\t"\
338
    "paddw                     %%mm0, %%mm1         \n\t"\
339
    "paddw                     %%mm0, %%mm7         \n\t"\
340
    "movq               "U_TEMP"(%0), %%mm3         \n\t"\
341
    "movq               "V_TEMP"(%0), %%mm4         \n\t"\
342 bca11e75 Michael Niedermayer
343 df57ab14 Cédric Schieli
#define YSCALEYUV2PACKEDX_ACCURATE \
344
    YSCALEYUV2PACKEDX_ACCURATE_UV \
345
    YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
346
347 8422aa88 Michael Niedermayer
#define YSCALEYUV2RGBX \
348 2da0d70d Diego Biurrun
    "psubw  "U_OFFSET"(%0), %%mm3       \n\t" /* (U-128)8*/\
349
    "psubw  "V_OFFSET"(%0), %%mm4       \n\t" /* (V-128)8*/\
350
    "movq            %%mm3, %%mm2       \n\t" /* (U-128)8*/\
351
    "movq            %%mm4, %%mm5       \n\t" /* (V-128)8*/\
352
    "pmulhw "UG_COEFF"(%0), %%mm3       \n\t"\
353
    "pmulhw "VG_COEFF"(%0), %%mm4       \n\t"\
354 c255994b Ramiro Polla
    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
355 2da0d70d Diego Biurrun
    "pmulhw "UB_COEFF"(%0), %%mm2       \n\t"\
356
    "pmulhw "VR_COEFF"(%0), %%mm5       \n\t"\
357
    "psubw  "Y_OFFSET"(%0), %%mm1       \n\t" /* 8(Y-16)*/\
358
    "psubw  "Y_OFFSET"(%0), %%mm7       \n\t" /* 8(Y-16)*/\
359
    "pmulhw  "Y_COEFF"(%0), %%mm1       \n\t"\
360
    "pmulhw  "Y_COEFF"(%0), %%mm7       \n\t"\
361 c255994b Ramiro Polla
    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
362 2da0d70d Diego Biurrun
    "paddw           %%mm3, %%mm4       \n\t"\
363
    "movq            %%mm2, %%mm0       \n\t"\
364
    "movq            %%mm5, %%mm6       \n\t"\
365
    "movq            %%mm4, %%mm3       \n\t"\
366
    "punpcklwd       %%mm2, %%mm2       \n\t"\
367
    "punpcklwd       %%mm5, %%mm5       \n\t"\
368
    "punpcklwd       %%mm4, %%mm4       \n\t"\
369
    "paddw           %%mm1, %%mm2       \n\t"\
370
    "paddw           %%mm1, %%mm5       \n\t"\
371
    "paddw           %%mm1, %%mm4       \n\t"\
372
    "punpckhwd       %%mm0, %%mm0       \n\t"\
373
    "punpckhwd       %%mm6, %%mm6       \n\t"\
374
    "punpckhwd       %%mm3, %%mm3       \n\t"\
375
    "paddw           %%mm7, %%mm0       \n\t"\
376
    "paddw           %%mm7, %%mm6       \n\t"\
377
    "paddw           %%mm7, %%mm3       \n\t"\
378
    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
379
    "packuswb        %%mm0, %%mm2       \n\t"\
380
    "packuswb        %%mm6, %%mm5       \n\t"\
381
    "packuswb        %%mm3, %%mm4       \n\t"\
382 d604bab9 Michael Niedermayer
383 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2PACKED(index, c) \
384 2da0d70d Diego Biurrun
    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
385
    "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
386
    "psraw                $3, %%mm0                           \n\t"\
387
    "psraw                $3, %%mm1                           \n\t"\
388
    "movq              %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
389
    "movq              %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
390
    "xor            "#index", "#index"                        \n\t"\
391
    ASMALIGN(4)\
392
    "1:                                 \n\t"\
393
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
394
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
395 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
396
    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
397 2da0d70d Diego Biurrun
    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
398
    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
399
    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
400
    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
401
    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
402
    "psraw                $7, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
403
    "psraw                $7, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
404
    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
405
    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
406
    "movq  (%0, "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
407
    "movq  (%1, "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
408
    "movq 8(%0, "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
409
    "movq 8(%1, "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
410
    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
411
    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
412
    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
413
    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
414
    "psraw                $7, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
415
    "psraw                $7, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
416
    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
417
    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
418 6a4970ab Diego Biurrun
419 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
420 6a4970ab Diego Biurrun
421 df57ab14 Cédric Schieli
#define REAL_YSCALEYUV2RGB_UV(index, c) \
422 2da0d70d Diego Biurrun
    "xor            "#index", "#index"  \n\t"\
423
    ASMALIGN(4)\
424
    "1:                                 \n\t"\
425
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
426
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
427 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
428
    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
429 2da0d70d Diego Biurrun
    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
430
    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
431
    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
432
    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
433
    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
434
    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
435
    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
436
    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
437
    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
438
    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
439
    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
440
    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
441
    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
442
    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
443
    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
444
    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
445 df57ab14 Cédric Schieli
446 786dcfef Cédric Schieli
#define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
447
    "movq  ("#b1", "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
448
    "movq  ("#b2", "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
449
    "movq 8("#b1", "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
450
    "movq 8("#b2", "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
451 2da0d70d Diego Biurrun
    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
452
    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
453
    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
454
    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
455
    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
456
    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
457
    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
458
    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
459 df57ab14 Cédric Schieli
460
#define REAL_YSCALEYUV2RGB_COEFF(c) \
461 2da0d70d Diego Biurrun
    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
462
    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
463
    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
464
    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
465
    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
466
    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
467
    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
468
    "paddw             %%mm3, %%mm4     \n\t"\
469
    "movq              %%mm2, %%mm0     \n\t"\
470
    "movq              %%mm5, %%mm6     \n\t"\
471
    "movq              %%mm4, %%mm3     \n\t"\
472
    "punpcklwd         %%mm2, %%mm2     \n\t"\
473
    "punpcklwd         %%mm5, %%mm5     \n\t"\
474
    "punpcklwd         %%mm4, %%mm4     \n\t"\
475
    "paddw             %%mm1, %%mm2     \n\t"\
476
    "paddw             %%mm1, %%mm5     \n\t"\
477
    "paddw             %%mm1, %%mm4     \n\t"\
478
    "punpckhwd         %%mm0, %%mm0     \n\t"\
479
    "punpckhwd         %%mm6, %%mm6     \n\t"\
480
    "punpckhwd         %%mm3, %%mm3     \n\t"\
481
    "paddw             %%mm7, %%mm0     \n\t"\
482
    "paddw             %%mm7, %%mm6     \n\t"\
483
    "paddw             %%mm7, %%mm3     \n\t"\
484
    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
485
    "packuswb          %%mm0, %%mm2     \n\t"\
486
    "packuswb          %%mm6, %%mm5     \n\t"\
487
    "packuswb          %%mm3, %%mm4     \n\t"\
488 40494418 Cédric Schieli
489 786dcfef Cédric Schieli
#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
490 df57ab14 Cédric Schieli
491
#define YSCALEYUV2RGB(index, c) \
492
    REAL_YSCALEYUV2RGB_UV(index, c) \
493 786dcfef Cédric Schieli
    REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
494 df57ab14 Cédric Schieli
    REAL_YSCALEYUV2RGB_COEFF(c)
495 6a4970ab Diego Biurrun
496 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2PACKED1(index, c) \
497 2da0d70d Diego Biurrun
    "xor            "#index", "#index"  \n\t"\
498
    ASMALIGN(4)\
499
    "1:                                 \n\t"\
500
    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
501 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
502 2da0d70d Diego Biurrun
    "psraw                $7, %%mm3     \n\t" \
503
    "psraw                $7, %%mm4     \n\t" \
504
    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
505
    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
506
    "psraw                $7, %%mm1     \n\t" \
507
    "psraw                $7, %%mm7     \n\t" \
508 6a4970ab Diego Biurrun
509 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
510 6a4970ab Diego Biurrun
511 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2RGB1(index, c) \
512 2da0d70d Diego Biurrun
    "xor            "#index", "#index"  \n\t"\
513
    ASMALIGN(4)\
514
    "1:                                 \n\t"\
515
    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
516 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
517 2da0d70d Diego Biurrun
    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
518
    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
519
    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
520
    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
521
    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
522
    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
523
    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
524
    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
525
    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
526
    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
527
    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
528
    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
529
    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
530
    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
531
    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
532
    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
533
    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
534
    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
535
    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
536
    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
537
    "paddw             %%mm3, %%mm4     \n\t"\
538
    "movq              %%mm2, %%mm0     \n\t"\
539
    "movq              %%mm5, %%mm6     \n\t"\
540
    "movq              %%mm4, %%mm3     \n\t"\
541
    "punpcklwd         %%mm2, %%mm2     \n\t"\
542
    "punpcklwd         %%mm5, %%mm5     \n\t"\
543
    "punpcklwd         %%mm4, %%mm4     \n\t"\
544
    "paddw             %%mm1, %%mm2     \n\t"\
545
    "paddw             %%mm1, %%mm5     \n\t"\
546
    "paddw             %%mm1, %%mm4     \n\t"\
547
    "punpckhwd         %%mm0, %%mm0     \n\t"\
548
    "punpckhwd         %%mm6, %%mm6     \n\t"\
549
    "punpckhwd         %%mm3, %%mm3     \n\t"\
550
    "paddw             %%mm7, %%mm0     \n\t"\
551
    "paddw             %%mm7, %%mm6     \n\t"\
552
    "paddw             %%mm7, %%mm3     \n\t"\
553
    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
554
    "packuswb          %%mm0, %%mm2     \n\t"\
555
    "packuswb          %%mm6, %%mm5     \n\t"\
556
    "packuswb          %%mm3, %%mm4     \n\t"\
557 40494418 Cédric Schieli
558 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
559 497d4f99 Michael Niedermayer
560 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2PACKED1b(index, c) \
561 2da0d70d Diego Biurrun
    "xor "#index", "#index"             \n\t"\
562
    ASMALIGN(4)\
563
    "1:                                 \n\t"\
564
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
565
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
566 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
567
    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
568 2da0d70d Diego Biurrun
    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
569
    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
570
    "psrlw                $8, %%mm3     \n\t" \
571
    "psrlw                $8, %%mm4     \n\t" \
572
    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
573
    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
574
    "psraw                $7, %%mm1     \n\t" \
575
    "psraw                $7, %%mm7     \n\t"
576 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
577 6a4970ab Diego Biurrun
578 497d4f99 Michael Niedermayer
// do vertical chrominance interpolation
579 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2RGB1b(index, c) \
580 2da0d70d Diego Biurrun
    "xor            "#index", "#index"  \n\t"\
581
    ASMALIGN(4)\
582
    "1:                                 \n\t"\
583
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
584
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
585 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
586
    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
587 2da0d70d Diego Biurrun
    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
588
    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
589
    "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
590
    "psrlw                $5, %%mm4     \n\t" /*FIXME might overflow*/\
591
    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
592
    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
593
    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
594
    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
595
    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
596
    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
597
    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
598
    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
599
    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
600
    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
601
    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
602
    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
603
    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
604
    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
605
    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
606
    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
607
    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
608
    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
609
    "paddw             %%mm3, %%mm4     \n\t"\
610
    "movq              %%mm2, %%mm0     \n\t"\
611
    "movq              %%mm5, %%mm6     \n\t"\
612
    "movq              %%mm4, %%mm3     \n\t"\
613
    "punpcklwd         %%mm2, %%mm2     \n\t"\
614
    "punpcklwd         %%mm5, %%mm5     \n\t"\
615
    "punpcklwd         %%mm4, %%mm4     \n\t"\
616
    "paddw             %%mm1, %%mm2     \n\t"\
617
    "paddw             %%mm1, %%mm5     \n\t"\
618
    "paddw             %%mm1, %%mm4     \n\t"\
619
    "punpckhwd         %%mm0, %%mm0     \n\t"\
620
    "punpckhwd         %%mm6, %%mm6     \n\t"\
621
    "punpckhwd         %%mm3, %%mm3     \n\t"\
622
    "paddw             %%mm7, %%mm0     \n\t"\
623
    "paddw             %%mm7, %%mm6     \n\t"\
624
    "paddw             %%mm7, %%mm3     \n\t"\
625
    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
626
    "packuswb          %%mm0, %%mm2     \n\t"\
627
    "packuswb          %%mm6, %%mm5     \n\t"\
628
    "packuswb          %%mm3, %%mm4     \n\t"\
629 40494418 Cédric Schieli
630 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
631 d604bab9 Michael Niedermayer
632 6858492e Cédric Schieli
#define REAL_YSCALEYUV2RGB1_ALPHA(index) \
633
    "movq  (%1, "#index", 2), %%mm7     \n\t" /* abuf0[index  ]     */\
634
    "movq 8(%1, "#index", 2), %%mm1     \n\t" /* abuf0[index+4]     */\
635
    "psraw                $7, %%mm7     \n\t" /* abuf0[index  ] >>7 */\
636
    "psraw                $7, %%mm1     \n\t" /* abuf0[index+4] >>7 */\
637
    "packuswb          %%mm1, %%mm7     \n\t"
638
#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
639
640 9c77b26b Cédric Schieli
#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
641
    "movq       "#b", "#q2"     \n\t" /* B */\
642
    "movq       "#r", "#t"      \n\t" /* R */\
643
    "punpcklbw  "#g", "#b"      \n\t" /* GBGBGBGB 0 */\
644
    "punpcklbw  "#a", "#r"      \n\t" /* ARARARAR 0 */\
645
    "punpckhbw  "#g", "#q2"     \n\t" /* GBGBGBGB 2 */\
646
    "punpckhbw  "#a", "#t"      \n\t" /* ARARARAR 2 */\
647
    "movq       "#b", "#q0"     \n\t" /* GBGBGBGB 0 */\
648
    "movq      "#q2", "#q3"     \n\t" /* GBGBGBGB 2 */\
649
    "punpcklwd  "#r", "#q0"     \n\t" /* ARGBARGB 0 */\
650
    "punpckhwd  "#r", "#b"      \n\t" /* ARGBARGB 1 */\
651
    "punpcklwd  "#t", "#q2"     \n\t" /* ARGBARGB 2 */\
652
    "punpckhwd  "#t", "#q3"     \n\t" /* ARGBARGB 3 */\
653 d604bab9 Michael Niedermayer
\
654 9c77b26b Cédric Schieli
    MOVNTQ(   q0,   (dst, index, 4))\
655
    MOVNTQ(    b,  8(dst, index, 4))\
656
    MOVNTQ(   q2, 16(dst, index, 4))\
657
    MOVNTQ(   q3, 24(dst, index, 4))\
658 d604bab9 Michael Niedermayer
\
659 2da0d70d Diego Biurrun
    "add      $8, "#index"      \n\t"\
660
    "cmp "#dstw", "#index"      \n\t"\
661
    " jb      1b                \n\t"
662 9c77b26b Cédric Schieli
#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)  REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
663 d604bab9 Michael Niedermayer
664 27a90b04 Michael Niedermayer
#define REAL_WRITERGB16(dst, dstw, index) \
665 2da0d70d Diego Biurrun
    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
666
    "pand "MANGLE(bFC)", %%mm4  \n\t" /* G */\
667
    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
668
    "psrlq           $3, %%mm2  \n\t"\
669 d604bab9 Michael Niedermayer
\
670 2da0d70d Diego Biurrun
    "movq         %%mm2, %%mm1  \n\t"\
671
    "movq         %%mm4, %%mm3  \n\t"\
672 d604bab9 Michael Niedermayer
\
673 2da0d70d Diego Biurrun
    "punpcklbw    %%mm7, %%mm3  \n\t"\
674
    "punpcklbw    %%mm5, %%mm2  \n\t"\
675
    "punpckhbw    %%mm7, %%mm4  \n\t"\
676
    "punpckhbw    %%mm5, %%mm1  \n\t"\
677 d604bab9 Michael Niedermayer
\
678 2da0d70d Diego Biurrun
    "psllq           $3, %%mm3  \n\t"\
679
    "psllq           $3, %%mm4  \n\t"\
680 d604bab9 Michael Niedermayer
\
681 2da0d70d Diego Biurrun
    "por          %%mm3, %%mm2  \n\t"\
682
    "por          %%mm4, %%mm1  \n\t"\
683 d604bab9 Michael Niedermayer
\
684 2da0d70d Diego Biurrun
    MOVNTQ(%%mm2,  (dst, index, 2))\
685
    MOVNTQ(%%mm1, 8(dst, index, 2))\
686 d604bab9 Michael Niedermayer
\
687 2da0d70d Diego Biurrun
    "add             $8, "#index"   \n\t"\
688
    "cmp        "#dstw", "#index"   \n\t"\
689
    " jb             1b             \n\t"
690 27a90b04 Michael Niedermayer
#define WRITERGB16(dst, dstw, index)  REAL_WRITERGB16(dst, dstw, index)
691 d604bab9 Michael Niedermayer
692 27a90b04 Michael Niedermayer
#define REAL_WRITERGB15(dst, dstw, index) \
693 2da0d70d Diego Biurrun
    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
694
    "pand "MANGLE(bF8)", %%mm4  \n\t" /* G */\
695
    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
696
    "psrlq           $3, %%mm2  \n\t"\
697
    "psrlq           $1, %%mm5  \n\t"\
698 d604bab9 Michael Niedermayer
\
699 2da0d70d Diego Biurrun
    "movq         %%mm2, %%mm1  \n\t"\
700
    "movq         %%mm4, %%mm3  \n\t"\
701 d604bab9 Michael Niedermayer
\
702 2da0d70d Diego Biurrun
    "punpcklbw    %%mm7, %%mm3  \n\t"\
703
    "punpcklbw    %%mm5, %%mm2  \n\t"\
704
    "punpckhbw    %%mm7, %%mm4  \n\t"\
705
    "punpckhbw    %%mm5, %%mm1  \n\t"\
706 d604bab9 Michael Niedermayer
\
707 2da0d70d Diego Biurrun
    "psllq           $2, %%mm3  \n\t"\
708
    "psllq           $2, %%mm4  \n\t"\
709 d604bab9 Michael Niedermayer
\
710 2da0d70d Diego Biurrun
    "por          %%mm3, %%mm2  \n\t"\
711
    "por          %%mm4, %%mm1  \n\t"\
712 d604bab9 Michael Niedermayer
\
713 2da0d70d Diego Biurrun
    MOVNTQ(%%mm2,  (dst, index, 2))\
714
    MOVNTQ(%%mm1, 8(dst, index, 2))\
715 d604bab9 Michael Niedermayer
\
716 2da0d70d Diego Biurrun
    "add             $8, "#index"   \n\t"\
717
    "cmp        "#dstw", "#index"   \n\t"\
718
    " jb             1b             \n\t"
719 27a90b04 Michael Niedermayer
#define WRITERGB15(dst, dstw, index)  REAL_WRITERGB15(dst, dstw, index)
720 f62255fb Michael Niedermayer
721 6542b44e Michael Niedermayer
#define WRITEBGR24OLD(dst, dstw, index) \
722 2da0d70d Diego Biurrun
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
723
    "movq      %%mm2, %%mm1             \n\t" /* B */\
724
    "movq      %%mm5, %%mm6             \n\t" /* R */\
725
    "punpcklbw %%mm4, %%mm2             \n\t" /* GBGBGBGB 0 */\
726
    "punpcklbw %%mm7, %%mm5             \n\t" /* 0R0R0R0R 0 */\
727
    "punpckhbw %%mm4, %%mm1             \n\t" /* GBGBGBGB 2 */\
728
    "punpckhbw %%mm7, %%mm6             \n\t" /* 0R0R0R0R 2 */\
729
    "movq      %%mm2, %%mm0             \n\t" /* GBGBGBGB 0 */\
730
    "movq      %%mm1, %%mm3             \n\t" /* GBGBGBGB 2 */\
731
    "punpcklwd %%mm5, %%mm0             \n\t" /* 0RGB0RGB 0 */\
732
    "punpckhwd %%mm5, %%mm2             \n\t" /* 0RGB0RGB 1 */\
733
    "punpcklwd %%mm6, %%mm1             \n\t" /* 0RGB0RGB 2 */\
734
    "punpckhwd %%mm6, %%mm3             \n\t" /* 0RGB0RGB 3 */\
735 d604bab9 Michael Niedermayer
\
736 2da0d70d Diego Biurrun
    "movq      %%mm0, %%mm4             \n\t" /* 0RGB0RGB 0 */\
737
    "psrlq        $8, %%mm0             \n\t" /* 00RGB0RG 0 */\
738
    "pand "MANGLE(bm00000111)", %%mm4   \n\t" /* 00000RGB 0 */\
739
    "pand "MANGLE(bm11111000)", %%mm0   \n\t" /* 00RGB000 0.5 */\
740
    "por       %%mm4, %%mm0             \n\t" /* 00RGBRGB 0 */\
741
    "movq      %%mm2, %%mm4             \n\t" /* 0RGB0RGB 1 */\
742
    "psllq       $48, %%mm2             \n\t" /* GB000000 1 */\
743
    "por       %%mm2, %%mm0             \n\t" /* GBRGBRGB 0 */\
744 d604bab9 Michael Niedermayer
\
745 2da0d70d Diego Biurrun
    "movq      %%mm4, %%mm2             \n\t" /* 0RGB0RGB 1 */\
746
    "psrld       $16, %%mm4             \n\t" /* 000R000R 1 */\
747
    "psrlq       $24, %%mm2             \n\t" /* 0000RGB0 1.5 */\
748
    "por       %%mm4, %%mm2             \n\t" /* 000RRGBR 1 */\
749
    "pand "MANGLE(bm00001111)", %%mm2   \n\t" /* 0000RGBR 1 */\
750
    "movq      %%mm1, %%mm4             \n\t" /* 0RGB0RGB 2 */\
751
    "psrlq        $8, %%mm1             \n\t" /* 00RGB0RG 2 */\
752
    "pand "MANGLE(bm00000111)", %%mm4   \n\t" /* 00000RGB 2 */\
753
    "pand "MANGLE(bm11111000)", %%mm1   \n\t" /* 00RGB000 2.5 */\
754
    "por       %%mm4, %%mm1             \n\t" /* 00RGBRGB 2 */\
755
    "movq      %%mm1, %%mm4             \n\t" /* 00RGBRGB 2 */\
756
    "psllq       $32, %%mm1             \n\t" /* BRGB0000 2 */\
757
    "por       %%mm1, %%mm2             \n\t" /* BRGBRGBR 1 */\
758 d604bab9 Michael Niedermayer
\
759 2da0d70d Diego Biurrun
    "psrlq       $32, %%mm4             \n\t" /* 000000RG 2.5 */\
760
    "movq      %%mm3, %%mm5             \n\t" /* 0RGB0RGB 3 */\
761
    "psrlq        $8, %%mm3             \n\t" /* 00RGB0RG 3 */\
762
    "pand "MANGLE(bm00000111)", %%mm5   \n\t" /* 00000RGB 3 */\
763
    "pand "MANGLE(bm11111000)", %%mm3   \n\t" /* 00RGB000 3.5 */\
764
    "por       %%mm5, %%mm3             \n\t" /* 00RGBRGB 3 */\
765
    "psllq       $16, %%mm3             \n\t" /* RGBRGB00 3 */\
766
    "por       %%mm4, %%mm3             \n\t" /* RGBRGBRG 2.5 */\
767 d604bab9 Michael Niedermayer
\
768 2da0d70d Diego Biurrun
    MOVNTQ(%%mm0,   (dst))\
769
    MOVNTQ(%%mm2,  8(dst))\
770
    MOVNTQ(%%mm3, 16(dst))\
771
    "add         $24, "#dst"            \n\t"\
772 d604bab9 Michael Niedermayer
\
773 2da0d70d Diego Biurrun
    "add          $8, "#index"          \n\t"\
774
    "cmp     "#dstw", "#index"          \n\t"\
775
    " jb          1b                    \n\t"
776 d604bab9 Michael Niedermayer
777 6542b44e Michael Niedermayer
#define WRITEBGR24MMX(dst, dstw, index) \
778 2da0d70d Diego Biurrun
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
779
    "movq      %%mm2, %%mm1     \n\t" /* B */\
780
    "movq      %%mm5, %%mm6     \n\t" /* R */\
781
    "punpcklbw %%mm4, %%mm2     \n\t" /* GBGBGBGB 0 */\
782
    "punpcklbw %%mm7, %%mm5     \n\t" /* 0R0R0R0R 0 */\
783
    "punpckhbw %%mm4, %%mm1     \n\t" /* GBGBGBGB 2 */\
784
    "punpckhbw %%mm7, %%mm6     \n\t" /* 0R0R0R0R 2 */\
785
    "movq      %%mm2, %%mm0     \n\t" /* GBGBGBGB 0 */\
786
    "movq      %%mm1, %%mm3     \n\t" /* GBGBGBGB 2 */\
787
    "punpcklwd %%mm5, %%mm0     \n\t" /* 0RGB0RGB 0 */\
788
    "punpckhwd %%mm5, %%mm2     \n\t" /* 0RGB0RGB 1 */\
789
    "punpcklwd %%mm6, %%mm1     \n\t" /* 0RGB0RGB 2 */\
790
    "punpckhwd %%mm6, %%mm3     \n\t" /* 0RGB0RGB 3 */\
791 99d2cb72 Michael Niedermayer
\
792 2da0d70d Diego Biurrun
    "movq      %%mm0, %%mm4     \n\t" /* 0RGB0RGB 0 */\
793
    "movq      %%mm2, %%mm6     \n\t" /* 0RGB0RGB 1 */\
794
    "movq      %%mm1, %%mm5     \n\t" /* 0RGB0RGB 2 */\
795
    "movq      %%mm3, %%mm7     \n\t" /* 0RGB0RGB 3 */\
796 99d2cb72 Michael Niedermayer
\
797 2da0d70d Diego Biurrun
    "psllq       $40, %%mm0     \n\t" /* RGB00000 0 */\
798
    "psllq       $40, %%mm2     \n\t" /* RGB00000 1 */\
799
    "psllq       $40, %%mm1     \n\t" /* RGB00000 2 */\
800
    "psllq       $40, %%mm3     \n\t" /* RGB00000 3 */\
801 99d2cb72 Michael Niedermayer
\
802 2da0d70d Diego Biurrun
    "punpckhdq %%mm4, %%mm0     \n\t" /* 0RGBRGB0 0 */\
803
    "punpckhdq %%mm6, %%mm2     \n\t" /* 0RGBRGB0 1 */\
804
    "punpckhdq %%mm5, %%mm1     \n\t" /* 0RGBRGB0 2 */\
805
    "punpckhdq %%mm7, %%mm3     \n\t" /* 0RGBRGB0 3 */\
806 99d2cb72 Michael Niedermayer
\
807 2da0d70d Diego Biurrun
    "psrlq        $8, %%mm0     \n\t" /* 00RGBRGB 0 */\
808
    "movq      %%mm2, %%mm6     \n\t" /* 0RGBRGB0 1 */\
809
    "psllq       $40, %%mm2     \n\t" /* GB000000 1 */\
810
    "por       %%mm2, %%mm0     \n\t" /* GBRGBRGB 0 */\
811
    MOVNTQ(%%mm0, (dst))\
812 99d2cb72 Michael Niedermayer
\
813 2da0d70d Diego Biurrun
    "psrlq       $24, %%mm6     \n\t" /* 0000RGBR 1 */\
814
    "movq      %%mm1, %%mm5     \n\t" /* 0RGBRGB0 2 */\
815
    "psllq       $24, %%mm1     \n\t" /* BRGB0000 2 */\
816
    "por       %%mm1, %%mm6     \n\t" /* BRGBRGBR 1 */\
817
    MOVNTQ(%%mm6, 8(dst))\
818 99d2cb72 Michael Niedermayer
\
819 2da0d70d Diego Biurrun
    "psrlq       $40, %%mm5     \n\t" /* 000000RG 2 */\
820
    "psllq        $8, %%mm3     \n\t" /* RGBRGB00 3 */\
821
    "por       %%mm3, %%mm5     \n\t" /* RGBRGBRG 2 */\
822
    MOVNTQ(%%mm5, 16(dst))\
823 99d2cb72 Michael Niedermayer
\
824 2da0d70d Diego Biurrun
    "add         $24, "#dst"    \n\t"\
825 99d2cb72 Michael Niedermayer
\
826 2da0d70d Diego Biurrun
    "add          $8, "#index"  \n\t"\
827
    "cmp     "#dstw", "#index"  \n\t"\
828
    " jb          1b            \n\t"
829 99d2cb72 Michael Niedermayer
830 6542b44e Michael Niedermayer
#define WRITEBGR24MMX2(dst, dstw, index) \
831 2da0d70d Diego Biurrun
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
832 5802683a Reimar Döffinger
    "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
833
    "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
834 2da0d70d Diego Biurrun
    "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2  B1 B0 B1 B0 */\
835
    "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2  G1 G0 G1 G0 */\
836
    "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0  R1 R0 R1 R0 */\
837 99d2cb72 Michael Niedermayer
\
838 2da0d70d Diego Biurrun
    "pand   %%mm0, %%mm1        \n\t" /*    B2        B1       B0 */\
839
    "pand   %%mm0, %%mm3        \n\t" /*    G2        G1       G0 */\
840
    "pand   %%mm7, %%mm6        \n\t" /*       R1        R0       */\
841 99d2cb72 Michael Niedermayer
\
842 2da0d70d Diego Biurrun
    "psllq     $8, %%mm3        \n\t" /* G2        G1       G0    */\
843
    "por    %%mm1, %%mm6        \n\t"\
844
    "por    %%mm3, %%mm6        \n\t"\
845
    MOVNTQ(%%mm6, (dst))\
846 99d2cb72 Michael Niedermayer
\
847 2da0d70d Diego Biurrun
    "psrlq     $8, %%mm4        \n\t" /* 00 G7 G6 G5  G4 G3 G2 G1 */\
848
    "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4  B3 B2 B3 B2 */\
849
    "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3  G4 G3 G4 G3 */\
850
    "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4  R3 R2 R3 R2 */\
851 99d2cb72 Michael Niedermayer
\
852 5802683a Reimar Döffinger
    "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5       B4        B3    */\
853 2da0d70d Diego Biurrun
    "pand   %%mm7, %%mm3        \n\t" /*       G4        G3       */\
854
    "pand   %%mm0, %%mm6        \n\t" /*    R4        R3       R2 */\
855 99d2cb72 Michael Niedermayer
\
856 2da0d70d Diego Biurrun
    "por    %%mm1, %%mm3        \n\t" /* B5    G4 B4     G3 B3    */\
857
    "por    %%mm3, %%mm6        \n\t"\
858
    MOVNTQ(%%mm6, 8(dst))\
859 99d2cb72 Michael Niedermayer
\
860 2da0d70d Diego Biurrun
    "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6  B7 B6 B6 B7 */\
861
    "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7  G6 G5 G6 G5 */\
862
    "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6  R5 R4 R5 R4 */\
863 99d2cb72 Michael Niedermayer
\
864 2da0d70d Diego Biurrun
    "pand   %%mm7, %%mm1        \n\t" /*       B7        B6       */\
865
    "pand   %%mm0, %%mm3        \n\t" /*    G7        G6       G5 */\
866 5802683a Reimar Döffinger
    "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7       R6        R5    */\
867 99d2cb72 Michael Niedermayer
\
868 2da0d70d Diego Biurrun
    "por    %%mm1, %%mm3        \n\t"\
869
    "por    %%mm3, %%mm6        \n\t"\
870
    MOVNTQ(%%mm6, 16(dst))\
871 99d2cb72 Michael Niedermayer
\
872 2da0d70d Diego Biurrun
    "add      $24, "#dst"       \n\t"\
873 99d2cb72 Michael Niedermayer
\
874 2da0d70d Diego Biurrun
    "add       $8, "#index"     \n\t"\
875
    "cmp  "#dstw", "#index"     \n\t"\
876
    " jb       1b               \n\t"
877 99d2cb72 Michael Niedermayer
878 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX2
879 7630f2e0 Michael Niedermayer
#undef WRITEBGR24
880 6e1c66bc Aurelien Jacobs
#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX2(dst, dstw, index)
881 99d2cb72 Michael Niedermayer
#else
882 7630f2e0 Michael Niedermayer
#undef WRITEBGR24
883 6e1c66bc Aurelien Jacobs
#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX(dst, dstw, index)
884 99d2cb72 Michael Niedermayer
#endif
885
886 6e1c66bc Aurelien Jacobs
#define REAL_WRITEYUY2(dst, dstw, index) \
887 2da0d70d Diego Biurrun
    "packuswb  %%mm3, %%mm3     \n\t"\
888
    "packuswb  %%mm4, %%mm4     \n\t"\
889
    "packuswb  %%mm7, %%mm1     \n\t"\
890
    "punpcklbw %%mm4, %%mm3     \n\t"\
891
    "movq      %%mm1, %%mm7     \n\t"\
892
    "punpcklbw %%mm3, %%mm1     \n\t"\
893
    "punpckhbw %%mm3, %%mm7     \n\t"\
894 25593e29 Michael Niedermayer
\
895 2da0d70d Diego Biurrun
    MOVNTQ(%%mm1, (dst, index, 2))\
896
    MOVNTQ(%%mm7, 8(dst, index, 2))\
897 25593e29 Michael Niedermayer
\
898 2da0d70d Diego Biurrun
    "add          $8, "#index"  \n\t"\
899
    "cmp     "#dstw", "#index"  \n\t"\
900
    " jb          1b            \n\t"
901 6e1c66bc Aurelien Jacobs
#define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
902 25593e29 Michael Niedermayer
903
904 7ac40327 Ramiro Polla
static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
905
                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
906 6858492e Cédric Schieli
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
907 38858470 Michael Niedermayer
{
908 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
909 dd68318c Ramiro Polla
    if(!(c->flags & SWS_BITEXACT)) {
910
        if (c->flags & SWS_ACCURATE_RND) {
911
            if (uDest) {
912 14014d47 Michael Niedermayer
                YSCALEYUV2YV12X_ACCURATE(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
913
                YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
914
            }
915 dd68318c Ramiro Polla
            if (CONFIG_SWSCALE_ALPHA && aDest) {
916 6858492e Cédric Schieli
                YSCALEYUV2YV12X_ACCURATE(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
917
            }
918 bca11e75 Michael Niedermayer
919 14014d47 Michael Niedermayer
            YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
920 dd68318c Ramiro Polla
        } else {
921
            if (uDest) {
922 14014d47 Michael Niedermayer
                YSCALEYUV2YV12X(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
923
                YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
924
            }
925 dd68318c Ramiro Polla
            if (CONFIG_SWSCALE_ALPHA && aDest) {
926 6858492e Cédric Schieli
                YSCALEYUV2YV12X(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
927
            }
928 2da0d70d Diego Biurrun
929 14014d47 Michael Niedermayer
            YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
930
        }
931 f433c8ab Michael Niedermayer
        return;
932
    }
933
#endif
934 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_ALTIVEC
935 9b734d44 Ramiro Polla
    yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
936
                          chrFilter, chrSrc, chrFilterSize,
937
                          dest, uDest, vDest, dstW, chrDstW);
938 94daf2e9 Ramiro Polla
#else //COMPILE_TEMPLATE_ALTIVEC
939 9b734d44 Ramiro Polla
    yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
940
                chrFilter, chrSrc, chrFilterSize,
941
                alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
942 94daf2e9 Ramiro Polla
#endif //!COMPILE_TEMPLATE_ALTIVEC
943 c1b0bfb4 Michael Niedermayer
}
944 2add307d Michael Niedermayer
945 7ac40327 Ramiro Polla
static inline void RENAME(yuv2nv12X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
946
                                     const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
947 b411dfff Carl Eugen Hoyos
                                     uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, enum PixelFormat dstFormat)
948 6118e52e Ville Syrjälä
{
949 9b734d44 Ramiro Polla
    yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
950
                 chrFilter, chrSrc, chrFilterSize,
951
                 dest, uDest, dstW, chrDstW, dstFormat);
952 6118e52e Ville Syrjälä
}
953
954 7ac40327 Ramiro Polla
static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
955 6858492e Cédric Schieli
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
956 c1b0bfb4 Michael Niedermayer
{
957 f433c8ab Michael Niedermayer
    int i;
958 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
959 dd68318c Ramiro Polla
    if(!(c->flags & SWS_BITEXACT)) {
960 6858492e Cédric Schieli
        long p= 4;
961
        uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
962
        uint8_t *dst[4]= {aDest, dest, uDest, vDest};
963
        x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
964 2da0d70d Diego Biurrun
965 dd68318c Ramiro Polla
        if (c->flags & SWS_ACCURATE_RND) {
966
            while(p--) {
967
                if (dst[p]) {
968 3164d25e Cédric Schieli
                    __asm__ volatile(
969
                        YSCALEYUV2YV121_ACCURATE
970
                        :: "r" (src[p]), "r" (dst[p] + counter[p]),
971
                        "g" (-counter[p])
972
                        : "%"REG_a
973
                    );
974
                }
975 6858492e Cédric Schieli
            }
976 dd68318c Ramiro Polla
        } else {
977
            while(p--) {
978
                if (dst[p]) {
979 3164d25e Cédric Schieli
                    __asm__ volatile(
980
                        YSCALEYUV2YV121
981
                        :: "r" (src[p]), "r" (dst[p] + counter[p]),
982
                        "g" (-counter[p])
983
                        : "%"REG_a
984
                    );
985
                }
986 6858492e Cédric Schieli
            }
987 d78c1ea1 Michael Niedermayer
        }
988 f433c8ab Michael Niedermayer
        return;
989
    }
990
#endif
991 dd68318c Ramiro Polla
    for (i=0; i<dstW; i++) {
992 a1f3ffa3 Michael Niedermayer
        int val= (lumSrc[i]+64)>>7;
993 2da0d70d Diego Biurrun
994 dd68318c Ramiro Polla
        if (val&256) {
995 2da0d70d Diego Biurrun
            if (val<0) val=0;
996
            else       val=255;
997
        }
998
999
        dest[i]= val;
1000
    }
1001
1002 1b0a4572 Benoit Fouet
    if (uDest)
1003 dd68318c Ramiro Polla
        for (i=0; i<chrDstW; i++) {
1004 a1f3ffa3 Michael Niedermayer
            int u=(chrSrc[i       ]+64)>>7;
1005
            int v=(chrSrc[i + VOFW]+64)>>7;
1006 2da0d70d Diego Biurrun
1007 dd68318c Ramiro Polla
            if ((u|v)&256) {
1008 2da0d70d Diego Biurrun
                if (u<0)        u=0;
1009
                else if (u>255) u=255;
1010
                if (v<0)        v=0;
1011
                else if (v>255) v=255;
1012
            }
1013
1014
            uDest[i]= u;
1015
            vDest[i]= v;
1016
        }
1017 6858492e Cédric Schieli
1018
    if (CONFIG_SWSCALE_ALPHA && aDest)
1019 dd68318c Ramiro Polla
        for (i=0; i<dstW; i++) {
1020 6858492e Cédric Schieli
            int val= (alpSrc[i]+64)>>7;
1021
            aDest[i]= av_clip_uint8(val);
1022
        }
1023 38858470 Michael Niedermayer
}
1024
1025 c1b0bfb4 Michael Niedermayer
1026 d604bab9 Michael Niedermayer
/**
1027
 * vertical scale YV12 to RGB
1028
 */
1029 7ac40327 Ramiro Polla
static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
1030
                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
1031
                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
1032 c1b0bfb4 Michael Niedermayer
{
1033 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1034 d0ce212a Ramiro Polla
    x86_reg dummy=0;
1035 dd68318c Ramiro Polla
    if(!(c->flags & SWS_BITEXACT)) {
1036
        if (c->flags & SWS_ACCURATE_RND) {
1037
            switch(c->dstFormat) {
1038 14014d47 Michael Niedermayer
            case PIX_FMT_RGB32:
1039 dd68318c Ramiro Polla
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1040 6858492e Cédric Schieli
                    YSCALEYUV2PACKEDX_ACCURATE
1041
                    YSCALEYUV2RGBX
1042
                    "movq                      %%mm2, "U_TEMP"(%0)  \n\t"
1043
                    "movq                      %%mm4, "V_TEMP"(%0)  \n\t"
1044
                    "movq                      %%mm5, "Y_TEMP"(%0)  \n\t"
1045
                    YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
1046
                    "movq               "Y_TEMP"(%0), %%mm5         \n\t"
1047
                    "psraw                        $3, %%mm1         \n\t"
1048
                    "psraw                        $3, %%mm7         \n\t"
1049
                    "packuswb                  %%mm7, %%mm1         \n\t"
1050
                    WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
1051
1052
                    YSCALEYUV2PACKEDX_END
1053 dd68318c Ramiro Polla
                } else {
1054 3164d25e Cédric Schieli
                    YSCALEYUV2PACKEDX_ACCURATE
1055
                    YSCALEYUV2RGBX
1056
                    "pcmpeqd %%mm7, %%mm7 \n\t"
1057
                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1058 2da0d70d Diego Biurrun
1059 3164d25e Cédric Schieli
                    YSCALEYUV2PACKEDX_END
1060 6858492e Cédric Schieli
                }
1061 14014d47 Michael Niedermayer
                return;
1062
            case PIX_FMT_BGR24:
1063
                YSCALEYUV2PACKEDX_ACCURATE
1064
                YSCALEYUV2RGBX
1065 40494418 Cédric Schieli
                "pxor %%mm7, %%mm7 \n\t"
1066 14014d47 Michael Niedermayer
                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
1067
                "add %4, %%"REG_c"                        \n\t"
1068
                WRITEBGR24(%%REGc, %5, %%REGa)
1069 2da0d70d Diego Biurrun
1070
1071 14014d47 Michael Niedermayer
                :: "r" (&c->redDither),
1072
                "m" (dummy), "m" (dummy), "m" (dummy),
1073
                "r" (dest), "m" (dstW)
1074
                : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1075
                );
1076
                return;
1077
            case PIX_FMT_RGB555:
1078
                YSCALEYUV2PACKEDX_ACCURATE
1079
                YSCALEYUV2RGBX
1080 40494418 Cédric Schieli
                "pxor %%mm7, %%mm7 \n\t"
1081 14014d47 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1082 bca11e75 Michael Niedermayer
#ifdef DITHER1XBPP
1083 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
1084
                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
1085
                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
1086 2da0d70d Diego Biurrun
#endif
1087
1088 14014d47 Michael Niedermayer
                WRITERGB15(%4, %5, %%REGa)
1089
                YSCALEYUV2PACKEDX_END
1090
                return;
1091
            case PIX_FMT_RGB565:
1092
                YSCALEYUV2PACKEDX_ACCURATE
1093
                YSCALEYUV2RGBX
1094 40494418 Cédric Schieli
                "pxor %%mm7, %%mm7 \n\t"
1095 14014d47 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1096 bca11e75 Michael Niedermayer
#ifdef DITHER1XBPP
1097 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
1098
                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
1099
                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
1100 2da0d70d Diego Biurrun
#endif
1101
1102 14014d47 Michael Niedermayer
                WRITERGB16(%4, %5, %%REGa)
1103
                YSCALEYUV2PACKEDX_END
1104
                return;
1105
            case PIX_FMT_YUYV422:
1106
                YSCALEYUV2PACKEDX_ACCURATE
1107
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1108
1109
                "psraw $3, %%mm3    \n\t"
1110
                "psraw $3, %%mm4    \n\t"
1111
                "psraw $3, %%mm1    \n\t"
1112
                "psraw $3, %%mm7    \n\t"
1113
                WRITEYUY2(%4, %5, %%REGa)
1114
                YSCALEYUV2PACKEDX_END
1115
                return;
1116
            }
1117 dd68318c Ramiro Polla
        } else {
1118
            switch(c->dstFormat) {
1119 14014d47 Michael Niedermayer
            case PIX_FMT_RGB32:
1120 dd68318c Ramiro Polla
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1121 6858492e Cédric Schieli
                    YSCALEYUV2PACKEDX
1122
                    YSCALEYUV2RGBX
1123
                    YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
1124
                    "psraw                        $3, %%mm1         \n\t"
1125
                    "psraw                        $3, %%mm7         \n\t"
1126
                    "packuswb                  %%mm7, %%mm1         \n\t"
1127
                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1128
                    YSCALEYUV2PACKEDX_END
1129 dd68318c Ramiro Polla
                } else {
1130 3164d25e Cédric Schieli
                    YSCALEYUV2PACKEDX
1131
                    YSCALEYUV2RGBX
1132
                    "pcmpeqd %%mm7, %%mm7 \n\t"
1133
                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1134
                    YSCALEYUV2PACKEDX_END
1135 6858492e Cédric Schieli
                }
1136 14014d47 Michael Niedermayer
                return;
1137
            case PIX_FMT_BGR24:
1138
                YSCALEYUV2PACKEDX
1139
                YSCALEYUV2RGBX
1140 40494418 Cédric Schieli
                "pxor                    %%mm7, %%mm7       \n\t"
1141 14014d47 Michael Niedermayer
                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"   \n\t" //FIXME optimize
1142
                "add                        %4, %%"REG_c"   \n\t"
1143
                WRITEBGR24(%%REGc, %5, %%REGa)
1144
1145
                :: "r" (&c->redDither),
1146
                "m" (dummy), "m" (dummy), "m" (dummy),
1147
                "r" (dest),  "m" (dstW)
1148
                : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1149
                );
1150
                return;
1151
            case PIX_FMT_RGB555:
1152
                YSCALEYUV2PACKEDX
1153
                YSCALEYUV2RGBX
1154 40494418 Cédric Schieli
                "pxor %%mm7, %%mm7 \n\t"
1155 14014d47 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1156 c1b0bfb4 Michael Niedermayer
#ifdef DITHER1XBPP
1157 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
1158
                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
1159
                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
1160 2da0d70d Diego Biurrun
#endif
1161
1162 14014d47 Michael Niedermayer
                WRITERGB15(%4, %5, %%REGa)
1163
                YSCALEYUV2PACKEDX_END
1164
                return;
1165
            case PIX_FMT_RGB565:
1166
                YSCALEYUV2PACKEDX
1167
                YSCALEYUV2RGBX
1168 40494418 Cédric Schieli
                "pxor %%mm7, %%mm7 \n\t"
1169 14014d47 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1170 c1b0bfb4 Michael Niedermayer
#ifdef DITHER1XBPP
1171 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
1172
                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
1173
                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
1174 2da0d70d Diego Biurrun
#endif
1175
1176 14014d47 Michael Niedermayer
                WRITERGB16(%4, %5, %%REGa)
1177
                YSCALEYUV2PACKEDX_END
1178
                return;
1179
            case PIX_FMT_YUYV422:
1180
                YSCALEYUV2PACKEDX
1181
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1182
1183
                "psraw $3, %%mm3    \n\t"
1184
                "psraw $3, %%mm4    \n\t"
1185
                "psraw $3, %%mm1    \n\t"
1186
                "psraw $3, %%mm7    \n\t"
1187
                WRITEYUY2(%4, %5, %%REGa)
1188
                YSCALEYUV2PACKEDX_END
1189
                return;
1190
            }
1191 bca11e75 Michael Niedermayer
        }
1192
    }
1193 94daf2e9 Ramiro Polla
#endif /* COMPILE_TEMPLATE_MMX */
1194
#if COMPILE_TEMPLATE_ALTIVEC
1195 2da0d70d Diego Biurrun
    /* The following list of supported dstFormat values should
1196 780daf2b Diego Biurrun
       match what's found in the body of ff_yuv2packedX_altivec() */
1197 d55ef636 Reimar Döffinger
    if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf &&
1198 9b734d44 Ramiro Polla
         (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
1199
          c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
1200
          c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB))
1201 780daf2b Diego Biurrun
            ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize,
1202
                                   chrFilter, chrSrc, chrFilterSize,
1203
                                   dest, dstW, dstY);
1204 2da0d70d Diego Biurrun
    else
1205
#endif
1206
        yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
1207
                       chrFilter, chrSrc, chrFilterSize,
1208 6858492e Cédric Schieli
                       alpSrc, dest, dstW, dstY);
1209 c1b0bfb4 Michael Niedermayer
}
1210
1211
/**
1212
 * vertical bilinear scale YV12 to RGB
1213
 */
1214 7ac40327 Ramiro Polla
static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
1215
                          const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
1216 d604bab9 Michael Niedermayer
{
1217 ac0ad729 Michael Niedermayer
    int  yalpha1=4095- yalpha;
1218
    int uvalpha1=4095-uvalpha;
1219 2da0d70d Diego Biurrun
    int i;
1220 d604bab9 Michael Niedermayer
1221 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1222 dd68318c Ramiro Polla
    if(!(c->flags & SWS_BITEXACT)) {
1223
        switch(c->dstFormat) {
1224 c255994b Ramiro Polla
        //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
1225
        case PIX_FMT_RGB32:
1226
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1227 6858492e Cédric Schieli
#if ARCH_X86_64
1228 c255994b Ramiro Polla
                __asm__ volatile(
1229 6858492e Cédric Schieli
                    YSCALEYUV2RGB(%%REGBP, %5)
1230
                    YSCALEYUV2RGB_YA(%%REGBP, %5, %6, %7)
1231
                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1232
                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1233
                    "packuswb            %%mm7, %%mm1       \n\t"
1234 04ef1d3f Reimar Döffinger
                    WRITEBGR32(%4, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1235 6858492e Cédric Schieli
1236 04ef1d3f Reimar Döffinger
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "r" (dest),
1237 6858492e Cédric Schieli
                    "a" (&c->redDither)
1238
                    ,"r" (abuf0), "r" (abuf1)
1239 04ef1d3f Reimar Döffinger
                    : "%"REG_BP
1240 c255994b Ramiro Polla
                );
1241 6858492e Cédric Schieli
#else
1242 c255994b Ramiro Polla
                *(uint16_t **)(&c->u_temp)=abuf0;
1243
                *(uint16_t **)(&c->v_temp)=abuf1;
1244
                __asm__ volatile(
1245 6858492e Cédric Schieli
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1246
                    "mov        %4, %%"REG_b"               \n\t"
1247
                    "push %%"REG_BP"                        \n\t"
1248
                    YSCALEYUV2RGB(%%REGBP, %5)
1249
                    "push                   %0              \n\t"
1250
                    "push                   %1              \n\t"
1251
                    "mov          "U_TEMP"(%5), %0          \n\t"
1252
                    "mov          "V_TEMP"(%5), %1          \n\t"
1253
                    YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
1254
                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1255
                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1256
                    "packuswb            %%mm7, %%mm1       \n\t"
1257
                    "pop                    %1              \n\t"
1258
                    "pop                    %0              \n\t"
1259
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1260
                    "pop %%"REG_BP"                         \n\t"
1261
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1262
1263
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1264
                    "a" (&c->redDither)
1265 c255994b Ramiro Polla
                );
1266 6858492e Cédric Schieli
#endif
1267 c255994b Ramiro Polla
            } else {
1268
                __asm__ volatile(
1269 3164d25e Cédric Schieli
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1270
                    "mov        %4, %%"REG_b"               \n\t"
1271
                    "push %%"REG_BP"                        \n\t"
1272
                    YSCALEYUV2RGB(%%REGBP, %5)
1273
                    "pcmpeqd %%mm7, %%mm7                   \n\t"
1274
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1275
                    "pop %%"REG_BP"                         \n\t"
1276
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1277 2da0d70d Diego Biurrun
1278 3164d25e Cédric Schieli
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1279
                    "a" (&c->redDither)
1280 c255994b Ramiro Polla
                );
1281
            }
1282
            return;
1283
        case PIX_FMT_BGR24:
1284
            __asm__ volatile(
1285 2da0d70d Diego Biurrun
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1286
                "mov        %4, %%"REG_b"               \n\t"
1287
                "push %%"REG_BP"                        \n\t"
1288
                YSCALEYUV2RGB(%%REGBP, %5)
1289 40494418 Cédric Schieli
                "pxor    %%mm7, %%mm7                   \n\t"
1290 2da0d70d Diego Biurrun
                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1291
                "pop %%"REG_BP"                         \n\t"
1292
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1293
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1294
                "a" (&c->redDither)
1295 c255994b Ramiro Polla
            );
1296
            return;
1297
        case PIX_FMT_RGB555:
1298
            __asm__ volatile(
1299 2da0d70d Diego Biurrun
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1300
                "mov        %4, %%"REG_b"               \n\t"
1301
                "push %%"REG_BP"                        \n\t"
1302
                YSCALEYUV2RGB(%%REGBP, %5)
1303 40494418 Cédric Schieli
                "pxor    %%mm7, %%mm7                   \n\t"
1304 2da0d70d Diego Biurrun
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1305 d604bab9 Michael Niedermayer
#ifdef DITHER1XBPP
1306 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
1307
                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1308
                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1309 2da0d70d Diego Biurrun
#endif
1310
1311 27a90b04 Michael Niedermayer
                WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1312 2da0d70d Diego Biurrun
                "pop %%"REG_BP"                         \n\t"
1313
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1314
1315
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1316
                "a" (&c->redDither)
1317 c255994b Ramiro Polla
            );
1318
            return;
1319
        case PIX_FMT_RGB565:
1320
            __asm__ volatile(
1321 2da0d70d Diego Biurrun
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1322
                "mov        %4, %%"REG_b"               \n\t"
1323
                "push %%"REG_BP"                        \n\t"
1324
                YSCALEYUV2RGB(%%REGBP, %5)
1325 40494418 Cédric Schieli
                "pxor    %%mm7, %%mm7                   \n\t"
1326 2da0d70d Diego Biurrun
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1327 d604bab9 Michael Niedermayer
#ifdef DITHER1XBPP
1328 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
1329
                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1330
                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1331 2da0d70d Diego Biurrun
#endif
1332
1333 27a90b04 Michael Niedermayer
                WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1334 2da0d70d Diego Biurrun
                "pop %%"REG_BP"                         \n\t"
1335
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1336
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1337
                "a" (&c->redDither)
1338 c255994b Ramiro Polla
            );
1339
            return;
1340
        case PIX_FMT_YUYV422:
1341
            __asm__ volatile(
1342 2da0d70d Diego Biurrun
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1343
                "mov %4, %%"REG_b"                        \n\t"
1344
                "push %%"REG_BP"                        \n\t"
1345
                YSCALEYUV2PACKED(%%REGBP, %5)
1346
                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1347
                "pop %%"REG_BP"                         \n\t"
1348
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1349
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1350
                "a" (&c->redDither)
1351 c255994b Ramiro Polla
            );
1352
            return;
1353
        default: break;
1354 2da0d70d Diego Biurrun
        }
1355 f433c8ab Michael Niedermayer
    }
1356 94daf2e9 Ramiro Polla
#endif //COMPILE_TEMPLATE_MMX
1357 9b734d44 Ramiro Polla
    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
1358 d604bab9 Michael Niedermayer
}
1359
1360
/**
1361
 * YV12 to RGB without scaling or interpolating
1362
 */
1363 7ac40327 Ramiro Polla
static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
1364 b411dfff Carl Eugen Hoyos
                          const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
1365 d604bab9 Michael Niedermayer
{
1366 2da0d70d Diego Biurrun
    const int yalpha1=0;
1367
    int i;
1368 6a4970ab Diego Biurrun
1369 7ac40327 Ramiro Polla
    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1370 2da0d70d Diego Biurrun
    const int yalpha= 4096; //FIXME ...
1371 96034638 Michael Niedermayer
1372 dd68318c Ramiro Polla
    if (flags&SWS_FULL_CHR_H_INT) {
1373 40fa5140 Ramiro Polla
        c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
1374 2da0d70d Diego Biurrun
        return;
1375
    }
1376 397c035e Michael Niedermayer
1377 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1378 dd68318c Ramiro Polla
    if(!(flags & SWS_BITEXACT)) {
1379
        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1380
            switch(dstFormat) {
1381 14014d47 Michael Niedermayer
            case PIX_FMT_RGB32:
1382 dd68318c Ramiro Polla
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1383 6858492e Cédric Schieli
                    __asm__ volatile(
1384 c255994b Ramiro Polla
                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1385
                        "mov        %4, %%"REG_b"               \n\t"
1386
                        "push %%"REG_BP"                        \n\t"
1387
                        YSCALEYUV2RGB1(%%REGBP, %5)
1388
                        YSCALEYUV2RGB1_ALPHA(%%REGBP)
1389
                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1390
                        "pop %%"REG_BP"                         \n\t"
1391
                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1392
1393
                        :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1394
                        "a" (&c->redDither)
1395 6858492e Cédric Schieli
                    );
1396 dd68318c Ramiro Polla
                } else {
1397 3164d25e Cédric Schieli
                    __asm__ volatile(
1398 c255994b Ramiro Polla
                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1399
                        "mov        %4, %%"REG_b"               \n\t"
1400
                        "push %%"REG_BP"                        \n\t"
1401
                        YSCALEYUV2RGB1(%%REGBP, %5)
1402
                        "pcmpeqd %%mm7, %%mm7                   \n\t"
1403
                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1404
                        "pop %%"REG_BP"                         \n\t"
1405
                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1406
1407
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1408
                        "a" (&c->redDither)
1409
                    );
1410
                }
1411
                return;
1412
            case PIX_FMT_BGR24:
1413
                __asm__ volatile(
1414 3164d25e Cédric Schieli
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1415
                    "mov        %4, %%"REG_b"               \n\t"
1416
                    "push %%"REG_BP"                        \n\t"
1417
                    YSCALEYUV2RGB1(%%REGBP, %5)
1418 c255994b Ramiro Polla
                    "pxor    %%mm7, %%mm7                   \n\t"
1419
                    WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1420 3164d25e Cédric Schieli
                    "pop %%"REG_BP"                         \n\t"
1421
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1422 14014d47 Michael Niedermayer
1423 3164d25e Cédric Schieli
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1424
                    "a" (&c->redDither)
1425 14014d47 Michael Niedermayer
                );
1426
                return;
1427
            case PIX_FMT_RGB555:
1428 7ad6469e Diego Pettenò
                __asm__ volatile(
1429 c255994b Ramiro Polla
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1430
                    "mov        %4, %%"REG_b"               \n\t"
1431
                    "push %%"REG_BP"                        \n\t"
1432
                    YSCALEYUV2RGB1(%%REGBP, %5)
1433
                    "pxor    %%mm7, %%mm7                   \n\t"
1434
                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1435 d604bab9 Michael Niedermayer
#ifdef DITHER1XBPP
1436 c255994b Ramiro Polla
                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
1437
                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1438
                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1439 2da0d70d Diego Biurrun
#endif
1440 c255994b Ramiro Polla
                    WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1441
                    "pop %%"REG_BP"                         \n\t"
1442
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1443 2da0d70d Diego Biurrun
1444 c255994b Ramiro Polla
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1445
                    "a" (&c->redDither)
1446 14014d47 Michael Niedermayer
                );
1447
                return;
1448
            case PIX_FMT_RGB565:
1449 7ad6469e Diego Pettenò
                __asm__ volatile(
1450 c255994b Ramiro Polla
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1451
                    "mov        %4, %%"REG_b"               \n\t"
1452
                    "push %%"REG_BP"                        \n\t"
1453
                    YSCALEYUV2RGB1(%%REGBP, %5)
1454
                    "pxor    %%mm7, %%mm7                   \n\t"
1455
                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1456 d604bab9 Michael Niedermayer
#ifdef DITHER1XBPP
1457 c255994b Ramiro Polla
                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
1458
                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1459
                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1460 2da0d70d Diego Biurrun
#endif
1461
1462 c255994b Ramiro Polla
                    WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1463
                    "pop %%"REG_BP"                         \n\t"
1464
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1465 2da0d70d Diego Biurrun
1466 c255994b Ramiro Polla
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1467
                    "a" (&c->redDither)
1468 14014d47 Michael Niedermayer
                );
1469
                return;
1470
            case PIX_FMT_YUYV422:
1471 7ad6469e Diego Pettenò
                __asm__ volatile(
1472 c255994b Ramiro Polla
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1473
                    "mov        %4, %%"REG_b"               \n\t"
1474
                    "push %%"REG_BP"                        \n\t"
1475
                    YSCALEYUV2PACKED1(%%REGBP, %5)
1476
                    WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1477
                    "pop %%"REG_BP"                         \n\t"
1478
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1479 14014d47 Michael Niedermayer
1480 c255994b Ramiro Polla
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1481
                    "a" (&c->redDither)
1482 14014d47 Michael Niedermayer
                );
1483
                return;
1484
            }
1485 dd68318c Ramiro Polla
        } else {
1486
            switch(dstFormat) {
1487 14014d47 Michael Niedermayer
            case PIX_FMT_RGB32:
1488 dd68318c Ramiro Polla
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1489 6858492e Cédric Schieli
                    __asm__ volatile(
1490 c255994b Ramiro Polla
                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1491
                        "mov        %4, %%"REG_b"               \n\t"
1492
                        "push %%"REG_BP"                        \n\t"
1493
                        YSCALEYUV2RGB1b(%%REGBP, %5)
1494
                        YSCALEYUV2RGB1_ALPHA(%%REGBP)
1495
                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1496
                        "pop %%"REG_BP"                         \n\t"
1497
                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1498
1499
                        :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1500
                        "a" (&c->redDither)
1501 6858492e Cédric Schieli
                    );
1502 dd68318c Ramiro Polla
                } else {
1503 3164d25e Cédric Schieli
                    __asm__ volatile(
1504 c255994b Ramiro Polla
                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1505
                        "mov        %4, %%"REG_b"               \n\t"
1506
                        "push %%"REG_BP"                        \n\t"
1507
                        YSCALEYUV2RGB1b(%%REGBP, %5)
1508
                        "pcmpeqd %%mm7, %%mm7                   \n\t"
1509
                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1510
                        "pop %%"REG_BP"                         \n\t"
1511
                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1512
1513
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1514
                        "a" (&c->redDither)
1515
                    );
1516
                }
1517
                return;
1518
            case PIX_FMT_BGR24:
1519
                __asm__ volatile(
1520 3164d25e Cédric Schieli
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1521
                    "mov        %4, %%"REG_b"               \n\t"
1522
                    "push %%"REG_BP"                        \n\t"
1523
                    YSCALEYUV2RGB1b(%%REGBP, %5)
1524 c255994b Ramiro Polla
                    "pxor    %%mm7, %%mm7                   \n\t"
1525
                    WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1526 3164d25e Cédric Schieli
                    "pop %%"REG_BP"                         \n\t"
1527
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1528 14014d47 Michael Niedermayer
1529 3164d25e Cédric Schieli
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1530
                    "a" (&c->redDither)
1531 14014d47 Michael Niedermayer
                );
1532
                return;
1533
            case PIX_FMT_RGB555:
1534 7ad6469e Diego Pettenò
                __asm__ volatile(
1535 c255994b Ramiro Polla
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1536
                    "mov        %4, %%"REG_b"               \n\t"
1537
                    "push %%"REG_BP"                        \n\t"
1538
                    YSCALEYUV2RGB1b(%%REGBP, %5)
1539
                    "pxor    %%mm7, %%mm7                   \n\t"
1540
                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1541 497d4f99 Michael Niedermayer
#ifdef DITHER1XBPP
1542 c255994b Ramiro Polla
                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
1543
                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1544
                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1545 2da0d70d Diego Biurrun
#endif
1546 c255994b Ramiro Polla
                    WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1547
                    "pop %%"REG_BP"                         \n\t"
1548
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1549 2da0d70d Diego Biurrun
1550 c255994b Ramiro Polla
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1551
                    "a" (&c->redDither)
1552 14014d47 Michael Niedermayer
                );
1553
                return;
1554
            case PIX_FMT_RGB565:
1555 7ad6469e Diego Pettenò
                __asm__ volatile(
1556 c255994b Ramiro Polla
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1557
                    "mov        %4, %%"REG_b"               \n\t"
1558
                    "push %%"REG_BP"                        \n\t"
1559
                    YSCALEYUV2RGB1b(%%REGBP, %5)
1560
                    "pxor    %%mm7, %%mm7                   \n\t"
1561
                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1562 497d4f99 Michael Niedermayer
#ifdef DITHER1XBPP
1563 c255994b Ramiro Polla
                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
1564
                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1565
                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1566 2da0d70d Diego Biurrun
#endif
1567
1568 c255994b Ramiro Polla
                    WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1569
                    "pop %%"REG_BP"                         \n\t"
1570
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1571 2da0d70d Diego Biurrun
1572 c255994b Ramiro Polla
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1573
                    "a" (&c->redDither)
1574 14014d47 Michael Niedermayer
                );
1575
                return;
1576
            case PIX_FMT_YUYV422:
1577 7ad6469e Diego Pettenò
                __asm__ volatile(
1578 c255994b Ramiro Polla
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1579
                    "mov        %4, %%"REG_b"               \n\t"
1580
                    "push %%"REG_BP"                        \n\t"
1581
                    YSCALEYUV2PACKED1b(%%REGBP, %5)
1582
                    WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1583
                    "pop %%"REG_BP"                         \n\t"
1584
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1585 14014d47 Michael Niedermayer
1586 c255994b Ramiro Polla
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1587
                    "a" (&c->redDither)
1588 14014d47 Michael Niedermayer
                );
1589
                return;
1590
            }
1591 2da0d70d Diego Biurrun
        }
1592
    }
1593 94daf2e9 Ramiro Polla
#endif /* COMPILE_TEMPLATE_MMX */
1594 dd68318c Ramiro Polla
    if (uvalpha < 2048) {
1595 6858492e Cédric Schieli
        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1596 dd68318c Ramiro Polla
    } else {
1597 6858492e Cédric Schieli
        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1598 2da0d70d Diego Biurrun
    }
1599 d604bab9 Michael Niedermayer
}
1600
1601 8a322796 Diego Biurrun
//FIXME yuy2* can read up to 7 samples too much
1602 6ff0ad6b Michael Niedermayer
1603 7ac40327 Ramiro Polla
static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
1604 1e621b18 Michael Niedermayer
{
1605 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1606 7ad6469e Diego Pettenò
    __asm__ volatile(
1607 c255994b Ramiro Polla
        "movq "MANGLE(bm01010101)", %%mm2           \n\t"
1608
        "mov                    %0, %%"REG_a"       \n\t"
1609
        "1:                                         \n\t"
1610
        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
1611
        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
1612
        "pand                %%mm2, %%mm0           \n\t"
1613
        "pand                %%mm2, %%mm1           \n\t"
1614
        "packuswb            %%mm1, %%mm0           \n\t"
1615
        "movq                %%mm0, (%2, %%"REG_a") \n\t"
1616
        "add                    $8, %%"REG_a"       \n\t"
1617
        " js                    1b                  \n\t"
1618
        : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
1619
        : "%"REG_a
1620 2da0d70d Diego Biurrun
    );
1621 1e621b18 Michael Niedermayer
#else
1622 2da0d70d Diego Biurrun
    int i;
1623
    for (i=0; i<width; i++)
1624
        dst[i]= src[2*i];
1625 1e621b18 Michael Niedermayer
#endif
1626
}
1627
1628 7ac40327 Ramiro Polla
static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1629 1e621b18 Michael Niedermayer
{
1630 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1631 7ad6469e Diego Pettenò
    __asm__ volatile(
1632 c255994b Ramiro Polla
        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
1633
        "mov                    %0, %%"REG_a"       \n\t"
1634
        "1:                                         \n\t"
1635
        "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
1636
        "movq   8(%1, %%"REG_a",4), %%mm1           \n\t"
1637
        "psrlw                  $8, %%mm0           \n\t"
1638
        "psrlw                  $8, %%mm1           \n\t"
1639
        "packuswb            %%mm1, %%mm0           \n\t"
1640
        "movq                %%mm0, %%mm1           \n\t"
1641
        "psrlw                  $8, %%mm0           \n\t"
1642
        "pand                %%mm4, %%mm1           \n\t"
1643
        "packuswb            %%mm0, %%mm0           \n\t"
1644
        "packuswb            %%mm1, %%mm1           \n\t"
1645
        "movd                %%mm0, (%3, %%"REG_a") \n\t"
1646
        "movd                %%mm1, (%2, %%"REG_a") \n\t"
1647
        "add                    $4, %%"REG_a"       \n\t"
1648
        " js                    1b                  \n\t"
1649
        : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
1650
        : "%"REG_a
1651 2da0d70d Diego Biurrun
    );
1652 1e621b18 Michael Niedermayer
#else
1653 2da0d70d Diego Biurrun
    int i;
1654 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1655 2da0d70d Diego Biurrun
        dstU[i]= src1[4*i + 1];
1656
        dstV[i]= src1[4*i + 3];
1657
    }
1658
#endif
1659
    assert(src1 == src2);
1660 1e621b18 Michael Niedermayer
}
1661
1662 de1275d5 Michael Niedermayer
static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1663
{
1664 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1665 de1275d5 Michael Niedermayer
    __asm__ volatile(
1666 c255994b Ramiro Polla
        "mov                    %0, %%"REG_a"       \n\t"
1667
        "1:                                         \n\t"
1668
        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
1669
        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
1670
        "movq    (%2, %%"REG_a",2), %%mm2           \n\t"
1671
        "movq   8(%2, %%"REG_a",2), %%mm3           \n\t"
1672
        "psrlw                  $8, %%mm0           \n\t"
1673
        "psrlw                  $8, %%mm1           \n\t"
1674
        "psrlw                  $8, %%mm2           \n\t"
1675
        "psrlw                  $8, %%mm3           \n\t"
1676
        "packuswb            %%mm1, %%mm0           \n\t"
1677
        "packuswb            %%mm3, %%mm2           \n\t"
1678
        "movq                %%mm0, (%3, %%"REG_a") \n\t"
1679
        "movq                %%mm2, (%4, %%"REG_a") \n\t"
1680
        "add                    $8, %%"REG_a"       \n\t"
1681
        " js                    1b                  \n\t"
1682
        : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
1683
        : "%"REG_a
1684 de1275d5 Michael Niedermayer
    );
1685
#else
1686
    int i;
1687 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1688 de1275d5 Michael Niedermayer
        dstU[i]= src1[2*i + 1];
1689
        dstV[i]= src2[2*i + 1];
1690
    }
1691
#endif
1692
}
1693
1694 4cf16bbe Diego Biurrun
/* This is almost identical to the previous, end exists only because
1695
 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1696 7ac40327 Ramiro Polla
static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
1697 7322a67c Michael Niedermayer
{
1698 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1699 7ad6469e Diego Pettenò
    __asm__ volatile(
1700 c255994b Ramiro Polla
        "mov                  %0, %%"REG_a"         \n\t"
1701
        "1:                                         \n\t"
1702
        "movq  (%1, %%"REG_a",2), %%mm0             \n\t"
1703
        "movq 8(%1, %%"REG_a",2), %%mm1             \n\t"
1704
        "psrlw                $8, %%mm0             \n\t"
1705
        "psrlw                $8, %%mm1             \n\t"
1706
        "packuswb          %%mm1, %%mm0             \n\t"
1707
        "movq              %%mm0, (%2, %%"REG_a")   \n\t"
1708
        "add                  $8, %%"REG_a"         \n\t"
1709
        " js                  1b                    \n\t"
1710
        : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
1711
        : "%"REG_a
1712 2da0d70d Diego Biurrun
    );
1713 7322a67c Michael Niedermayer
#else
1714 2da0d70d Diego Biurrun
    int i;
1715
    for (i=0; i<width; i++)
1716
        dst[i]= src[2*i+1];
1717 7322a67c Michael Niedermayer
#endif
1718
}
1719
1720 7ac40327 Ramiro Polla
static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1721 7322a67c Michael Niedermayer
{
1722 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1723 7ad6469e Diego Pettenò
    __asm__ volatile(
1724 c255994b Ramiro Polla
        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
1725
        "mov                    %0, %%"REG_a"       \n\t"
1726
        "1:                                         \n\t"
1727
        "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
1728
        "movq   8(%1, %%"REG_a",4), %%mm1           \n\t"
1729
        "pand                %%mm4, %%mm0           \n\t"
1730
        "pand                %%mm4, %%mm1           \n\t"
1731
        "packuswb            %%mm1, %%mm0           \n\t"
1732
        "movq                %%mm0, %%mm1           \n\t"
1733
        "psrlw                  $8, %%mm0           \n\t"
1734
        "pand                %%mm4, %%mm1           \n\t"
1735
        "packuswb            %%mm0, %%mm0           \n\t"
1736
        "packuswb            %%mm1, %%mm1           \n\t"
1737
        "movd                %%mm0, (%3, %%"REG_a") \n\t"
1738
        "movd                %%mm1, (%2, %%"REG_a") \n\t"
1739
        "add                    $4, %%"REG_a"       \n\t"
1740
        " js                    1b                  \n\t"
1741
        : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
1742
        : "%"REG_a
1743 2da0d70d Diego Biurrun
    );
1744 7322a67c Michael Niedermayer
#else
1745 2da0d70d Diego Biurrun
    int i;
1746 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1747 2da0d70d Diego Biurrun
        dstU[i]= src1[4*i + 0];
1748
        dstV[i]= src1[4*i + 2];
1749
    }
1750
#endif
1751
    assert(src1 == src2);
1752 7322a67c Michael Niedermayer
}
1753
1754 de1275d5 Michael Niedermayer
static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1755
{
1756 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1757 de1275d5 Michael Niedermayer
    __asm__ volatile(
1758 c255994b Ramiro Polla
        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
1759
        "mov                    %0, %%"REG_a"       \n\t"
1760
        "1:                                         \n\t"
1761
        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
1762
        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
1763
        "movq    (%2, %%"REG_a",2), %%mm2           \n\t"
1764
        "movq   8(%2, %%"REG_a",2), %%mm3           \n\t"
1765
        "pand                %%mm4, %%mm0           \n\t"
1766
        "pand                %%mm4, %%mm1           \n\t"
1767
        "pand                %%mm4, %%mm2           \n\t"
1768
        "pand                %%mm4, %%mm3           \n\t"
1769
        "packuswb            %%mm1, %%mm0           \n\t"
1770
        "packuswb            %%mm3, %%mm2           \n\t"
1771
        "movq                %%mm0, (%3, %%"REG_a") \n\t"
1772
        "movq                %%mm2, (%4, %%"REG_a") \n\t"
1773
        "add                    $8, %%"REG_a"       \n\t"
1774
        " js                    1b                  \n\t"
1775
        : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
1776
        : "%"REG_a
1777 de1275d5 Michael Niedermayer
    );
1778
#else
1779
    int i;
1780 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1781 de1275d5 Michael Niedermayer
        dstU[i]= src1[2*i];
1782
        dstV[i]= src2[2*i];
1783
    }
1784
#endif
1785
}
1786
1787 f415be68 Ramiro Polla
static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
1788
                                    const uint8_t *src, long width)
1789
{
1790
#if COMPILE_TEMPLATE_MMX
1791
    __asm__ volatile(
1792
        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
1793
        "mov                    %0, %%"REG_a"       \n\t"
1794
        "1:                                         \n\t"
1795
        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
1796
        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
1797
        "movq                %%mm0, %%mm2           \n\t"
1798
        "movq                %%mm1, %%mm3           \n\t"
1799
        "pand                %%mm4, %%mm0           \n\t"
1800
        "pand                %%mm4, %%mm1           \n\t"
1801
        "psrlw                  $8, %%mm2           \n\t"
1802
        "psrlw                  $8, %%mm3           \n\t"
1803
        "packuswb            %%mm1, %%mm0           \n\t"
1804
        "packuswb            %%mm3, %%mm2           \n\t"
1805
        "movq                %%mm0, (%2, %%"REG_a") \n\t"
1806
        "movq                %%mm2, (%3, %%"REG_a") \n\t"
1807
        "add                    $8, %%"REG_a"       \n\t"
1808
        " js                    1b                  \n\t"
1809
        : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst1+width), "r" (dst2+width)
1810
        : "%"REG_a
1811
    );
1812
#else
1813
    int i;
1814
    for (i = 0; i < width; i++) {
1815
        dst1[i] = src[2*i+0];
1816
        dst2[i] = src[2*i+1];
1817
    }
1818
#endif
1819
}
1820
1821 e470691a Ramiro Polla
static inline void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
1822
                                    const uint8_t *src1, const uint8_t *src2,
1823
                                    long width, uint32_t *unused)
1824 f415be68 Ramiro Polla
{
1825
    RENAME(nvXXtoUV)(dstU, dstV, src1, width);
1826
}
1827
1828 e470691a Ramiro Polla
static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
1829
                                    const uint8_t *src1, const uint8_t *src2,
1830
                                    long width, uint32_t *unused)
1831 f415be68 Ramiro Polla
{
1832
    RENAME(nvXXtoUV)(dstV, dstU, src1, width);
1833
}
1834
1835 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1836 b411dfff Carl Eugen Hoyos
static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat)
1837 dfb09bd1 Michael Niedermayer
{
1838
1839 dd68318c Ramiro Polla
    if(srcFormat == PIX_FMT_BGR24) {
1840 7ad6469e Diego Pettenò
        __asm__ volatile(
1841 ff9a056d Michael Niedermayer
            "movq  "MANGLE(ff_bgr24toY1Coeff)", %%mm5       \n\t"
1842
            "movq  "MANGLE(ff_bgr24toY2Coeff)", %%mm6       \n\t"
1843
            :
1844 dfb09bd1 Michael Niedermayer
        );
1845 dd68318c Ramiro Polla
    } else {
1846 7ad6469e Diego Pettenò
        __asm__ volatile(
1847 ff9a056d Michael Niedermayer
            "movq  "MANGLE(ff_rgb24toY1Coeff)", %%mm5       \n\t"
1848
            "movq  "MANGLE(ff_rgb24toY2Coeff)", %%mm6       \n\t"
1849
            :
1850 dfb09bd1 Michael Niedermayer
        );
1851
    }
1852
1853 7ad6469e Diego Pettenò
    __asm__ volatile(
1854 dfb09bd1 Michael Niedermayer
        "movq  "MANGLE(ff_bgr24toYOffset)", %%mm4   \n\t"
1855
        "mov                        %2, %%"REG_a"   \n\t"
1856
        "pxor                    %%mm7, %%mm7       \n\t"
1857
        "1:                                         \n\t"
1858
        PREFETCH"               64(%0)              \n\t"
1859
        "movd                     (%0), %%mm0       \n\t"
1860
        "movd                    2(%0), %%mm1       \n\t"
1861
        "movd                    6(%0), %%mm2       \n\t"
1862
        "movd                    8(%0), %%mm3       \n\t"
1863
        "add                       $12, %0          \n\t"
1864
        "punpcklbw               %%mm7, %%mm0       \n\t"
1865
        "punpcklbw               %%mm7, %%mm1       \n\t"
1866
        "punpcklbw               %%mm7, %%mm2       \n\t"
1867
        "punpcklbw               %%mm7, %%mm3       \n\t"
1868
        "pmaddwd                 %%mm5, %%mm0       \n\t"
1869
        "pmaddwd                 %%mm6, %%mm1       \n\t"
1870
        "pmaddwd                 %%mm5, %%mm2       \n\t"
1871
        "pmaddwd                 %%mm6, %%mm3       \n\t"
1872
        "paddd                   %%mm1, %%mm0       \n\t"
1873
        "paddd                   %%mm3, %%mm2       \n\t"
1874
        "paddd                   %%mm4, %%mm0       \n\t"
1875
        "paddd                   %%mm4, %%mm2       \n\t"
1876
        "psrad                     $15, %%mm0       \n\t"
1877
        "psrad                     $15, %%mm2       \n\t"
1878
        "packssdw                %%mm2, %%mm0       \n\t"
1879
        "packuswb                %%mm0, %%mm0       \n\t"
1880
        "movd                %%mm0, (%1, %%"REG_a") \n\t"
1881
        "add                        $4, %%"REG_a"   \n\t"
1882
        " js                        1b              \n\t"
1883
    : "+r" (src)
1884 d0ce212a Ramiro Polla
    : "r" (dst+width), "g" ((x86_reg)-width)
1885 dfb09bd1 Michael Niedermayer
    : "%"REG_a
1886 2da0d70d Diego Biurrun
    );
1887 dfb09bd1 Michael Niedermayer
}
1888
1889 b411dfff Carl Eugen Hoyos
static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, long width, enum PixelFormat srcFormat)
1890 dfb09bd1 Michael Niedermayer
{
1891 7ad6469e Diego Pettenò
    __asm__ volatile(
1892 dfb09bd1 Michael Niedermayer
        "movq                    24+%4, %%mm6       \n\t"
1893
        "mov                        %3, %%"REG_a"   \n\t"
1894
        "pxor                    %%mm7, %%mm7       \n\t"
1895
        "1:                                         \n\t"
1896
        PREFETCH"               64(%0)              \n\t"
1897
        "movd                     (%0), %%mm0       \n\t"
1898
        "movd                    2(%0), %%mm1       \n\t"
1899
        "punpcklbw               %%mm7, %%mm0       \n\t"
1900
        "punpcklbw               %%mm7, %%mm1       \n\t"
1901
        "movq                    %%mm0, %%mm2       \n\t"
1902
        "movq                    %%mm1, %%mm3       \n\t"
1903
        "pmaddwd                    %4, %%mm0       \n\t"
1904
        "pmaddwd                  8+%4, %%mm1       \n\t"
1905
        "pmaddwd                 16+%4, %%mm2       \n\t"
1906
        "pmaddwd                 %%mm6, %%mm3       \n\t"
1907
        "paddd                   %%mm1, %%mm0       \n\t"
1908
        "paddd                   %%mm3, %%mm2       \n\t"
1909
1910
        "movd                    6(%0), %%mm1       \n\t"
1911
        "movd                    8(%0), %%mm3       \n\t"
1912
        "add                       $12, %0          \n\t"
1913
        "punpcklbw               %%mm7, %%mm1       \n\t"
1914
        "punpcklbw               %%mm7, %%mm3       \n\t"
1915
        "movq                    %%mm1, %%mm4       \n\t"
1916
        "movq                    %%mm3, %%mm5       \n\t"
1917
        "pmaddwd                    %4, %%mm1       \n\t"
1918
        "pmaddwd                  8+%4, %%mm3       \n\t"
1919
        "pmaddwd                 16+%4, %%mm4       \n\t"
1920
        "pmaddwd                 %%mm6, %%mm5       \n\t"
1921
        "paddd                   %%mm3, %%mm1       \n\t"
1922
        "paddd                   %%mm5, %%mm4       \n\t"
1923
1924
        "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3       \n\t"
1925
        "paddd                   %%mm3, %%mm0       \n\t"
1926
        "paddd                   %%mm3, %%mm2       \n\t"
1927
        "paddd                   %%mm3, %%mm1       \n\t"
1928
        "paddd                   %%mm3, %%mm4       \n\t"
1929
        "psrad                     $15, %%mm0       \n\t"
1930
        "psrad                     $15, %%mm2       \n\t"
1931
        "psrad                     $15, %%mm1       \n\t"
1932
        "psrad                     $15, %%mm4       \n\t"
1933
        "packssdw                %%mm1, %%mm0       \n\t"
1934
        "packssdw                %%mm4, %%mm2       \n\t"
1935
        "packuswb                %%mm0, %%mm0       \n\t"
1936
        "packuswb                %%mm2, %%mm2       \n\t"
1937
        "movd                %%mm0, (%1, %%"REG_a") \n\t"
1938
        "movd                %%mm2, (%2, %%"REG_a") \n\t"
1939
        "add                        $4, %%"REG_a"   \n\t"
1940
        " js                        1b              \n\t"
1941
    : "+r" (src)
1942 d0ce212a Ramiro Polla
    : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "m"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24][0])
1943 dfb09bd1 Michael Niedermayer
    : "%"REG_a
1944
    );
1945
}
1946
#endif
1947
1948 7ac40327 Ramiro Polla
static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
1949 dfb09bd1 Michael Niedermayer
{
1950 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1951 a35acd7f Benjamin Zores
    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
1952 1e621b18 Michael Niedermayer
#else
1953 2da0d70d Diego Biurrun
    int i;
1954 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1955 2da0d70d Diego Biurrun
        int b= src[i*3+0];
1956
        int g= src[i*3+1];
1957
        int r= src[i*3+2];
1958 1e621b18 Michael Niedermayer
1959 e5091488 Benoit Fouet
        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1960 2da0d70d Diego Biurrun
    }
1961 94daf2e9 Ramiro Polla
#endif /* COMPILE_TEMPLATE_MMX */
1962 1e621b18 Michael Niedermayer
}
1963
1964 7ac40327 Ramiro Polla
static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1965 1e621b18 Michael Niedermayer
{
1966 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1967 a35acd7f Benjamin Zores
    RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
1968 1e621b18 Michael Niedermayer
#else
1969 2da0d70d Diego Biurrun
    int i;
1970 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1971 dfb09bd1 Michael Niedermayer
        int b= src1[3*i + 0];
1972
        int g= src1[3*i + 1];
1973
        int r= src1[3*i + 2];
1974 2da0d70d Diego Biurrun
1975 dfb09bd1 Michael Niedermayer
        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1976
        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1977 2da0d70d Diego Biurrun
    }
1978 94daf2e9 Ramiro Polla
#endif /* COMPILE_TEMPLATE_MMX */
1979 2da0d70d Diego Biurrun
    assert(src1 == src2);
1980 1e621b18 Michael Niedermayer
}
1981
1982 7ac40327 Ramiro Polla
static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1983 2f60f629 Michael Niedermayer
{
1984
    int i;
1985 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1986 2f60f629 Michael Niedermayer
        int b= src1[6*i + 0] + src1[6*i + 3];
1987
        int g= src1[6*i + 1] + src1[6*i + 4];
1988
        int r= src1[6*i + 2] + src1[6*i + 5];
1989
1990
        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1991
        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1992
    }
1993
    assert(src1 == src2);
1994
}
1995
1996 7ac40327 Ramiro Polla
static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
1997 a861d4d7 Michael Niedermayer
{
1998 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1999 a35acd7f Benjamin Zores
    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
2000 dfb09bd1 Michael Niedermayer
#else
2001 2da0d70d Diego Biurrun
    int i;
2002 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
2003 2da0d70d Diego Biurrun
        int r= src[i*3+0];
2004
        int g= src[i*3+1];
2005
        int b= src[i*3+2];
2006
2007 e5091488 Benoit Fouet
        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2008 2da0d70d Diego Biurrun
    }
2009 dfb09bd1 Michael Niedermayer
#endif
2010 a861d4d7 Michael Niedermayer
}
2011
2012 7ac40327 Ramiro Polla
static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
2013 a861d4d7 Michael Niedermayer
{
2014 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
2015 5155b839 Diego Biurrun
    assert(src1==src2);
2016 a35acd7f Benjamin Zores
    RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
2017 dfb09bd1 Michael Niedermayer
#else
2018 5155b839 Diego Biurrun
    int i;
2019
    assert(src1==src2);
2020 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
2021 dfb09bd1 Michael Niedermayer
        int r= src1[3*i + 0];
2022
        int g= src1[3*i + 1];
2023
        int b= src1[3*i + 2];
2024 2da0d70d Diego Biurrun
2025 dfb09bd1 Michael Niedermayer
        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
2026
        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
2027 2da0d70d Diego Biurrun
    }
2028 dfb09bd1 Michael Niedermayer
#endif
2029 a861d4d7 Michael Niedermayer
}
2030
2031 7ac40327 Ramiro Polla
static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
2032 2f60f629 Michael Niedermayer
{