Statistics
| Branch: | Revision:

ffmpeg / libswscale / swscale_template.c @ 22b6a24c

History | View | Annotate | Download (137 KB)

1 fe8054c0 Michael Niedermayer
/*
2 d026b45e Diego Biurrun
 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with FFmpeg; if not, write to the Free Software
18 b19bcbaa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 d026b45e Diego Biurrun
 *
20 8a322796 Diego Biurrun
 * The C code (not assembly, MMX, ...) of this file can be used
21
 * under the LGPL license.
22 d026b45e Diego Biurrun
 */
23 783e9cc9 Michael Niedermayer
24 6e1c66bc Aurelien Jacobs
#undef REAL_MOVNTQ
25 541c4eb9 Michael Niedermayer
#undef MOVNTQ
26 7d7f78b5 Michael Niedermayer
#undef PAVGB
27 48a05cec Michael Niedermayer
#undef PREFETCH
28
29 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_AMD3DNOW
30 48a05cec Michael Niedermayer
#define PREFETCH  "prefetch"
31 94daf2e9 Ramiro Polla
#elif COMPILE_TEMPLATE_MMX2
32 48a05cec Michael Niedermayer
#define PREFETCH "prefetchnta"
33
#else
34 d904b5fc Nigel Pearson
#define PREFETCH  " # nop"
35 48a05cec Michael Niedermayer
#endif
36
37 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX2
38 d604bab9 Michael Niedermayer
#define PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
39 94daf2e9 Ramiro Polla
#elif COMPILE_TEMPLATE_AMD3DNOW
40 d604bab9 Michael Niedermayer
#define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
41
#endif
42 d3f41512 Michael Niedermayer
43 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX2
44 6e1c66bc Aurelien Jacobs
#define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
45 d604bab9 Michael Niedermayer
#else
46 6e1c66bc Aurelien Jacobs
#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
47 d604bab9 Michael Niedermayer
#endif
48 6e1c66bc Aurelien Jacobs
#define MOVNTQ(a,b)  REAL_MOVNTQ(a,b)
49 d604bab9 Michael Niedermayer
50 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_ALTIVEC
51 009d2d74 Diego Biurrun
#include "ppc/swscale_altivec_template.c"
52 a2faa401 Romain Dolbeau
#endif
53
54 bca11e75 Michael Niedermayer
#define YSCALEYUV2YV12X(x, offset, dest, width) \
55 7ad6469e Diego Pettenò
    __asm__ volatile(\
56 c255994b Ramiro Polla
        "xor                          %%"REG_a", %%"REG_a"  \n\t"\
57
        "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
58
        "movq                             %%mm3, %%mm4      \n\t"\
59
        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
60
        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
61
        ASMALIGN(4) /* FIXME Unroll? */\
62
        "1:                                                 \n\t"\
63
        "movq                      8(%%"REG_d"), %%mm0      \n\t" /* filterCoeff */\
64
        "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
65
        "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm5      \n\t" /* srcData */\
66
        "add                                $16, %%"REG_d"  \n\t"\
67
        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
68
        "test                         %%"REG_S", %%"REG_S"  \n\t"\
69
        "pmulhw                           %%mm0, %%mm2      \n\t"\
70
        "pmulhw                           %%mm0, %%mm5      \n\t"\
71
        "paddw                            %%mm2, %%mm3      \n\t"\
72
        "paddw                            %%mm5, %%mm4      \n\t"\
73
        " jnz                                1b             \n\t"\
74
        "psraw                               $3, %%mm3      \n\t"\
75
        "psraw                               $3, %%mm4      \n\t"\
76
        "packuswb                         %%mm4, %%mm3      \n\t"\
77
        MOVNTQ(%%mm3, (%1, %%REGa))\
78
        "add                                 $8, %%"REG_a"  \n\t"\
79
        "cmp                                 %2, %%"REG_a"  \n\t"\
80
        "movq             "VROUNDER_OFFSET"(%0), %%mm3      \n\t"\
81
        "movq                             %%mm3, %%mm4      \n\t"\
82
        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
83
        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
84
        "jb                                  1b             \n\t"\
85
        :: "r" (&c->redDither),\
86
        "r" (dest), "g" (width)\
87
        : "%"REG_a, "%"REG_d, "%"REG_S\
88 2da0d70d Diego Biurrun
    );
89 bca11e75 Michael Niedermayer
90
#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \
91 7ad6469e Diego Pettenò
    __asm__ volatile(\
92 c255994b Ramiro Polla
        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
93
        "xor                          %%"REG_a", %%"REG_a"  \n\t"\
94
        "pxor                             %%mm4, %%mm4      \n\t"\
95
        "pxor                             %%mm5, %%mm5      \n\t"\
96
        "pxor                             %%mm6, %%mm6      \n\t"\
97
        "pxor                             %%mm7, %%mm7      \n\t"\
98
        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
99
        ASMALIGN(4) \
100
        "1:                                                 \n\t"\
101
        "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm0      \n\t" /* srcData */\
102
        "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm2      \n\t" /* srcData */\
103
        "mov        "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"  \n\t"\
104
        "movq   "  x "(%%"REG_S", %%"REG_a", 2), %%mm1      \n\t" /* srcData */\
105
        "movq                             %%mm0, %%mm3      \n\t"\
106
        "punpcklwd                        %%mm1, %%mm0      \n\t"\
107
        "punpckhwd                        %%mm1, %%mm3      \n\t"\
108
        "movq       "STR(APCK_COEF)"(%%"REG_d"), %%mm1      \n\t" /* filterCoeff */\
109
        "pmaddwd                          %%mm1, %%mm0      \n\t"\
110
        "pmaddwd                          %%mm1, %%mm3      \n\t"\
111
        "paddd                            %%mm0, %%mm4      \n\t"\
112
        "paddd                            %%mm3, %%mm5      \n\t"\
113
        "movq 8+"  x "(%%"REG_S", %%"REG_a", 2), %%mm3      \n\t" /* srcData */\
114
        "mov        "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"  \n\t"\
115
        "add                  $"STR(APCK_SIZE)", %%"REG_d"  \n\t"\
116
        "test                         %%"REG_S", %%"REG_S"  \n\t"\
117
        "movq                             %%mm2, %%mm0      \n\t"\
118
        "punpcklwd                        %%mm3, %%mm2      \n\t"\
119
        "punpckhwd                        %%mm3, %%mm0      \n\t"\
120
        "pmaddwd                          %%mm1, %%mm2      \n\t"\
121
        "pmaddwd                          %%mm1, %%mm0      \n\t"\
122
        "paddd                            %%mm2, %%mm6      \n\t"\
123
        "paddd                            %%mm0, %%mm7      \n\t"\
124
        " jnz                                1b             \n\t"\
125
        "psrad                              $16, %%mm4      \n\t"\
126
        "psrad                              $16, %%mm5      \n\t"\
127
        "psrad                              $16, %%mm6      \n\t"\
128
        "psrad                              $16, %%mm7      \n\t"\
129
        "movq             "VROUNDER_OFFSET"(%0), %%mm0      \n\t"\
130
        "packssdw                         %%mm5, %%mm4      \n\t"\
131
        "packssdw                         %%mm7, %%mm6      \n\t"\
132
        "paddw                            %%mm0, %%mm4      \n\t"\
133
        "paddw                            %%mm0, %%mm6      \n\t"\
134
        "psraw                               $3, %%mm4      \n\t"\
135
        "psraw                               $3, %%mm6      \n\t"\
136
        "packuswb                         %%mm6, %%mm4      \n\t"\
137
        MOVNTQ(%%mm4, (%1, %%REGa))\
138
        "add                                 $8, %%"REG_a"  \n\t"\
139
        "cmp                                 %2, %%"REG_a"  \n\t"\
140
        "lea                     " offset "(%0), %%"REG_d"  \n\t"\
141
        "pxor                             %%mm4, %%mm4      \n\t"\
142
        "pxor                             %%mm5, %%mm5      \n\t"\
143
        "pxor                             %%mm6, %%mm6      \n\t"\
144
        "pxor                             %%mm7, %%mm7      \n\t"\
145
        "mov                        (%%"REG_d"), %%"REG_S"  \n\t"\
146
        "jb                                  1b             \n\t"\
147
        :: "r" (&c->redDither),\
148
        "r" (dest), "g" (width)\
149
        : "%"REG_a, "%"REG_d, "%"REG_S\
150 2da0d70d Diego Biurrun
    );
151 c1b0bfb4 Michael Niedermayer
152
#define YSCALEYUV2YV121 \
153 2da0d70d Diego Biurrun
    "mov %2, %%"REG_a"                    \n\t"\
154
    ASMALIGN(4) /* FIXME Unroll? */\
155
    "1:                                   \n\t"\
156
    "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"\
157
    "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"\
158
    "psraw                 $7, %%mm0      \n\t"\
159
    "psraw                 $7, %%mm1      \n\t"\
160
    "packuswb           %%mm1, %%mm0      \n\t"\
161
    MOVNTQ(%%mm0, (%1, %%REGa))\
162
    "add                   $8, %%"REG_a"  \n\t"\
163
    "jnc                   1b             \n\t"
164 c1b0bfb4 Michael Niedermayer
165 bf2bdde6 Michael Niedermayer
#define YSCALEYUV2YV121_ACCURATE \
166
    "mov %2, %%"REG_a"                    \n\t"\
167
    "pcmpeqw %%mm7, %%mm7                 \n\t"\
168
    "psrlw                 $15, %%mm7     \n\t"\
169
    "psllw                  $6, %%mm7     \n\t"\
170
    ASMALIGN(4) /* FIXME Unroll? */\
171
    "1:                                   \n\t"\
172
    "movq  (%0, %%"REG_a", 2), %%mm0      \n\t"\
173
    "movq 8(%0, %%"REG_a", 2), %%mm1      \n\t"\
174 33a67bd6 Michael Niedermayer
    "paddsw             %%mm7, %%mm0      \n\t"\
175
    "paddsw             %%mm7, %%mm1      \n\t"\
176 bf2bdde6 Michael Niedermayer
    "psraw                 $7, %%mm0      \n\t"\
177
    "psraw                 $7, %%mm1      \n\t"\
178
    "packuswb           %%mm1, %%mm0      \n\t"\
179
    MOVNTQ(%%mm0, (%1, %%REGa))\
180
    "add                   $8, %%"REG_a"  \n\t"\
181
    "jnc                   1b             \n\t"
182
183 c1b0bfb4 Michael Niedermayer
/*
184 2da0d70d Diego Biurrun
    :: "m" (-lumFilterSize), "m" (-chrFilterSize),
185
       "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),
186
       "r" (dest), "m" (dstW),
187
       "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)
188
    : "%eax", "%ebx", "%ecx", "%edx", "%esi"
189 c1b0bfb4 Michael Niedermayer
*/
190 df57ab14 Cédric Schieli
#define YSCALEYUV2PACKEDX_UV \
191 7ad6469e Diego Pettenò
    __asm__ volatile(\
192 c255994b Ramiro Polla
        "xor                   %%"REG_a", %%"REG_a"     \n\t"\
193
        ASMALIGN(4)\
194
        "nop                                            \n\t"\
195
        "1:                                             \n\t"\
196
        "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
197
        "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
198
        "movq      "VROUNDER_OFFSET"(%0), %%mm3         \n\t"\
199
        "movq                      %%mm3, %%mm4         \n\t"\
200
        ASMALIGN(4)\
201
        "2:                                             \n\t"\
202
        "movq               8(%%"REG_d"), %%mm0         \n\t" /* filterCoeff */\
203
        "movq     (%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* UsrcData */\
204
        "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm5         \n\t" /* VsrcData */\
205
        "add                         $16, %%"REG_d"     \n\t"\
206
        "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
207
        "pmulhw                    %%mm0, %%mm2         \n\t"\
208
        "pmulhw                    %%mm0, %%mm5         \n\t"\
209
        "paddw                     %%mm2, %%mm3         \n\t"\
210
        "paddw                     %%mm5, %%mm4         \n\t"\
211
        "test                  %%"REG_S", %%"REG_S"     \n\t"\
212
        " jnz                         2b                \n\t"\
213 df57ab14 Cédric Schieli
214 fe91924d Cédric Schieli
#define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
215 df57ab14 Cédric Schieli
    "lea                "offset"(%0), %%"REG_d"     \n\t"\
216 2da0d70d Diego Biurrun
    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
217 fe91924d Cédric Schieli
    "movq      "VROUNDER_OFFSET"(%0), "#dst1"       \n\t"\
218
    "movq                    "#dst1", "#dst2"       \n\t"\
219 2da0d70d Diego Biurrun
    ASMALIGN(4)\
220
    "2:                                             \n\t"\
221 fe91924d Cédric Schieli
    "movq               8(%%"REG_d"), "#coeff"      \n\t" /* filterCoeff */\
222
    "movq  (%%"REG_S", %%"REG_a", 2), "#src1"       \n\t" /* Y1srcData */\
223
    "movq 8(%%"REG_S", %%"REG_a", 2), "#src2"       \n\t" /* Y2srcData */\
224 2da0d70d Diego Biurrun
    "add                         $16, %%"REG_d"            \n\t"\
225
    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
226 fe91924d Cédric Schieli
    "pmulhw                 "#coeff", "#src1"       \n\t"\
227
    "pmulhw                 "#coeff", "#src2"       \n\t"\
228
    "paddw                   "#src1", "#dst1"       \n\t"\
229
    "paddw                   "#src2", "#dst2"       \n\t"\
230 2da0d70d Diego Biurrun
    "test                  %%"REG_S", %%"REG_S"     \n\t"\
231
    " jnz                         2b                \n\t"\
232
233 df57ab14 Cédric Schieli
#define YSCALEYUV2PACKEDX \
234
    YSCALEYUV2PACKEDX_UV \
235 fe91924d Cédric Schieli
    YSCALEYUV2PACKEDX_YA(LUM_MMX_FILTER_OFFSET,%%mm0,%%mm2,%%mm5,%%mm1,%%mm7) \
236 df57ab14 Cédric Schieli
237 c255994b Ramiro Polla
#define YSCALEYUV2PACKEDX_END                     \
238
        :: "r" (&c->redDither),                   \
239
            "m" (dummy), "m" (dummy), "m" (dummy),\
240
            "r" (dest), "m" (dstW)                \
241
        : "%"REG_a, "%"REG_d, "%"REG_S            \
242 2da0d70d Diego Biurrun
    );
243 8422aa88 Michael Niedermayer
244 df57ab14 Cédric Schieli
#define YSCALEYUV2PACKEDX_ACCURATE_UV \
245 7ad6469e Diego Pettenò
    __asm__ volatile(\
246 c255994b Ramiro Polla
        "xor %%"REG_a", %%"REG_a"                       \n\t"\
247
        ASMALIGN(4)\
248
        "nop                                            \n\t"\
249
        "1:                                             \n\t"\
250
        "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"     \n\t"\
251
        "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
252
        "pxor                      %%mm4, %%mm4         \n\t"\
253
        "pxor                      %%mm5, %%mm5         \n\t"\
254
        "pxor                      %%mm6, %%mm6         \n\t"\
255
        "pxor                      %%mm7, %%mm7         \n\t"\
256
        ASMALIGN(4)\
257
        "2:                                             \n\t"\
258
        "movq     (%%"REG_S", %%"REG_a"), %%mm0         \n\t" /* UsrcData */\
259
        "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2         \n\t" /* VsrcData */\
260
        "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
261
        "movq     (%%"REG_S", %%"REG_a"), %%mm1         \n\t" /* UsrcData */\
262
        "movq                      %%mm0, %%mm3         \n\t"\
263
        "punpcklwd                 %%mm1, %%mm0         \n\t"\
264
        "punpckhwd                 %%mm1, %%mm3         \n\t"\
265
        "movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1         \n\t" /* filterCoeff */\
266
        "pmaddwd                   %%mm1, %%mm0         \n\t"\
267
        "pmaddwd                   %%mm1, %%mm3         \n\t"\
268
        "paddd                     %%mm0, %%mm4         \n\t"\
269
        "paddd                     %%mm3, %%mm5         \n\t"\
270
        "movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3         \n\t" /* VsrcData */\
271
        "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
272
        "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
273
        "test                  %%"REG_S", %%"REG_S"     \n\t"\
274
        "movq                      %%mm2, %%mm0         \n\t"\
275
        "punpcklwd                 %%mm3, %%mm2         \n\t"\
276
        "punpckhwd                 %%mm3, %%mm0         \n\t"\
277
        "pmaddwd                   %%mm1, %%mm2         \n\t"\
278
        "pmaddwd                   %%mm1, %%mm0         \n\t"\
279
        "paddd                     %%mm2, %%mm6         \n\t"\
280
        "paddd                     %%mm0, %%mm7         \n\t"\
281
        " jnz                         2b                \n\t"\
282
        "psrad                       $16, %%mm4         \n\t"\
283
        "psrad                       $16, %%mm5         \n\t"\
284
        "psrad                       $16, %%mm6         \n\t"\
285
        "psrad                       $16, %%mm7         \n\t"\
286
        "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
287
        "packssdw                  %%mm5, %%mm4         \n\t"\
288
        "packssdw                  %%mm7, %%mm6         \n\t"\
289
        "paddw                     %%mm0, %%mm4         \n\t"\
290
        "paddw                     %%mm0, %%mm6         \n\t"\
291
        "movq                      %%mm4, "U_TEMP"(%0)  \n\t"\
292
        "movq                      %%mm6, "V_TEMP"(%0)  \n\t"\
293 df57ab14 Cédric Schieli
294
#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
295
    "lea                "offset"(%0), %%"REG_d"     \n\t"\
296 2da0d70d Diego Biurrun
    "mov                 (%%"REG_d"), %%"REG_S"     \n\t"\
297
    "pxor                      %%mm1, %%mm1         \n\t"\
298
    "pxor                      %%mm5, %%mm5         \n\t"\
299
    "pxor                      %%mm7, %%mm7         \n\t"\
300
    "pxor                      %%mm6, %%mm6         \n\t"\
301
    ASMALIGN(4)\
302
    "2:                                             \n\t"\
303
    "movq  (%%"REG_S", %%"REG_a", 2), %%mm0         \n\t" /* Y1srcData */\
304
    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm2         \n\t" /* Y2srcData */\
305 1625216e Michael Niedermayer
    "mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S"     \n\t"\
306 2da0d70d Diego Biurrun
    "movq  (%%"REG_S", %%"REG_a", 2), %%mm4         \n\t" /* Y1srcData */\
307
    "movq                      %%mm0, %%mm3         \n\t"\
308
    "punpcklwd                 %%mm4, %%mm0         \n\t"\
309
    "punpckhwd                 %%mm4, %%mm3         \n\t"\
310 1625216e Michael Niedermayer
    "movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4         \n\t" /* filterCoeff */\
311 2da0d70d Diego Biurrun
    "pmaddwd                   %%mm4, %%mm0         \n\t"\
312
    "pmaddwd                   %%mm4, %%mm3         \n\t"\
313
    "paddd                     %%mm0, %%mm1         \n\t"\
314
    "paddd                     %%mm3, %%mm5         \n\t"\
315
    "movq 8(%%"REG_S", %%"REG_a", 2), %%mm3         \n\t" /* Y2srcData */\
316 1625216e Michael Niedermayer
    "mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S"     \n\t"\
317
    "add           $"STR(APCK_SIZE)", %%"REG_d"     \n\t"\
318 2da0d70d Diego Biurrun
    "test                  %%"REG_S", %%"REG_S"     \n\t"\
319
    "movq                      %%mm2, %%mm0         \n\t"\
320
    "punpcklwd                 %%mm3, %%mm2         \n\t"\
321
    "punpckhwd                 %%mm3, %%mm0         \n\t"\
322
    "pmaddwd                   %%mm4, %%mm2         \n\t"\
323
    "pmaddwd                   %%mm4, %%mm0         \n\t"\
324
    "paddd                     %%mm2, %%mm7         \n\t"\
325
    "paddd                     %%mm0, %%mm6         \n\t"\
326
    " jnz                         2b                \n\t"\
327
    "psrad                       $16, %%mm1         \n\t"\
328
    "psrad                       $16, %%mm5         \n\t"\
329
    "psrad                       $16, %%mm7         \n\t"\
330
    "psrad                       $16, %%mm6         \n\t"\
331
    "movq      "VROUNDER_OFFSET"(%0), %%mm0         \n\t"\
332
    "packssdw                  %%mm5, %%mm1         \n\t"\
333
    "packssdw                  %%mm6, %%mm7         \n\t"\
334
    "paddw                     %%mm0, %%mm1         \n\t"\
335
    "paddw                     %%mm0, %%mm7         \n\t"\
336
    "movq               "U_TEMP"(%0), %%mm3         \n\t"\
337
    "movq               "V_TEMP"(%0), %%mm4         \n\t"\
338 bca11e75 Michael Niedermayer
339 df57ab14 Cédric Schieli
#define YSCALEYUV2PACKEDX_ACCURATE \
340
    YSCALEYUV2PACKEDX_ACCURATE_UV \
341
    YSCALEYUV2PACKEDX_ACCURATE_YA(LUM_MMX_FILTER_OFFSET)
342
343 8422aa88 Michael Niedermayer
#define YSCALEYUV2RGBX \
344 2da0d70d Diego Biurrun
    "psubw  "U_OFFSET"(%0), %%mm3       \n\t" /* (U-128)8*/\
345
    "psubw  "V_OFFSET"(%0), %%mm4       \n\t" /* (V-128)8*/\
346
    "movq            %%mm3, %%mm2       \n\t" /* (U-128)8*/\
347
    "movq            %%mm4, %%mm5       \n\t" /* (V-128)8*/\
348
    "pmulhw "UG_COEFF"(%0), %%mm3       \n\t"\
349
    "pmulhw "VG_COEFF"(%0), %%mm4       \n\t"\
350 c255994b Ramiro Polla
    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
351 2da0d70d Diego Biurrun
    "pmulhw "UB_COEFF"(%0), %%mm2       \n\t"\
352
    "pmulhw "VR_COEFF"(%0), %%mm5       \n\t"\
353
    "psubw  "Y_OFFSET"(%0), %%mm1       \n\t" /* 8(Y-16)*/\
354
    "psubw  "Y_OFFSET"(%0), %%mm7       \n\t" /* 8(Y-16)*/\
355
    "pmulhw  "Y_COEFF"(%0), %%mm1       \n\t"\
356
    "pmulhw  "Y_COEFF"(%0), %%mm7       \n\t"\
357 c255994b Ramiro Polla
    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
358 2da0d70d Diego Biurrun
    "paddw           %%mm3, %%mm4       \n\t"\
359
    "movq            %%mm2, %%mm0       \n\t"\
360
    "movq            %%mm5, %%mm6       \n\t"\
361
    "movq            %%mm4, %%mm3       \n\t"\
362
    "punpcklwd       %%mm2, %%mm2       \n\t"\
363
    "punpcklwd       %%mm5, %%mm5       \n\t"\
364
    "punpcklwd       %%mm4, %%mm4       \n\t"\
365
    "paddw           %%mm1, %%mm2       \n\t"\
366
    "paddw           %%mm1, %%mm5       \n\t"\
367
    "paddw           %%mm1, %%mm4       \n\t"\
368
    "punpckhwd       %%mm0, %%mm0       \n\t"\
369
    "punpckhwd       %%mm6, %%mm6       \n\t"\
370
    "punpckhwd       %%mm3, %%mm3       \n\t"\
371
    "paddw           %%mm7, %%mm0       \n\t"\
372
    "paddw           %%mm7, %%mm6       \n\t"\
373
    "paddw           %%mm7, %%mm3       \n\t"\
374
    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
375
    "packuswb        %%mm0, %%mm2       \n\t"\
376
    "packuswb        %%mm6, %%mm5       \n\t"\
377
    "packuswb        %%mm3, %%mm4       \n\t"\
378 d604bab9 Michael Niedermayer
379 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2PACKED(index, c) \
380 2da0d70d Diego Biurrun
    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0              \n\t"\
381
    "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1              \n\t"\
382
    "psraw                $3, %%mm0                           \n\t"\
383
    "psraw                $3, %%mm1                           \n\t"\
384
    "movq              %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\
385
    "movq              %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\
386
    "xor            "#index", "#index"                        \n\t"\
387
    ASMALIGN(4)\
388
    "1:                                 \n\t"\
389
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
390
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
391 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
392
    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
393 2da0d70d Diego Biurrun
    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
394
    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
395
    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
396
    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
397
    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
398
    "psraw                $7, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
399
    "psraw                $7, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
400
    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
401
    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
402
    "movq  (%0, "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
403
    "movq  (%1, "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
404
    "movq 8(%0, "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
405
    "movq 8(%1, "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
406
    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
407
    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
408
    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
409
    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
410
    "psraw                $7, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
411
    "psraw                $7, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
412
    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
413
    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
414 6a4970ab Diego Biurrun
415 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2PACKED(index, c)  REAL_YSCALEYUV2PACKED(index, c)
416 6a4970ab Diego Biurrun
417 df57ab14 Cédric Schieli
#define REAL_YSCALEYUV2RGB_UV(index, c) \
418 2da0d70d Diego Biurrun
    "xor            "#index", "#index"  \n\t"\
419
    ASMALIGN(4)\
420
    "1:                                 \n\t"\
421
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
422
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
423 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
424
    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
425 2da0d70d Diego Biurrun
    "psubw             %%mm3, %%mm2     \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
426
    "psubw             %%mm4, %%mm5     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
427
    "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0    \n\t"\
428
    "pmulhw            %%mm0, %%mm2     \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\
429
    "pmulhw            %%mm0, %%mm5     \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\
430
    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
431
    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
432
    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\
433
    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\
434
    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
435
    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
436
    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
437
    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
438
    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
439
    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
440
    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
441 df57ab14 Cédric Schieli
442 786dcfef Cédric Schieli
#define REAL_YSCALEYUV2RGB_YA(index, c, b1, b2) \
443
    "movq  ("#b1", "#index", 2), %%mm0     \n\t" /*buf0[eax]*/\
444
    "movq  ("#b2", "#index", 2), %%mm1     \n\t" /*buf1[eax]*/\
445
    "movq 8("#b1", "#index", 2), %%mm6     \n\t" /*buf0[eax]*/\
446
    "movq 8("#b2", "#index", 2), %%mm7     \n\t" /*buf1[eax]*/\
447 2da0d70d Diego Biurrun
    "psubw             %%mm1, %%mm0     \n\t" /* buf0[eax] - buf1[eax]*/\
448
    "psubw             %%mm7, %%mm6     \n\t" /* buf0[eax] - buf1[eax]*/\
449
    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
450
    "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6  \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\
451
    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
452
    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
453
    "paddw             %%mm0, %%mm1     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
454
    "paddw             %%mm6, %%mm7     \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\
455 df57ab14 Cédric Schieli
456
#define REAL_YSCALEYUV2RGB_COEFF(c) \
457 2da0d70d Diego Biurrun
    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
458
    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
459
    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
460
    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
461
    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
462
    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
463
    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
464
    "paddw             %%mm3, %%mm4     \n\t"\
465
    "movq              %%mm2, %%mm0     \n\t"\
466
    "movq              %%mm5, %%mm6     \n\t"\
467
    "movq              %%mm4, %%mm3     \n\t"\
468
    "punpcklwd         %%mm2, %%mm2     \n\t"\
469
    "punpcklwd         %%mm5, %%mm5     \n\t"\
470
    "punpcklwd         %%mm4, %%mm4     \n\t"\
471
    "paddw             %%mm1, %%mm2     \n\t"\
472
    "paddw             %%mm1, %%mm5     \n\t"\
473
    "paddw             %%mm1, %%mm4     \n\t"\
474
    "punpckhwd         %%mm0, %%mm0     \n\t"\
475
    "punpckhwd         %%mm6, %%mm6     \n\t"\
476
    "punpckhwd         %%mm3, %%mm3     \n\t"\
477
    "paddw             %%mm7, %%mm0     \n\t"\
478
    "paddw             %%mm7, %%mm6     \n\t"\
479
    "paddw             %%mm7, %%mm3     \n\t"\
480
    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
481
    "packuswb          %%mm0, %%mm2     \n\t"\
482
    "packuswb          %%mm6, %%mm5     \n\t"\
483
    "packuswb          %%mm3, %%mm4     \n\t"\
484 40494418 Cédric Schieli
485 786dcfef Cédric Schieli
#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
486 df57ab14 Cédric Schieli
487
#define YSCALEYUV2RGB(index, c) \
488
    REAL_YSCALEYUV2RGB_UV(index, c) \
489 786dcfef Cédric Schieli
    REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
490 df57ab14 Cédric Schieli
    REAL_YSCALEYUV2RGB_COEFF(c)
491 6a4970ab Diego Biurrun
492 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2PACKED1(index, c) \
493 2da0d70d Diego Biurrun
    "xor            "#index", "#index"  \n\t"\
494
    ASMALIGN(4)\
495
    "1:                                 \n\t"\
496
    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
497 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
498 2da0d70d Diego Biurrun
    "psraw                $7, %%mm3     \n\t" \
499
    "psraw                $7, %%mm4     \n\t" \
500
    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
501
    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
502
    "psraw                $7, %%mm1     \n\t" \
503
    "psraw                $7, %%mm7     \n\t" \
504 6a4970ab Diego Biurrun
505 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2PACKED1(index, c)  REAL_YSCALEYUV2PACKED1(index, c)
506 6a4970ab Diego Biurrun
507 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2RGB1(index, c) \
508 2da0d70d Diego Biurrun
    "xor            "#index", "#index"  \n\t"\
509
    ASMALIGN(4)\
510
    "1:                                 \n\t"\
511
    "movq     (%2, "#index"), %%mm3     \n\t" /* uvbuf0[eax]*/\
512 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4     \n\t" /* uvbuf0[eax+2048]*/\
513 2da0d70d Diego Biurrun
    "psraw                $4, %%mm3     \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
514
    "psraw                $4, %%mm4     \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
515
    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
516
    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
517
    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
518
    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
519
    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
520
    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
521
    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
522
    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
523
    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
524
    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
525
    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
526
    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
527
    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
528
    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
529
    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
530
    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
531
    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
532
    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
533
    "paddw             %%mm3, %%mm4     \n\t"\
534
    "movq              %%mm2, %%mm0     \n\t"\
535
    "movq              %%mm5, %%mm6     \n\t"\
536
    "movq              %%mm4, %%mm3     \n\t"\
537
    "punpcklwd         %%mm2, %%mm2     \n\t"\
538
    "punpcklwd         %%mm5, %%mm5     \n\t"\
539
    "punpcklwd         %%mm4, %%mm4     \n\t"\
540
    "paddw             %%mm1, %%mm2     \n\t"\
541
    "paddw             %%mm1, %%mm5     \n\t"\
542
    "paddw             %%mm1, %%mm4     \n\t"\
543
    "punpckhwd         %%mm0, %%mm0     \n\t"\
544
    "punpckhwd         %%mm6, %%mm6     \n\t"\
545
    "punpckhwd         %%mm3, %%mm3     \n\t"\
546
    "paddw             %%mm7, %%mm0     \n\t"\
547
    "paddw             %%mm7, %%mm6     \n\t"\
548
    "paddw             %%mm7, %%mm3     \n\t"\
549
    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
550
    "packuswb          %%mm0, %%mm2     \n\t"\
551
    "packuswb          %%mm6, %%mm5     \n\t"\
552
    "packuswb          %%mm3, %%mm4     \n\t"\
553 40494418 Cédric Schieli
554 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2RGB1(index, c)  REAL_YSCALEYUV2RGB1(index, c)
555 497d4f99 Michael Niedermayer
556 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2PACKED1b(index, c) \
557 2da0d70d Diego Biurrun
    "xor "#index", "#index"             \n\t"\
558
    ASMALIGN(4)\
559
    "1:                                 \n\t"\
560
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
561
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
562 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
563
    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
564 2da0d70d Diego Biurrun
    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
565
    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
566
    "psrlw                $8, %%mm3     \n\t" \
567
    "psrlw                $8, %%mm4     \n\t" \
568
    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
569
    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
570
    "psraw                $7, %%mm1     \n\t" \
571
    "psraw                $7, %%mm7     \n\t"
572 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2PACKED1b(index, c)  REAL_YSCALEYUV2PACKED1b(index, c)
573 6a4970ab Diego Biurrun
574 497d4f99 Michael Niedermayer
// do vertical chrominance interpolation
575 6e1c66bc Aurelien Jacobs
#define REAL_YSCALEYUV2RGB1b(index, c) \
576 2da0d70d Diego Biurrun
    "xor            "#index", "#index"  \n\t"\
577
    ASMALIGN(4)\
578
    "1:                                 \n\t"\
579
    "movq     (%2, "#index"), %%mm2     \n\t" /* uvbuf0[eax]*/\
580
    "movq     (%3, "#index"), %%mm3     \n\t" /* uvbuf1[eax]*/\
581 8b2fce0d Michael Niedermayer
    "movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5     \n\t" /* uvbuf0[eax+2048]*/\
582
    "movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4     \n\t" /* uvbuf1[eax+2048]*/\
583 2da0d70d Diego Biurrun
    "paddw             %%mm2, %%mm3     \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
584
    "paddw             %%mm5, %%mm4     \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
585
    "psrlw                $5, %%mm3     \n\t" /*FIXME might overflow*/\
586
    "psrlw                $5, %%mm4     \n\t" /*FIXME might overflow*/\
587
    "psubw  "U_OFFSET"("#c"), %%mm3     \n\t" /* (U-128)8*/\
588
    "psubw  "V_OFFSET"("#c"), %%mm4     \n\t" /* (V-128)8*/\
589
    "movq              %%mm3, %%mm2     \n\t" /* (U-128)8*/\
590
    "movq              %%mm4, %%mm5     \n\t" /* (V-128)8*/\
591
    "pmulhw "UG_COEFF"("#c"), %%mm3     \n\t"\
592
    "pmulhw "VG_COEFF"("#c"), %%mm4     \n\t"\
593
    /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
594
    "movq  (%0, "#index", 2), %%mm1     \n\t" /*buf0[eax]*/\
595
    "movq 8(%0, "#index", 2), %%mm7     \n\t" /*buf0[eax]*/\
596
    "psraw                $4, %%mm1     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
597
    "psraw                $4, %%mm7     \n\t" /* buf0[eax] - buf1[eax] >>4*/\
598
    "pmulhw "UB_COEFF"("#c"), %%mm2     \n\t"\
599
    "pmulhw "VR_COEFF"("#c"), %%mm5     \n\t"\
600
    "psubw  "Y_OFFSET"("#c"), %%mm1     \n\t" /* 8(Y-16)*/\
601
    "psubw  "Y_OFFSET"("#c"), %%mm7     \n\t" /* 8(Y-16)*/\
602
    "pmulhw  "Y_COEFF"("#c"), %%mm1     \n\t"\
603
    "pmulhw  "Y_COEFF"("#c"), %%mm7     \n\t"\
604
    /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
605
    "paddw             %%mm3, %%mm4     \n\t"\
606
    "movq              %%mm2, %%mm0     \n\t"\
607
    "movq              %%mm5, %%mm6     \n\t"\
608
    "movq              %%mm4, %%mm3     \n\t"\
609
    "punpcklwd         %%mm2, %%mm2     \n\t"\
610
    "punpcklwd         %%mm5, %%mm5     \n\t"\
611
    "punpcklwd         %%mm4, %%mm4     \n\t"\
612
    "paddw             %%mm1, %%mm2     \n\t"\
613
    "paddw             %%mm1, %%mm5     \n\t"\
614
    "paddw             %%mm1, %%mm4     \n\t"\
615
    "punpckhwd         %%mm0, %%mm0     \n\t"\
616
    "punpckhwd         %%mm6, %%mm6     \n\t"\
617
    "punpckhwd         %%mm3, %%mm3     \n\t"\
618
    "paddw             %%mm7, %%mm0     \n\t"\
619
    "paddw             %%mm7, %%mm6     \n\t"\
620
    "paddw             %%mm7, %%mm3     \n\t"\
621
    /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
622
    "packuswb          %%mm0, %%mm2     \n\t"\
623
    "packuswb          %%mm6, %%mm5     \n\t"\
624
    "packuswb          %%mm3, %%mm4     \n\t"\
625 40494418 Cédric Schieli
626 6e1c66bc Aurelien Jacobs
#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
627 d604bab9 Michael Niedermayer
628 6858492e Cédric Schieli
#define REAL_YSCALEYUV2RGB1_ALPHA(index) \
629
    "movq  (%1, "#index", 2), %%mm7     \n\t" /* abuf0[index  ]     */\
630
    "movq 8(%1, "#index", 2), %%mm1     \n\t" /* abuf0[index+4]     */\
631
    "psraw                $7, %%mm7     \n\t" /* abuf0[index  ] >>7 */\
632
    "psraw                $7, %%mm1     \n\t" /* abuf0[index+4] >>7 */\
633
    "packuswb          %%mm1, %%mm7     \n\t"
634
#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
635
636 9c77b26b Cédric Schieli
#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
637
    "movq       "#b", "#q2"     \n\t" /* B */\
638
    "movq       "#r", "#t"      \n\t" /* R */\
639
    "punpcklbw  "#g", "#b"      \n\t" /* GBGBGBGB 0 */\
640
    "punpcklbw  "#a", "#r"      \n\t" /* ARARARAR 0 */\
641
    "punpckhbw  "#g", "#q2"     \n\t" /* GBGBGBGB 2 */\
642
    "punpckhbw  "#a", "#t"      \n\t" /* ARARARAR 2 */\
643
    "movq       "#b", "#q0"     \n\t" /* GBGBGBGB 0 */\
644
    "movq      "#q2", "#q3"     \n\t" /* GBGBGBGB 2 */\
645
    "punpcklwd  "#r", "#q0"     \n\t" /* ARGBARGB 0 */\
646
    "punpckhwd  "#r", "#b"      \n\t" /* ARGBARGB 1 */\
647
    "punpcklwd  "#t", "#q2"     \n\t" /* ARGBARGB 2 */\
648
    "punpckhwd  "#t", "#q3"     \n\t" /* ARGBARGB 3 */\
649 d604bab9 Michael Niedermayer
\
650 9c77b26b Cédric Schieli
    MOVNTQ(   q0,   (dst, index, 4))\
651
    MOVNTQ(    b,  8(dst, index, 4))\
652
    MOVNTQ(   q2, 16(dst, index, 4))\
653
    MOVNTQ(   q3, 24(dst, index, 4))\
654 d604bab9 Michael Niedermayer
\
655 2da0d70d Diego Biurrun
    "add      $8, "#index"      \n\t"\
656
    "cmp "#dstw", "#index"      \n\t"\
657
    " jb      1b                \n\t"
658 9c77b26b Cédric Schieli
#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)  REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
659 d604bab9 Michael Niedermayer
660 27a90b04 Michael Niedermayer
#define REAL_WRITERGB16(dst, dstw, index) \
661 2da0d70d Diego Biurrun
    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
662
    "pand "MANGLE(bFC)", %%mm4  \n\t" /* G */\
663
    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
664
    "psrlq           $3, %%mm2  \n\t"\
665 d604bab9 Michael Niedermayer
\
666 2da0d70d Diego Biurrun
    "movq         %%mm2, %%mm1  \n\t"\
667
    "movq         %%mm4, %%mm3  \n\t"\
668 d604bab9 Michael Niedermayer
\
669 2da0d70d Diego Biurrun
    "punpcklbw    %%mm7, %%mm3  \n\t"\
670
    "punpcklbw    %%mm5, %%mm2  \n\t"\
671
    "punpckhbw    %%mm7, %%mm4  \n\t"\
672
    "punpckhbw    %%mm5, %%mm1  \n\t"\
673 d604bab9 Michael Niedermayer
\
674 2da0d70d Diego Biurrun
    "psllq           $3, %%mm3  \n\t"\
675
    "psllq           $3, %%mm4  \n\t"\
676 d604bab9 Michael Niedermayer
\
677 2da0d70d Diego Biurrun
    "por          %%mm3, %%mm2  \n\t"\
678
    "por          %%mm4, %%mm1  \n\t"\
679 d604bab9 Michael Niedermayer
\
680 2da0d70d Diego Biurrun
    MOVNTQ(%%mm2,  (dst, index, 2))\
681
    MOVNTQ(%%mm1, 8(dst, index, 2))\
682 d604bab9 Michael Niedermayer
\
683 2da0d70d Diego Biurrun
    "add             $8, "#index"   \n\t"\
684
    "cmp        "#dstw", "#index"   \n\t"\
685
    " jb             1b             \n\t"
686 27a90b04 Michael Niedermayer
#define WRITERGB16(dst, dstw, index)  REAL_WRITERGB16(dst, dstw, index)
687 d604bab9 Michael Niedermayer
688 27a90b04 Michael Niedermayer
#define REAL_WRITERGB15(dst, dstw, index) \
689 2da0d70d Diego Biurrun
    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
690
    "pand "MANGLE(bF8)", %%mm4  \n\t" /* G */\
691
    "pand "MANGLE(bF8)", %%mm5  \n\t" /* R */\
692
    "psrlq           $3, %%mm2  \n\t"\
693
    "psrlq           $1, %%mm5  \n\t"\
694 d604bab9 Michael Niedermayer
\
695 2da0d70d Diego Biurrun
    "movq         %%mm2, %%mm1  \n\t"\
696
    "movq         %%mm4, %%mm3  \n\t"\
697 d604bab9 Michael Niedermayer
\
698 2da0d70d Diego Biurrun
    "punpcklbw    %%mm7, %%mm3  \n\t"\
699
    "punpcklbw    %%mm5, %%mm2  \n\t"\
700
    "punpckhbw    %%mm7, %%mm4  \n\t"\
701
    "punpckhbw    %%mm5, %%mm1  \n\t"\
702 d604bab9 Michael Niedermayer
\
703 2da0d70d Diego Biurrun
    "psllq           $2, %%mm3  \n\t"\
704
    "psllq           $2, %%mm4  \n\t"\
705 d604bab9 Michael Niedermayer
\
706 2da0d70d Diego Biurrun
    "por          %%mm3, %%mm2  \n\t"\
707
    "por          %%mm4, %%mm1  \n\t"\
708 d604bab9 Michael Niedermayer
\
709 2da0d70d Diego Biurrun
    MOVNTQ(%%mm2,  (dst, index, 2))\
710
    MOVNTQ(%%mm1, 8(dst, index, 2))\
711 d604bab9 Michael Niedermayer
\
712 2da0d70d Diego Biurrun
    "add             $8, "#index"   \n\t"\
713
    "cmp        "#dstw", "#index"   \n\t"\
714
    " jb             1b             \n\t"
715 27a90b04 Michael Niedermayer
#define WRITERGB15(dst, dstw, index)  REAL_WRITERGB15(dst, dstw, index)
716 f62255fb Michael Niedermayer
717 6542b44e Michael Niedermayer
#define WRITEBGR24OLD(dst, dstw, index) \
718 2da0d70d Diego Biurrun
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
719
    "movq      %%mm2, %%mm1             \n\t" /* B */\
720
    "movq      %%mm5, %%mm6             \n\t" /* R */\
721
    "punpcklbw %%mm4, %%mm2             \n\t" /* GBGBGBGB 0 */\
722
    "punpcklbw %%mm7, %%mm5             \n\t" /* 0R0R0R0R 0 */\
723
    "punpckhbw %%mm4, %%mm1             \n\t" /* GBGBGBGB 2 */\
724
    "punpckhbw %%mm7, %%mm6             \n\t" /* 0R0R0R0R 2 */\
725
    "movq      %%mm2, %%mm0             \n\t" /* GBGBGBGB 0 */\
726
    "movq      %%mm1, %%mm3             \n\t" /* GBGBGBGB 2 */\
727
    "punpcklwd %%mm5, %%mm0             \n\t" /* 0RGB0RGB 0 */\
728
    "punpckhwd %%mm5, %%mm2             \n\t" /* 0RGB0RGB 1 */\
729
    "punpcklwd %%mm6, %%mm1             \n\t" /* 0RGB0RGB 2 */\
730
    "punpckhwd %%mm6, %%mm3             \n\t" /* 0RGB0RGB 3 */\
731 d604bab9 Michael Niedermayer
\
732 2da0d70d Diego Biurrun
    "movq      %%mm0, %%mm4             \n\t" /* 0RGB0RGB 0 */\
733
    "psrlq        $8, %%mm0             \n\t" /* 00RGB0RG 0 */\
734
    "pand "MANGLE(bm00000111)", %%mm4   \n\t" /* 00000RGB 0 */\
735
    "pand "MANGLE(bm11111000)", %%mm0   \n\t" /* 00RGB000 0.5 */\
736
    "por       %%mm4, %%mm0             \n\t" /* 00RGBRGB 0 */\
737
    "movq      %%mm2, %%mm4             \n\t" /* 0RGB0RGB 1 */\
738
    "psllq       $48, %%mm2             \n\t" /* GB000000 1 */\
739
    "por       %%mm2, %%mm0             \n\t" /* GBRGBRGB 0 */\
740 d604bab9 Michael Niedermayer
\
741 2da0d70d Diego Biurrun
    "movq      %%mm4, %%mm2             \n\t" /* 0RGB0RGB 1 */\
742
    "psrld       $16, %%mm4             \n\t" /* 000R000R 1 */\
743
    "psrlq       $24, %%mm2             \n\t" /* 0000RGB0 1.5 */\
744
    "por       %%mm4, %%mm2             \n\t" /* 000RRGBR 1 */\
745
    "pand "MANGLE(bm00001111)", %%mm2   \n\t" /* 0000RGBR 1 */\
746
    "movq      %%mm1, %%mm4             \n\t" /* 0RGB0RGB 2 */\
747
    "psrlq        $8, %%mm1             \n\t" /* 00RGB0RG 2 */\
748
    "pand "MANGLE(bm00000111)", %%mm4   \n\t" /* 00000RGB 2 */\
749
    "pand "MANGLE(bm11111000)", %%mm1   \n\t" /* 00RGB000 2.5 */\
750
    "por       %%mm4, %%mm1             \n\t" /* 00RGBRGB 2 */\
751
    "movq      %%mm1, %%mm4             \n\t" /* 00RGBRGB 2 */\
752
    "psllq       $32, %%mm1             \n\t" /* BRGB0000 2 */\
753
    "por       %%mm1, %%mm2             \n\t" /* BRGBRGBR 1 */\
754 d604bab9 Michael Niedermayer
\
755 2da0d70d Diego Biurrun
    "psrlq       $32, %%mm4             \n\t" /* 000000RG 2.5 */\
756
    "movq      %%mm3, %%mm5             \n\t" /* 0RGB0RGB 3 */\
757
    "psrlq        $8, %%mm3             \n\t" /* 00RGB0RG 3 */\
758
    "pand "MANGLE(bm00000111)", %%mm5   \n\t" /* 00000RGB 3 */\
759
    "pand "MANGLE(bm11111000)", %%mm3   \n\t" /* 00RGB000 3.5 */\
760
    "por       %%mm5, %%mm3             \n\t" /* 00RGBRGB 3 */\
761
    "psllq       $16, %%mm3             \n\t" /* RGBRGB00 3 */\
762
    "por       %%mm4, %%mm3             \n\t" /* RGBRGBRG 2.5 */\
763 d604bab9 Michael Niedermayer
\
764 2da0d70d Diego Biurrun
    MOVNTQ(%%mm0,   (dst))\
765
    MOVNTQ(%%mm2,  8(dst))\
766
    MOVNTQ(%%mm3, 16(dst))\
767
    "add         $24, "#dst"            \n\t"\
768 d604bab9 Michael Niedermayer
\
769 2da0d70d Diego Biurrun
    "add          $8, "#index"          \n\t"\
770
    "cmp     "#dstw", "#index"          \n\t"\
771
    " jb          1b                    \n\t"
772 d604bab9 Michael Niedermayer
773 6542b44e Michael Niedermayer
#define WRITEBGR24MMX(dst, dstw, index) \
774 2da0d70d Diego Biurrun
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
775
    "movq      %%mm2, %%mm1     \n\t" /* B */\
776
    "movq      %%mm5, %%mm6     \n\t" /* R */\
777
    "punpcklbw %%mm4, %%mm2     \n\t" /* GBGBGBGB 0 */\
778
    "punpcklbw %%mm7, %%mm5     \n\t" /* 0R0R0R0R 0 */\
779
    "punpckhbw %%mm4, %%mm1     \n\t" /* GBGBGBGB 2 */\
780
    "punpckhbw %%mm7, %%mm6     \n\t" /* 0R0R0R0R 2 */\
781
    "movq      %%mm2, %%mm0     \n\t" /* GBGBGBGB 0 */\
782
    "movq      %%mm1, %%mm3     \n\t" /* GBGBGBGB 2 */\
783
    "punpcklwd %%mm5, %%mm0     \n\t" /* 0RGB0RGB 0 */\
784
    "punpckhwd %%mm5, %%mm2     \n\t" /* 0RGB0RGB 1 */\
785
    "punpcklwd %%mm6, %%mm1     \n\t" /* 0RGB0RGB 2 */\
786
    "punpckhwd %%mm6, %%mm3     \n\t" /* 0RGB0RGB 3 */\
787 99d2cb72 Michael Niedermayer
\
788 2da0d70d Diego Biurrun
    "movq      %%mm0, %%mm4     \n\t" /* 0RGB0RGB 0 */\
789
    "movq      %%mm2, %%mm6     \n\t" /* 0RGB0RGB 1 */\
790
    "movq      %%mm1, %%mm5     \n\t" /* 0RGB0RGB 2 */\
791
    "movq      %%mm3, %%mm7     \n\t" /* 0RGB0RGB 3 */\
792 99d2cb72 Michael Niedermayer
\
793 2da0d70d Diego Biurrun
    "psllq       $40, %%mm0     \n\t" /* RGB00000 0 */\
794
    "psllq       $40, %%mm2     \n\t" /* RGB00000 1 */\
795
    "psllq       $40, %%mm1     \n\t" /* RGB00000 2 */\
796
    "psllq       $40, %%mm3     \n\t" /* RGB00000 3 */\
797 99d2cb72 Michael Niedermayer
\
798 2da0d70d Diego Biurrun
    "punpckhdq %%mm4, %%mm0     \n\t" /* 0RGBRGB0 0 */\
799
    "punpckhdq %%mm6, %%mm2     \n\t" /* 0RGBRGB0 1 */\
800
    "punpckhdq %%mm5, %%mm1     \n\t" /* 0RGBRGB0 2 */\
801
    "punpckhdq %%mm7, %%mm3     \n\t" /* 0RGBRGB0 3 */\
802 99d2cb72 Michael Niedermayer
\
803 2da0d70d Diego Biurrun
    "psrlq        $8, %%mm0     \n\t" /* 00RGBRGB 0 */\
804
    "movq      %%mm2, %%mm6     \n\t" /* 0RGBRGB0 1 */\
805
    "psllq       $40, %%mm2     \n\t" /* GB000000 1 */\
806
    "por       %%mm2, %%mm0     \n\t" /* GBRGBRGB 0 */\
807
    MOVNTQ(%%mm0, (dst))\
808 99d2cb72 Michael Niedermayer
\
809 2da0d70d Diego Biurrun
    "psrlq       $24, %%mm6     \n\t" /* 0000RGBR 1 */\
810
    "movq      %%mm1, %%mm5     \n\t" /* 0RGBRGB0 2 */\
811
    "psllq       $24, %%mm1     \n\t" /* BRGB0000 2 */\
812
    "por       %%mm1, %%mm6     \n\t" /* BRGBRGBR 1 */\
813
    MOVNTQ(%%mm6, 8(dst))\
814 99d2cb72 Michael Niedermayer
\
815 2da0d70d Diego Biurrun
    "psrlq       $40, %%mm5     \n\t" /* 000000RG 2 */\
816
    "psllq        $8, %%mm3     \n\t" /* RGBRGB00 3 */\
817
    "por       %%mm3, %%mm5     \n\t" /* RGBRGBRG 2 */\
818
    MOVNTQ(%%mm5, 16(dst))\
819 99d2cb72 Michael Niedermayer
\
820 2da0d70d Diego Biurrun
    "add         $24, "#dst"    \n\t"\
821 99d2cb72 Michael Niedermayer
\
822 2da0d70d Diego Biurrun
    "add          $8, "#index"  \n\t"\
823
    "cmp     "#dstw", "#index"  \n\t"\
824
    " jb          1b            \n\t"
825 99d2cb72 Michael Niedermayer
826 6542b44e Michael Niedermayer
#define WRITEBGR24MMX2(dst, dstw, index) \
827 2da0d70d Diego Biurrun
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
828 5802683a Reimar Döffinger
    "movq "MANGLE(ff_M24A)", %%mm0 \n\t"\
829
    "movq "MANGLE(ff_M24C)", %%mm7 \n\t"\
830 2da0d70d Diego Biurrun
    "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2  B1 B0 B1 B0 */\
831
    "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2  G1 G0 G1 G0 */\
832
    "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0  R1 R0 R1 R0 */\
833 99d2cb72 Michael Niedermayer
\
834 2da0d70d Diego Biurrun
    "pand   %%mm0, %%mm1        \n\t" /*    B2        B1       B0 */\
835
    "pand   %%mm0, %%mm3        \n\t" /*    G2        G1       G0 */\
836
    "pand   %%mm7, %%mm6        \n\t" /*       R1        R0       */\
837 99d2cb72 Michael Niedermayer
\
838 2da0d70d Diego Biurrun
    "psllq     $8, %%mm3        \n\t" /* G2        G1       G0    */\
839
    "por    %%mm1, %%mm6        \n\t"\
840
    "por    %%mm3, %%mm6        \n\t"\
841
    MOVNTQ(%%mm6, (dst))\
842 99d2cb72 Michael Niedermayer
\
843 2da0d70d Diego Biurrun
    "psrlq     $8, %%mm4        \n\t" /* 00 G7 G6 G5  G4 G3 G2 G1 */\
844
    "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4  B3 B2 B3 B2 */\
845
    "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3  G4 G3 G4 G3 */\
846
    "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4  R3 R2 R3 R2 */\
847 99d2cb72 Michael Niedermayer
\
848 5802683a Reimar Döffinger
    "pand "MANGLE(ff_M24B)", %%mm1 \n\t" /* B5       B4        B3    */\
849 2da0d70d Diego Biurrun
    "pand   %%mm7, %%mm3        \n\t" /*       G4        G3       */\
850
    "pand   %%mm0, %%mm6        \n\t" /*    R4        R3       R2 */\
851 99d2cb72 Michael Niedermayer
\
852 2da0d70d Diego Biurrun
    "por    %%mm1, %%mm3        \n\t" /* B5    G4 B4     G3 B3    */\
853
    "por    %%mm3, %%mm6        \n\t"\
854
    MOVNTQ(%%mm6, 8(dst))\
855 99d2cb72 Michael Niedermayer
\
856 2da0d70d Diego Biurrun
    "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6  B7 B6 B6 B7 */\
857
    "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7  G6 G5 G6 G5 */\
858
    "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6  R5 R4 R5 R4 */\
859 99d2cb72 Michael Niedermayer
\
860 2da0d70d Diego Biurrun
    "pand   %%mm7, %%mm1        \n\t" /*       B7        B6       */\
861
    "pand   %%mm0, %%mm3        \n\t" /*    G7        G6       G5 */\
862 5802683a Reimar Döffinger
    "pand "MANGLE(ff_M24B)", %%mm6 \n\t" /* R7       R6        R5    */\
863 99d2cb72 Michael Niedermayer
\
864 2da0d70d Diego Biurrun
    "por    %%mm1, %%mm3        \n\t"\
865
    "por    %%mm3, %%mm6        \n\t"\
866
    MOVNTQ(%%mm6, 16(dst))\
867 99d2cb72 Michael Niedermayer
\
868 2da0d70d Diego Biurrun
    "add      $24, "#dst"       \n\t"\
869 99d2cb72 Michael Niedermayer
\
870 2da0d70d Diego Biurrun
    "add       $8, "#index"     \n\t"\
871
    "cmp  "#dstw", "#index"     \n\t"\
872
    " jb       1b               \n\t"
873 99d2cb72 Michael Niedermayer
874 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX2
875 7630f2e0 Michael Niedermayer
#undef WRITEBGR24
876 6e1c66bc Aurelien Jacobs
#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX2(dst, dstw, index)
877 99d2cb72 Michael Niedermayer
#else
878 7630f2e0 Michael Niedermayer
#undef WRITEBGR24
879 6e1c66bc Aurelien Jacobs
#define WRITEBGR24(dst, dstw, index)  WRITEBGR24MMX(dst, dstw, index)
880 99d2cb72 Michael Niedermayer
#endif
881
882 6e1c66bc Aurelien Jacobs
#define REAL_WRITEYUY2(dst, dstw, index) \
883 2da0d70d Diego Biurrun
    "packuswb  %%mm3, %%mm3     \n\t"\
884
    "packuswb  %%mm4, %%mm4     \n\t"\
885
    "packuswb  %%mm7, %%mm1     \n\t"\
886
    "punpcklbw %%mm4, %%mm3     \n\t"\
887
    "movq      %%mm1, %%mm7     \n\t"\
888
    "punpcklbw %%mm3, %%mm1     \n\t"\
889
    "punpckhbw %%mm3, %%mm7     \n\t"\
890 25593e29 Michael Niedermayer
\
891 2da0d70d Diego Biurrun
    MOVNTQ(%%mm1, (dst, index, 2))\
892
    MOVNTQ(%%mm7, 8(dst, index, 2))\
893 25593e29 Michael Niedermayer
\
894 2da0d70d Diego Biurrun
    "add          $8, "#index"  \n\t"\
895
    "cmp     "#dstw", "#index"  \n\t"\
896
    " jb          1b            \n\t"
897 6e1c66bc Aurelien Jacobs
#define WRITEYUY2(dst, dstw, index)  REAL_WRITEYUY2(dst, dstw, index)
898 25593e29 Michael Niedermayer
899
900 7ac40327 Ramiro Polla
static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
901
                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc,
902 6858492e Cédric Schieli
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
903 38858470 Michael Niedermayer
{
904 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
905 dd68318c Ramiro Polla
    if(!(c->flags & SWS_BITEXACT)) {
906
        if (c->flags & SWS_ACCURATE_RND) {
907
            if (uDest) {
908 14014d47 Michael Niedermayer
                YSCALEYUV2YV12X_ACCURATE(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
909
                YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
910
            }
911 dd68318c Ramiro Polla
            if (CONFIG_SWSCALE_ALPHA && aDest) {
912 6858492e Cédric Schieli
                YSCALEYUV2YV12X_ACCURATE(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
913
            }
914 bca11e75 Michael Niedermayer
915 14014d47 Michael Niedermayer
            YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
916 dd68318c Ramiro Polla
        } else {
917
            if (uDest) {
918 14014d47 Michael Niedermayer
                YSCALEYUV2YV12X(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
919
                YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
920
            }
921 dd68318c Ramiro Polla
            if (CONFIG_SWSCALE_ALPHA && aDest) {
922 6858492e Cédric Schieli
                YSCALEYUV2YV12X(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
923
            }
924 2da0d70d Diego Biurrun
925 14014d47 Michael Niedermayer
            YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
926
        }
927 f433c8ab Michael Niedermayer
        return;
928
    }
929
#endif
930 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_ALTIVEC
931 9b734d44 Ramiro Polla
    yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize,
932
                          chrFilter, chrSrc, chrFilterSize,
933
                          dest, uDest, vDest, dstW, chrDstW);
934 94daf2e9 Ramiro Polla
#else //COMPILE_TEMPLATE_ALTIVEC
935 9b734d44 Ramiro Polla
    yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
936
                chrFilter, chrSrc, chrFilterSize,
937
                alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
938 94daf2e9 Ramiro Polla
#endif //!COMPILE_TEMPLATE_ALTIVEC
939 c1b0bfb4 Michael Niedermayer
}
940 2add307d Michael Niedermayer
941 7ac40327 Ramiro Polla
static inline void RENAME(yuv2nv12X)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
942
                                     const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
943 b411dfff Carl Eugen Hoyos
                                     uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, enum PixelFormat dstFormat)
944 6118e52e Ville Syrjälä
{
945 9b734d44 Ramiro Polla
    yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize,
946
                 chrFilter, chrSrc, chrFilterSize,
947
                 dest, uDest, dstW, chrDstW, dstFormat);
948 6118e52e Ville Syrjälä
}
949
950 7ac40327 Ramiro Polla
static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc,
951 6858492e Cédric Schieli
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
952 c1b0bfb4 Michael Niedermayer
{
953 f433c8ab Michael Niedermayer
    int i;
954 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
955 dd68318c Ramiro Polla
    if(!(c->flags & SWS_BITEXACT)) {
956 6858492e Cédric Schieli
        long p= 4;
957 a959e247 Zuxy Meng
        const uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
958 6858492e Cédric Schieli
        uint8_t *dst[4]= {aDest, dest, uDest, vDest};
959
        x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
960 2da0d70d Diego Biurrun
961 dd68318c Ramiro Polla
        if (c->flags & SWS_ACCURATE_RND) {
962
            while(p--) {
963
                if (dst[p]) {
964 3164d25e Cédric Schieli
                    __asm__ volatile(
965
                        YSCALEYUV2YV121_ACCURATE
966
                        :: "r" (src[p]), "r" (dst[p] + counter[p]),
967
                        "g" (-counter[p])
968
                        : "%"REG_a
969
                    );
970
                }
971 6858492e Cédric Schieli
            }
972 dd68318c Ramiro Polla
        } else {
973
            while(p--) {
974
                if (dst[p]) {
975 3164d25e Cédric Schieli
                    __asm__ volatile(
976
                        YSCALEYUV2YV121
977
                        :: "r" (src[p]), "r" (dst[p] + counter[p]),
978
                        "g" (-counter[p])
979
                        : "%"REG_a
980
                    );
981
                }
982 6858492e Cédric Schieli
            }
983 d78c1ea1 Michael Niedermayer
        }
984 f433c8ab Michael Niedermayer
        return;
985
    }
986
#endif
987 dd68318c Ramiro Polla
    for (i=0; i<dstW; i++) {
988 a1f3ffa3 Michael Niedermayer
        int val= (lumSrc[i]+64)>>7;
989 2da0d70d Diego Biurrun
990 dd68318c Ramiro Polla
        if (val&256) {
991 2da0d70d Diego Biurrun
            if (val<0) val=0;
992
            else       val=255;
993
        }
994
995
        dest[i]= val;
996
    }
997
998 1b0a4572 Benoit Fouet
    if (uDest)
999 dd68318c Ramiro Polla
        for (i=0; i<chrDstW; i++) {
1000 a1f3ffa3 Michael Niedermayer
            int u=(chrSrc[i       ]+64)>>7;
1001
            int v=(chrSrc[i + VOFW]+64)>>7;
1002 2da0d70d Diego Biurrun
1003 dd68318c Ramiro Polla
            if ((u|v)&256) {
1004 2da0d70d Diego Biurrun
                if (u<0)        u=0;
1005
                else if (u>255) u=255;
1006
                if (v<0)        v=0;
1007
                else if (v>255) v=255;
1008
            }
1009
1010
            uDest[i]= u;
1011
            vDest[i]= v;
1012
        }
1013 6858492e Cédric Schieli
1014
    if (CONFIG_SWSCALE_ALPHA && aDest)
1015 dd68318c Ramiro Polla
        for (i=0; i<dstW; i++) {
1016 6858492e Cédric Schieli
            int val= (alpSrc[i]+64)>>7;
1017
            aDest[i]= av_clip_uint8(val);
1018
        }
1019 38858470 Michael Niedermayer
}
1020
1021 c1b0bfb4 Michael Niedermayer
1022 d604bab9 Michael Niedermayer
/**
1023
 * vertical scale YV12 to RGB
1024
 */
1025 7ac40327 Ramiro Polla
static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
1026
                                       const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
1027
                                       const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
1028 c1b0bfb4 Michael Niedermayer
{
1029 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1030 d0ce212a Ramiro Polla
    x86_reg dummy=0;
1031 dd68318c Ramiro Polla
    if(!(c->flags & SWS_BITEXACT)) {
1032
        if (c->flags & SWS_ACCURATE_RND) {
1033
            switch(c->dstFormat) {
1034 14014d47 Michael Niedermayer
            case PIX_FMT_RGB32:
1035 dd68318c Ramiro Polla
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1036 6858492e Cédric Schieli
                    YSCALEYUV2PACKEDX_ACCURATE
1037
                    YSCALEYUV2RGBX
1038
                    "movq                      %%mm2, "U_TEMP"(%0)  \n\t"
1039
                    "movq                      %%mm4, "V_TEMP"(%0)  \n\t"
1040
                    "movq                      %%mm5, "Y_TEMP"(%0)  \n\t"
1041
                    YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
1042
                    "movq               "Y_TEMP"(%0), %%mm5         \n\t"
1043
                    "psraw                        $3, %%mm1         \n\t"
1044
                    "psraw                        $3, %%mm7         \n\t"
1045
                    "packuswb                  %%mm7, %%mm1         \n\t"
1046
                    WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
1047
1048
                    YSCALEYUV2PACKEDX_END
1049 dd68318c Ramiro Polla
                } else {
1050 3164d25e Cédric Schieli
                    YSCALEYUV2PACKEDX_ACCURATE
1051
                    YSCALEYUV2RGBX
1052
                    "pcmpeqd %%mm7, %%mm7 \n\t"
1053
                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1054 2da0d70d Diego Biurrun
1055 3164d25e Cédric Schieli
                    YSCALEYUV2PACKEDX_END
1056 6858492e Cédric Schieli
                }
1057 14014d47 Michael Niedermayer
                return;
1058
            case PIX_FMT_BGR24:
1059
                YSCALEYUV2PACKEDX_ACCURATE
1060
                YSCALEYUV2RGBX
1061 40494418 Cédric Schieli
                "pxor %%mm7, %%mm7 \n\t"
1062 14014d47 Michael Niedermayer
                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize
1063
                "add %4, %%"REG_c"                        \n\t"
1064
                WRITEBGR24(%%REGc, %5, %%REGa)
1065 2da0d70d Diego Biurrun
1066
1067 14014d47 Michael Niedermayer
                :: "r" (&c->redDither),
1068
                "m" (dummy), "m" (dummy), "m" (dummy),
1069
                "r" (dest), "m" (dstW)
1070
                : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1071
                );
1072
                return;
1073
            case PIX_FMT_RGB555:
1074
                YSCALEYUV2PACKEDX_ACCURATE
1075
                YSCALEYUV2RGBX
1076 40494418 Cédric Schieli
                "pxor %%mm7, %%mm7 \n\t"
1077 14014d47 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1078 bca11e75 Michael Niedermayer
#ifdef DITHER1XBPP
1079 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
1080
                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
1081
                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
1082 2da0d70d Diego Biurrun
#endif
1083
1084 14014d47 Michael Niedermayer
                WRITERGB15(%4, %5, %%REGa)
1085
                YSCALEYUV2PACKEDX_END
1086
                return;
1087
            case PIX_FMT_RGB565:
1088
                YSCALEYUV2PACKEDX_ACCURATE
1089
                YSCALEYUV2RGBX
1090 40494418 Cédric Schieli
                "pxor %%mm7, %%mm7 \n\t"
1091 14014d47 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1092 bca11e75 Michael Niedermayer
#ifdef DITHER1XBPP
1093 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%0), %%mm2\n\t"
1094
                "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
1095
                "paddusb "RED_DITHER"(%0), %%mm5\n\t"
1096 2da0d70d Diego Biurrun
#endif
1097
1098 14014d47 Michael Niedermayer
                WRITERGB16(%4, %5, %%REGa)
1099
                YSCALEYUV2PACKEDX_END
1100
                return;
1101
            case PIX_FMT_YUYV422:
1102
                YSCALEYUV2PACKEDX_ACCURATE
1103
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1104
1105
                "psraw $3, %%mm3    \n\t"
1106
                "psraw $3, %%mm4    \n\t"
1107
                "psraw $3, %%mm1    \n\t"
1108
                "psraw $3, %%mm7    \n\t"
1109
                WRITEYUY2(%4, %5, %%REGa)
1110
                YSCALEYUV2PACKEDX_END
1111
                return;
1112
            }
1113 dd68318c Ramiro Polla
        } else {
1114
            switch(c->dstFormat) {
1115 14014d47 Michael Niedermayer
            case PIX_FMT_RGB32:
1116 dd68318c Ramiro Polla
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1117 6858492e Cédric Schieli
                    YSCALEYUV2PACKEDX
1118
                    YSCALEYUV2RGBX
1119
                    YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
1120
                    "psraw                        $3, %%mm1         \n\t"
1121
                    "psraw                        $3, %%mm7         \n\t"
1122
                    "packuswb                  %%mm7, %%mm1         \n\t"
1123
                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1124
                    YSCALEYUV2PACKEDX_END
1125 dd68318c Ramiro Polla
                } else {
1126 3164d25e Cédric Schieli
                    YSCALEYUV2PACKEDX
1127
                    YSCALEYUV2RGBX
1128
                    "pcmpeqd %%mm7, %%mm7 \n\t"
1129
                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1130
                    YSCALEYUV2PACKEDX_END
1131 6858492e Cédric Schieli
                }
1132 14014d47 Michael Niedermayer
                return;
1133
            case PIX_FMT_BGR24:
1134
                YSCALEYUV2PACKEDX
1135
                YSCALEYUV2RGBX
1136 40494418 Cédric Schieli
                "pxor                    %%mm7, %%mm7       \n\t"
1137 14014d47 Michael Niedermayer
                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"   \n\t" //FIXME optimize
1138
                "add                        %4, %%"REG_c"   \n\t"
1139
                WRITEBGR24(%%REGc, %5, %%REGa)
1140
1141
                :: "r" (&c->redDither),
1142
                "m" (dummy), "m" (dummy), "m" (dummy),
1143
                "r" (dest),  "m" (dstW)
1144
                : "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S
1145
                );
1146
                return;
1147
            case PIX_FMT_RGB555:
1148
                YSCALEYUV2PACKEDX
1149
                YSCALEYUV2RGBX
1150 40494418 Cédric Schieli
                "pxor %%mm7, %%mm7 \n\t"
1151 14014d47 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1152 c1b0bfb4 Michael Niedermayer
#ifdef DITHER1XBPP
1153 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
1154
                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
1155
                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
1156 2da0d70d Diego Biurrun
#endif
1157
1158 14014d47 Michael Niedermayer
                WRITERGB15(%4, %5, %%REGa)
1159
                YSCALEYUV2PACKEDX_END
1160
                return;
1161
            case PIX_FMT_RGB565:
1162
                YSCALEYUV2PACKEDX
1163
                YSCALEYUV2RGBX
1164 40494418 Cédric Schieli
                "pxor %%mm7, %%mm7 \n\t"
1165 14014d47 Michael Niedermayer
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1166 c1b0bfb4 Michael Niedermayer
#ifdef DITHER1XBPP
1167 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%0), %%mm2  \n\t"
1168
                "paddusb "GREEN_DITHER"(%0), %%mm4  \n\t"
1169
                "paddusb "RED_DITHER"(%0), %%mm5  \n\t"
1170 2da0d70d Diego Biurrun
#endif
1171
1172 14014d47 Michael Niedermayer
                WRITERGB16(%4, %5, %%REGa)
1173
                YSCALEYUV2PACKEDX_END
1174
                return;
1175
            case PIX_FMT_YUYV422:
1176
                YSCALEYUV2PACKEDX
1177
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1178
1179
                "psraw $3, %%mm3    \n\t"
1180
                "psraw $3, %%mm4    \n\t"
1181
                "psraw $3, %%mm1    \n\t"
1182
                "psraw $3, %%mm7    \n\t"
1183
                WRITEYUY2(%4, %5, %%REGa)
1184
                YSCALEYUV2PACKEDX_END
1185
                return;
1186
            }
1187 bca11e75 Michael Niedermayer
        }
1188
    }
1189 94daf2e9 Ramiro Polla
#endif /* COMPILE_TEMPLATE_MMX */
1190
#if COMPILE_TEMPLATE_ALTIVEC
1191 2da0d70d Diego Biurrun
    /* The following list of supported dstFormat values should
1192 780daf2b Diego Biurrun
       match what's found in the body of ff_yuv2packedX_altivec() */
1193 d55ef636 Reimar Döffinger
    if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf &&
1194 9b734d44 Ramiro Polla
         (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
1195
          c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
1196
          c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB))
1197 780daf2b Diego Biurrun
            ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize,
1198
                                   chrFilter, chrSrc, chrFilterSize,
1199
                                   dest, dstW, dstY);
1200 2da0d70d Diego Biurrun
    else
1201
#endif
1202
        yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
1203
                       chrFilter, chrSrc, chrFilterSize,
1204 6858492e Cédric Schieli
                       alpSrc, dest, dstW, dstY);
1205 c1b0bfb4 Michael Niedermayer
}
1206
1207
/**
1208
 * vertical bilinear scale YV12 to RGB
1209
 */
1210 7ac40327 Ramiro Polla
static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, const uint16_t *buf1, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
1211
                          const uint16_t *abuf0, const uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
1212 d604bab9 Michael Niedermayer
{
1213 ac0ad729 Michael Niedermayer
    int  yalpha1=4095- yalpha;
1214
    int uvalpha1=4095-uvalpha;
1215 2da0d70d Diego Biurrun
    int i;
1216 d604bab9 Michael Niedermayer
1217 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1218 dd68318c Ramiro Polla
    if(!(c->flags & SWS_BITEXACT)) {
1219
        switch(c->dstFormat) {
1220 c255994b Ramiro Polla
        //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
1221
        case PIX_FMT_RGB32:
1222
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1223 6858492e Cédric Schieli
#if ARCH_X86_64
1224 c255994b Ramiro Polla
                __asm__ volatile(
1225 f514b4f9 Reimar Döffinger
                    YSCALEYUV2RGB(%%r8, %5)
1226
                    YSCALEYUV2RGB_YA(%%r8, %5, %6, %7)
1227 6858492e Cédric Schieli
                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1228
                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1229
                    "packuswb            %%mm7, %%mm1       \n\t"
1230 f514b4f9 Reimar Döffinger
                    WRITEBGR32(%4, 8280(%5), %%r8, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1231 6858492e Cédric Schieli
1232 04ef1d3f Reimar Döffinger
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "r" (dest),
1233 6858492e Cédric Schieli
                    "a" (&c->redDither)
1234
                    ,"r" (abuf0), "r" (abuf1)
1235 f514b4f9 Reimar Döffinger
                    : "%r8"
1236 c255994b Ramiro Polla
                );
1237 6858492e Cédric Schieli
#else
1238 a959e247 Zuxy Meng
                *(const uint16_t **)(&c->u_temp)=abuf0;
1239
                *(const uint16_t **)(&c->v_temp)=abuf1;
1240 c255994b Ramiro Polla
                __asm__ volatile(
1241 6858492e Cédric Schieli
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1242
                    "mov        %4, %%"REG_b"               \n\t"
1243
                    "push %%"REG_BP"                        \n\t"
1244
                    YSCALEYUV2RGB(%%REGBP, %5)
1245
                    "push                   %0              \n\t"
1246
                    "push                   %1              \n\t"
1247
                    "mov          "U_TEMP"(%5), %0          \n\t"
1248
                    "mov          "V_TEMP"(%5), %1          \n\t"
1249
                    YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
1250
                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1251
                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1252
                    "packuswb            %%mm7, %%mm1       \n\t"
1253
                    "pop                    %1              \n\t"
1254
                    "pop                    %0              \n\t"
1255
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1256
                    "pop %%"REG_BP"                         \n\t"
1257
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1258
1259
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1260
                    "a" (&c->redDither)
1261 c255994b Ramiro Polla
                );
1262 6858492e Cédric Schieli
#endif
1263 c255994b Ramiro Polla
            } else {
1264
                __asm__ volatile(
1265 3164d25e Cédric Schieli
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1266
                    "mov        %4, %%"REG_b"               \n\t"
1267
                    "push %%"REG_BP"                        \n\t"
1268
                    YSCALEYUV2RGB(%%REGBP, %5)
1269
                    "pcmpeqd %%mm7, %%mm7                   \n\t"
1270
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1271
                    "pop %%"REG_BP"                         \n\t"
1272
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1273 2da0d70d Diego Biurrun
1274 3164d25e Cédric Schieli
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1275
                    "a" (&c->redDither)
1276 c255994b Ramiro Polla
                );
1277
            }
1278
            return;
1279
        case PIX_FMT_BGR24:
1280
            __asm__ volatile(
1281 2da0d70d Diego Biurrun
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1282
                "mov        %4, %%"REG_b"               \n\t"
1283
                "push %%"REG_BP"                        \n\t"
1284
                YSCALEYUV2RGB(%%REGBP, %5)
1285 40494418 Cédric Schieli
                "pxor    %%mm7, %%mm7                   \n\t"
1286 2da0d70d Diego Biurrun
                WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1287
                "pop %%"REG_BP"                         \n\t"
1288
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1289
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1290
                "a" (&c->redDither)
1291 c255994b Ramiro Polla
            );
1292
            return;
1293
        case PIX_FMT_RGB555:
1294
            __asm__ volatile(
1295 2da0d70d Diego Biurrun
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1296
                "mov        %4, %%"REG_b"               \n\t"
1297
                "push %%"REG_BP"                        \n\t"
1298
                YSCALEYUV2RGB(%%REGBP, %5)
1299 40494418 Cédric Schieli
                "pxor    %%mm7, %%mm7                   \n\t"
1300 2da0d70d Diego Biurrun
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1301 d604bab9 Michael Niedermayer
#ifdef DITHER1XBPP
1302 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
1303
                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1304
                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1305 2da0d70d Diego Biurrun
#endif
1306
1307 27a90b04 Michael Niedermayer
                WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1308 2da0d70d Diego Biurrun
                "pop %%"REG_BP"                         \n\t"
1309
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1310
1311
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1312
                "a" (&c->redDither)
1313 c255994b Ramiro Polla
            );
1314
            return;
1315
        case PIX_FMT_RGB565:
1316
            __asm__ volatile(
1317 2da0d70d Diego Biurrun
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1318
                "mov        %4, %%"REG_b"               \n\t"
1319
                "push %%"REG_BP"                        \n\t"
1320
                YSCALEYUV2RGB(%%REGBP, %5)
1321 40494418 Cédric Schieli
                "pxor    %%mm7, %%mm7                   \n\t"
1322 2da0d70d Diego Biurrun
                /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1323 d604bab9 Michael Niedermayer
#ifdef DITHER1XBPP
1324 88e2a9ae Carl Eugen Hoyos
                "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
1325
                "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1326
                "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1327 2da0d70d Diego Biurrun
#endif
1328
1329 27a90b04 Michael Niedermayer
                WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1330 2da0d70d Diego Biurrun
                "pop %%"REG_BP"                         \n\t"
1331
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1332
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1333
                "a" (&c->redDither)
1334 c255994b Ramiro Polla
            );
1335
            return;
1336
        case PIX_FMT_YUYV422:
1337
            __asm__ volatile(
1338 2da0d70d Diego Biurrun
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1339
                "mov %4, %%"REG_b"                        \n\t"
1340
                "push %%"REG_BP"                        \n\t"
1341
                YSCALEYUV2PACKED(%%REGBP, %5)
1342
                WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1343
                "pop %%"REG_BP"                         \n\t"
1344
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1345
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1346
                "a" (&c->redDither)
1347 c255994b Ramiro Polla
            );
1348
            return;
1349
        default: break;
1350 2da0d70d Diego Biurrun
        }
1351 f433c8ab Michael Niedermayer
    }
1352 94daf2e9 Ramiro Polla
#endif //COMPILE_TEMPLATE_MMX
1353 9b734d44 Ramiro Polla
    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
1354 d604bab9 Michael Niedermayer
}
1355
1356
/**
1357
 * YV12 to RGB without scaling or interpolating
1358
 */
1359 7ac40327 Ramiro Polla
static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, const uint16_t *uvbuf0, const uint16_t *uvbuf1,
1360 b411dfff Carl Eugen Hoyos
                          const uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, enum PixelFormat dstFormat, int flags, int y)
1361 d604bab9 Michael Niedermayer
{
1362 2da0d70d Diego Biurrun
    const int yalpha1=0;
1363
    int i;
1364 6a4970ab Diego Biurrun
1365 7ac40327 Ramiro Polla
    const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
1366 2da0d70d Diego Biurrun
    const int yalpha= 4096; //FIXME ...
1367 96034638 Michael Niedermayer
1368 dd68318c Ramiro Polla
    if (flags&SWS_FULL_CHR_H_INT) {
1369 40fa5140 Ramiro Polla
        c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
1370 2da0d70d Diego Biurrun
        return;
1371
    }
1372 397c035e Michael Niedermayer
1373 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1374 dd68318c Ramiro Polla
    if(!(flags & SWS_BITEXACT)) {
1375
        if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
1376
            switch(dstFormat) {
1377 14014d47 Michael Niedermayer
            case PIX_FMT_RGB32:
1378 dd68318c Ramiro Polla
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1379 6858492e Cédric Schieli
                    __asm__ volatile(
1380 c255994b Ramiro Polla
                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1381
                        "mov        %4, %%"REG_b"               \n\t"
1382
                        "push %%"REG_BP"                        \n\t"
1383
                        YSCALEYUV2RGB1(%%REGBP, %5)
1384
                        YSCALEYUV2RGB1_ALPHA(%%REGBP)
1385
                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1386
                        "pop %%"REG_BP"                         \n\t"
1387
                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1388
1389
                        :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1390
                        "a" (&c->redDither)
1391 6858492e Cédric Schieli
                    );
1392 dd68318c Ramiro Polla
                } else {
1393 3164d25e Cédric Schieli
                    __asm__ volatile(
1394 c255994b Ramiro Polla
                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1395
                        "mov        %4, %%"REG_b"               \n\t"
1396
                        "push %%"REG_BP"                        \n\t"
1397
                        YSCALEYUV2RGB1(%%REGBP, %5)
1398
                        "pcmpeqd %%mm7, %%mm7                   \n\t"
1399
                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1400
                        "pop %%"REG_BP"                         \n\t"
1401
                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1402
1403
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1404
                        "a" (&c->redDither)
1405
                    );
1406
                }
1407
                return;
1408
            case PIX_FMT_BGR24:
1409
                __asm__ volatile(
1410 3164d25e Cédric Schieli
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1411
                    "mov        %4, %%"REG_b"               \n\t"
1412
                    "push %%"REG_BP"                        \n\t"
1413
                    YSCALEYUV2RGB1(%%REGBP, %5)
1414 c255994b Ramiro Polla
                    "pxor    %%mm7, %%mm7                   \n\t"
1415
                    WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1416 3164d25e Cédric Schieli
                    "pop %%"REG_BP"                         \n\t"
1417
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1418 14014d47 Michael Niedermayer
1419 3164d25e Cédric Schieli
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1420
                    "a" (&c->redDither)
1421 14014d47 Michael Niedermayer
                );
1422
                return;
1423
            case PIX_FMT_RGB555:
1424 7ad6469e Diego Pettenò
                __asm__ volatile(
1425 c255994b Ramiro Polla
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1426
                    "mov        %4, %%"REG_b"               \n\t"
1427
                    "push %%"REG_BP"                        \n\t"
1428
                    YSCALEYUV2RGB1(%%REGBP, %5)
1429
                    "pxor    %%mm7, %%mm7                   \n\t"
1430
                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1431 d604bab9 Michael Niedermayer
#ifdef DITHER1XBPP
1432 c255994b Ramiro Polla
                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
1433
                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1434
                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1435 2da0d70d Diego Biurrun
#endif
1436 c255994b Ramiro Polla
                    WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1437
                    "pop %%"REG_BP"                         \n\t"
1438
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1439 2da0d70d Diego Biurrun
1440 c255994b Ramiro Polla
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1441
                    "a" (&c->redDither)
1442 14014d47 Michael Niedermayer
                );
1443
                return;
1444
            case PIX_FMT_RGB565:
1445 7ad6469e Diego Pettenò
                __asm__ volatile(
1446 c255994b Ramiro Polla
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1447
                    "mov        %4, %%"REG_b"               \n\t"
1448
                    "push %%"REG_BP"                        \n\t"
1449
                    YSCALEYUV2RGB1(%%REGBP, %5)
1450
                    "pxor    %%mm7, %%mm7                   \n\t"
1451
                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1452 d604bab9 Michael Niedermayer
#ifdef DITHER1XBPP
1453 c255994b Ramiro Polla
                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
1454
                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1455
                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1456 2da0d70d Diego Biurrun
#endif
1457
1458 c255994b Ramiro Polla
                    WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1459
                    "pop %%"REG_BP"                         \n\t"
1460
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1461 2da0d70d Diego Biurrun
1462 c255994b Ramiro Polla
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1463
                    "a" (&c->redDither)
1464 14014d47 Michael Niedermayer
                );
1465
                return;
1466
            case PIX_FMT_YUYV422:
1467 7ad6469e Diego Pettenò
                __asm__ volatile(
1468 c255994b Ramiro Polla
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1469
                    "mov        %4, %%"REG_b"               \n\t"
1470
                    "push %%"REG_BP"                        \n\t"
1471
                    YSCALEYUV2PACKED1(%%REGBP, %5)
1472
                    WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1473
                    "pop %%"REG_BP"                         \n\t"
1474
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1475 14014d47 Michael Niedermayer
1476 c255994b Ramiro Polla
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1477
                    "a" (&c->redDither)
1478 14014d47 Michael Niedermayer
                );
1479
                return;
1480
            }
1481 dd68318c Ramiro Polla
        } else {
1482
            switch(dstFormat) {
1483 14014d47 Michael Niedermayer
            case PIX_FMT_RGB32:
1484 dd68318c Ramiro Polla
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1485 6858492e Cédric Schieli
                    __asm__ volatile(
1486 c255994b Ramiro Polla
                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1487
                        "mov        %4, %%"REG_b"               \n\t"
1488
                        "push %%"REG_BP"                        \n\t"
1489
                        YSCALEYUV2RGB1b(%%REGBP, %5)
1490
                        YSCALEYUV2RGB1_ALPHA(%%REGBP)
1491
                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1492
                        "pop %%"REG_BP"                         \n\t"
1493
                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1494
1495
                        :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1496
                        "a" (&c->redDither)
1497 6858492e Cédric Schieli
                    );
1498 dd68318c Ramiro Polla
                } else {
1499 3164d25e Cédric Schieli
                    __asm__ volatile(
1500 c255994b Ramiro Polla
                        "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1501
                        "mov        %4, %%"REG_b"               \n\t"
1502
                        "push %%"REG_BP"                        \n\t"
1503
                        YSCALEYUV2RGB1b(%%REGBP, %5)
1504
                        "pcmpeqd %%mm7, %%mm7                   \n\t"
1505
                        WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1506
                        "pop %%"REG_BP"                         \n\t"
1507
                        "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1508
1509
                        :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1510
                        "a" (&c->redDither)
1511
                    );
1512
                }
1513
                return;
1514
            case PIX_FMT_BGR24:
1515
                __asm__ volatile(
1516 3164d25e Cédric Schieli
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1517
                    "mov        %4, %%"REG_b"               \n\t"
1518
                    "push %%"REG_BP"                        \n\t"
1519
                    YSCALEYUV2RGB1b(%%REGBP, %5)
1520 c255994b Ramiro Polla
                    "pxor    %%mm7, %%mm7                   \n\t"
1521
                    WRITEBGR24(%%REGb, 8280(%5), %%REGBP)
1522 3164d25e Cédric Schieli
                    "pop %%"REG_BP"                         \n\t"
1523
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1524 14014d47 Michael Niedermayer
1525 3164d25e Cédric Schieli
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1526
                    "a" (&c->redDither)
1527 14014d47 Michael Niedermayer
                );
1528
                return;
1529
            case PIX_FMT_RGB555:
1530 7ad6469e Diego Pettenò
                __asm__ volatile(
1531 c255994b Ramiro Polla
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1532
                    "mov        %4, %%"REG_b"               \n\t"
1533
                    "push %%"REG_BP"                        \n\t"
1534
                    YSCALEYUV2RGB1b(%%REGBP, %5)
1535
                    "pxor    %%mm7, %%mm7                   \n\t"
1536
                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1537 497d4f99 Michael Niedermayer
#ifdef DITHER1XBPP
1538 c255994b Ramiro Polla
                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
1539
                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1540
                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1541 2da0d70d Diego Biurrun
#endif
1542 c255994b Ramiro Polla
                    WRITERGB15(%%REGb, 8280(%5), %%REGBP)
1543
                    "pop %%"REG_BP"                         \n\t"
1544
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1545 2da0d70d Diego Biurrun
1546 c255994b Ramiro Polla
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1547
                    "a" (&c->redDither)
1548 14014d47 Michael Niedermayer
                );
1549
                return;
1550
            case PIX_FMT_RGB565:
1551 7ad6469e Diego Pettenò
                __asm__ volatile(
1552 c255994b Ramiro Polla
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1553
                    "mov        %4, %%"REG_b"               \n\t"
1554
                    "push %%"REG_BP"                        \n\t"
1555
                    YSCALEYUV2RGB1b(%%REGBP, %5)
1556
                    "pxor    %%mm7, %%mm7                   \n\t"
1557
                    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
1558 497d4f99 Michael Niedermayer
#ifdef DITHER1XBPP
1559 c255994b Ramiro Polla
                    "paddusb "BLUE_DITHER"(%5), %%mm2      \n\t"
1560
                    "paddusb "GREEN_DITHER"(%5), %%mm4      \n\t"
1561
                    "paddusb "RED_DITHER"(%5), %%mm5      \n\t"
1562 2da0d70d Diego Biurrun
#endif
1563
1564 c255994b Ramiro Polla
                    WRITERGB16(%%REGb, 8280(%5), %%REGBP)
1565
                    "pop %%"REG_BP"                         \n\t"
1566
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1567 2da0d70d Diego Biurrun
1568 c255994b Ramiro Polla
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1569
                    "a" (&c->redDither)
1570 14014d47 Michael Niedermayer
                );
1571
                return;
1572
            case PIX_FMT_YUYV422:
1573 7ad6469e Diego Pettenò
                __asm__ volatile(
1574 c255994b Ramiro Polla
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1575
                    "mov        %4, %%"REG_b"               \n\t"
1576
                    "push %%"REG_BP"                        \n\t"
1577
                    YSCALEYUV2PACKED1b(%%REGBP, %5)
1578
                    WRITEYUY2(%%REGb, 8280(%5), %%REGBP)
1579
                    "pop %%"REG_BP"                         \n\t"
1580
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1581 14014d47 Michael Niedermayer
1582 c255994b Ramiro Polla
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1583
                    "a" (&c->redDither)
1584 14014d47 Michael Niedermayer
                );
1585
                return;
1586
            }
1587 2da0d70d Diego Biurrun
        }
1588
    }
1589 94daf2e9 Ramiro Polla
#endif /* COMPILE_TEMPLATE_MMX */
1590 dd68318c Ramiro Polla
    if (uvalpha < 2048) {
1591 6858492e Cédric Schieli
        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1592 dd68318c Ramiro Polla
    } else {
1593 6858492e Cédric Schieli
        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1594 2da0d70d Diego Biurrun
    }
1595 d604bab9 Michael Niedermayer
}
1596
1597 8a322796 Diego Biurrun
//FIXME yuy2* can read up to 7 samples too much
1598 6ff0ad6b Michael Niedermayer
1599 7ac40327 Ramiro Polla
static inline void RENAME(yuy2ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
1600 1e621b18 Michael Niedermayer
{
1601 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1602 7ad6469e Diego Pettenò
    __asm__ volatile(
1603 c255994b Ramiro Polla
        "movq "MANGLE(bm01010101)", %%mm2           \n\t"
1604
        "mov                    %0, %%"REG_a"       \n\t"
1605
        "1:                                         \n\t"
1606
        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
1607
        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
1608
        "pand                %%mm2, %%mm0           \n\t"
1609
        "pand                %%mm2, %%mm1           \n\t"
1610
        "packuswb            %%mm1, %%mm0           \n\t"
1611
        "movq                %%mm0, (%2, %%"REG_a") \n\t"
1612
        "add                    $8, %%"REG_a"       \n\t"
1613
        " js                    1b                  \n\t"
1614
        : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
1615
        : "%"REG_a
1616 2da0d70d Diego Biurrun
    );
1617 1e621b18 Michael Niedermayer
#else
1618 2da0d70d Diego Biurrun
    int i;
1619
    for (i=0; i<width; i++)
1620
        dst[i]= src[2*i];
1621 1e621b18 Michael Niedermayer
#endif
1622
}
1623
1624 7ac40327 Ramiro Polla
static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1625 1e621b18 Michael Niedermayer
{
1626 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1627 7ad6469e Diego Pettenò
    __asm__ volatile(
1628 c255994b Ramiro Polla
        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
1629
        "mov                    %0, %%"REG_a"       \n\t"
1630
        "1:                                         \n\t"
1631
        "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
1632
        "movq   8(%1, %%"REG_a",4), %%mm1           \n\t"
1633
        "psrlw                  $8, %%mm0           \n\t"
1634
        "psrlw                  $8, %%mm1           \n\t"
1635
        "packuswb            %%mm1, %%mm0           \n\t"
1636
        "movq                %%mm0, %%mm1           \n\t"
1637
        "psrlw                  $8, %%mm0           \n\t"
1638
        "pand                %%mm4, %%mm1           \n\t"
1639
        "packuswb            %%mm0, %%mm0           \n\t"
1640
        "packuswb            %%mm1, %%mm1           \n\t"
1641
        "movd                %%mm0, (%3, %%"REG_a") \n\t"
1642
        "movd                %%mm1, (%2, %%"REG_a") \n\t"
1643
        "add                    $4, %%"REG_a"       \n\t"
1644
        " js                    1b                  \n\t"
1645
        : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
1646
        : "%"REG_a
1647 2da0d70d Diego Biurrun
    );
1648 1e621b18 Michael Niedermayer
#else
1649 2da0d70d Diego Biurrun
    int i;
1650 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1651 2da0d70d Diego Biurrun
        dstU[i]= src1[4*i + 1];
1652
        dstV[i]= src1[4*i + 3];
1653
    }
1654
#endif
1655
    assert(src1 == src2);
1656 1e621b18 Michael Niedermayer
}
1657
1658 de1275d5 Michael Niedermayer
static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1659
{
1660 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1661 de1275d5 Michael Niedermayer
    __asm__ volatile(
1662 c255994b Ramiro Polla
        "mov                    %0, %%"REG_a"       \n\t"
1663
        "1:                                         \n\t"
1664
        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
1665
        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
1666
        "movq    (%2, %%"REG_a",2), %%mm2           \n\t"
1667
        "movq   8(%2, %%"REG_a",2), %%mm3           \n\t"
1668
        "psrlw                  $8, %%mm0           \n\t"
1669
        "psrlw                  $8, %%mm1           \n\t"
1670
        "psrlw                  $8, %%mm2           \n\t"
1671
        "psrlw                  $8, %%mm3           \n\t"
1672
        "packuswb            %%mm1, %%mm0           \n\t"
1673
        "packuswb            %%mm3, %%mm2           \n\t"
1674
        "movq                %%mm0, (%3, %%"REG_a") \n\t"
1675
        "movq                %%mm2, (%4, %%"REG_a") \n\t"
1676
        "add                    $8, %%"REG_a"       \n\t"
1677
        " js                    1b                  \n\t"
1678
        : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
1679
        : "%"REG_a
1680 de1275d5 Michael Niedermayer
    );
1681
#else
1682
    int i;
1683 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1684 de1275d5 Michael Niedermayer
        dstU[i]= src1[2*i + 1];
1685
        dstV[i]= src2[2*i + 1];
1686
    }
1687
#endif
1688
}
1689
1690 4cf16bbe Diego Biurrun
/* This is almost identical to the previous, end exists only because
1691
 * yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
1692 7ac40327 Ramiro Polla
static inline void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
1693 7322a67c Michael Niedermayer
{
1694 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1695 7ad6469e Diego Pettenò
    __asm__ volatile(
1696 c255994b Ramiro Polla
        "mov                  %0, %%"REG_a"         \n\t"
1697
        "1:                                         \n\t"
1698
        "movq  (%1, %%"REG_a",2), %%mm0             \n\t"
1699
        "movq 8(%1, %%"REG_a",2), %%mm1             \n\t"
1700
        "psrlw                $8, %%mm0             \n\t"
1701
        "psrlw                $8, %%mm1             \n\t"
1702
        "packuswb          %%mm1, %%mm0             \n\t"
1703
        "movq              %%mm0, (%2, %%"REG_a")   \n\t"
1704
        "add                  $8, %%"REG_a"         \n\t"
1705
        " js                  1b                    \n\t"
1706
        : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst+width)
1707
        : "%"REG_a
1708 2da0d70d Diego Biurrun
    );
1709 7322a67c Michael Niedermayer
#else
1710 2da0d70d Diego Biurrun
    int i;
1711
    for (i=0; i<width; i++)
1712
        dst[i]= src[2*i+1];
1713 7322a67c Michael Niedermayer
#endif
1714
}
1715
1716 7ac40327 Ramiro Polla
static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1717 7322a67c Michael Niedermayer
{
1718 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1719 7ad6469e Diego Pettenò
    __asm__ volatile(
1720 c255994b Ramiro Polla
        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
1721
        "mov                    %0, %%"REG_a"       \n\t"
1722
        "1:                                         \n\t"
1723
        "movq    (%1, %%"REG_a",4), %%mm0           \n\t"
1724
        "movq   8(%1, %%"REG_a",4), %%mm1           \n\t"
1725
        "pand                %%mm4, %%mm0           \n\t"
1726
        "pand                %%mm4, %%mm1           \n\t"
1727
        "packuswb            %%mm1, %%mm0           \n\t"
1728
        "movq                %%mm0, %%mm1           \n\t"
1729
        "psrlw                  $8, %%mm0           \n\t"
1730
        "pand                %%mm4, %%mm1           \n\t"
1731
        "packuswb            %%mm0, %%mm0           \n\t"
1732
        "packuswb            %%mm1, %%mm1           \n\t"
1733
        "movd                %%mm0, (%3, %%"REG_a") \n\t"
1734
        "movd                %%mm1, (%2, %%"REG_a") \n\t"
1735
        "add                    $4, %%"REG_a"       \n\t"
1736
        " js                    1b                  \n\t"
1737
        : : "g" ((x86_reg)-width), "r" (src1+width*4), "r" (dstU+width), "r" (dstV+width)
1738
        : "%"REG_a
1739 2da0d70d Diego Biurrun
    );
1740 7322a67c Michael Niedermayer
#else
1741 2da0d70d Diego Biurrun
    int i;
1742 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1743 2da0d70d Diego Biurrun
        dstU[i]= src1[4*i + 0];
1744
        dstV[i]= src1[4*i + 2];
1745
    }
1746
#endif
1747
    assert(src1 == src2);
1748 7322a67c Michael Niedermayer
}
1749
1750 de1275d5 Michael Niedermayer
static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1751
{
1752 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1753 de1275d5 Michael Niedermayer
    __asm__ volatile(
1754 c255994b Ramiro Polla
        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
1755
        "mov                    %0, %%"REG_a"       \n\t"
1756
        "1:                                         \n\t"
1757
        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
1758
        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
1759
        "movq    (%2, %%"REG_a",2), %%mm2           \n\t"
1760
        "movq   8(%2, %%"REG_a",2), %%mm3           \n\t"
1761
        "pand                %%mm4, %%mm0           \n\t"
1762
        "pand                %%mm4, %%mm1           \n\t"
1763
        "pand                %%mm4, %%mm2           \n\t"
1764
        "pand                %%mm4, %%mm3           \n\t"
1765
        "packuswb            %%mm1, %%mm0           \n\t"
1766
        "packuswb            %%mm3, %%mm2           \n\t"
1767
        "movq                %%mm0, (%3, %%"REG_a") \n\t"
1768
        "movq                %%mm2, (%4, %%"REG_a") \n\t"
1769
        "add                    $8, %%"REG_a"       \n\t"
1770
        " js                    1b                  \n\t"
1771
        : : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
1772
        : "%"REG_a
1773 de1275d5 Michael Niedermayer
    );
1774
#else
1775
    int i;
1776 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1777 de1275d5 Michael Niedermayer
        dstU[i]= src1[2*i];
1778
        dstV[i]= src2[2*i];
1779
    }
1780
#endif
1781
}
1782
1783 f415be68 Ramiro Polla
static inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
1784
                                    const uint8_t *src, long width)
1785
{
1786
#if COMPILE_TEMPLATE_MMX
1787
    __asm__ volatile(
1788
        "movq "MANGLE(bm01010101)", %%mm4           \n\t"
1789
        "mov                    %0, %%"REG_a"       \n\t"
1790
        "1:                                         \n\t"
1791
        "movq    (%1, %%"REG_a",2), %%mm0           \n\t"
1792
        "movq   8(%1, %%"REG_a",2), %%mm1           \n\t"
1793
        "movq                %%mm0, %%mm2           \n\t"
1794
        "movq                %%mm1, %%mm3           \n\t"
1795
        "pand                %%mm4, %%mm0           \n\t"
1796
        "pand                %%mm4, %%mm1           \n\t"
1797
        "psrlw                  $8, %%mm2           \n\t"
1798
        "psrlw                  $8, %%mm3           \n\t"
1799
        "packuswb            %%mm1, %%mm0           \n\t"
1800
        "packuswb            %%mm3, %%mm2           \n\t"
1801
        "movq                %%mm0, (%2, %%"REG_a") \n\t"
1802
        "movq                %%mm2, (%3, %%"REG_a") \n\t"
1803
        "add                    $8, %%"REG_a"       \n\t"
1804
        " js                    1b                  \n\t"
1805
        : : "g" ((x86_reg)-width), "r" (src+width*2), "r" (dst1+width), "r" (dst2+width)
1806
        : "%"REG_a
1807
    );
1808
#else
1809
    int i;
1810
    for (i = 0; i < width; i++) {
1811
        dst1[i] = src[2*i+0];
1812
        dst2[i] = src[2*i+1];
1813
    }
1814
#endif
1815
}
1816
1817 e470691a Ramiro Polla
static inline void RENAME(nv12ToUV)(uint8_t *dstU, uint8_t *dstV,
1818
                                    const uint8_t *src1, const uint8_t *src2,
1819
                                    long width, uint32_t *unused)
1820 f415be68 Ramiro Polla
{
1821
    RENAME(nvXXtoUV)(dstU, dstV, src1, width);
1822
}
1823
1824 e470691a Ramiro Polla
static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
1825
                                    const uint8_t *src1, const uint8_t *src2,
1826
                                    long width, uint32_t *unused)
1827 f415be68 Ramiro Polla
{
1828
    RENAME(nvXXtoUV)(dstV, dstU, src1, width);
1829
}
1830
1831 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1832 b411dfff Carl Eugen Hoyos
static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat)
1833 dfb09bd1 Michael Niedermayer
{
1834
1835 dd68318c Ramiro Polla
    if(srcFormat == PIX_FMT_BGR24) {
1836 7ad6469e Diego Pettenò
        __asm__ volatile(
1837 ff9a056d Michael Niedermayer
            "movq  "MANGLE(ff_bgr24toY1Coeff)", %%mm5       \n\t"
1838
            "movq  "MANGLE(ff_bgr24toY2Coeff)", %%mm6       \n\t"
1839
            :
1840 dfb09bd1 Michael Niedermayer
        );
1841 dd68318c Ramiro Polla
    } else {
1842 7ad6469e Diego Pettenò
        __asm__ volatile(
1843 ff9a056d Michael Niedermayer
            "movq  "MANGLE(ff_rgb24toY1Coeff)", %%mm5       \n\t"
1844
            "movq  "MANGLE(ff_rgb24toY2Coeff)", %%mm6       \n\t"
1845
            :
1846 dfb09bd1 Michael Niedermayer
        );
1847
    }
1848
1849 7ad6469e Diego Pettenò
    __asm__ volatile(
1850 dfb09bd1 Michael Niedermayer
        "movq  "MANGLE(ff_bgr24toYOffset)", %%mm4   \n\t"
1851
        "mov                        %2, %%"REG_a"   \n\t"
1852
        "pxor                    %%mm7, %%mm7       \n\t"
1853
        "1:                                         \n\t"
1854
        PREFETCH"               64(%0)              \n\t"
1855
        "movd                     (%0), %%mm0       \n\t"
1856
        "movd                    2(%0), %%mm1       \n\t"
1857
        "movd                    6(%0), %%mm2       \n\t"
1858
        "movd                    8(%0), %%mm3       \n\t"
1859
        "add                       $12, %0          \n\t"
1860
        "punpcklbw               %%mm7, %%mm0       \n\t"
1861
        "punpcklbw               %%mm7, %%mm1       \n\t"
1862
        "punpcklbw               %%mm7, %%mm2       \n\t"
1863
        "punpcklbw               %%mm7, %%mm3       \n\t"
1864
        "pmaddwd                 %%mm5, %%mm0       \n\t"
1865
        "pmaddwd                 %%mm6, %%mm1       \n\t"
1866
        "pmaddwd                 %%mm5, %%mm2       \n\t"
1867
        "pmaddwd                 %%mm6, %%mm3       \n\t"
1868
        "paddd                   %%mm1, %%mm0       \n\t"
1869
        "paddd                   %%mm3, %%mm2       \n\t"
1870
        "paddd                   %%mm4, %%mm0       \n\t"
1871
        "paddd                   %%mm4, %%mm2       \n\t"
1872
        "psrad                     $15, %%mm0       \n\t"
1873
        "psrad                     $15, %%mm2       \n\t"
1874
        "packssdw                %%mm2, %%mm0       \n\t"
1875
        "packuswb                %%mm0, %%mm0       \n\t"
1876
        "movd                %%mm0, (%1, %%"REG_a") \n\t"
1877
        "add                        $4, %%"REG_a"   \n\t"
1878
        " js                        1b              \n\t"
1879
    : "+r" (src)
1880 d0ce212a Ramiro Polla
    : "r" (dst+width), "g" ((x86_reg)-width)
1881 dfb09bd1 Michael Niedermayer
    : "%"REG_a
1882 2da0d70d Diego Biurrun
    );
1883 dfb09bd1 Michael Niedermayer
}
1884
1885 b411dfff Carl Eugen Hoyos
static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, long width, enum PixelFormat srcFormat)
1886 dfb09bd1 Michael Niedermayer
{
1887 7ad6469e Diego Pettenò
    __asm__ volatile(
1888 dfb09bd1 Michael Niedermayer
        "movq                    24+%4, %%mm6       \n\t"
1889
        "mov                        %3, %%"REG_a"   \n\t"
1890
        "pxor                    %%mm7, %%mm7       \n\t"
1891
        "1:                                         \n\t"
1892
        PREFETCH"               64(%0)              \n\t"
1893
        "movd                     (%0), %%mm0       \n\t"
1894
        "movd                    2(%0), %%mm1       \n\t"
1895
        "punpcklbw               %%mm7, %%mm0       \n\t"
1896
        "punpcklbw               %%mm7, %%mm1       \n\t"
1897
        "movq                    %%mm0, %%mm2       \n\t"
1898
        "movq                    %%mm1, %%mm3       \n\t"
1899
        "pmaddwd                    %4, %%mm0       \n\t"
1900
        "pmaddwd                  8+%4, %%mm1       \n\t"
1901
        "pmaddwd                 16+%4, %%mm2       \n\t"
1902
        "pmaddwd                 %%mm6, %%mm3       \n\t"
1903
        "paddd                   %%mm1, %%mm0       \n\t"
1904
        "paddd                   %%mm3, %%mm2       \n\t"
1905
1906
        "movd                    6(%0), %%mm1       \n\t"
1907
        "movd                    8(%0), %%mm3       \n\t"
1908
        "add                       $12, %0          \n\t"
1909
        "punpcklbw               %%mm7, %%mm1       \n\t"
1910
        "punpcklbw               %%mm7, %%mm3       \n\t"
1911
        "movq                    %%mm1, %%mm4       \n\t"
1912
        "movq                    %%mm3, %%mm5       \n\t"
1913
        "pmaddwd                    %4, %%mm1       \n\t"
1914
        "pmaddwd                  8+%4, %%mm3       \n\t"
1915
        "pmaddwd                 16+%4, %%mm4       \n\t"
1916
        "pmaddwd                 %%mm6, %%mm5       \n\t"
1917
        "paddd                   %%mm3, %%mm1       \n\t"
1918
        "paddd                   %%mm5, %%mm4       \n\t"
1919
1920
        "movq "MANGLE(ff_bgr24toUVOffset)", %%mm3       \n\t"
1921
        "paddd                   %%mm3, %%mm0       \n\t"
1922
        "paddd                   %%mm3, %%mm2       \n\t"
1923
        "paddd                   %%mm3, %%mm1       \n\t"
1924
        "paddd                   %%mm3, %%mm4       \n\t"
1925
        "psrad                     $15, %%mm0       \n\t"
1926
        "psrad                     $15, %%mm2       \n\t"
1927
        "psrad                     $15, %%mm1       \n\t"
1928
        "psrad                     $15, %%mm4       \n\t"
1929
        "packssdw                %%mm1, %%mm0       \n\t"
1930
        "packssdw                %%mm4, %%mm2       \n\t"
1931
        "packuswb                %%mm0, %%mm0       \n\t"
1932
        "packuswb                %%mm2, %%mm2       \n\t"
1933
        "movd                %%mm0, (%1, %%"REG_a") \n\t"
1934
        "movd                %%mm2, (%2, %%"REG_a") \n\t"
1935
        "add                        $4, %%"REG_a"   \n\t"
1936
        " js                        1b              \n\t"
1937
    : "+r" (src)
1938 d0ce212a Ramiro Polla
    : "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "m"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24][0])
1939 dfb09bd1 Michael Niedermayer
    : "%"REG_a
1940
    );
1941
}
1942
#endif
1943
1944 7ac40327 Ramiro Polla
static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
1945 dfb09bd1 Michael Niedermayer
{
1946 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1947 a35acd7f Benjamin Zores
    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
1948 1e621b18 Michael Niedermayer
#else
1949 2da0d70d Diego Biurrun
    int i;
1950 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1951 2da0d70d Diego Biurrun
        int b= src[i*3+0];
1952
        int g= src[i*3+1];
1953
        int r= src[i*3+2];
1954 1e621b18 Michael Niedermayer
1955 e5091488 Benoit Fouet
        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
1956 2da0d70d Diego Biurrun
    }
1957 94daf2e9 Ramiro Polla
#endif /* COMPILE_TEMPLATE_MMX */
1958 1e621b18 Michael Niedermayer
}
1959
1960 7ac40327 Ramiro Polla
static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1961 1e621b18 Michael Niedermayer
{
1962 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1963 a35acd7f Benjamin Zores
    RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
1964 1e621b18 Michael Niedermayer
#else
1965 2da0d70d Diego Biurrun
    int i;
1966 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1967 dfb09bd1 Michael Niedermayer
        int b= src1[3*i + 0];
1968
        int g= src1[3*i + 1];
1969
        int r= src1[3*i + 2];
1970 2da0d70d Diego Biurrun
1971 dfb09bd1 Michael Niedermayer
        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1972
        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
1973 2da0d70d Diego Biurrun
    }
1974 94daf2e9 Ramiro Polla
#endif /* COMPILE_TEMPLATE_MMX */
1975 2da0d70d Diego Biurrun
    assert(src1 == src2);
1976 1e621b18 Michael Niedermayer
}
1977
1978 7ac40327 Ramiro Polla
static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
1979 2f60f629 Michael Niedermayer
{
1980
    int i;
1981 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1982 2f60f629 Michael Niedermayer
        int b= src1[6*i + 0] + src1[6*i + 3];
1983
        int g= src1[6*i + 1] + src1[6*i + 4];
1984
        int r= src1[6*i + 2] + src1[6*i + 5];
1985
1986
        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1987
        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
1988
    }
1989
    assert(src1 == src2);
1990
}
1991
1992 7ac40327 Ramiro Polla
static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
1993 a861d4d7 Michael Niedermayer
{
1994 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
1995 a35acd7f Benjamin Zores
    RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
1996 dfb09bd1 Michael Niedermayer
#else
1997 2da0d70d Diego Biurrun
    int i;
1998 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1999 2da0d70d Diego Biurrun
        int r= src[i*3+0];
2000
        int g= src[i*3+1];
2001
        int b= src[i*3+2];
2002
2003 e5091488 Benoit Fouet
        dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
2004 2da0d70d Diego Biurrun
    }
2005 dfb09bd1 Michael Niedermayer
#endif
2006 a861d4d7 Michael Niedermayer
}
2007
2008 7ac40327 Ramiro Polla
static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
2009 a861d4d7 Michael Niedermayer
{
2010 94daf2e9 Ramiro Polla
#if COMPILE_TEMPLATE_MMX
2011 5155b839 Diego Biurrun
    assert(src1==src2);
2012 a35acd7f Benjamin Zores
    RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
2013 dfb09bd1 Michael Niedermayer
#else
2014 5155b839 Diego Biurrun
    int i;
2015
    assert(src1==src2);
2016 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
2017 dfb09bd1 Michael Niedermayer
        int r= src1[3*i + 0];
2018
        int g= src1[3*i + 1];
2019
        int b= src1[3*i + 2];
2020 2da0d70d Diego Biurrun
2021 dfb09bd1 Michael Niedermayer
        dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
2022
        dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
2023 2da0d70d Diego Biurrun
    }
2024 dfb09bd1 Michael Niedermayer
#endif
2025 a861d4d7 Michael Niedermayer
}
2026
2027 7ac40327 Ramiro Polla
static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
2028 2f60f629 Michael Niedermayer
{
2029
    int i;
2030
    assert(src1==src2);
2031