Statistics
| Branch: | Revision:

ffmpeg / libswscale / rgb2rgb_template.c @ aa2ead82

History | View | Annotate | Download (115 KB)

1 fcfbc150 Michael Niedermayer
/*
2 8a322796 Diego Biurrun
 * software RGB to RGB converter
3
 * pluralize by software PAL8 to RGB converter
4
 *              software YUV to YUV converter
5
 *              software YUV to RGB converter
6
 * Written by Nick Kurshev.
7
 * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
8
 * lot of big-endian byte order fixes by Alex Beregszaszi
9 4fadc2b4 Diego Biurrun
 *
10 d026b45e Diego Biurrun
 * This file is part of FFmpeg.
11
 *
12
 * FFmpeg is free software; you can redistribute it and/or modify
13 4fadc2b4 Diego Biurrun
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17 d026b45e Diego Biurrun
 * FFmpeg is distributed in the hope that it will be useful,
18 4fadc2b4 Diego Biurrun
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23 d026b45e Diego Biurrun
 * along with FFmpeg; if not, write to the Free Software
24 b19bcbaa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 6a4970ab Diego Biurrun
 *
26 8a322796 Diego Biurrun
 * The C code (not assembly, MMX, ...) of this file can be used
27 594ff7cc Diego Biurrun
 * under the LGPL license.
28 a3aece93 Nick Kurshev
 */
29
30 0d9f3d85 Arpi
#include <stddef.h>
31
32 1de97d84 Michael Niedermayer
#undef PREFETCH
33
#undef MOVNTQ
34
#undef EMMS
35
#undef SFENCE
36
#undef MMREG_SIZE
37
#undef PAVGB
38
39 b63f641e Aurelien Jacobs
#if HAVE_SSE2
40 1de97d84 Michael Niedermayer
#define MMREG_SIZE 16
41
#else
42
#define MMREG_SIZE 8
43
#endif
44
45 f4406ec1 Diego Biurrun
#if HAVE_AMD3DNOW
46 1de97d84 Michael Niedermayer
#define PREFETCH  "prefetch"
47 6e42e6c4 Diego Biurrun
#define PAVGB     "pavgusb"
48 b63f641e Aurelien Jacobs
#elif HAVE_MMX2
49 1de97d84 Michael Niedermayer
#define PREFETCH "prefetchnta"
50 6e42e6c4 Diego Biurrun
#define PAVGB     "pavgb"
51 1de97d84 Michael Niedermayer
#else
52 d904b5fc Nigel Pearson
#define PREFETCH  " # nop"
53 99969243 Michael Niedermayer
#endif
54 1de97d84 Michael Niedermayer
55 f4406ec1 Diego Biurrun
#if HAVE_AMD3DNOW
56 aeb87a49 Diego Biurrun
/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
57 1de97d84 Michael Niedermayer
#define EMMS     "femms"
58
#else
59
#define EMMS     "emms"
60 e697a141 Michael Niedermayer
#endif
61 79811694 Nick Kurshev
62 b63f641e Aurelien Jacobs
#if HAVE_MMX2
63 1de97d84 Michael Niedermayer
#define MOVNTQ "movntq"
64
#define SFENCE "sfence"
65
#else
66
#define MOVNTQ "movq"
67 d904b5fc Nigel Pearson
#define SFENCE " # nop"
68 fac8012c Nicolas Plourde
#endif
69 1de97d84 Michael Niedermayer
70 6107059c Michael Niedermayer
static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
71 b234ae81 Nick Kurshev
{
72 6e42e6c4 Diego Biurrun
    uint8_t *dest = dst;
73
    const uint8_t *s = src;
74
    const uint8_t *end;
75 7d73d1c3 Ramiro Polla
#if HAVE_MMX
76 9b734d44 Ramiro Polla
    const uint8_t *mm_end;
77 7d73d1c3 Ramiro Polla
#endif
78 6e42e6c4 Diego Biurrun
    end = s + src_size;
79 7d73d1c3 Ramiro Polla
#if HAVE_MMX
80 9b734d44 Ramiro Polla
    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
81
    mm_end = end - 23;
82
    __asm__ volatile("movq        %0, %%mm7"::"m"(mask32a):"memory");
83 dd68318c Ramiro Polla
    while (s < mm_end) {
84 9b734d44 Ramiro Polla
        __asm__ volatile(
85 6e42e6c4 Diego Biurrun
            PREFETCH"    32%1           \n\t"
86
            "movd          %1, %%mm0    \n\t"
87
            "punpckldq    3%1, %%mm0    \n\t"
88
            "movd         6%1, %%mm1    \n\t"
89
            "punpckldq    9%1, %%mm1    \n\t"
90
            "movd        12%1, %%mm2    \n\t"
91
            "punpckldq   15%1, %%mm2    \n\t"
92
            "movd        18%1, %%mm3    \n\t"
93
            "punpckldq   21%1, %%mm3    \n\t"
94 f8a138be Cédric Schieli
            "por        %%mm7, %%mm0    \n\t"
95
            "por        %%mm7, %%mm1    \n\t"
96
            "por        %%mm7, %%mm2    \n\t"
97
            "por        %%mm7, %%mm3    \n\t"
98 6e42e6c4 Diego Biurrun
            MOVNTQ"     %%mm0,   %0     \n\t"
99
            MOVNTQ"     %%mm1,  8%0     \n\t"
100
            MOVNTQ"     %%mm2, 16%0     \n\t"
101
            MOVNTQ"     %%mm3, 24%0"
102
            :"=m"(*dest)
103
            :"m"(*s)
104
            :"memory");
105 9b734d44 Ramiro Polla
        dest += 32;
106
        s += 24;
107
    }
108
    __asm__ volatile(SFENCE:::"memory");
109
    __asm__ volatile(EMMS:::"memory");
110 7d73d1c3 Ramiro Polla
#endif
111 dd68318c Ramiro Polla
    while (s < end) {
112 7d73d1c3 Ramiro Polla
#if HAVE_BIGENDIAN
113 6e42e6c4 Diego Biurrun
        /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
114 f8a138be Cédric Schieli
        *dest++ = 255;
115 6e42e6c4 Diego Biurrun
        *dest++ = s[2];
116
        *dest++ = s[1];
117
        *dest++ = s[0];
118
        s+=3;
119 7d73d1c3 Ramiro Polla
#else
120 6e42e6c4 Diego Biurrun
        *dest++ = *s++;
121
        *dest++ = *s++;
122
        *dest++ = *s++;
123 f8a138be Cédric Schieli
        *dest++ = 255;
124 7d73d1c3 Ramiro Polla
#endif
125 6e42e6c4 Diego Biurrun
    }
126 b234ae81 Nick Kurshev
}
127 59ac5a93 Nick Kurshev
128 6107059c Michael Niedermayer
static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
129 59ac5a93 Nick Kurshev
{
130 6e42e6c4 Diego Biurrun
    uint8_t *dest = dst;
131
    const uint8_t *s = src;
132
    const uint8_t *end;
133 b63f641e Aurelien Jacobs
#if HAVE_MMX
134 6e42e6c4 Diego Biurrun
    const uint8_t *mm_end;
135 494a6294 Nick Kurshev
#endif
136 6e42e6c4 Diego Biurrun
    end = s + src_size;
137 b63f641e Aurelien Jacobs
#if HAVE_MMX
138 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
139 6e42e6c4 Diego Biurrun
    mm_end = end - 31;
140 dd68318c Ramiro Polla
    while (s < mm_end) {
141 7ad6469e Diego Pettenò
        __asm__ volatile(
142 9b734d44 Ramiro Polla
            PREFETCH"    32%1           \n\t"
143
            "movq          %1, %%mm0    \n\t"
144
            "movq         8%1, %%mm1    \n\t"
145
            "movq        16%1, %%mm4    \n\t"
146
            "movq        24%1, %%mm5    \n\t"
147
            "movq       %%mm0, %%mm2    \n\t"
148
            "movq       %%mm1, %%mm3    \n\t"
149
            "movq       %%mm4, %%mm6    \n\t"
150
            "movq       %%mm5, %%mm7    \n\t"
151
            "psrlq         $8, %%mm2    \n\t"
152
            "psrlq         $8, %%mm3    \n\t"
153
            "psrlq         $8, %%mm6    \n\t"
154
            "psrlq         $8, %%mm7    \n\t"
155
            "pand          %2, %%mm0    \n\t"
156
            "pand          %2, %%mm1    \n\t"
157
            "pand          %2, %%mm4    \n\t"
158
            "pand          %2, %%mm5    \n\t"
159
            "pand          %3, %%mm2    \n\t"
160
            "pand          %3, %%mm3    \n\t"
161
            "pand          %3, %%mm6    \n\t"
162
            "pand          %3, %%mm7    \n\t"
163
            "por        %%mm2, %%mm0    \n\t"
164
            "por        %%mm3, %%mm1    \n\t"
165
            "por        %%mm6, %%mm4    \n\t"
166
            "por        %%mm7, %%mm5    \n\t"
167
168
            "movq       %%mm1, %%mm2    \n\t"
169
            "movq       %%mm4, %%mm3    \n\t"
170
            "psllq        $48, %%mm2    \n\t"
171
            "psllq        $32, %%mm3    \n\t"
172
            "pand          %4, %%mm2    \n\t"
173
            "pand          %5, %%mm3    \n\t"
174
            "por        %%mm2, %%mm0    \n\t"
175
            "psrlq        $16, %%mm1    \n\t"
176
            "psrlq        $32, %%mm4    \n\t"
177
            "psllq        $16, %%mm5    \n\t"
178
            "por        %%mm3, %%mm1    \n\t"
179
            "pand          %6, %%mm5    \n\t"
180
            "por        %%mm5, %%mm4    \n\t"
181
182
            MOVNTQ"     %%mm0,   %0     \n\t"
183
            MOVNTQ"     %%mm1,  8%0     \n\t"
184
            MOVNTQ"     %%mm4, 16%0"
185
            :"=m"(*dest)
186
            :"m"(*s),"m"(mask24l),
187
            "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
188
            :"memory");
189 6e42e6c4 Diego Biurrun
        dest += 24;
190
        s += 32;
191
    }
192 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
193
    __asm__ volatile(EMMS:::"memory");
194 6e42e6c4 Diego Biurrun
#endif
195 dd68318c Ramiro Polla
    while (s < end) {
196 a898cdc9 Måns Rullgård
#if HAVE_BIGENDIAN
197 6e42e6c4 Diego Biurrun
        /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
198
        s++;
199
        dest[2] = *s++;
200
        dest[1] = *s++;
201
        dest[0] = *s++;
202
        dest += 3;
203 6cb38650 Alex Beregszaszi
#else
204 6e42e6c4 Diego Biurrun
        *dest++ = *s++;
205
        *dest++ = *s++;
206
        *dest++ = *s++;
207
        s++;
208 6cb38650 Alex Beregszaszi
#endif
209 6e42e6c4 Diego Biurrun
    }
210 59ac5a93 Nick Kurshev
}
211 b238eb2e Nick Kurshev
212 a3aece93 Nick Kurshev
/*
213 8a322796 Diego Biurrun
 original by Strepto/Astral
214
 ported to gcc & bugfixed: A'rpi
215 51da31f1 Nick Kurshev
 MMX2, 3DNOW optimization by Nick Kurshev
216 8a322796 Diego Biurrun
 32-bit C version, and and&add trick by Michael Niedermayer
217 a3aece93 Nick Kurshev
*/
218 30c48a0a Benoit Fouet
static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
219 b238eb2e Nick Kurshev
{
220 6e42e6c4 Diego Biurrun
    register const uint8_t* s=src;
221
    register uint8_t* d=dst;
222
    register const uint8_t *end;
223
    const uint8_t *mm_end;
224
    end = s + src_size;
225 b63f641e Aurelien Jacobs
#if HAVE_MMX
226 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
227
    __asm__ volatile("movq        %0, %%mm4"::"m"(mask15s));
228 6e42e6c4 Diego Biurrun
    mm_end = end - 15;
229 dd68318c Ramiro Polla
    while (s<mm_end) {
230 7ad6469e Diego Pettenò
        __asm__ volatile(
231 9b734d44 Ramiro Polla
            PREFETCH"  32%1         \n\t"
232
            "movq        %1, %%mm0  \n\t"
233
            "movq       8%1, %%mm2  \n\t"
234
            "movq     %%mm0, %%mm1  \n\t"
235
            "movq     %%mm2, %%mm3  \n\t"
236
            "pand     %%mm4, %%mm0  \n\t"
237
            "pand     %%mm4, %%mm2  \n\t"
238
            "paddw    %%mm1, %%mm0  \n\t"
239
            "paddw    %%mm3, %%mm2  \n\t"
240
            MOVNTQ"   %%mm0,  %0    \n\t"
241
            MOVNTQ"   %%mm2, 8%0"
242
            :"=m"(*d)
243
            :"m"(*s)
244 6e42e6c4 Diego Biurrun
        );
245
        d+=16;
246
        s+=16;
247
    }
248 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
249
    __asm__ volatile(EMMS:::"memory");
250 b238eb2e Nick Kurshev
#endif
251 d8dad2a5 Michael Niedermayer
    mm_end = end - 3;
252 dd68318c Ramiro Polla
    while (s < mm_end) {
253 994c1ef0 Baptiste Coudurier
        register unsigned x= *((const uint32_t *)s);
254 6e42e6c4 Diego Biurrun
        *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
255
        d+=4;
256
        s+=4;
257 0d9f3d85 Arpi
    }
258 dd68318c Ramiro Polla
    if (s < end) {
259 994c1ef0 Baptiste Coudurier
        register unsigned short x= *((const uint16_t *)s);
260 6e42e6c4 Diego Biurrun
        *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
261 0d9f3d85 Arpi
    }
262 b238eb2e Nick Kurshev
}
263 fcfbc150 Michael Niedermayer
264 30c48a0a Benoit Fouet
static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size)
265 ac4d0aea Michael Niedermayer
{
266 6e42e6c4 Diego Biurrun
    register const uint8_t* s=src;
267
    register uint8_t* d=dst;
268
    register const uint8_t *end;
269
    const uint8_t *mm_end;
270
    end = s + src_size;
271 b63f641e Aurelien Jacobs
#if HAVE_MMX
272 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
273
    __asm__ volatile("movq        %0, %%mm7"::"m"(mask15rg));
274
    __asm__ volatile("movq        %0, %%mm6"::"m"(mask15b));
275 6e42e6c4 Diego Biurrun
    mm_end = end - 15;
276 dd68318c Ramiro Polla
    while (s<mm_end) {
277 7ad6469e Diego Pettenò
        __asm__ volatile(
278 9b734d44 Ramiro Polla
            PREFETCH"  32%1         \n\t"
279
            "movq        %1, %%mm0  \n\t"
280
            "movq       8%1, %%mm2  \n\t"
281
            "movq     %%mm0, %%mm1  \n\t"
282
            "movq     %%mm2, %%mm3  \n\t"
283
            "psrlq       $1, %%mm0  \n\t"
284
            "psrlq       $1, %%mm2  \n\t"
285
            "pand     %%mm7, %%mm0  \n\t"
286
            "pand     %%mm7, %%mm2  \n\t"
287
            "pand     %%mm6, %%mm1  \n\t"
288
            "pand     %%mm6, %%mm3  \n\t"
289
            "por      %%mm1, %%mm0  \n\t"
290
            "por      %%mm3, %%mm2  \n\t"
291
            MOVNTQ"   %%mm0,  %0    \n\t"
292
            MOVNTQ"   %%mm2, 8%0"
293
            :"=m"(*d)
294
            :"m"(*s)
295 6e42e6c4 Diego Biurrun
        );
296
        d+=16;
297
        s+=16;
298
    }
299 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
300
    __asm__ volatile(EMMS:::"memory");
301 ac4d0aea Michael Niedermayer
#endif
302 0598bcbb Michael Niedermayer
    mm_end = end - 3;
303 dd68318c Ramiro Polla
    while (s < mm_end) {
304 ce3d365f Baptiste Coudurier
        register uint32_t x= *((const uint32_t*)s);
305 6e42e6c4 Diego Biurrun
        *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
306
        s+=4;
307
        d+=4;
308 ac4d0aea Michael Niedermayer
    }
309 dd68318c Ramiro Polla
    if (s < end) {
310 ce3d365f Baptiste Coudurier
        register uint16_t x= *((const uint16_t*)s);
311 6e42e6c4 Diego Biurrun
        *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
312 ac4d0aea Michael Niedermayer
    }
313
}
314
315 7f526efd Reimar Döffinger
static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
316 fcfbc150 Michael Niedermayer
{
317 6e42e6c4 Diego Biurrun
    const uint8_t *s = src;
318
    const uint8_t *end;
319 b63f641e Aurelien Jacobs
#if HAVE_MMX
320 6e42e6c4 Diego Biurrun
    const uint8_t *mm_end;
321 0d9f3d85 Arpi
#endif
322 6e42e6c4 Diego Biurrun
    uint16_t *d = (uint16_t *)dst;
323
    end = s + src_size;
324 b63f641e Aurelien Jacobs
#if HAVE_MMX
325 6e42e6c4 Diego Biurrun
    mm_end = end - 15;
326 594ff7cc Diego Biurrun
#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
327 7ad6469e Diego Pettenò
    __asm__ volatile(
328 9b734d44 Ramiro Polla
        "movq           %3, %%mm5   \n\t"
329
        "movq           %4, %%mm6   \n\t"
330
        "movq           %5, %%mm7   \n\t"
331
        "jmp 2f                     \n\t"
332
        ASMALIGN(4)
333
        "1:                         \n\t"
334
        PREFETCH"   32(%1)          \n\t"
335
        "movd         (%1), %%mm0   \n\t"
336
        "movd        4(%1), %%mm3   \n\t"
337
        "punpckldq   8(%1), %%mm0   \n\t"
338
        "punpckldq  12(%1), %%mm3   \n\t"
339
        "movq        %%mm0, %%mm1   \n\t"
340
        "movq        %%mm3, %%mm4   \n\t"
341
        "pand        %%mm6, %%mm0   \n\t"
342
        "pand        %%mm6, %%mm3   \n\t"
343
        "pmaddwd     %%mm7, %%mm0   \n\t"
344
        "pmaddwd     %%mm7, %%mm3   \n\t"
345
        "pand        %%mm5, %%mm1   \n\t"
346
        "pand        %%mm5, %%mm4   \n\t"
347
        "por         %%mm1, %%mm0   \n\t"
348
        "por         %%mm4, %%mm3   \n\t"
349
        "psrld          $5, %%mm0   \n\t"
350
        "pslld         $11, %%mm3   \n\t"
351
        "por         %%mm3, %%mm0   \n\t"
352
        MOVNTQ"      %%mm0, (%0)    \n\t"
353
        "add           $16,  %1     \n\t"
354
        "add            $8,  %0     \n\t"
355
        "2:                         \n\t"
356
        "cmp            %2,  %1     \n\t"
357
        " jb            1b          \n\t"
358
        : "+r" (d), "+r"(s)
359
        : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
360 6e42e6c4 Diego Biurrun
    );
361 aeae5d53 Michael Niedermayer
#else
362 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
363
    __asm__ volatile(
364 6e42e6c4 Diego Biurrun
        "movq    %0, %%mm7    \n\t"
365
        "movq    %1, %%mm6    \n\t"
366
        ::"m"(red_16mask),"m"(green_16mask));
367 dd68318c Ramiro Polla
    while (s < mm_end) {
368 7ad6469e Diego Pettenò
        __asm__ volatile(
369 9b734d44 Ramiro Polla
            PREFETCH"    32%1           \n\t"
370
            "movd          %1, %%mm0    \n\t"
371
            "movd         4%1, %%mm3    \n\t"
372
            "punpckldq    8%1, %%mm0    \n\t"
373
            "punpckldq   12%1, %%mm3    \n\t"
374
            "movq       %%mm0, %%mm1    \n\t"
375
            "movq       %%mm0, %%mm2    \n\t"
376
            "movq       %%mm3, %%mm4    \n\t"
377
            "movq       %%mm3, %%mm5    \n\t"
378
            "psrlq         $3, %%mm0    \n\t"
379
            "psrlq         $3, %%mm3    \n\t"
380
            "pand          %2, %%mm0    \n\t"
381
            "pand          %2, %%mm3    \n\t"
382
            "psrlq         $5, %%mm1    \n\t"
383
            "psrlq         $5, %%mm4    \n\t"
384
            "pand       %%mm6, %%mm1    \n\t"
385
            "pand       %%mm6, %%mm4    \n\t"
386
            "psrlq         $8, %%mm2    \n\t"
387
            "psrlq         $8, %%mm5    \n\t"
388
            "pand       %%mm7, %%mm2    \n\t"
389
            "pand       %%mm7, %%mm5    \n\t"
390
            "por        %%mm1, %%mm0    \n\t"
391
            "por        %%mm4, %%mm3    \n\t"
392
            "por        %%mm2, %%mm0    \n\t"
393
            "por        %%mm5, %%mm3    \n\t"
394
            "psllq        $16, %%mm3    \n\t"
395
            "por        %%mm3, %%mm0    \n\t"
396
            MOVNTQ"     %%mm0, %0       \n\t"
397
            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
398 6e42e6c4 Diego Biurrun
        d += 4;
399
        s += 16;
400
    }
401
#endif
402 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
403
    __asm__ volatile(EMMS:::"memory");
404 6e42e6c4 Diego Biurrun
#endif
405 dd68318c Ramiro Polla
    while (s < end) {
406 994c1ef0 Baptiste Coudurier
        register int rgb = *(const uint32_t*)s; s += 4;
407 6e42e6c4 Diego Biurrun
        *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
408
    }
409 fcfbc150 Michael Niedermayer
}
410
411 7f526efd Reimar Döffinger
static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
412 ac4d0aea Michael Niedermayer
{
413 6e42e6c4 Diego Biurrun
    const uint8_t *s = src;
414
    const uint8_t *end;
415 b63f641e Aurelien Jacobs
#if HAVE_MMX
416 6e42e6c4 Diego Biurrun
    const uint8_t *mm_end;
417 ac4d0aea Michael Niedermayer
#endif
418 6e42e6c4 Diego Biurrun
    uint16_t *d = (uint16_t *)dst;
419
    end = s + src_size;
420 b63f641e Aurelien Jacobs
#if HAVE_MMX
421 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
422
    __asm__ volatile(
423 6e42e6c4 Diego Biurrun
        "movq          %0, %%mm7    \n\t"
424
        "movq          %1, %%mm6    \n\t"
425
        ::"m"(red_16mask),"m"(green_16mask));
426
    mm_end = end - 15;
427 dd68318c Ramiro Polla
    while (s < mm_end) {
428 7ad6469e Diego Pettenò
        __asm__ volatile(
429 9b734d44 Ramiro Polla
            PREFETCH"    32%1           \n\t"
430
            "movd          %1, %%mm0    \n\t"
431
            "movd         4%1, %%mm3    \n\t"
432
            "punpckldq    8%1, %%mm0    \n\t"
433
            "punpckldq   12%1, %%mm3    \n\t"
434
            "movq       %%mm0, %%mm1    \n\t"
435
            "movq       %%mm0, %%mm2    \n\t"
436
            "movq       %%mm3, %%mm4    \n\t"
437
            "movq       %%mm3, %%mm5    \n\t"
438
            "psllq         $8, %%mm0    \n\t"
439
            "psllq         $8, %%mm3    \n\t"
440
            "pand       %%mm7, %%mm0    \n\t"
441
            "pand       %%mm7, %%mm3    \n\t"
442
            "psrlq         $5, %%mm1    \n\t"
443
            "psrlq         $5, %%mm4    \n\t"
444
            "pand       %%mm6, %%mm1    \n\t"
445
            "pand       %%mm6, %%mm4    \n\t"
446
            "psrlq        $19, %%mm2    \n\t"
447
            "psrlq        $19, %%mm5    \n\t"
448
            "pand          %2, %%mm2    \n\t"
449
            "pand          %2, %%mm5    \n\t"
450
            "por        %%mm1, %%mm0    \n\t"
451
            "por        %%mm4, %%mm3    \n\t"
452
            "por        %%mm2, %%mm0    \n\t"
453
            "por        %%mm5, %%mm3    \n\t"
454
            "psllq        $16, %%mm3    \n\t"
455
            "por        %%mm3, %%mm0    \n\t"
456
            MOVNTQ"     %%mm0, %0       \n\t"
457
            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
458 6e42e6c4 Diego Biurrun
        d += 4;
459
        s += 16;
460
    }
461 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
462
    __asm__ volatile(EMMS:::"memory");
463 6e42e6c4 Diego Biurrun
#endif
464 dd68318c Ramiro Polla
    while (s < end) {
465 994c1ef0 Baptiste Coudurier
        register int rgb = *(const uint32_t*)s; s += 4;
466 6e42e6c4 Diego Biurrun
        *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
467
    }
468 ac4d0aea Michael Niedermayer
}
469
470 7f526efd Reimar Döffinger
static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size)
471 fcfbc150 Michael Niedermayer
{
472 6e42e6c4 Diego Biurrun
    const uint8_t *s = src;
473
    const uint8_t *end;
474 b63f641e Aurelien Jacobs
#if HAVE_MMX
475 6e42e6c4 Diego Biurrun
    const uint8_t *mm_end;
476 0d9f3d85 Arpi
#endif
477 6e42e6c4 Diego Biurrun
    uint16_t *d = (uint16_t *)dst;
478
    end = s + src_size;
479 b63f641e Aurelien Jacobs
#if HAVE_MMX
480 6e42e6c4 Diego Biurrun
    mm_end = end - 15;
481 594ff7cc Diego Biurrun
#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
482 7ad6469e Diego Pettenò
    __asm__ volatile(
483 9b734d44 Ramiro Polla
        "movq           %3, %%mm5   \n\t"
484
        "movq           %4, %%mm6   \n\t"
485
        "movq           %5, %%mm7   \n\t"
486
        "jmp            2f          \n\t"
487
        ASMALIGN(4)
488
        "1:                         \n\t"
489
        PREFETCH"   32(%1)          \n\t"
490
        "movd         (%1), %%mm0   \n\t"
491
        "movd        4(%1), %%mm3   \n\t"
492
        "punpckldq   8(%1), %%mm0   \n\t"
493
        "punpckldq  12(%1), %%mm3   \n\t"
494
        "movq        %%mm0, %%mm1   \n\t"
495
        "movq        %%mm3, %%mm4   \n\t"
496
        "pand        %%mm6, %%mm0   \n\t"
497
        "pand        %%mm6, %%mm3   \n\t"
498
        "pmaddwd     %%mm7, %%mm0   \n\t"
499
        "pmaddwd     %%mm7, %%mm3   \n\t"
500
        "pand        %%mm5, %%mm1   \n\t"
501
        "pand        %%mm5, %%mm4   \n\t"
502
        "por         %%mm1, %%mm0   \n\t"
503
        "por         %%mm4, %%mm3   \n\t"
504
        "psrld          $6, %%mm0   \n\t"
505
        "pslld         $10, %%mm3   \n\t"
506
        "por         %%mm3, %%mm0   \n\t"
507
        MOVNTQ"      %%mm0, (%0)    \n\t"
508
        "add           $16,  %1     \n\t"
509
        "add            $8,  %0     \n\t"
510
        "2:                         \n\t"
511
        "cmp            %2,  %1     \n\t"
512
        " jb            1b          \n\t"
513
        : "+r" (d), "+r"(s)
514
        : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
515 6e42e6c4 Diego Biurrun
    );
516 aeae5d53 Michael Niedermayer
#else
517 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
518
    __asm__ volatile(
519 6e42e6c4 Diego Biurrun
        "movq          %0, %%mm7    \n\t"
520
        "movq          %1, %%mm6    \n\t"
521
        ::"m"(red_15mask),"m"(green_15mask));
522 dd68318c Ramiro Polla
    while (s < mm_end) {
523 7ad6469e Diego Pettenò
        __asm__ volatile(
524 9b734d44 Ramiro Polla
            PREFETCH"    32%1           \n\t"
525
            "movd          %1, %%mm0    \n\t"
526
            "movd         4%1, %%mm3    \n\t"
527
            "punpckldq    8%1, %%mm0    \n\t"
528
            "punpckldq   12%1, %%mm3    \n\t"
529
            "movq       %%mm0, %%mm1    \n\t"
530
            "movq       %%mm0, %%mm2    \n\t"
531
            "movq       %%mm3, %%mm4    \n\t"
532
            "movq       %%mm3, %%mm5    \n\t"
533
            "psrlq         $3, %%mm0    \n\t"
534
            "psrlq         $3, %%mm3    \n\t"
535
            "pand          %2, %%mm0    \n\t"
536
            "pand          %2, %%mm3    \n\t"
537
            "psrlq         $6, %%mm1    \n\t"
538
            "psrlq         $6, %%mm4    \n\t"
539
            "pand       %%mm6, %%mm1    \n\t"
540
            "pand       %%mm6, %%mm4    \n\t"
541
            "psrlq         $9, %%mm2    \n\t"
542
            "psrlq         $9, %%mm5    \n\t"
543
            "pand       %%mm7, %%mm2    \n\t"
544
            "pand       %%mm7, %%mm5    \n\t"
545
            "por        %%mm1, %%mm0    \n\t"
546
            "por        %%mm4, %%mm3    \n\t"
547
            "por        %%mm2, %%mm0    \n\t"
548
            "por        %%mm5, %%mm3    \n\t"
549
            "psllq        $16, %%mm3    \n\t"
550
            "por        %%mm3, %%mm0    \n\t"
551
            MOVNTQ"     %%mm0, %0       \n\t"
552
            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
553 6e42e6c4 Diego Biurrun
        d += 4;
554
        s += 16;
555
    }
556
#endif
557 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
558
    __asm__ volatile(EMMS:::"memory");
559 6e42e6c4 Diego Biurrun
#endif
560 dd68318c Ramiro Polla
    while (s < end) {
561 994c1ef0 Baptiste Coudurier
        register int rgb = *(const uint32_t*)s; s += 4;
562 6e42e6c4 Diego Biurrun
        *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
563
    }
564 fcfbc150 Michael Niedermayer
}
565
566 7f526efd Reimar Döffinger
static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
567 ac4d0aea Michael Niedermayer
{
568 6e42e6c4 Diego Biurrun
    const uint8_t *s = src;
569
    const uint8_t *end;
570 b63f641e Aurelien Jacobs
#if HAVE_MMX
571 6e42e6c4 Diego Biurrun
    const uint8_t *mm_end;
572 ac4d0aea Michael Niedermayer
#endif
573 6e42e6c4 Diego Biurrun
    uint16_t *d = (uint16_t *)dst;
574
    end = s + src_size;
575 b63f641e Aurelien Jacobs
#if HAVE_MMX
576 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
577
    __asm__ volatile(
578 6e42e6c4 Diego Biurrun
        "movq          %0, %%mm7    \n\t"
579
        "movq          %1, %%mm6    \n\t"
580
        ::"m"(red_15mask),"m"(green_15mask));
581
    mm_end = end - 15;
582 dd68318c Ramiro Polla
    while (s < mm_end) {
583 7ad6469e Diego Pettenò
        __asm__ volatile(
584 9b734d44 Ramiro Polla
            PREFETCH"    32%1           \n\t"
585
            "movd          %1, %%mm0    \n\t"
586
            "movd         4%1, %%mm3    \n\t"
587
            "punpckldq    8%1, %%mm0    \n\t"
588
            "punpckldq   12%1, %%mm3    \n\t"
589
            "movq       %%mm0, %%mm1    \n\t"
590
            "movq       %%mm0, %%mm2    \n\t"
591
            "movq       %%mm3, %%mm4    \n\t"
592
            "movq       %%mm3, %%mm5    \n\t"
593
            "psllq         $7, %%mm0    \n\t"
594
            "psllq         $7, %%mm3    \n\t"
595
            "pand       %%mm7, %%mm0    \n\t"
596
            "pand       %%mm7, %%mm3    \n\t"
597
            "psrlq         $6, %%mm1    \n\t"
598
            "psrlq         $6, %%mm4    \n\t"
599
            "pand       %%mm6, %%mm1    \n\t"
600
            "pand       %%mm6, %%mm4    \n\t"
601
            "psrlq        $19, %%mm2    \n\t"
602
            "psrlq        $19, %%mm5    \n\t"
603
            "pand          %2, %%mm2    \n\t"
604
            "pand          %2, %%mm5    \n\t"
605
            "por        %%mm1, %%mm0    \n\t"
606
            "por        %%mm4, %%mm3    \n\t"
607
            "por        %%mm2, %%mm0    \n\t"
608
            "por        %%mm5, %%mm3    \n\t"
609
            "psllq        $16, %%mm3    \n\t"
610
            "por        %%mm3, %%mm0    \n\t"
611
            MOVNTQ"     %%mm0, %0       \n\t"
612
            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
613 6e42e6c4 Diego Biurrun
        d += 4;
614
        s += 16;
615
    }
616 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
617
    __asm__ volatile(EMMS:::"memory");
618 6e42e6c4 Diego Biurrun
#endif
619 dd68318c Ramiro Polla
    while (s < end) {
620 994c1ef0 Baptiste Coudurier
        register int rgb = *(const uint32_t*)s; s += 4;
621 6e42e6c4 Diego Biurrun
        *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
622
    }
623 ac4d0aea Michael Niedermayer
}
624
625 6107059c Michael Niedermayer
static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
626 996e1a7c Nick Kurshev
{
627 6e42e6c4 Diego Biurrun
    const uint8_t *s = src;
628
    const uint8_t *end;
629 b63f641e Aurelien Jacobs
#if HAVE_MMX
630 6e42e6c4 Diego Biurrun
    const uint8_t *mm_end;
631 0d9f3d85 Arpi
#endif
632 6e42e6c4 Diego Biurrun
    uint16_t *d = (uint16_t *)dst;
633
    end = s + src_size;
634 b63f641e Aurelien Jacobs
#if HAVE_MMX
635 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
636
    __asm__ volatile(
637 6e42e6c4 Diego Biurrun
        "movq         %0, %%mm7     \n\t"
638
        "movq         %1, %%mm6     \n\t"
639
        ::"m"(red_16mask),"m"(green_16mask));
640
    mm_end = end - 11;
641 dd68318c Ramiro Polla
    while (s < mm_end) {
642 7ad6469e Diego Pettenò
        __asm__ volatile(
643 9b734d44 Ramiro Polla
            PREFETCH"    32%1           \n\t"
644
            "movd          %1, %%mm0    \n\t"
645
            "movd         3%1, %%mm3    \n\t"
646
            "punpckldq    6%1, %%mm0    \n\t"
647
            "punpckldq    9%1, %%mm3    \n\t"
648
            "movq       %%mm0, %%mm1    \n\t"
649
            "movq       %%mm0, %%mm2    \n\t"
650
            "movq       %%mm3, %%mm4    \n\t"
651
            "movq       %%mm3, %%mm5    \n\t"
652
            "psrlq         $3, %%mm0    \n\t"
653
            "psrlq         $3, %%mm3    \n\t"
654
            "pand          %2, %%mm0    \n\t"
655
            "pand          %2, %%mm3    \n\t"
656
            "psrlq         $5, %%mm1    \n\t"
657
            "psrlq         $5, %%mm4    \n\t"
658
            "pand       %%mm6, %%mm1    \n\t"
659
            "pand       %%mm6, %%mm4    \n\t"
660
            "psrlq         $8, %%mm2    \n\t"
661
            "psrlq         $8, %%mm5    \n\t"
662
            "pand       %%mm7, %%mm2    \n\t"
663
            "pand       %%mm7, %%mm5    \n\t"
664
            "por        %%mm1, %%mm0    \n\t"
665
            "por        %%mm4, %%mm3    \n\t"
666
            "por        %%mm2, %%mm0    \n\t"
667
            "por        %%mm5, %%mm3    \n\t"
668
            "psllq        $16, %%mm3    \n\t"
669
            "por        %%mm3, %%mm0    \n\t"
670
            MOVNTQ"     %%mm0, %0       \n\t"
671
            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
672 6e42e6c4 Diego Biurrun
        d += 4;
673
        s += 12;
674
    }
675 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
676
    __asm__ volatile(EMMS:::"memory");
677 6e42e6c4 Diego Biurrun
#endif
678 dd68318c Ramiro Polla
    while (s < end) {
679 6e42e6c4 Diego Biurrun
        const int b = *s++;
680
        const int g = *s++;
681
        const int r = *s++;
682
        *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
683
    }
684 996e1a7c Nick Kurshev
}
685
686 6107059c Michael Niedermayer
static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size)
687 ac4d0aea Michael Niedermayer
{
688 6e42e6c4 Diego Biurrun
    const uint8_t *s = src;
689
    const uint8_t *end;
690 b63f641e Aurelien Jacobs
#if HAVE_MMX
691 6e42e6c4 Diego Biurrun
    const uint8_t *mm_end;
692 ac4d0aea Michael Niedermayer
#endif
693 6e42e6c4 Diego Biurrun
    uint16_t *d = (uint16_t *)dst;
694
    end = s + src_size;
695 b63f641e Aurelien Jacobs
#if HAVE_MMX
696 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
697
    __asm__ volatile(
698 6e42e6c4 Diego Biurrun
        "movq         %0, %%mm7     \n\t"
699
        "movq         %1, %%mm6     \n\t"
700
        ::"m"(red_16mask),"m"(green_16mask));
701
    mm_end = end - 15;
702 dd68318c Ramiro Polla
    while (s < mm_end) {
703 7ad6469e Diego Pettenò
        __asm__ volatile(
704 9b734d44 Ramiro Polla
            PREFETCH"    32%1           \n\t"
705
            "movd          %1, %%mm0    \n\t"
706
            "movd         3%1, %%mm3    \n\t"
707
            "punpckldq    6%1, %%mm0    \n\t"
708
            "punpckldq    9%1, %%mm3    \n\t"
709
            "movq       %%mm0, %%mm1    \n\t"
710
            "movq       %%mm0, %%mm2    \n\t"
711
            "movq       %%mm3, %%mm4    \n\t"
712
            "movq       %%mm3, %%mm5    \n\t"
713
            "psllq         $8, %%mm0    \n\t"
714
            "psllq         $8, %%mm3    \n\t"
715
            "pand       %%mm7, %%mm0    \n\t"
716
            "pand       %%mm7, %%mm3    \n\t"
717
            "psrlq         $5, %%mm1    \n\t"
718
            "psrlq         $5, %%mm4    \n\t"
719
            "pand       %%mm6, %%mm1    \n\t"
720
            "pand       %%mm6, %%mm4    \n\t"
721
            "psrlq        $19, %%mm2    \n\t"
722
            "psrlq        $19, %%mm5    \n\t"
723
            "pand          %2, %%mm2    \n\t"
724
            "pand          %2, %%mm5    \n\t"
725
            "por        %%mm1, %%mm0    \n\t"
726
            "por        %%mm4, %%mm3    \n\t"
727
            "por        %%mm2, %%mm0    \n\t"
728
            "por        %%mm5, %%mm3    \n\t"
729
            "psllq        $16, %%mm3    \n\t"
730
            "por        %%mm3, %%mm0    \n\t"
731
            MOVNTQ"     %%mm0, %0       \n\t"
732
            :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
733 6e42e6c4 Diego Biurrun
        d += 4;
734
        s += 12;
735
    }
736 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
737
    __asm__ volatile(EMMS:::"memory");
738 6e42e6c4 Diego Biurrun
#endif
739 dd68318c Ramiro Polla
    while (s < end) {
740 6e42e6c4 Diego Biurrun
        const int r = *s++;
741
        const int g = *s++;
742
        const int b = *s++;
743
        *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
744
    }
745 ac4d0aea Michael Niedermayer
}
746
747 6107059c Michael Niedermayer
static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
748 996e1a7c Nick Kurshev
{
749 6e42e6c4 Diego Biurrun
    const uint8_t *s = src;
750
    const uint8_t *end;
751 b63f641e Aurelien Jacobs
#if HAVE_MMX
752 6e42e6c4 Diego Biurrun
    const uint8_t *mm_end;
753 0d9f3d85 Arpi
#endif
754 6e42e6c4 Diego Biurrun
    uint16_t *d = (uint16_t *)dst;
755
    end = s + src_size;
756 b63f641e Aurelien Jacobs
#if HAVE_MMX
757 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
758
    __asm__ volatile(
759 6e42e6c4 Diego Biurrun
        "movq          %0, %%mm7    \n\t"
760
        "movq          %1, %%mm6    \n\t"
761
        ::"m"(red_15mask),"m"(green_15mask));
762
    mm_end = end - 11;
763 dd68318c Ramiro Polla
    while (s < mm_end) {
764 7ad6469e Diego Pettenò
        __asm__ volatile(
765 9b734d44 Ramiro Polla
            PREFETCH"    32%1           \n\t"
766
            "movd          %1, %%mm0    \n\t"
767
            "movd         3%1, %%mm3    \n\t"
768
            "punpckldq    6%1, %%mm0    \n\t"
769
            "punpckldq    9%1, %%mm3    \n\t"
770
            "movq       %%mm0, %%mm1    \n\t"
771
            "movq       %%mm0, %%mm2    \n\t"
772
            "movq       %%mm3, %%mm4    \n\t"
773
            "movq       %%mm3, %%mm5    \n\t"
774
            "psrlq         $3, %%mm0    \n\t"
775
            "psrlq         $3, %%mm3    \n\t"
776
            "pand          %2, %%mm0    \n\t"
777
            "pand          %2, %%mm3    \n\t"
778
            "psrlq         $6, %%mm1    \n\t"
779
            "psrlq         $6, %%mm4    \n\t"
780
            "pand       %%mm6, %%mm1    \n\t"
781
            "pand       %%mm6, %%mm4    \n\t"
782
            "psrlq         $9, %%mm2    \n\t"
783
            "psrlq         $9, %%mm5    \n\t"
784
            "pand       %%mm7, %%mm2    \n\t"
785
            "pand       %%mm7, %%mm5    \n\t"
786
            "por        %%mm1, %%mm0    \n\t"
787
            "por        %%mm4, %%mm3    \n\t"
788
            "por        %%mm2, %%mm0    \n\t"
789
            "por        %%mm5, %%mm3    \n\t"
790
            "psllq        $16, %%mm3    \n\t"
791
            "por        %%mm3, %%mm0    \n\t"
792
            MOVNTQ"     %%mm0, %0       \n\t"
793
            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
794 6e42e6c4 Diego Biurrun
        d += 4;
795
        s += 12;
796
    }
797 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
798
    __asm__ volatile(EMMS:::"memory");
799 6e42e6c4 Diego Biurrun
#endif
800 dd68318c Ramiro Polla
    while (s < end) {
801 6e42e6c4 Diego Biurrun
        const int b = *s++;
802
        const int g = *s++;
803
        const int r = *s++;
804
        *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
805
    }
806 0d9f3d85 Arpi
}
807
808 6107059c Michael Niedermayer
static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size)
809 ac4d0aea Michael Niedermayer
{
810 6e42e6c4 Diego Biurrun
    const uint8_t *s = src;
811
    const uint8_t *end;
812 b63f641e Aurelien Jacobs
#if HAVE_MMX
813 6e42e6c4 Diego Biurrun
    const uint8_t *mm_end;
814 ac4d0aea Michael Niedermayer
#endif
815 6e42e6c4 Diego Biurrun
    uint16_t *d = (uint16_t *)dst;
816
    end = s + src_size;
817 b63f641e Aurelien Jacobs
#if HAVE_MMX
818 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
819
    __asm__ volatile(
820 6e42e6c4 Diego Biurrun
        "movq         %0, %%mm7     \n\t"
821
        "movq         %1, %%mm6     \n\t"
822
        ::"m"(red_15mask),"m"(green_15mask));
823
    mm_end = end - 15;
824 dd68318c Ramiro Polla
    while (s < mm_end) {
825 7ad6469e Diego Pettenò
        __asm__ volatile(
826 9b734d44 Ramiro Polla
            PREFETCH"   32%1            \n\t"
827
            "movd         %1, %%mm0     \n\t"
828
            "movd        3%1, %%mm3     \n\t"
829
            "punpckldq   6%1, %%mm0     \n\t"
830
            "punpckldq   9%1, %%mm3     \n\t"
831
            "movq      %%mm0, %%mm1     \n\t"
832
            "movq      %%mm0, %%mm2     \n\t"
833
            "movq      %%mm3, %%mm4     \n\t"
834
            "movq      %%mm3, %%mm5     \n\t"
835
            "psllq        $7, %%mm0     \n\t"
836
            "psllq        $7, %%mm3     \n\t"
837
            "pand      %%mm7, %%mm0     \n\t"
838
            "pand      %%mm7, %%mm3     \n\t"
839
            "psrlq        $6, %%mm1     \n\t"
840
            "psrlq        $6, %%mm4     \n\t"
841
            "pand      %%mm6, %%mm1     \n\t"
842
            "pand      %%mm6, %%mm4     \n\t"
843
            "psrlq       $19, %%mm2     \n\t"
844
            "psrlq       $19, %%mm5     \n\t"
845
            "pand         %2, %%mm2     \n\t"
846
            "pand         %2, %%mm5     \n\t"
847
            "por       %%mm1, %%mm0     \n\t"
848
            "por       %%mm4, %%mm3     \n\t"
849
            "por       %%mm2, %%mm0     \n\t"
850
            "por       %%mm5, %%mm3     \n\t"
851
            "psllq       $16, %%mm3     \n\t"
852
            "por       %%mm3, %%mm0     \n\t"
853
            MOVNTQ"    %%mm0, %0        \n\t"
854
            :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
855 6e42e6c4 Diego Biurrun
        d += 4;
856
        s += 12;
857
    }
858 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
859
    __asm__ volatile(EMMS:::"memory");
860 6e42e6c4 Diego Biurrun
#endif
861 dd68318c Ramiro Polla
    while (s < end) {
862 6e42e6c4 Diego Biurrun
        const int r = *s++;
863
        const int g = *s++;
864
        const int b = *s++;
865
        *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
866
    }
867 ac4d0aea Michael Niedermayer
}
868
869 0d9f3d85 Arpi
/*
870 594ff7cc Diego Biurrun
  I use less accurate approximation here by simply left-shifting the input
871
  value and filling the low order bits with zeroes. This method improves PNG
872
  compression but this scheme cannot reproduce white exactly, since it does
873
  not generate an all-ones maximum value; the net effect is to darken the
874 0d9f3d85 Arpi
  image slightly.
875

876
  The better method should be "left bit replication":
877

878
   4 3 2 1 0
879
   ---------
880
   1 1 0 1 1
881

882
   7 6 5 4 3  2 1 0
883
   ----------------
884
   1 1 0 1 1  1 1 0
885
   |=======|  |===|
886 8a322796 Diego Biurrun
       |      leftmost bits repeated to fill open bits
887 0d9f3d85 Arpi
       |
888 8a322796 Diego Biurrun
   original bits
889 0d9f3d85 Arpi
*/
890 6107059c Michael Niedermayer
static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
891 0d9f3d85 Arpi
{
892 6e42e6c4 Diego Biurrun
    const uint16_t *end;
893 b63f641e Aurelien Jacobs
#if HAVE_MMX
894 6e42e6c4 Diego Biurrun
    const uint16_t *mm_end;
895 0d9f3d85 Arpi
#endif
896 baf7f7c6 Baptiste Coudurier
    uint8_t *d = dst;
897 994c1ef0 Baptiste Coudurier
    const uint16_t *s = (const uint16_t*)src;
898 6e42e6c4 Diego Biurrun
    end = s + src_size/2;
899 b63f641e Aurelien Jacobs
#if HAVE_MMX
900 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
901 6e42e6c4 Diego Biurrun
    mm_end = end - 7;
902 dd68318c Ramiro Polla
    while (s < mm_end) {
903 7ad6469e Diego Pettenò
        __asm__ volatile(
904 9b734d44 Ramiro Polla
            PREFETCH"    32%1           \n\t"
905
            "movq          %1, %%mm0    \n\t"
906
            "movq          %1, %%mm1    \n\t"
907
            "movq          %1, %%mm2    \n\t"
908
            "pand          %2, %%mm0    \n\t"
909
            "pand          %3, %%mm1    \n\t"
910
            "pand          %4, %%mm2    \n\t"
911
            "psllq         $3, %%mm0    \n\t"
912
            "psrlq         $2, %%mm1    \n\t"
913
            "psrlq         $7, %%mm2    \n\t"
914
            "movq       %%mm0, %%mm3    \n\t"
915
            "movq       %%mm1, %%mm4    \n\t"
916
            "movq       %%mm2, %%mm5    \n\t"
917
            "punpcklwd     %5, %%mm0    \n\t"
918
            "punpcklwd     %5, %%mm1    \n\t"
919
            "punpcklwd     %5, %%mm2    \n\t"
920
            "punpckhwd     %5, %%mm3    \n\t"
921
            "punpckhwd     %5, %%mm4    \n\t"
922
            "punpckhwd     %5, %%mm5    \n\t"
923
            "psllq         $8, %%mm1    \n\t"
924
            "psllq        $16, %%mm2    \n\t"
925
            "por        %%mm1, %%mm0    \n\t"
926
            "por        %%mm2, %%mm0    \n\t"
927
            "psllq         $8, %%mm4    \n\t"
928
            "psllq        $16, %%mm5    \n\t"
929
            "por        %%mm4, %%mm3    \n\t"
930
            "por        %%mm5, %%mm3    \n\t"
931
932
            "movq       %%mm0, %%mm6    \n\t"
933
            "movq       %%mm3, %%mm7    \n\t"
934
935
            "movq         8%1, %%mm0    \n\t"
936
            "movq         8%1, %%mm1    \n\t"
937
            "movq         8%1, %%mm2    \n\t"
938
            "pand          %2, %%mm0    \n\t"
939
            "pand          %3, %%mm1    \n\t"
940
            "pand          %4, %%mm2    \n\t"
941
            "psllq         $3, %%mm0    \n\t"
942
            "psrlq         $2, %%mm1    \n\t"
943
            "psrlq         $7, %%mm2    \n\t"
944
            "movq       %%mm0, %%mm3    \n\t"
945
            "movq       %%mm1, %%mm4    \n\t"
946
            "movq       %%mm2, %%mm5    \n\t"
947
            "punpcklwd     %5, %%mm0    \n\t"
948
            "punpcklwd     %5, %%mm1    \n\t"
949
            "punpcklwd     %5, %%mm2    \n\t"
950
            "punpckhwd     %5, %%mm3    \n\t"
951
            "punpckhwd     %5, %%mm4    \n\t"
952
            "punpckhwd     %5, %%mm5    \n\t"
953
            "psllq         $8, %%mm1    \n\t"
954
            "psllq        $16, %%mm2    \n\t"
955
            "por        %%mm1, %%mm0    \n\t"
956
            "por        %%mm2, %%mm0    \n\t"
957
            "psllq         $8, %%mm4    \n\t"
958
            "psllq        $16, %%mm5    \n\t"
959
            "por        %%mm4, %%mm3    \n\t"
960
            "por        %%mm5, %%mm3    \n\t"
961
962
            :"=m"(*d)
963
            :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
964
            :"memory");
965 8a322796 Diego Biurrun
        /* borrowed 32 to 24 */
966 7ad6469e Diego Pettenò
        __asm__ volatile(
967 9b734d44 Ramiro Polla
            "movq       %%mm0, %%mm4    \n\t"
968
            "movq       %%mm3, %%mm5    \n\t"
969
            "movq       %%mm6, %%mm0    \n\t"
970
            "movq       %%mm7, %%mm1    \n\t"
971
972
            "movq       %%mm4, %%mm6    \n\t"
973
            "movq       %%mm5, %%mm7    \n\t"
974
            "movq       %%mm0, %%mm2    \n\t"
975
            "movq       %%mm1, %%mm3    \n\t"
976
977
            "psrlq         $8, %%mm2    \n\t"
978
            "psrlq         $8, %%mm3    \n\t"
979
            "psrlq         $8, %%mm6    \n\t"
980
            "psrlq         $8, %%mm7    \n\t"
981
            "pand          %2, %%mm0    \n\t"
982
            "pand          %2, %%mm1    \n\t"
983
            "pand          %2, %%mm4    \n\t"
984
            "pand          %2, %%mm5    \n\t"
985
            "pand          %3, %%mm2    \n\t"
986
            "pand          %3, %%mm3    \n\t"
987
            "pand          %3, %%mm6    \n\t"
988
            "pand          %3, %%mm7    \n\t"
989
            "por        %%mm2, %%mm0    \n\t"
990
            "por        %%mm3, %%mm1    \n\t"
991
            "por        %%mm6, %%mm4    \n\t"
992
            "por        %%mm7, %%mm5    \n\t"
993
994
            "movq       %%mm1, %%mm2    \n\t"
995
            "movq       %%mm4, %%mm3    \n\t"
996
            "psllq        $48, %%mm2    \n\t"
997
            "psllq        $32, %%mm3    \n\t"
998
            "pand          %4, %%mm2    \n\t"
999
            "pand          %5, %%mm3    \n\t"
1000
            "por        %%mm2, %%mm0    \n\t"
1001
            "psrlq        $16, %%mm1    \n\t"
1002
            "psrlq        $32, %%mm4    \n\t"
1003
            "psllq        $16, %%mm5    \n\t"
1004
            "por        %%mm3, %%mm1    \n\t"
1005
            "pand          %6, %%mm5    \n\t"
1006
            "por        %%mm5, %%mm4    \n\t"
1007
1008
            MOVNTQ"     %%mm0,   %0     \n\t"
1009
            MOVNTQ"     %%mm1,  8%0     \n\t"
1010
            MOVNTQ"     %%mm4, 16%0"
1011
1012
            :"=m"(*d)
1013
            :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
1014
            :"memory");
1015 6e42e6c4 Diego Biurrun
        d += 24;
1016
        s += 8;
1017
    }
1018 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
1019
    __asm__ volatile(EMMS:::"memory");
1020 6e42e6c4 Diego Biurrun
#endif
1021 dd68318c Ramiro Polla
    while (s < end) {
1022 6e42e6c4 Diego Biurrun
        register uint16_t bgr;
1023
        bgr = *s++;
1024
        *d++ = (bgr&0x1F)<<3;
1025
        *d++ = (bgr&0x3E0)>>2;
1026
        *d++ = (bgr&0x7C00)>>7;
1027
    }
1028 0d9f3d85 Arpi
}
1029
1030 6107059c Michael Niedermayer
static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
1031 0d9f3d85 Arpi
{
1032 6e42e6c4 Diego Biurrun
    const uint16_t *end;
1033 b63f641e Aurelien Jacobs
#if HAVE_MMX
1034 6e42e6c4 Diego Biurrun
    const uint16_t *mm_end;
1035 0d9f3d85 Arpi
#endif
1036 6e42e6c4 Diego Biurrun
    uint8_t *d = (uint8_t *)dst;
1037
    const uint16_t *s = (const uint16_t *)src;
1038
    end = s + src_size/2;
1039 b63f641e Aurelien Jacobs
#if HAVE_MMX
1040 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
1041 6e42e6c4 Diego Biurrun
    mm_end = end - 7;
1042 dd68318c Ramiro Polla
    while (s < mm_end) {
1043 7ad6469e Diego Pettenò
        __asm__ volatile(
1044 9b734d44 Ramiro Polla
            PREFETCH"    32%1           \n\t"
1045
            "movq          %1, %%mm0    \n\t"
1046
            "movq          %1, %%mm1    \n\t"
1047
            "movq          %1, %%mm2    \n\t"
1048
            "pand          %2, %%mm0    \n\t"
1049
            "pand          %3, %%mm1    \n\t"
1050
            "pand          %4, %%mm2    \n\t"
1051
            "psllq         $3, %%mm0    \n\t"
1052
            "psrlq         $3, %%mm1    \n\t"
1053
            "psrlq         $8, %%mm2    \n\t"
1054
            "movq       %%mm0, %%mm3    \n\t"
1055
            "movq       %%mm1, %%mm4    \n\t"
1056
            "movq       %%mm2, %%mm5    \n\t"
1057
            "punpcklwd     %5, %%mm0    \n\t"
1058
            "punpcklwd     %5, %%mm1    \n\t"
1059
            "punpcklwd     %5, %%mm2    \n\t"
1060
            "punpckhwd     %5, %%mm3    \n\t"
1061
            "punpckhwd     %5, %%mm4    \n\t"
1062
            "punpckhwd     %5, %%mm5    \n\t"
1063
            "psllq         $8, %%mm1    \n\t"
1064
            "psllq        $16, %%mm2    \n\t"
1065
            "por        %%mm1, %%mm0    \n\t"
1066
            "por        %%mm2, %%mm0    \n\t"
1067
            "psllq         $8, %%mm4    \n\t"
1068
            "psllq        $16, %%mm5    \n\t"
1069
            "por        %%mm4, %%mm3    \n\t"
1070
            "por        %%mm5, %%mm3    \n\t"
1071
1072
            "movq       %%mm0, %%mm6    \n\t"
1073
            "movq       %%mm3, %%mm7    \n\t"
1074
1075
            "movq         8%1, %%mm0    \n\t"
1076
            "movq         8%1, %%mm1    \n\t"
1077
            "movq         8%1, %%mm2    \n\t"
1078
            "pand          %2, %%mm0    \n\t"
1079
            "pand          %3, %%mm1    \n\t"
1080
            "pand          %4, %%mm2    \n\t"
1081
            "psllq         $3, %%mm0    \n\t"
1082
            "psrlq         $3, %%mm1    \n\t"
1083
            "psrlq         $8, %%mm2    \n\t"
1084
            "movq       %%mm0, %%mm3    \n\t"
1085
            "movq       %%mm1, %%mm4    \n\t"
1086
            "movq       %%mm2, %%mm5    \n\t"
1087
            "punpcklwd     %5, %%mm0    \n\t"
1088
            "punpcklwd     %5, %%mm1    \n\t"
1089
            "punpcklwd     %5, %%mm2    \n\t"
1090
            "punpckhwd     %5, %%mm3    \n\t"
1091
            "punpckhwd     %5, %%mm4    \n\t"
1092
            "punpckhwd     %5, %%mm5    \n\t"
1093
            "psllq         $8, %%mm1    \n\t"
1094
            "psllq        $16, %%mm2    \n\t"
1095
            "por        %%mm1, %%mm0    \n\t"
1096
            "por        %%mm2, %%mm0    \n\t"
1097
            "psllq         $8, %%mm4    \n\t"
1098
            "psllq        $16, %%mm5    \n\t"
1099
            "por        %%mm4, %%mm3    \n\t"
1100
            "por        %%mm5, %%mm3    \n\t"
1101
            :"=m"(*d)
1102
            :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
1103
            :"memory");
1104 8a322796 Diego Biurrun
        /* borrowed 32 to 24 */
1105 7ad6469e Diego Pettenò
        __asm__ volatile(
1106 9b734d44 Ramiro Polla
            "movq       %%mm0, %%mm4    \n\t"
1107
            "movq       %%mm3, %%mm5    \n\t"
1108
            "movq       %%mm6, %%mm0    \n\t"
1109
            "movq       %%mm7, %%mm1    \n\t"
1110
1111
            "movq       %%mm4, %%mm6    \n\t"
1112
            "movq       %%mm5, %%mm7    \n\t"
1113
            "movq       %%mm0, %%mm2    \n\t"
1114
            "movq       %%mm1, %%mm3    \n\t"
1115
1116
            "psrlq         $8, %%mm2    \n\t"
1117
            "psrlq         $8, %%mm3    \n\t"
1118
            "psrlq         $8, %%mm6    \n\t"
1119
            "psrlq         $8, %%mm7    \n\t"
1120
            "pand          %2, %%mm0    \n\t"
1121
            "pand          %2, %%mm1    \n\t"
1122
            "pand          %2, %%mm4    \n\t"
1123
            "pand          %2, %%mm5    \n\t"
1124
            "pand          %3, %%mm2    \n\t"
1125
            "pand          %3, %%mm3    \n\t"
1126
            "pand          %3, %%mm6    \n\t"
1127
            "pand          %3, %%mm7    \n\t"
1128
            "por        %%mm2, %%mm0    \n\t"
1129
            "por        %%mm3, %%mm1    \n\t"
1130
            "por        %%mm6, %%mm4    \n\t"
1131
            "por        %%mm7, %%mm5    \n\t"
1132
1133
            "movq       %%mm1, %%mm2    \n\t"
1134
            "movq       %%mm4, %%mm3    \n\t"
1135
            "psllq        $48, %%mm2    \n\t"
1136
            "psllq        $32, %%mm3    \n\t"
1137
            "pand          %4, %%mm2    \n\t"
1138
            "pand          %5, %%mm3    \n\t"
1139
            "por        %%mm2, %%mm0    \n\t"
1140
            "psrlq        $16, %%mm1    \n\t"
1141
            "psrlq        $32, %%mm4    \n\t"
1142
            "psllq        $16, %%mm5    \n\t"
1143
            "por        %%mm3, %%mm1    \n\t"
1144
            "pand          %6, %%mm5    \n\t"
1145
            "por        %%mm5, %%mm4    \n\t"
1146
1147
            MOVNTQ"     %%mm0,   %0     \n\t"
1148
            MOVNTQ"     %%mm1,  8%0     \n\t"
1149
            MOVNTQ"     %%mm4, 16%0"
1150
1151
            :"=m"(*d)
1152
            :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
1153
            :"memory");
1154 6e42e6c4 Diego Biurrun
        d += 24;
1155
        s += 8;
1156
    }
1157 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
1158
    __asm__ volatile(EMMS:::"memory");
1159 6e42e6c4 Diego Biurrun
#endif
1160 dd68318c Ramiro Polla
    while (s < end) {
1161 6e42e6c4 Diego Biurrun
        register uint16_t bgr;
1162
        bgr = *s++;
1163
        *d++ = (bgr&0x1F)<<3;
1164
        *d++ = (bgr&0x7E0)>>3;
1165
        *d++ = (bgr&0xF800)>>8;
1166
    }
1167 0d9f3d85 Arpi
}
1168
1169 a284d030 Cédric Schieli
/*
1170
 * mm0 = 00 B3 00 B2 00 B1 00 B0
1171
 * mm1 = 00 G3 00 G2 00 G1 00 G0
1172
 * mm2 = 00 R3 00 R2 00 R1 00 R0
1173
 * mm6 = FF FF FF FF FF FF FF FF
1174
 * mm7 = 00 00 00 00 00 00 00 00
1175
 */
1176
#define PACK_RGB32 \
1177
    "packuswb   %%mm7, %%mm0    \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \
1178
    "packuswb   %%mm7, %%mm1    \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \
1179
    "packuswb   %%mm7, %%mm2    \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \
1180
    "punpcklbw  %%mm1, %%mm0    \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \
1181
    "punpcklbw  %%mm6, %%mm2    \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \
1182
    "movq       %%mm0, %%mm3    \n\t"                               \
1183
    "punpcklwd  %%mm2, %%mm0    \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
1184
    "punpckhwd  %%mm2, %%mm3    \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
1185
    MOVNTQ"     %%mm0,  %0      \n\t"                               \
1186
    MOVNTQ"     %%mm3, 8%0      \n\t"                               \
1187
1188 7f526efd Reimar Döffinger
static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
1189 0d9f3d85 Arpi
{
1190 6e42e6c4 Diego Biurrun
    const uint16_t *end;
1191 b63f641e Aurelien Jacobs
#if HAVE_MMX
1192 6e42e6c4 Diego Biurrun
    const uint16_t *mm_end;
1193 0d9f3d85 Arpi
#endif
1194 baf7f7c6 Baptiste Coudurier
    uint8_t *d = dst;
1195 6e42e6c4 Diego Biurrun
    const uint16_t *s = (const uint16_t *)src;
1196
    end = s + src_size/2;
1197 b63f641e Aurelien Jacobs
#if HAVE_MMX
1198 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
1199
    __asm__ volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
1200 a284d030 Cédric Schieli
    __asm__ volatile("pcmpeqd %%mm6,%%mm6    \n\t":::"memory");
1201 6e42e6c4 Diego Biurrun
    mm_end = end - 3;
1202 dd68318c Ramiro Polla
    while (s < mm_end) {
1203 7ad6469e Diego Pettenò
        __asm__ volatile(
1204 9b734d44 Ramiro Polla
            PREFETCH"    32%1           \n\t"
1205
            "movq          %1, %%mm0    \n\t"
1206
            "movq          %1, %%mm1    \n\t"
1207
            "movq          %1, %%mm2    \n\t"
1208
            "pand          %2, %%mm0    \n\t"
1209
            "pand          %3, %%mm1    \n\t"
1210
            "pand          %4, %%mm2    \n\t"
1211
            "psllq         $3, %%mm0    \n\t"
1212
            "psrlq         $2, %%mm1    \n\t"
1213
            "psrlq         $7, %%mm2    \n\t"
1214
            PACK_RGB32
1215
            :"=m"(*d)
1216
            :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
1217
            :"memory");
1218 6e42e6c4 Diego Biurrun
        d += 16;
1219
        s += 4;
1220
    }
1221 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
1222
    __asm__ volatile(EMMS:::"memory");
1223 6e42e6c4 Diego Biurrun
#endif
1224 dd68318c Ramiro Polla
    while (s < end) {
1225 6e42e6c4 Diego Biurrun
        register uint16_t bgr;
1226
        bgr = *s++;
1227 a898cdc9 Måns Rullgård
#if HAVE_BIGENDIAN
1228 f8a138be Cédric Schieli
        *d++ = 255;
1229 6e42e6c4 Diego Biurrun
        *d++ = (bgr&0x7C00)>>7;
1230
        *d++ = (bgr&0x3E0)>>2;
1231
        *d++ = (bgr&0x1F)<<3;
1232 6cb38650 Alex Beregszaszi
#else
1233 6e42e6c4 Diego Biurrun
        *d++ = (bgr&0x1F)<<3;
1234
        *d++ = (bgr&0x3E0)>>2;
1235
        *d++ = (bgr&0x7C00)>>7;
1236 f8a138be Cédric Schieli
        *d++ = 255;
1237 deb2277c Michael Niedermayer
#endif
1238 6e42e6c4 Diego Biurrun
    }
1239 0d9f3d85 Arpi
}
1240 996e1a7c Nick Kurshev
1241 7f526efd Reimar Döffinger
static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size)
1242 0d9f3d85 Arpi
{
1243 6e42e6c4 Diego Biurrun
    const uint16_t *end;
1244 b63f641e Aurelien Jacobs
#if HAVE_MMX
1245 6e42e6c4 Diego Biurrun
    const uint16_t *mm_end;
1246 0d9f3d85 Arpi
#endif
1247 baf7f7c6 Baptiste Coudurier
    uint8_t *d = dst;
1248 994c1ef0 Baptiste Coudurier
    const uint16_t *s = (const uint16_t*)src;
1249 6e42e6c4 Diego Biurrun
    end = s + src_size/2;
1250 b63f641e Aurelien Jacobs
#if HAVE_MMX
1251 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
1252
    __asm__ volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
1253 a284d030 Cédric Schieli
    __asm__ volatile("pcmpeqd %%mm6,%%mm6    \n\t":::"memory");
1254 6e42e6c4 Diego Biurrun
    mm_end = end - 3;
1255 dd68318c Ramiro Polla
    while (s < mm_end) {
1256 7ad6469e Diego Pettenò
        __asm__ volatile(
1257 9b734d44 Ramiro Polla
            PREFETCH"    32%1           \n\t"
1258
            "movq          %1, %%mm0    \n\t"
1259
            "movq          %1, %%mm1    \n\t"
1260
            "movq          %1, %%mm2    \n\t"
1261
            "pand          %2, %%mm0    \n\t"
1262
            "pand          %3, %%mm1    \n\t"
1263
            "pand          %4, %%mm2    \n\t"
1264
            "psllq         $3, %%mm0    \n\t"
1265
            "psrlq         $3, %%mm1    \n\t"
1266
            "psrlq         $8, %%mm2    \n\t"
1267
            PACK_RGB32
1268
            :"=m"(*d)
1269
            :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
1270
            :"memory");
1271 6e42e6c4 Diego Biurrun
        d += 16;
1272
        s += 4;
1273
    }
1274 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
1275
    __asm__ volatile(EMMS:::"memory");
1276 6e42e6c4 Diego Biurrun
#endif
1277 dd68318c Ramiro Polla
    while (s < end) {
1278 6e42e6c4 Diego Biurrun
        register uint16_t bgr;
1279
        bgr = *s++;
1280 a898cdc9 Måns Rullgård
#if HAVE_BIGENDIAN
1281 f8a138be Cédric Schieli
        *d++ = 255;
1282 6e42e6c4 Diego Biurrun
        *d++ = (bgr&0xF800)>>8;
1283
        *d++ = (bgr&0x7E0)>>3;
1284
        *d++ = (bgr&0x1F)<<3;
1285 6cb38650 Alex Beregszaszi
#else
1286 6e42e6c4 Diego Biurrun
        *d++ = (bgr&0x1F)<<3;
1287
        *d++ = (bgr&0x7E0)>>3;
1288
        *d++ = (bgr&0xF800)>>8;
1289 f8a138be Cédric Schieli
        *d++ = 255;
1290 6cb38650 Alex Beregszaszi
#endif
1291 6e42e6c4 Diego Biurrun
    }
1292 996e1a7c Nick Kurshev
}
1293 fcfbc150 Michael Niedermayer
1294 7f526efd Reimar Döffinger
static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
1295 99969243 Michael Niedermayer
{
1296 9326d3f3 Michael Niedermayer
    x86_reg idx = 15 - src_size;
1297 994c1ef0 Baptiste Coudurier
    const uint8_t *s = src-idx;
1298
    uint8_t *d = dst-idx;
1299 b63f641e Aurelien Jacobs
#if HAVE_MMX
1300 7ad6469e Diego Pettenò
    __asm__ volatile(
1301 9b734d44 Ramiro Polla
        "test          %0, %0           \n\t"
1302
        "jns           2f               \n\t"
1303
        PREFETCH"       (%1, %0)        \n\t"
1304
        "movq          %3, %%mm7        \n\t"
1305
        "pxor          %4, %%mm7        \n\t"
1306
        "movq       %%mm7, %%mm6        \n\t"
1307
        "pxor          %5, %%mm7        \n\t"
1308
        ASMALIGN(4)
1309
        "1:                             \n\t"
1310
        PREFETCH"     32(%1, %0)        \n\t"
1311
        "movq           (%1, %0), %%mm0 \n\t"
1312
        "movq          8(%1, %0), %%mm1 \n\t"
1313 b63f641e Aurelien Jacobs
# if HAVE_MMX2
1314 9b734d44 Ramiro Polla
        "pshufw      $177, %%mm0, %%mm3 \n\t"
1315
        "pshufw      $177, %%mm1, %%mm5 \n\t"
1316
        "pand       %%mm7, %%mm0        \n\t"
1317
        "pand       %%mm6, %%mm3        \n\t"
1318
        "pand       %%mm7, %%mm1        \n\t"
1319
        "pand       %%mm6, %%mm5        \n\t"
1320
        "por        %%mm3, %%mm0        \n\t"
1321
        "por        %%mm5, %%mm1        \n\t"
1322 b38d4874 Ivo van Poorten
# else
1323 9b734d44 Ramiro Polla
        "movq       %%mm0, %%mm2        \n\t"
1324
        "movq       %%mm1, %%mm4        \n\t"
1325
        "pand       %%mm7, %%mm0        \n\t"
1326
        "pand       %%mm6, %%mm2        \n\t"
1327
        "pand       %%mm7, %%mm1        \n\t"
1328
        "pand       %%mm6, %%mm4        \n\t"
1329
        "movq       %%mm2, %%mm3        \n\t"
1330
        "movq       %%mm4, %%mm5        \n\t"
1331
        "pslld        $16, %%mm2        \n\t"
1332
        "psrld        $16, %%mm3        \n\t"
1333
        "pslld        $16, %%mm4        \n\t"
1334
        "psrld        $16, %%mm5        \n\t"
1335
        "por        %%mm2, %%mm0        \n\t"
1336
        "por        %%mm4, %%mm1        \n\t"
1337
        "por        %%mm3, %%mm0        \n\t"
1338
        "por        %%mm5, %%mm1        \n\t"
1339 b38d4874 Ivo van Poorten
# endif
1340 9b734d44 Ramiro Polla
        MOVNTQ"     %%mm0,  (%2, %0)    \n\t"
1341
        MOVNTQ"     %%mm1, 8(%2, %0)    \n\t"
1342
        "add          $16, %0           \n\t"
1343
        "js            1b               \n\t"
1344
        SFENCE"                         \n\t"
1345
        EMMS"                           \n\t"
1346
        "2:                             \n\t"
1347
        : "+&r"(idx)
1348
        : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
1349
        : "memory");
1350 6e42e6c4 Diego Biurrun
#endif
1351
    for (; idx<15; idx+=4) {
1352 994c1ef0 Baptiste Coudurier
        register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
1353 6e42e6c4 Diego Biurrun
        v &= 0xff00ff;
1354
        *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
1355
    }
1356 99969243 Michael Niedermayer
}
1357
1358 7f526efd Reimar Döffinger
static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
1359 74d35835 Michael Niedermayer
{
1360 6e42e6c4 Diego Biurrun
    unsigned i;
1361 b63f641e Aurelien Jacobs
#if HAVE_MMX
1362 d0ce212a Ramiro Polla
    x86_reg mmx_size= 23 - src_size;
1363 7ad6469e Diego Pettenò
    __asm__ volatile (
1364 9b734d44 Ramiro Polla
        "test             %%"REG_a", %%"REG_a"          \n\t"
1365
        "jns                     2f                     \n\t"
1366
        "movq     "MANGLE(mask24r)", %%mm5              \n\t"
1367
        "movq     "MANGLE(mask24g)", %%mm6              \n\t"
1368
        "movq     "MANGLE(mask24b)", %%mm7              \n\t"
1369
        ASMALIGN(4)
1370
        "1:                                             \n\t"
1371
        PREFETCH" 32(%1, %%"REG_a")                     \n\t"
1372
        "movq       (%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG
1373
        "movq       (%1, %%"REG_a"), %%mm1              \n\t" // BGR BGR BG
1374
        "movq      2(%1, %%"REG_a"), %%mm2              \n\t" // R BGR BGR B
1375
        "psllq                  $16, %%mm0              \n\t" // 00 BGR BGR
1376
        "pand                 %%mm5, %%mm0              \n\t"
1377
        "pand                 %%mm6, %%mm1              \n\t"
1378
        "pand                 %%mm7, %%mm2              \n\t"
1379
        "por                  %%mm0, %%mm1              \n\t"
1380
        "por                  %%mm2, %%mm1              \n\t"
1381
        "movq      6(%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG
1382
        MOVNTQ"               %%mm1,   (%2, %%"REG_a")  \n\t" // RGB RGB RG
1383
        "movq      8(%1, %%"REG_a"), %%mm1              \n\t" // R BGR BGR B
1384
        "movq     10(%1, %%"REG_a"), %%mm2              \n\t" // GR BGR BGR
1385
        "pand                 %%mm7, %%mm0              \n\t"
1386
        "pand                 %%mm5, %%mm1              \n\t"
1387
        "pand                 %%mm6, %%mm2              \n\t"
1388
        "por                  %%mm0, %%mm1              \n\t"
1389
        "por                  %%mm2, %%mm1              \n\t"
1390
        "movq     14(%1, %%"REG_a"), %%mm0              \n\t" // R BGR BGR B
1391
        MOVNTQ"               %%mm1,  8(%2, %%"REG_a")  \n\t" // B RGB RGB R
1392
        "movq     16(%1, %%"REG_a"), %%mm1              \n\t" // GR BGR BGR
1393
        "movq     18(%1, %%"REG_a"), %%mm2              \n\t" // BGR BGR BG
1394
        "pand                 %%mm6, %%mm0              \n\t"
1395
        "pand                 %%mm7, %%mm1              \n\t"
1396
        "pand                 %%mm5, %%mm2              \n\t"
1397
        "por                  %%mm0, %%mm1              \n\t"
1398
        "por                  %%mm2, %%mm1              \n\t"
1399
        MOVNTQ"               %%mm1, 16(%2, %%"REG_a")  \n\t"
1400
        "add                    $24, %%"REG_a"          \n\t"
1401
        " js                     1b                     \n\t"
1402
        "2:                                             \n\t"
1403
        : "+a" (mmx_size)
1404
        : "r" (src-mmx_size), "r"(dst-mmx_size)
1405 6e42e6c4 Diego Biurrun
    );
1406
1407 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
1408
    __asm__ volatile(EMMS:::"memory");
1409 6e42e6c4 Diego Biurrun
1410 8a322796 Diego Biurrun
    if (mmx_size==23) return; //finished, was multiple of 8
1411 6e42e6c4 Diego Biurrun
1412
    src+= src_size;
1413
    dst+= src_size;
1414
    src_size= 23-mmx_size;
1415
    src-= src_size;
1416
    dst-= src_size;
1417
#endif
1418 dd68318c Ramiro Polla
    for (i=0; i<src_size; i+=3) {
1419 6e42e6c4 Diego Biurrun
        register uint8_t x;
1420
        x          = src[i + 2];
1421
        dst[i + 1] = src[i + 1];
1422
        dst[i + 2] = src[i + 0];
1423
        dst[i + 0] = x;
1424
    }
1425 74d35835 Michael Niedermayer
}
1426
1427 b1ec5875 Michael Niedermayer
static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1428 6e42e6c4 Diego Biurrun
                                           long width, long height,
1429
                                           long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
1430 d9d58d17 Michael Niedermayer
{
1431 6e42e6c4 Diego Biurrun
    long y;
1432 9326d3f3 Michael Niedermayer
    const x86_reg chromWidth= width>>1;
1433 dd68318c Ramiro Polla
    for (y=0; y<height; y++) {
1434 b63f641e Aurelien Jacobs
#if HAVE_MMX
1435 7d73d1c3 Ramiro Polla
        //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1436 7ad6469e Diego Pettenò
        __asm__ volatile(
1437 9b734d44 Ramiro Polla
            "xor                 %%"REG_a", %%"REG_a"   \n\t"
1438
            ASMALIGN(4)
1439
            "1:                                         \n\t"
1440
            PREFETCH"    32(%1, %%"REG_a", 2)           \n\t"
1441
            PREFETCH"    32(%2, %%"REG_a")              \n\t"
1442
            PREFETCH"    32(%3, %%"REG_a")              \n\t"
1443
            "movq          (%2, %%"REG_a"), %%mm0       \n\t" // U(0)
1444
            "movq                    %%mm0, %%mm2       \n\t" // U(0)
1445
            "movq          (%3, %%"REG_a"), %%mm1       \n\t" // V(0)
1446
            "punpcklbw               %%mm1, %%mm0       \n\t" // UVUV UVUV(0)
1447
            "punpckhbw               %%mm1, %%mm2       \n\t" // UVUV UVUV(8)
1448
1449
            "movq        (%1, %%"REG_a",2), %%mm3       \n\t" // Y(0)
1450
            "movq       8(%1, %%"REG_a",2), %%mm5       \n\t" // Y(8)
1451
            "movq                    %%mm3, %%mm4       \n\t" // Y(0)
1452
            "movq                    %%mm5, %%mm6       \n\t" // Y(8)
1453
            "punpcklbw               %%mm0, %%mm3       \n\t" // YUYV YUYV(0)
1454
            "punpckhbw               %%mm0, %%mm4       \n\t" // YUYV YUYV(4)
1455
            "punpcklbw               %%mm2, %%mm5       \n\t" // YUYV YUYV(8)
1456
            "punpckhbw               %%mm2, %%mm6       \n\t" // YUYV YUYV(12)
1457
1458
            MOVNTQ"                  %%mm3,   (%0, %%"REG_a", 4)    \n\t"
1459
            MOVNTQ"                  %%mm4,  8(%0, %%"REG_a", 4)    \n\t"
1460
            MOVNTQ"                  %%mm5, 16(%0, %%"REG_a", 4)    \n\t"
1461
            MOVNTQ"                  %%mm6, 24(%0, %%"REG_a", 4)    \n\t"
1462
1463
            "add                        $8, %%"REG_a"   \n\t"
1464
            "cmp                        %4, %%"REG_a"   \n\t"
1465
            " jb                        1b              \n\t"
1466
            ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
1467
            : "%"REG_a
1468 6e42e6c4 Diego Biurrun
        );
1469 4060205b Michael Niedermayer
#else
1470 b3b8bf64 Michael Niedermayer
1471 b63f641e Aurelien Jacobs
#if ARCH_ALPHA && HAVE_MVI
1472 6e42e6c4 Diego Biurrun
#define pl2yuy2(n)                  \
1473
    y1 = yc[n];                     \
1474
    y2 = yc2[n];                    \
1475
    u = uc[n];                      \
1476
    v = vc[n];                      \
1477 7ad6469e Diego Pettenò
    __asm__("unpkbw %1, %0" : "=r"(y1) : "r"(y1));  \
1478
    __asm__("unpkbw %1, %0" : "=r"(y2) : "r"(y2));  \
1479
    __asm__("unpkbl %1, %0" : "=r"(u) : "r"(u));    \
1480
    __asm__("unpkbl %1, %0" : "=r"(v) : "r"(v));    \
1481 6e42e6c4 Diego Biurrun
    yuv1 = (u << 8) + (v << 24);                \
1482
    yuv2 = yuv1 + y2;               \
1483
    yuv1 += y1;                     \
1484
    qdst[n]  = yuv1;                \
1485
    qdst2[n] = yuv2;
1486
1487
        int i;
1488
        uint64_t *qdst = (uint64_t *) dst;
1489
        uint64_t *qdst2 = (uint64_t *) (dst + dstStride);
1490
        const uint32_t *yc = (uint32_t *) ysrc;
1491
        const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride);
1492
        const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc;
1493 dd68318c Ramiro Polla
        for (i = 0; i < chromWidth; i += 8) {
1494 6e42e6c4 Diego Biurrun
            uint64_t y1, y2, yuv1, yuv2;
1495
            uint64_t u, v;
1496
            /* Prefetch */
1497 7ad6469e Diego Pettenò
            __asm__("ldq $31,64(%0)" :: "r"(yc));
1498
            __asm__("ldq $31,64(%0)" :: "r"(yc2));
1499
            __asm__("ldq $31,64(%0)" :: "r"(uc));
1500
            __asm__("ldq $31,64(%0)" :: "r"(vc));
1501 6e42e6c4 Diego Biurrun
1502
            pl2yuy2(0);
1503
            pl2yuy2(1);
1504
            pl2yuy2(2);
1505
            pl2yuy2(3);
1506
1507
            yc    += 4;
1508
            yc2   += 4;
1509
            uc    += 4;
1510
            vc    += 4;
1511
            qdst  += 4;
1512
            qdst2 += 4;
1513
        }
1514
        y++;
1515
        ysrc += lumStride;
1516
        dst += dstStride;
1517 b3b8bf64 Michael Niedermayer
1518 02a6a6ee Diego Biurrun
#elif HAVE_FAST_64BIT
1519 6e42e6c4 Diego Biurrun
        int i;
1520
        uint64_t *ldst = (uint64_t *) dst;
1521
        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
1522 dd68318c Ramiro Polla
        for (i = 0; i < chromWidth; i += 2) {
1523 6e42e6c4 Diego Biurrun
            uint64_t k, l;
1524
            k = yc[0] + (uc[0] << 8) +
1525
                (yc[1] << 16) + (vc[0] << 24);
1526
            l = yc[2] + (uc[1] << 8) +
1527
                (yc[3] << 16) + (vc[1] << 24);
1528
            *ldst++ = k + (l << 32);
1529
            yc += 4;
1530
            uc += 2;
1531
            vc += 2;
1532
        }
1533 0d9f3d85 Arpi
1534
#else
1535 6e42e6c4 Diego Biurrun
        int i, *idst = (int32_t *) dst;
1536
        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
1537 dd68318c Ramiro Polla
        for (i = 0; i < chromWidth; i++) {
1538 a898cdc9 Måns Rullgård
#if HAVE_BIGENDIAN
1539 6e42e6c4 Diego Biurrun
            *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
1540
                (yc[1] << 8) + (vc[0] << 0);
1541 da7f8893 Michael Niedermayer
#else
1542 6e42e6c4 Diego Biurrun
            *idst++ = yc[0] + (uc[0] << 8) +
1543
                (yc[1] << 16) + (vc[0] << 24);
1544
#endif
1545
            yc += 2;
1546
            uc++;
1547
            vc++;
1548
        }
1549
#endif
1550
#endif
1551 dd68318c Ramiro Polla
        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
1552 6e42e6c4 Diego Biurrun
            usrc += chromStride;
1553
            vsrc += chromStride;
1554
        }
1555
        ysrc += lumStride;
1556
        dst  += dstStride;
1557
    }
1558 b63f641e Aurelien Jacobs
#if HAVE_MMX
1559 9b734d44 Ramiro Polla
    __asm__(EMMS"       \n\t"
1560
            SFENCE"     \n\t"
1561
            :::"memory");
1562 4060205b Michael Niedermayer
#endif
1563 d9d58d17 Michael Niedermayer
}
1564
1565 dabcdbc4 Michael Niedermayer
/**
1566 8a322796 Diego Biurrun
 * Height should be a multiple of 2 and width should be a multiple of 16.
1567
 * (If this is a problem for anyone then tell me, and I will fix it.)
1568 dabcdbc4 Michael Niedermayer
 */
1569 b1ec5875 Michael Niedermayer
static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1570 6e42e6c4 Diego Biurrun
                                      long width, long height,
1571
                                      long lumStride, long chromStride, long dstStride)
1572 b1ec5875 Michael Niedermayer
{
1573 6e42e6c4 Diego Biurrun
    //FIXME interpolate chroma
1574
    RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
1575 caeaabe7 Alex Beregszaszi
}
1576
1577
static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1578 6e42e6c4 Diego Biurrun
                                           long width, long height,
1579
                                           long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
1580 caeaabe7 Alex Beregszaszi
{
1581 6e42e6c4 Diego Biurrun
    long y;
1582 9326d3f3 Michael Niedermayer
    const x86_reg chromWidth= width>>1;
1583 dd68318c Ramiro Polla
    for (y=0; y<height; y++) {
1584 b63f641e Aurelien Jacobs
#if HAVE_MMX
1585 7d73d1c3 Ramiro Polla
        //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1586 7ad6469e Diego Pettenò
        __asm__ volatile(
1587 9b734d44 Ramiro Polla
            "xor                %%"REG_a", %%"REG_a"    \n\t"
1588
            ASMALIGN(4)
1589
            "1:                                         \n\t"
1590
            PREFETCH"   32(%1, %%"REG_a", 2)            \n\t"
1591
            PREFETCH"   32(%2, %%"REG_a")               \n\t"
1592
            PREFETCH"   32(%3, %%"REG_a")               \n\t"
1593
            "movq         (%2, %%"REG_a"), %%mm0        \n\t" // U(0)
1594
            "movq                   %%mm0, %%mm2        \n\t" // U(0)
1595
            "movq         (%3, %%"REG_a"), %%mm1        \n\t" // V(0)
1596
            "punpcklbw              %%mm1, %%mm0        \n\t" // UVUV UVUV(0)
1597
            "punpckhbw              %%mm1, %%mm2        \n\t" // UVUV UVUV(8)
1598
1599
            "movq       (%1, %%"REG_a",2), %%mm3        \n\t" // Y(0)
1600
            "movq      8(%1, %%"REG_a",2), %%mm5        \n\t" // Y(8)
1601
            "movq                   %%mm0, %%mm4        \n\t" // Y(0)
1602
            "movq                   %%mm2, %%mm6        \n\t" // Y(8)
1603
            "punpcklbw              %%mm3, %%mm0        \n\t" // YUYV YUYV(0)
1604
            "punpckhbw              %%mm3, %%mm4        \n\t" // YUYV YUYV(4)
1605
            "punpcklbw              %%mm5, %%mm2        \n\t" // YUYV YUYV(8)
1606
            "punpckhbw              %%mm5, %%mm6        \n\t" // YUYV YUYV(12)
1607
1608
            MOVNTQ"                 %%mm0,   (%0, %%"REG_a", 4)     \n\t"
1609
            MOVNTQ"                 %%mm4,  8(%0, %%"REG_a", 4)     \n\t"
1610
            MOVNTQ"                 %%mm2, 16(%0, %%"REG_a", 4)     \n\t"
1611
            MOVNTQ"                 %%mm6, 24(%0, %%"REG_a", 4)     \n\t"
1612
1613
            "add                       $8, %%"REG_a"    \n\t"
1614
            "cmp                       %4, %%"REG_a"    \n\t"
1615
            " jb                       1b               \n\t"
1616
            ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
1617
            : "%"REG_a
1618 6e42e6c4 Diego Biurrun
        );
1619 7ac25f2d Michael Niedermayer
#else
1620 594ff7cc Diego Biurrun
//FIXME adapt the Alpha ASM code from yv12->yuy2
1621 7ac25f2d Michael Niedermayer
1622 02a6a6ee Diego Biurrun
#if HAVE_FAST_64BIT
1623 6e42e6c4 Diego Biurrun
        int i;
1624
        uint64_t *ldst = (uint64_t *) dst;
1625
        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
1626 dd68318c Ramiro Polla
        for (i = 0; i < chromWidth; i += 2) {
1627 6e42e6c4 Diego Biurrun
            uint64_t k, l;
1628
            k = uc[0] + (yc[0] << 8) +
1629
                (vc[0] << 16) + (yc[1] << 24);
1630
            l = uc[1] + (yc[2] << 8) +
1631
                (vc[1] << 16) + (yc[3] << 24);
1632
            *ldst++ = k + (l << 32);
1633
            yc += 4;
1634
            uc += 2;
1635
            vc += 2;
1636
        }
1637 caeaabe7 Alex Beregszaszi
1638
#else
1639 6e42e6c4 Diego Biurrun
        int i, *idst = (int32_t *) dst;
1640
        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
1641 dd68318c Ramiro Polla
        for (i = 0; i < chromWidth; i++) {
1642 a898cdc9 Måns Rullgård
#if HAVE_BIGENDIAN
1643 6e42e6c4 Diego Biurrun
            *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
1644
                (vc[0] << 8) + (yc[1] << 0);
1645 da7f8893 Michael Niedermayer
#else
1646 6e42e6c4 Diego Biurrun
            *idst++ = uc[0] + (yc[0] << 8) +
1647 8a322796 Diego Biurrun
               (vc[0] << 16) + (yc[1] << 24);
1648 6e42e6c4 Diego Biurrun
#endif
1649
            yc += 2;
1650
            uc++;
1651
            vc++;
1652
        }
1653
#endif
1654
#endif
1655 dd68318c Ramiro Polla
        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
1656 6e42e6c4 Diego Biurrun
            usrc += chromStride;
1657
            vsrc += chromStride;
1658
        }
1659
        ysrc += lumStride;
1660
        dst += dstStride;
1661
    }
1662 b63f641e Aurelien Jacobs
#if HAVE_MMX
1663 9b734d44 Ramiro Polla
    __asm__(EMMS"       \n\t"
1664
            SFENCE"     \n\t"
1665
            :::"memory");
1666 7ac25f2d Michael Niedermayer
#endif
1667 caeaabe7 Alex Beregszaszi
}
1668
1669
/**
1670 8a322796 Diego Biurrun
 * Height should be a multiple of 2 and width should be a multiple of 16
1671
 * (If this is a problem for anyone then tell me, and I will fix it.)
1672 caeaabe7 Alex Beregszaszi
 */
1673
static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1674 6e42e6c4 Diego Biurrun
                                      long width, long height,
1675
                                      long lumStride, long chromStride, long dstStride)
1676 caeaabe7 Alex Beregszaszi
{
1677 6e42e6c4 Diego Biurrun
    //FIXME interpolate chroma
1678
    RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
1679 b1ec5875 Michael Niedermayer
}
1680
1681
/**
1682 594ff7cc Diego Biurrun
 * Width should be a multiple of 16.
1683 b1ec5875 Michael Niedermayer
 */
1684 a6100f39 Baptiste Coudurier
static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1685
                                         long width, long height,
1686
                                         long lumStride, long chromStride, long dstStride)
1687
{
1688
    RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
1689
}
1690
1691
/**
1692
 * Width should be a multiple of 16.
1693
 */
1694 b1ec5875 Michael Niedermayer
static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1695 6e42e6c4 Diego Biurrun
                                         long width, long height,
1696
                                         long lumStride, long chromStride, long dstStride)
1697 b1ec5875 Michael Niedermayer
{
1698 6e42e6c4 Diego Biurrun
    RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
1699 b1ec5875 Michael Niedermayer
}
1700
1701
/**
1702 8a322796 Diego Biurrun
 * Height should be a multiple of 2 and width should be a multiple of 16.
1703
 * (If this is a problem for anyone then tell me, and I will fix it.)
1704 b1ec5875 Michael Niedermayer
 */
1705 1de97d84 Michael Niedermayer
static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
1706 6e42e6c4 Diego Biurrun
                                      long width, long height,
1707
                                      long lumStride, long chromStride, long srcStride)
1708 d9d58d17 Michael Niedermayer
{
1709 6e42e6c4 Diego Biurrun
    long y;
1710 9326d3f3 Michael Niedermayer
    const x86_reg chromWidth= width>>1;
1711 dd68318c Ramiro Polla
    for (y=0; y<height; y+=2) {
1712 b63f641e Aurelien Jacobs
#if HAVE_MMX
1713 7ad6469e Diego Pettenò
        __asm__ volatile(
1714 9b734d44 Ramiro Polla
            "xor                 %%"REG_a", %%"REG_a"   \n\t"
1715
            "pcmpeqw                 %%mm7, %%mm7       \n\t"
1716
            "psrlw                      $8, %%mm7       \n\t" // FF,00,FF,00...
1717
            ASMALIGN(4)
1718
            "1:                \n\t"
1719
            PREFETCH" 64(%0, %%"REG_a", 4)              \n\t"
1720
            "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
1721
            "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
1722
            "movq                    %%mm0, %%mm2       \n\t" // YUYV YUYV(0)
1723
            "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(4)
1724
            "psrlw                      $8, %%mm0       \n\t" // U0V0 U0V0(0)
1725
            "psrlw                      $8, %%mm1       \n\t" // U0V0 U0V0(4)
1726
            "pand                    %%mm7, %%mm2       \n\t" // Y0Y0 Y0Y0(0)
1727
            "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(4)
1728
            "packuswb                %%mm1, %%mm0       \n\t" // UVUV UVUV(0)
1729
            "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(0)
1730
1731
            MOVNTQ"                  %%mm2, (%1, %%"REG_a", 2)  \n\t"
1732
1733
            "movq     16(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(8)
1734
            "movq     24(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(12)
1735
            "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(8)
1736
            "movq                    %%mm2, %%mm4       \n\t" // YUYV YUYV(12)
1737
            "psrlw                      $8, %%mm1       \n\t" // U0V0 U0V0(8)
1738
            "psrlw                      $8, %%mm2       \n\t" // U0V0 U0V0(12)
1739
            "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(8)
1740
            "pand                    %%mm7, %%mm4       \n\t" // Y0Y0 Y0Y0(12)
1741
            "packuswb                %%mm2, %%mm1       \n\t" // UVUV UVUV(8)
1742
            "packuswb                %%mm4, %%mm3       \n\t" // YYYY YYYY(8)
1743
1744
            MOVNTQ"                  %%mm3, 8(%1, %%"REG_a", 2) \n\t"
1745
1746
            "movq                    %%mm0, %%mm2       \n\t" // UVUV UVUV(0)
1747
            "movq                    %%mm1, %%mm3       \n\t" // UVUV UVUV(8)
1748
            "psrlw                      $8, %%mm0       \n\t" // V0V0 V0V0(0)
1749
            "psrlw                      $8, %%mm1       \n\t" // V0V0 V0V0(8)
1750
            "pand                    %%mm7, %%mm2       \n\t" // U0U0 U0U0(0)
1751
            "pand                    %%mm7, %%mm3       \n\t" // U0U0 U0U0(8)
1752
            "packuswb                %%mm1, %%mm0       \n\t" // VVVV VVVV(0)
1753
            "packuswb                %%mm3, %%mm2       \n\t" // UUUU UUUU(0)
1754
1755
            MOVNTQ"                  %%mm0, (%3, %%"REG_a")     \n\t"
1756
            MOVNTQ"                  %%mm2, (%2, %%"REG_a")     \n\t"
1757
1758
            "add                        $8, %%"REG_a"   \n\t"
1759
            "cmp                        %4, %%"REG_a"   \n\t"
1760
            " jb                        1b              \n\t"
1761
            ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
1762
            : "memory", "%"REG_a
1763 6e42e6c4 Diego Biurrun
        );
1764
1765
        ydst += lumStride;
1766
        src  += srcStride;
1767
1768 7ad6469e Diego Pettenò
        __asm__ volatile(
1769 9b734d44 Ramiro Polla
            "xor                 %%"REG_a", %%"REG_a"   \n\t"
1770
            ASMALIGN(4)
1771
            "1:                                         \n\t"
1772
            PREFETCH" 64(%0, %%"REG_a", 4)              \n\t"
1773
            "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
1774
            "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
1775
            "movq     16(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(8)
1776
            "movq     24(%0, %%"REG_a", 4), %%mm3       \n\t" // YUYV YUYV(12)
1777
            "pand                    %%mm7, %%mm0       \n\t" // Y0Y0 Y0Y0(0)
1778
            "pand                    %%mm7, %%mm1       \n\t" // Y0Y0 Y0Y0(4)
1779
            "pand                    %%mm7, %%mm2       \n\t" // Y0Y0 Y0Y0(8)
1780
            "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(12)
1781
            "packuswb                %%mm1, %%mm0       \n\t" // YYYY YYYY(0)
1782
            "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(8)
1783
1784
            MOVNTQ"                  %%mm0,  (%1, %%"REG_a", 2) \n\t"
1785
            MOVNTQ"                  %%mm2, 8(%1, %%"REG_a", 2) \n\t"
1786
1787
            "add                        $8, %%"REG_a"   \n\t"
1788
            "cmp                        %4, %%"REG_a"   \n\t"
1789
            " jb                        1b              \n\t"
1790
1791
            ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
1792
            : "memory", "%"REG_a
1793 6e42e6c4 Diego Biurrun
        );
1794 bd09433f Michael Niedermayer
#else
1795 6e42e6c4 Diego Biurrun
        long i;
1796 dd68318c Ramiro Polla
        for (i=0; i<chromWidth; i++) {
1797 6e42e6c4 Diego Biurrun
            ydst[2*i+0]     = src[4*i+0];
1798
            udst[i]     = src[4*i+1];
1799
            ydst[2*i+1]     = src[4*i+2];
1800
            vdst[i]     = src[4*i+3];
1801
        }
1802
        ydst += lumStride;
1803
        src  += srcStride;
1804
1805 dd68318c Ramiro Polla
        for (i=0; i<chromWidth; i++) {
1806 6e42e6c4 Diego Biurrun
            ydst[2*i+0]     = src[4*i+0];
1807
            ydst[2*i+1]     = src[4*i+2];
1808
        }
1809
#endif
1810
        udst += chromStride;
1811
        vdst += chromStride;
1812
        ydst += lumStride;
1813
        src  += srcStride;
1814
    }
1815 b63f641e Aurelien Jacobs
#if HAVE_MMX
1816 9b734d44 Ramiro Polla
    __asm__ volatile(EMMS"       \n\t"
1817
                     SFENCE"     \n\t"
1818
                     :::"memory");
1819 bd09433f Michael Niedermayer
#endif
1820 42b5fcb8 Michael Niedermayer
}
1821 81c0590e Arpi
1822 d661d18d Alex Beregszaszi
static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc,
1823 6e42e6c4 Diego Biurrun
                                      uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
1824
                                      long width, long height, long lumStride, long chromStride)
1825 d661d18d Alex Beregszaszi
{
1826 6e42e6c4 Diego Biurrun
    /* Y Plane */
1827
    memcpy(ydst, ysrc, width*height);
1828 d661d18d Alex Beregszaszi
1829 6e42e6c4 Diego Biurrun
    /* XXX: implement upscaling for U,V */
1830 d661d18d Alex Beregszaszi
}
1831
1832 7f526efd Reimar Döffinger
static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
1833 b241cbf2 Michael Niedermayer
{
1834 6e42e6c4 Diego Biurrun
    long x,y;
1835 6a4970ab Diego Biurrun
1836 6e42e6c4 Diego Biurrun
    dst[0]= src[0];
1837 6a4970ab Diego Biurrun
1838 6e42e6c4 Diego Biurrun
    // first line
1839 dd68318c Ramiro Polla
    for (x=0; x<srcWidth-1; x++) {
1840 6e42e6c4 Diego Biurrun
        dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
1841
        dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
1842
    }
1843
    dst[2*srcWidth-1]= src[srcWidth-1];
1844 6a4970ab Diego Biurrun
1845 9b734d44 Ramiro Polla
    dst+= dstStride;
1846 b241cbf2 Michael Niedermayer
1847 dd68318c Ramiro Polla
    for (y=1; y<srcHeight; y++) {
1848 f4406ec1 Diego Biurrun
#if HAVE_MMX2 || HAVE_AMD3DNOW
1849 d0ce212a Ramiro Polla
        const x86_reg mmxSize= srcWidth&~15;
1850 7ad6469e Diego Pettenò
        __asm__ volatile(
1851 9b734d44 Ramiro Polla
            "mov           %4, %%"REG_a"            \n\t"
1852
            "1:                                     \n\t"
1853
            "movq         (%0, %%"REG_a"), %%mm0    \n\t"
1854
            "movq         (%1, %%"REG_a"), %%mm1    \n\t"
1855
            "movq        1(%0, %%"REG_a"), %%mm2    \n\t"
1856
            "movq        1(%1, %%"REG_a"), %%mm3    \n\t"
1857
            "movq       -1(%0, %%"REG_a"), %%mm4    \n\t"
1858
            "movq       -1(%1, %%"REG_a"), %%mm5    \n\t"
1859
            PAVGB"                  %%mm0, %%mm5    \n\t"
1860
            PAVGB"                  %%mm0, %%mm3    \n\t"
1861
            PAVGB"                  %%mm0, %%mm5    \n\t"
1862
            PAVGB"                  %%mm0, %%mm3    \n\t"
1863
            PAVGB"                  %%mm1, %%mm4    \n\t"
1864
            PAVGB"                  %%mm1, %%mm2    \n\t"
1865
            PAVGB"                  %%mm1, %%mm4    \n\t"
1866
            PAVGB"                  %%mm1, %%mm2    \n\t"
1867
            "movq                   %%mm5, %%mm7    \n\t"
1868
            "movq                   %%mm4, %%mm6    \n\t"
1869
            "punpcklbw              %%mm3, %%mm5    \n\t"
1870
            "punpckhbw              %%mm3, %%mm7    \n\t"
1871
            "punpcklbw              %%mm2, %%mm4    \n\t"
1872
            "punpckhbw              %%mm2, %%mm6    \n\t"
1873 b241cbf2 Michael Niedermayer
#if 1
1874 9b734d44 Ramiro Polla
            MOVNTQ"                 %%mm5,  (%2, %%"REG_a", 2)  \n\t"
1875
            MOVNTQ"                 %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
1876
            MOVNTQ"                 %%mm4,  (%3, %%"REG_a", 2)  \n\t"
1877
            MOVNTQ"                 %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
1878 b241cbf2 Michael Niedermayer
#else
1879 9b734d44 Ramiro Polla
            "movq                   %%mm5,  (%2, %%"REG_a", 2)  \n\t"
1880
            "movq                   %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
1881
            "movq                   %%mm4,  (%3, %%"REG_a", 2)  \n\t"
1882
            "movq                   %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
1883
#endif
1884
            "add                       $8, %%"REG_a"            \n\t"
1885
            " js                       1b                       \n\t"
1886
            :: "r" (src + mmxSize  ), "r" (src + srcStride + mmxSize  ),
1887
            "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
1888
            "g" (-mmxSize)
1889
            : "%"REG_a
1890 6e42e6c4 Diego Biurrun
1891
        );
1892 b241cbf2 Michael Niedermayer
#else
1893 9326d3f3 Michael Niedermayer
        const x86_reg mmxSize=1;
1894 6e42e6c4 Diego Biurrun
#endif
1895
        dst[0        ]= (3*src[0] +   src[srcStride])>>2;
1896
        dst[dstStride]= (  src[0] + 3*src[srcStride])>>2;
1897
1898 dd68318c Ramiro Polla
        for (x=mmxSize-1; x<srcWidth-1; x++) {
1899 6e42e6c4 Diego Biurrun
            dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
1900
            dst[2*x+dstStride+2]= (  src[x+0] + 3*src[x+srcStride+1])>>2;
1901
            dst[2*x+dstStride+1]= (  src[x+1] + 3*src[x+srcStride  ])>>2;
1902
            dst[2*x          +2]= (3*src[x+1] +   src[x+srcStride  ])>>2;
1903
        }
1904
        dst[srcWidth*2 -1            ]= (3*src[srcWidth-1] +   src[srcWidth-1 + srcStride])>>2;
1905
        dst[srcWidth*2 -1 + dstStride]= (  src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
1906
1907
        dst+=dstStride*2;
1908
        src+=srcStride;
1909
    }
1910 6a4970ab Diego Biurrun
1911 6e42e6c4 Diego Biurrun
    // last line
1912 b2609d4c Michael Niedermayer
#if 1
1913 6e42e6c4 Diego Biurrun
    dst[0]= src[0];
1914 6a4970ab Diego Biurrun
1915 dd68318c Ramiro Polla
    for (x=0; x<srcWidth-1; x++) {
1916 6e42e6c4 Diego Biurrun
        dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
1917
        dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
1918
    }
1919
    dst[2*srcWidth-1]= src[srcWidth-1];
1920 b2609d4c Michael Niedermayer
#else
1921 dd68318c Ramiro Polla
    for (x=0; x<srcWidth; x++) {
1922 6e42e6c4 Diego Biurrun
        dst[2*x+0]=
1923
        dst[2*x+1]= src[x];
1924
    }
1925 b2609d4c Michael Niedermayer
#endif
1926
1927 b63f641e Aurelien Jacobs
#if HAVE_MMX
1928 9b734d44 Ramiro Polla
    __asm__ volatile(EMMS"       \n\t"
1929
                     SFENCE"     \n\t"
1930
                     :::"memory");
1931 b241cbf2 Michael Niedermayer
#endif
1932
}
1933
1934 81c0590e Arpi
/**
1935 8a322796 Diego Biurrun
 * Height should be a multiple of 2 and width should be a multiple of 16.
1936
 * (If this is a problem for anyone then tell me, and I will fix it.)
1937
 * Chrominance data is only taken from every second line, others are ignored.
1938 594ff7cc Diego Biurrun
 * FIXME: Write HQ version.
1939 81c0590e Arpi
 */
1940 1de97d84 Michael Niedermayer
static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
1941 6e42e6c4 Diego Biurrun
                                      long width, long height,
1942
                                      long lumStride, long chromStride, long srcStride)
1943 81c0590e Arpi
{
1944 6e42e6c4 Diego Biurrun
    long y;
1945 9326d3f3 Michael Niedermayer
    const x86_reg chromWidth= width>>1;
1946 dd68318c Ramiro Polla
    for (y=0; y<height; y+=2) {
1947 b63f641e Aurelien Jacobs
#if HAVE_MMX
1948 7ad6469e Diego Pettenò
        __asm__ volatile(
1949 9b734d44 Ramiro Polla
            "xor                 %%"REG_a", %%"REG_a"   \n\t"
1950
            "pcmpeqw             %%mm7, %%mm7   \n\t"
1951
            "psrlw                  $8, %%mm7   \n\t" // FF,00,FF,00...
1952
            ASMALIGN(4)
1953
            "1:                                 \n\t"
1954
            PREFETCH" 64(%0, %%"REG_a", 4)          \n\t"
1955
            "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // UYVY UYVY(0)
1956
            "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(4)
1957
            "movq                %%mm0, %%mm2   \n\t" // UYVY UYVY(0)
1958
            "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(4)
1959
            "pand                %%mm7, %%mm0   \n\t" // U0V0 U0V0(0)
1960
            "pand                %%mm7, %%mm1   \n\t" // U0V0 U0V0(4)
1961
            "psrlw                  $8, %%mm2   \n\t" // Y0Y0 Y0Y0(0)
1962
            "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(4)
1963
            "packuswb            %%mm1, %%mm0   \n\t" // UVUV UVUV(0)
1964
            "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(0)
1965
1966
            MOVNTQ"              %%mm2,  (%1, %%"REG_a", 2) \n\t"
1967
1968
            "movq     16(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(8)
1969
            "movq     24(%0, %%"REG_a", 4), %%mm2   \n\t" // UYVY UYVY(12)
1970
            "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(8)
1971
            "movq                %%mm2, %%mm4   \n\t" // UYVY UYVY(12)
1972
            "pand                %%mm7, %%mm1   \n\t" // U0V0 U0V0(8)
1973
            "pand                %%mm7, %%mm2   \n\t" // U0V0 U0V0(12)
1974
            "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(8)
1975
            "psrlw                  $8, %%mm4   \n\t" // Y0Y0 Y0Y0(12)
1976
            "packuswb            %%mm2, %%mm1   \n\t" // UVUV UVUV(8)
1977
            "packuswb            %%mm4, %%mm3   \n\t" // YYYY YYYY(8)
1978
1979
            MOVNTQ"              %%mm3, 8(%1, %%"REG_a", 2) \n\t"
1980
1981
            "movq                %%mm0, %%mm2   \n\t" // UVUV UVUV(0)
1982
            "movq                %%mm1, %%mm3   \n\t" // UVUV UVUV(8)
1983
            "psrlw                  $8, %%mm0   \n\t" // V0V0 V0V0(0)
1984
            "psrlw                  $8, %%mm1   \n\t" // V0V0 V0V0(8)
1985
            "pand                %%mm7, %%mm2   \n\t" // U0U0 U0U0(0)
1986
            "pand                %%mm7, %%mm3   \n\t" // U0U0 U0U0(8)
1987
            "packuswb            %%mm1, %%mm0   \n\t" // VVVV VVVV(0)
1988
            "packuswb            %%mm3, %%mm2   \n\t" // UUUU UUUU(0)
1989
1990
            MOVNTQ"              %%mm0, (%3, %%"REG_a") \n\t"
1991
            MOVNTQ"              %%mm2, (%2, %%"REG_a") \n\t"
1992
1993
            "add                    $8, %%"REG_a"   \n\t"
1994
            "cmp                    %4, %%"REG_a"   \n\t"
1995
            " jb                    1b          \n\t"
1996
            ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
1997
            : "memory", "%"REG_a
1998 6e42e6c4 Diego Biurrun
        );
1999
2000
        ydst += lumStride;
2001
        src  += srcStride;
2002
2003 7ad6469e Diego Pettenò
        __asm__ volatile(
2004 9b734d44 Ramiro Polla
            "xor                 %%"REG_a", %%"REG_a"   \n\t"
2005
            ASMALIGN(4)
2006
            "1:                                 \n\t"
2007
            PREFETCH" 64(%0, %%"REG_a", 4)          \n\t"
2008
            "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // YUYV YUYV(0)
2009
            "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // YUYV YUYV(4)
2010
            "movq     16(%0, %%"REG_a", 4), %%mm2   \n\t" // YUYV YUYV(8)
2011
            "movq     24(%0, %%"REG_a", 4), %%mm3   \n\t" // YUYV YUYV(12)
2012
            "psrlw                  $8, %%mm0   \n\t" // Y0Y0 Y0Y0(0)
2013
            "psrlw                  $8, %%mm1   \n\t" // Y0Y0 Y0Y0(4)
2014
            "psrlw                  $8, %%mm2   \n\t" // Y0Y0 Y0Y0(8)
2015
            "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(12)
2016
            "packuswb            %%mm1, %%mm0   \n\t" // YYYY YYYY(0)
2017
            "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(8)
2018
2019
            MOVNTQ"              %%mm0,  (%1, %%"REG_a", 2) \n\t"
2020
            MOVNTQ"              %%mm2, 8(%1, %%"REG_a", 2) \n\t"
2021
2022
            "add                    $8, %%"REG_a"   \n\t"
2023
            "cmp                    %4, %%"REG_a"   \n\t"
2024
            " jb                    1b          \n\t"
2025
2026
            ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
2027
            : "memory", "%"REG_a
2028 6e42e6c4 Diego Biurrun
        );
2029 ed8c0670 Michael Niedermayer
#else
2030 6e42e6c4 Diego Biurrun
        long i;
2031 dd68318c Ramiro Polla
        for (i=0; i<chromWidth; i++) {
2032 6e42e6c4 Diego Biurrun
            udst[i]     = src[4*i+0];
2033
            ydst[2*i+0] = src[4*i+1];
2034
            vdst[i]     = src[4*i+2];
2035
            ydst[2*i+1] = src[4*i+3];
2036
        }
2037
        ydst += lumStride;
2038
        src  += srcStride;
2039
2040 dd68318c Ramiro Polla
        for (i=0; i<chromWidth; i++) {
2041 6e42e6c4 Diego Biurrun
            ydst[2*i+0] = src[4*i+1];
2042
            ydst[2*i+1] = src[4*i+3];
2043
        }
2044
#endif
2045
        udst += chromStride;
2046
        vdst += chromStride;
2047
        ydst += lumStride;
2048
        src  += srcStride;
2049
    }
2050 b63f641e Aurelien Jacobs
#if HAVE_MMX
2051 9b734d44 Ramiro Polla
    __asm__ volatile(EMMS"       \n\t"
2052
                     SFENCE"     \n\t"
2053
                     :::"memory");
2054 ed8c0670 Michael Niedermayer
#endif
2055 81c0590e Arpi
}
2056
2057 1de97d84 Michael Niedermayer
/**
2058 8a322796 Diego Biurrun
 * Height should be a multiple of 2 and width should be a multiple of 2.
2059
 * (If this is a problem for anyone then tell me, and I will fix it.)
2060
 * Chrominance data is only taken from every second line,
2061 594ff7cc Diego Biurrun
 * others are ignored in the C version.
2062
 * FIXME: Write HQ version.
2063 1de97d84 Michael Niedermayer
 */
2064
static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
2065 6e42e6c4 Diego Biurrun
                                       long width, long height,
2066
                                       long lumStride, long chromStride, long srcStride)
2067 1de97d84 Michael Niedermayer
{
2068 6e42e6c4 Diego Biurrun
    long y;
2069 9326d3f3 Michael Niedermayer
    const x86_reg chromWidth= width>>1;
2070 b63f641e Aurelien Jacobs
#if HAVE_MMX
2071 dd68318c Ramiro Polla
    for (y=0; y<height-2; y+=2) {
2072 6e42e6c4 Diego Biurrun
        long i;
2073 dd68318c Ramiro Polla
        for (i=0; i<2; i++) {
2074 7ad6469e Diego Pettenò
            __asm__ volatile(
2075 9b734d44 Ramiro Polla
                "mov                        %2, %%"REG_a"   \n\t"
2076
                "movq  "MANGLE(ff_bgr2YCoeff)", %%mm6       \n\t"
2077
                "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
2078
                "pxor                    %%mm7, %%mm7       \n\t"
2079
                "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
2080
                ASMALIGN(4)
2081
                "1:                                         \n\t"
2082
                PREFETCH"    64(%0, %%"REG_d")              \n\t"
2083
                "movd          (%0, %%"REG_d"), %%mm0       \n\t"
2084
                "movd         3(%0, %%"REG_d"), %%mm1       \n\t"
2085
                "punpcklbw               %%mm7, %%mm0       \n\t"
2086
                "punpcklbw               %%mm7, %%mm1       \n\t"
2087
                "movd         6(%0, %%"REG_d"), %%mm2       \n\t"
2088
                "movd         9(%0, %%"REG_d"), %%mm3       \n\t"
2089
                "punpcklbw               %%mm7, %%mm2       \n\t"
2090
                "punpcklbw               %%mm7, %%mm3       \n\t"
2091
                "pmaddwd                 %%mm6, %%mm0       \n\t"
2092
                "pmaddwd                 %%mm6, %%mm1       \n\t"
2093
                "pmaddwd                 %%mm6, %%mm2       \n\t"
2094
                "pmaddwd                 %%mm6, %%mm3       \n\t"
2095
#ifndef FAST_BGR2YV12
2096
                "psrad                      $8, %%mm0       \n\t"
2097
                "psrad                      $8, %%mm1       \n\t"
2098
                "psrad                      $8, %%mm2       \n\t"
2099
                "psrad                      $8, %%mm3       \n\t"
2100
#endif
2101
                "packssdw                %%mm1, %%mm0       \n\t"
2102
                "packssdw                %%mm3, %%mm2       \n\t"
2103
                "pmaddwd                 %%mm5, %%mm0       \n\t"
2104
                "pmaddwd                 %%mm5, %%mm2       \n\t"
2105
                "packssdw                %%mm2, %%mm0       \n\t"
2106
                "psraw                      $7, %%mm0       \n\t"
2107
2108
                "movd        12(%0, %%"REG_d"), %%mm4       \n\t"
2109
                "movd        15(%0, %%"REG_d"), %%mm1       \n\t"
2110
                "punpcklbw               %%mm7, %%mm4       \n\t"
2111
                "punpcklbw               %%mm7, %%mm1       \n\t"
2112
                "movd        18(%0, %%"REG_d"), %%mm2       \n\t"
2113
                "movd        21(%0, %%"REG_d"), %%mm3       \n\t"
2114
                "punpcklbw               %%mm7, %%mm2       \n\t"
2115
                "punpcklbw               %%mm7, %%mm3       \n\t"
2116
                "pmaddwd                 %%mm6, %%mm4       \n\t"
2117
                "pmaddwd                 %%mm6, %%mm1       \n\t"
2118
                "pmaddwd                 %%mm6, %%mm2       \n\t"
2119
                "pmaddwd                 %%mm6, %%mm3       \n\t"
2120
#ifndef FAST_BGR2YV12
2121
                "psrad                      $8, %%mm4       \n\t"
2122
                "psrad                      $8, %%mm1       \n\t"
2123
                "psrad                      $8, %%mm2       \n\t"
2124
                "psrad                      $8, %%mm3       \n\t"
2125
#endif
2126
                "packssdw                %%mm1, %%mm4       \n\t"
2127
                "packssdw                %%mm3, %%mm2       \n\t"
2128
                "pmaddwd                 %%mm5, %%mm4       \n\t"
2129
                "pmaddwd                 %%mm5, %%mm2       \n\t"
2130
                "add                       $24, %%"REG_d"   \n\t"
2131
                "packssdw                %%mm2, %%mm4       \n\t"
2132
                "psraw                      $7, %%mm4       \n\t"
2133
2134
                "packuswb                %%mm4, %%mm0       \n\t"
2135
                "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0    \n\t"
2136
2137
                MOVNTQ"                  %%mm0, (%1, %%"REG_a") \n\t"
2138
                "add                        $8,      %%"REG_a"  \n\t"
2139
                " js                        1b                  \n\t"
2140
                : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width)
2141
                : "%"REG_a, "%"REG_d
2142
            );
2143
            ydst += lumStride;
2144
            src  += srcStride;
2145
        }
2146
        src -= srcStride*2;
2147
        __asm__ volatile(
2148
            "mov                        %4, %%"REG_a"   \n\t"
2149 5802683a Reimar Döffinger
            "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
2150 9b734d44 Ramiro Polla
            "movq  "MANGLE(ff_bgr2UCoeff)", %%mm6       \n\t"
2151 6e42e6c4 Diego Biurrun
            "pxor                    %%mm7, %%mm7       \n\t"
2152
            "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
2153 9b734d44 Ramiro Polla
            "add                 %%"REG_d", %%"REG_d"   \n\t"
2154 6e42e6c4 Diego Biurrun
            ASMALIGN(4)
2155
            "1:                                         \n\t"
2156
            PREFETCH"    64(%0, %%"REG_d")              \n\t"
2157 9b734d44 Ramiro Polla
            PREFETCH"    64(%1, %%"REG_d")              \n\t"
2158
#if HAVE_MMX2 || HAVE_AMD3DNOW
2159
            "movq          (%0, %%"REG_d"), %%mm0       \n\t"
2160
            "movq          (%1, %%"REG_d"), %%mm1       \n\t"
2161
            "movq         6(%0, %%"REG_d"), %%mm2       \n\t"
2162
            "movq         6(%1, %%"REG_d"), %%mm3       \n\t"
2163
            PAVGB"                   %%mm1, %%mm0       \n\t"
2164
            PAVGB"                   %%mm3, %%mm2       \n\t"
2165
            "movq                    %%mm0, %%mm1       \n\t"
2166
            "movq                    %%mm2, %%mm3       \n\t"
2167
            "psrlq                     $24, %%mm0       \n\t"
2168
            "psrlq                     $24, %%mm2       \n\t"
2169
            PAVGB"                   %%mm1, %%mm0       \n\t"
2170
            PAVGB"                   %%mm3, %%mm2       \n\t"
2171
            "punpcklbw               %%mm7, %%mm0       \n\t"
2172
            "punpcklbw               %%mm7, %%mm2       \n\t"
2173
#else
2174 6e42e6c4 Diego Biurrun
            "movd          (%0, %%"REG_d"), %%mm0       \n\t"
2175 9b734d44 Ramiro Polla
            "movd          (%1, %%"REG_d"), %%mm1       \n\t"
2176
            "movd         3(%0, %%"REG_d"), %%mm2       \n\t"
2177
            "movd         3(%1, %%"REG_d"), %%mm3       \n\t"
2178 6e42e6c4 Diego Biurrun
            "punpcklbw               %%mm7, %%mm0       \n\t"
2179
            "punpcklbw               %%mm7, %%mm1       \n\t"
2180
            "punpcklbw               %%mm7, %%mm2       \n\t"
2181
            "punpcklbw               %%mm7, %%mm3       \n\t"
2182 9b734d44 Ramiro Polla
            "paddw                   %%mm1, %%mm0       \n\t"
2183
            "paddw                   %%mm3, %%mm2       \n\t"
2184
            "paddw                   %%mm2, %%mm0       \n\t"
2185
            "movd         6(%0, %%"REG_d"), %%mm4       \n\t"
2186
            "movd         6(%1, %%"REG_d"), %%mm1       \n\t"
2187
            "movd         9(%0, %%"REG_d"), %%mm2       \n\t"
2188
            "movd         9(%1, %%"REG_d"), %%mm3       \n\t"
2189
            "punpcklbw               %%mm7, %%mm4       \n\t"
2190
            "punpcklbw               %%mm7, %%mm1       \n\t"
2191
            "punpcklbw               %%mm7, %%mm2       \n\t"
2192
            "punpcklbw               %%mm7, %%mm3       \n\t"
2193
            "paddw                   %%mm1, %%mm4       \n\t"
2194
            "paddw                   %%mm3, %%mm2       \n\t"
2195
            "paddw                   %%mm4, %%mm2       \n\t"
2196
            "psrlw                      $2, %%mm0       \n\t"
2197
            "psrlw                      $2, %%mm2       \n\t"
2198
#endif
2199
            "movq  "MANGLE(ff_bgr2VCoeff)", %%mm1       \n\t"
2200
            "movq  "MANGLE(ff_bgr2VCoeff)", %%mm3       \n\t"
2201
2202
            "pmaddwd                 %%mm0, %%mm1       \n\t"
2203
            "pmaddwd                 %%mm2, %%mm3       \n\t"
2204 6e42e6c4 Diego Biurrun
            "pmaddwd                 %%mm6, %%mm0       \n\t"
2205
            "pmaddwd                 %%mm6, %%mm2       \n\t"
2206 21316f3c Michael Niedermayer
#ifndef FAST_BGR2YV12
2207 6e42e6c4 Diego Biurrun
            "psrad                      $8, %%mm0       \n\t"
2208
            "psrad                      $8, %%mm1       \n\t"
2209
            "psrad                      $8, %%mm2       \n\t"
2210
            "psrad                      $8, %%mm3       \n\t"
2211
#endif
2212
            "packssdw                %%mm2, %%mm0       \n\t"
2213 9b734d44 Ramiro Polla
            "packssdw                %%mm3, %%mm1       \n\t"
2214
            "pmaddwd                 %%mm5, %%mm0       \n\t"
2215
            "pmaddwd                 %%mm5, %%mm1       \n\t"
2216
            "packssdw                %%mm1, %%mm0       \n\t" // V1 V0 U1 U0
2217 6e42e6c4 Diego Biurrun
            "psraw                      $7, %%mm0       \n\t"
2218
2219 9b734d44 Ramiro Polla
#if HAVE_MMX2 || HAVE_AMD3DNOW
2220
            "movq        12(%0, %%"REG_d"), %%mm4       \n\t"
2221
            "movq        12(%1, %%"REG_d"), %%mm1       \n\t"
2222
            "movq        18(%0, %%"REG_d"), %%mm2       \n\t"
2223
            "movq        18(%1, %%"REG_d"), %%mm3       \n\t"
2224
            PAVGB"                   %%mm1, %%mm4       \n\t"
2225
            PAVGB"                   %%mm3, %%mm2       \n\t"
2226
            "movq                    %%mm4, %%mm1       \n\t"
2227
            "movq                    %%mm2, %%mm3       \n\t"
2228
            "psrlq                     $24, %%mm4       \n\t"
2229
            "psrlq                     $24, %%mm2       \n\t"
2230
            PAVGB"                   %%mm1, %%mm4       \n\t"
2231
            PAVGB"                   %%mm3, %%mm2       \n\t"
2232
            "punpcklbw               %%mm7, %%mm4       \n\t"
2233
            "punpcklbw               %%mm7, %%mm2       \n\t"