Statistics
| Branch: | Revision:

ffmpeg / libswscale / rgb2rgb_template.c @ f6cf4ed0

History | View | Annotate | Download (106 KB)

1 fcfbc150 Michael Niedermayer
/*
2 8a322796 Diego Biurrun
 * software RGB to RGB converter
3
 * pluralize by software PAL8 to RGB converter
4
 *              software YUV to YUV converter
5
 *              software YUV to RGB converter
6
 * Written by Nick Kurshev.
7
 * palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
8
 * lot of big-endian byte order fixes by Alex Beregszaszi
9 4fadc2b4 Diego Biurrun
 *
10 d026b45e Diego Biurrun
 * This file is part of FFmpeg.
11
 *
12
 * FFmpeg is free software; you can redistribute it and/or modify
13 4fadc2b4 Diego Biurrun
 * it under the terms of the GNU General Public License as published by
14
 * the Free Software Foundation; either version 2 of the License, or
15
 * (at your option) any later version.
16
 *
17 d026b45e Diego Biurrun
 * FFmpeg is distributed in the hope that it will be useful,
18 4fadc2b4 Diego Biurrun
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20
 * GNU General Public License for more details.
21
 *
22
 * You should have received a copy of the GNU General Public License
23 d026b45e Diego Biurrun
 * along with FFmpeg; if not, write to the Free Software
24 b19bcbaa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 6a4970ab Diego Biurrun
 *
26 8a322796 Diego Biurrun
 * The C code (not assembly, MMX, ...) of this file can be used
27 594ff7cc Diego Biurrun
 * under the LGPL license.
28 a3aece93 Nick Kurshev
 */
29
30 0d9f3d85 Arpi
#include <stddef.h>
31
32 1de97d84 Michael Niedermayer
#undef PREFETCH
33
#undef MOVNTQ
34
#undef EMMS
35
#undef SFENCE
36
#undef MMREG_SIZE
37
#undef PREFETCHW
38
#undef PAVGB
39
40 b63f641e Aurelien Jacobs
#if HAVE_SSE2
41 1de97d84 Michael Niedermayer
#define MMREG_SIZE 16
42
#else
43
#define MMREG_SIZE 8
44
#endif
45
46 f4406ec1 Diego Biurrun
#if HAVE_AMD3DNOW
47 1de97d84 Michael Niedermayer
#define PREFETCH  "prefetch"
48
#define PREFETCHW "prefetchw"
49 6e42e6c4 Diego Biurrun
#define PAVGB     "pavgusb"
50 b63f641e Aurelien Jacobs
#elif HAVE_MMX2
51 1de97d84 Michael Niedermayer
#define PREFETCH "prefetchnta"
52
#define PREFETCHW "prefetcht0"
53 6e42e6c4 Diego Biurrun
#define PAVGB     "pavgb"
54 1de97d84 Michael Niedermayer
#else
55 d904b5fc Nigel Pearson
#define PREFETCH  " # nop"
56
#define PREFETCHW " # nop"
57 99969243 Michael Niedermayer
#endif
58 1de97d84 Michael Niedermayer
59 f4406ec1 Diego Biurrun
#if HAVE_AMD3DNOW
60 aeb87a49 Diego Biurrun
/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */
61 1de97d84 Michael Niedermayer
#define EMMS     "femms"
62
#else
63
#define EMMS     "emms"
64 e697a141 Michael Niedermayer
#endif
65 79811694 Nick Kurshev
66 b63f641e Aurelien Jacobs
#if HAVE_MMX2
67 1de97d84 Michael Niedermayer
#define MOVNTQ "movntq"
68
#define SFENCE "sfence"
69
#else
70
#define MOVNTQ "movq"
71 d904b5fc Nigel Pearson
#define SFENCE " # nop"
72 fac8012c Nicolas Plourde
#endif
73 1de97d84 Michael Niedermayer
74 6107059c Michael Niedermayer
static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
75 b234ae81 Nick Kurshev
{
76 6e42e6c4 Diego Biurrun
    uint8_t *dest = dst;
77
    const uint8_t *s = src;
78
    const uint8_t *end;
79 b63f641e Aurelien Jacobs
    #if HAVE_MMX
80 6e42e6c4 Diego Biurrun
        const uint8_t *mm_end;
81
    #endif
82
    end = s + src_size;
83 b63f641e Aurelien Jacobs
    #if HAVE_MMX
84 7ad6469e Diego Pettenò
        __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
85 6e42e6c4 Diego Biurrun
        mm_end = end - 23;
86 f8a138be Cédric Schieli
        __asm__ volatile("movq        %0, %%mm7"::"m"(mask32a):"memory");
87 6e42e6c4 Diego Biurrun
        while (s < mm_end)
88
        {
89 7ad6469e Diego Pettenò
            __asm__ volatile(
90 6e42e6c4 Diego Biurrun
            PREFETCH"    32%1           \n\t"
91
            "movd          %1, %%mm0    \n\t"
92
            "punpckldq    3%1, %%mm0    \n\t"
93
            "movd         6%1, %%mm1    \n\t"
94
            "punpckldq    9%1, %%mm1    \n\t"
95
            "movd        12%1, %%mm2    \n\t"
96
            "punpckldq   15%1, %%mm2    \n\t"
97
            "movd        18%1, %%mm3    \n\t"
98
            "punpckldq   21%1, %%mm3    \n\t"
99 f8a138be Cédric Schieli
            "por        %%mm7, %%mm0    \n\t"
100
            "por        %%mm7, %%mm1    \n\t"
101
            "por        %%mm7, %%mm2    \n\t"
102
            "por        %%mm7, %%mm3    \n\t"
103 6e42e6c4 Diego Biurrun
            MOVNTQ"     %%mm0,   %0     \n\t"
104
            MOVNTQ"     %%mm1,  8%0     \n\t"
105
            MOVNTQ"     %%mm2, 16%0     \n\t"
106
            MOVNTQ"     %%mm3, 24%0"
107
            :"=m"(*dest)
108
            :"m"(*s)
109
            :"memory");
110
            dest += 32;
111
            s += 24;
112
        }
113 7ad6469e Diego Pettenò
        __asm__ volatile(SFENCE:::"memory");
114
        __asm__ volatile(EMMS:::"memory");
115 6e42e6c4 Diego Biurrun
    #endif
116
    while (s < end)
117
    {
118
    #ifdef WORDS_BIGENDIAN
119
        /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
120 f8a138be Cédric Schieli
        *dest++ = 255;
121 6e42e6c4 Diego Biurrun
        *dest++ = s[2];
122
        *dest++ = s[1];
123
        *dest++ = s[0];
124
        s+=3;
125
    #else
126
        *dest++ = *s++;
127
        *dest++ = *s++;
128
        *dest++ = *s++;
129 f8a138be Cédric Schieli
        *dest++ = 255;
130 6e42e6c4 Diego Biurrun
    #endif
131
    }
132 b234ae81 Nick Kurshev
}
133 59ac5a93 Nick Kurshev
134 6107059c Michael Niedermayer
static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
135 59ac5a93 Nick Kurshev
{
136 6e42e6c4 Diego Biurrun
    uint8_t *dest = dst;
137
    const uint8_t *s = src;
138
    const uint8_t *end;
139 b63f641e Aurelien Jacobs
#if HAVE_MMX
140 6e42e6c4 Diego Biurrun
    const uint8_t *mm_end;
141 494a6294 Nick Kurshev
#endif
142 6e42e6c4 Diego Biurrun
    end = s + src_size;
143 b63f641e Aurelien Jacobs
#if HAVE_MMX
144 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
145 6e42e6c4 Diego Biurrun
    mm_end = end - 31;
146
    while (s < mm_end)
147
    {
148 7ad6469e Diego Pettenò
        __asm__ volatile(
149 6e42e6c4 Diego Biurrun
        PREFETCH"    32%1           \n\t"
150
        "movq          %1, %%mm0    \n\t"
151
        "movq         8%1, %%mm1    \n\t"
152
        "movq        16%1, %%mm4    \n\t"
153
        "movq        24%1, %%mm5    \n\t"
154
        "movq       %%mm0, %%mm2    \n\t"
155
        "movq       %%mm1, %%mm3    \n\t"
156
        "movq       %%mm4, %%mm6    \n\t"
157
        "movq       %%mm5, %%mm7    \n\t"
158
        "psrlq         $8, %%mm2    \n\t"
159
        "psrlq         $8, %%mm3    \n\t"
160
        "psrlq         $8, %%mm6    \n\t"
161
        "psrlq         $8, %%mm7    \n\t"
162
        "pand          %2, %%mm0    \n\t"
163
        "pand          %2, %%mm1    \n\t"
164
        "pand          %2, %%mm4    \n\t"
165
        "pand          %2, %%mm5    \n\t"
166
        "pand          %3, %%mm2    \n\t"
167
        "pand          %3, %%mm3    \n\t"
168
        "pand          %3, %%mm6    \n\t"
169
        "pand          %3, %%mm7    \n\t"
170
        "por        %%mm2, %%mm0    \n\t"
171
        "por        %%mm3, %%mm1    \n\t"
172
        "por        %%mm6, %%mm4    \n\t"
173
        "por        %%mm7, %%mm5    \n\t"
174
175
        "movq       %%mm1, %%mm2    \n\t"
176
        "movq       %%mm4, %%mm3    \n\t"
177
        "psllq        $48, %%mm2    \n\t"
178
        "psllq        $32, %%mm3    \n\t"
179
        "pand          %4, %%mm2    \n\t"
180
        "pand          %5, %%mm3    \n\t"
181
        "por        %%mm2, %%mm0    \n\t"
182
        "psrlq        $16, %%mm1    \n\t"
183
        "psrlq        $32, %%mm4    \n\t"
184
        "psllq        $16, %%mm5    \n\t"
185
        "por        %%mm3, %%mm1    \n\t"
186
        "pand          %6, %%mm5    \n\t"
187
        "por        %%mm5, %%mm4    \n\t"
188
189
        MOVNTQ"     %%mm0,   %0     \n\t"
190
        MOVNTQ"     %%mm1,  8%0     \n\t"
191
        MOVNTQ"     %%mm4, 16%0"
192
        :"=m"(*dest)
193
        :"m"(*s),"m"(mask24l),
194
         "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
195
        :"memory");
196
        dest += 24;
197
        s += 32;
198
    }
199 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
200
    __asm__ volatile(EMMS:::"memory");
201 6e42e6c4 Diego Biurrun
#endif
202
    while (s < end)
203
    {
204 6cb38650 Alex Beregszaszi
#ifdef WORDS_BIGENDIAN
205 6e42e6c4 Diego Biurrun
        /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
206
        s++;
207
        dest[2] = *s++;
208
        dest[1] = *s++;
209
        dest[0] = *s++;
210
        dest += 3;
211 6cb38650 Alex Beregszaszi
#else
212 6e42e6c4 Diego Biurrun
        *dest++ = *s++;
213
        *dest++ = *s++;
214
        *dest++ = *s++;
215
        s++;
216 6cb38650 Alex Beregszaszi
#endif
217 6e42e6c4 Diego Biurrun
    }
218 59ac5a93 Nick Kurshev
}
219 b238eb2e Nick Kurshev
220 a3aece93 Nick Kurshev
/*
221 8a322796 Diego Biurrun
 original by Strepto/Astral
222
 ported to gcc & bugfixed: A'rpi
223 51da31f1 Nick Kurshev
 MMX2, 3DNOW optimization by Nick Kurshev
224 8a322796 Diego Biurrun
 32-bit C version, and and&add trick by Michael Niedermayer
225 a3aece93 Nick Kurshev
*/
226 30c48a0a Benoit Fouet
static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_size)
227 b238eb2e Nick Kurshev
{
228 6e42e6c4 Diego Biurrun
    register const uint8_t* s=src;
229
    register uint8_t* d=dst;
230
    register const uint8_t *end;
231
    const uint8_t *mm_end;
232
    end = s + src_size;
233 b63f641e Aurelien Jacobs
#if HAVE_MMX
234 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
235
    __asm__ volatile("movq        %0, %%mm4"::"m"(mask15s));
236 6e42e6c4 Diego Biurrun
    mm_end = end - 15;
237
    while (s<mm_end)
238
    {
239 7ad6469e Diego Pettenò
        __asm__ volatile(
240 6e42e6c4 Diego Biurrun
        PREFETCH"  32%1         \n\t"
241
        "movq        %1, %%mm0  \n\t"
242
        "movq       8%1, %%mm2  \n\t"
243
        "movq     %%mm0, %%mm1  \n\t"
244
        "movq     %%mm2, %%mm3  \n\t"
245
        "pand     %%mm4, %%mm0  \n\t"
246
        "pand     %%mm4, %%mm2  \n\t"
247
        "paddw    %%mm1, %%mm0  \n\t"
248
        "paddw    %%mm3, %%mm2  \n\t"
249
        MOVNTQ"   %%mm0,  %0    \n\t"
250
        MOVNTQ"   %%mm2, 8%0"
251
        :"=m"(*d)
252
        :"m"(*s)
253
        );
254
        d+=16;
255
        s+=16;
256
    }
257 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
258
    __asm__ volatile(EMMS:::"memory");
259 b238eb2e Nick Kurshev
#endif
260 d8dad2a5 Michael Niedermayer
    mm_end = end - 3;
261 6e42e6c4 Diego Biurrun
    while (s < mm_end)
262 0d9f3d85 Arpi
    {
263 994c1ef0 Baptiste Coudurier
        register unsigned x= *((const uint32_t *)s);
264 6e42e6c4 Diego Biurrun
        *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
265
        d+=4;
266
        s+=4;
267 0d9f3d85 Arpi
    }
268 6e42e6c4 Diego Biurrun
    if (s < end)
269 0d9f3d85 Arpi
    {
270 994c1ef0 Baptiste Coudurier
        register unsigned short x= *((const uint16_t *)s);
271 6e42e6c4 Diego Biurrun
        *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
272 0d9f3d85 Arpi
    }
273 b238eb2e Nick Kurshev
}
274 fcfbc150 Michael Niedermayer
275 30c48a0a Benoit Fouet
static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size)
276 ac4d0aea Michael Niedermayer
{
277 6e42e6c4 Diego Biurrun
    register const uint8_t* s=src;
278
    register uint8_t* d=dst;
279
    register const uint8_t *end;
280
    const uint8_t *mm_end;
281
    end = s + src_size;
282 b63f641e Aurelien Jacobs
#if HAVE_MMX
283 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*s));
284
    __asm__ volatile("movq        %0, %%mm7"::"m"(mask15rg));
285
    __asm__ volatile("movq        %0, %%mm6"::"m"(mask15b));
286 6e42e6c4 Diego Biurrun
    mm_end = end - 15;
287
    while (s<mm_end)
288
    {
289 7ad6469e Diego Pettenò
        __asm__ volatile(
290 6e42e6c4 Diego Biurrun
        PREFETCH"  32%1         \n\t"
291
        "movq        %1, %%mm0  \n\t"
292
        "movq       8%1, %%mm2  \n\t"
293
        "movq     %%mm0, %%mm1  \n\t"
294
        "movq     %%mm2, %%mm3  \n\t"
295
        "psrlq       $1, %%mm0  \n\t"
296
        "psrlq       $1, %%mm2  \n\t"
297
        "pand     %%mm7, %%mm0  \n\t"
298
        "pand     %%mm7, %%mm2  \n\t"
299
        "pand     %%mm6, %%mm1  \n\t"
300
        "pand     %%mm6, %%mm3  \n\t"
301
        "por      %%mm1, %%mm0  \n\t"
302
        "por      %%mm3, %%mm2  \n\t"
303
        MOVNTQ"   %%mm0,  %0    \n\t"
304
        MOVNTQ"   %%mm2, 8%0"
305
        :"=m"(*d)
306
        :"m"(*s)
307
        );
308
        d+=16;
309
        s+=16;
310
    }
311 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
312
    __asm__ volatile(EMMS:::"memory");
313 ac4d0aea Michael Niedermayer
#endif
314 0598bcbb Michael Niedermayer
    mm_end = end - 3;
315 6e42e6c4 Diego Biurrun
    while (s < mm_end)
316 ac4d0aea Michael Niedermayer
    {
317 ce3d365f Baptiste Coudurier
        register uint32_t x= *((const uint32_t*)s);
318 6e42e6c4 Diego Biurrun
        *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
319
        s+=4;
320
        d+=4;
321 ac4d0aea Michael Niedermayer
    }
322 6e42e6c4 Diego Biurrun
    if (s < end)
323 ac4d0aea Michael Niedermayer
    {
324 ce3d365f Baptiste Coudurier
        register uint16_t x= *((const uint16_t*)s);
325 6e42e6c4 Diego Biurrun
        *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
326
        s+=2;
327
        d+=2;
328 ac4d0aea Michael Niedermayer
    }
329
}
330
331 7f526efd Reimar Döffinger
static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_size)
332 fcfbc150 Michael Niedermayer
{
333 6e42e6c4 Diego Biurrun
    const uint8_t *s = src;
334
    const uint8_t *end;
335 b63f641e Aurelien Jacobs
#if HAVE_MMX
336 6e42e6c4 Diego Biurrun
    const uint8_t *mm_end;
337 0d9f3d85 Arpi
#endif
338 6e42e6c4 Diego Biurrun
    uint16_t *d = (uint16_t *)dst;
339
    end = s + src_size;
340 b63f641e Aurelien Jacobs
#if HAVE_MMX
341 6e42e6c4 Diego Biurrun
    mm_end = end - 15;
342 594ff7cc Diego Biurrun
#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
343 7ad6469e Diego Pettenò
    __asm__ volatile(
344 6e42e6c4 Diego Biurrun
    "movq           %3, %%mm5   \n\t"
345
    "movq           %4, %%mm6   \n\t"
346
    "movq           %5, %%mm7   \n\t"
347
    "jmp 2f                     \n\t"
348
    ASMALIGN(4)
349
    "1:                         \n\t"
350
    PREFETCH"   32(%1)          \n\t"
351
    "movd         (%1), %%mm0   \n\t"
352
    "movd        4(%1), %%mm3   \n\t"
353
    "punpckldq   8(%1), %%mm0   \n\t"
354
    "punpckldq  12(%1), %%mm3   \n\t"
355
    "movq        %%mm0, %%mm1   \n\t"
356
    "movq        %%mm3, %%mm4   \n\t"
357
    "pand        %%mm6, %%mm0   \n\t"
358
    "pand        %%mm6, %%mm3   \n\t"
359
    "pmaddwd     %%mm7, %%mm0   \n\t"
360
    "pmaddwd     %%mm7, %%mm3   \n\t"
361
    "pand        %%mm5, %%mm1   \n\t"
362
    "pand        %%mm5, %%mm4   \n\t"
363
    "por         %%mm1, %%mm0   \n\t"
364
    "por         %%mm4, %%mm3   \n\t"
365
    "psrld          $5, %%mm0   \n\t"
366
    "pslld         $11, %%mm3   \n\t"
367
    "por         %%mm3, %%mm0   \n\t"
368
    MOVNTQ"      %%mm0, (%0)    \n\t"
369
    "add           $16,  %1     \n\t"
370
    "add            $8,  %0     \n\t"
371
    "2:                         \n\t"
372
    "cmp            %2,  %1     \n\t"
373
    " jb            1b          \n\t"
374
    : "+r" (d), "+r"(s)
375
    : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
376
    );
377 aeae5d53 Michael Niedermayer
#else
378 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
379
    __asm__ volatile(
380 6e42e6c4 Diego Biurrun
        "movq    %0, %%mm7    \n\t"
381
        "movq    %1, %%mm6    \n\t"
382
        ::"m"(red_16mask),"m"(green_16mask));
383
    while (s < mm_end)
384
    {
385 7ad6469e Diego Pettenò
        __asm__ volatile(
386 6e42e6c4 Diego Biurrun
        PREFETCH"    32%1           \n\t"
387
        "movd          %1, %%mm0    \n\t"
388
        "movd         4%1, %%mm3    \n\t"
389
        "punpckldq    8%1, %%mm0    \n\t"
390
        "punpckldq   12%1, %%mm3    \n\t"
391
        "movq       %%mm0, %%mm1    \n\t"
392
        "movq       %%mm0, %%mm2    \n\t"
393
        "movq       %%mm3, %%mm4    \n\t"
394
        "movq       %%mm3, %%mm5    \n\t"
395
        "psrlq         $3, %%mm0    \n\t"
396
        "psrlq         $3, %%mm3    \n\t"
397
        "pand          %2, %%mm0    \n\t"
398
        "pand          %2, %%mm3    \n\t"
399
        "psrlq         $5, %%mm1    \n\t"
400
        "psrlq         $5, %%mm4    \n\t"
401
        "pand       %%mm6, %%mm1    \n\t"
402
        "pand       %%mm6, %%mm4    \n\t"
403
        "psrlq         $8, %%mm2    \n\t"
404
        "psrlq         $8, %%mm5    \n\t"
405
        "pand       %%mm7, %%mm2    \n\t"
406
        "pand       %%mm7, %%mm5    \n\t"
407
        "por        %%mm1, %%mm0    \n\t"
408
        "por        %%mm4, %%mm3    \n\t"
409
        "por        %%mm2, %%mm0    \n\t"
410
        "por        %%mm5, %%mm3    \n\t"
411
        "psllq        $16, %%mm3    \n\t"
412
        "por        %%mm3, %%mm0    \n\t"
413
        MOVNTQ"     %%mm0, %0       \n\t"
414
        :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
415
        d += 4;
416
        s += 16;
417
    }
418
#endif
419 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
420
    __asm__ volatile(EMMS:::"memory");
421 6e42e6c4 Diego Biurrun
#endif
422
    while (s < end)
423
    {
424 994c1ef0 Baptiste Coudurier
        register int rgb = *(const uint32_t*)s; s += 4;
425 6e42e6c4 Diego Biurrun
        *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
426
    }
427 fcfbc150 Michael Niedermayer
}
428
429 7f526efd Reimar Döffinger
static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
430 ac4d0aea Michael Niedermayer
{
431 6e42e6c4 Diego Biurrun
    const uint8_t *s = src;
432
    const uint8_t *end;
433 b63f641e Aurelien Jacobs
#if HAVE_MMX
434 6e42e6c4 Diego Biurrun
    const uint8_t *mm_end;
435 ac4d0aea Michael Niedermayer
#endif
436 6e42e6c4 Diego Biurrun
    uint16_t *d = (uint16_t *)dst;
437
    end = s + src_size;
438 b63f641e Aurelien Jacobs
#if HAVE_MMX
439 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
440
    __asm__ volatile(
441 6e42e6c4 Diego Biurrun
        "movq          %0, %%mm7    \n\t"
442
        "movq          %1, %%mm6    \n\t"
443
        ::"m"(red_16mask),"m"(green_16mask));
444
    mm_end = end - 15;
445
    while (s < mm_end)
446
    {
447 7ad6469e Diego Pettenò
        __asm__ volatile(
448 6e42e6c4 Diego Biurrun
        PREFETCH"    32%1           \n\t"
449
        "movd          %1, %%mm0    \n\t"
450
        "movd         4%1, %%mm3    \n\t"
451
        "punpckldq    8%1, %%mm0    \n\t"
452
        "punpckldq   12%1, %%mm3    \n\t"
453
        "movq       %%mm0, %%mm1    \n\t"
454
        "movq       %%mm0, %%mm2    \n\t"
455
        "movq       %%mm3, %%mm4    \n\t"
456
        "movq       %%mm3, %%mm5    \n\t"
457
        "psllq         $8, %%mm0    \n\t"
458
        "psllq         $8, %%mm3    \n\t"
459
        "pand       %%mm7, %%mm0    \n\t"
460
        "pand       %%mm7, %%mm3    \n\t"
461
        "psrlq         $5, %%mm1    \n\t"
462
        "psrlq         $5, %%mm4    \n\t"
463
        "pand       %%mm6, %%mm1    \n\t"
464
        "pand       %%mm6, %%mm4    \n\t"
465
        "psrlq        $19, %%mm2    \n\t"
466
        "psrlq        $19, %%mm5    \n\t"
467
        "pand          %2, %%mm2    \n\t"
468
        "pand          %2, %%mm5    \n\t"
469
        "por        %%mm1, %%mm0    \n\t"
470
        "por        %%mm4, %%mm3    \n\t"
471
        "por        %%mm2, %%mm0    \n\t"
472
        "por        %%mm5, %%mm3    \n\t"
473
        "psllq        $16, %%mm3    \n\t"
474
        "por        %%mm3, %%mm0    \n\t"
475
        MOVNTQ"     %%mm0, %0       \n\t"
476
        :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
477
        d += 4;
478
        s += 16;
479
    }
480 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
481
    __asm__ volatile(EMMS:::"memory");
482 6e42e6c4 Diego Biurrun
#endif
483
    while (s < end)
484
    {
485 994c1ef0 Baptiste Coudurier
        register int rgb = *(const uint32_t*)s; s += 4;
486 6e42e6c4 Diego Biurrun
        *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
487
    }
488 ac4d0aea Michael Niedermayer
}
489
490 7f526efd Reimar Döffinger
static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_size)
491 fcfbc150 Michael Niedermayer
{
492 6e42e6c4 Diego Biurrun
    const uint8_t *s = src;
493
    const uint8_t *end;
494 b63f641e Aurelien Jacobs
#if HAVE_MMX
495 6e42e6c4 Diego Biurrun
    const uint8_t *mm_end;
496 0d9f3d85 Arpi
#endif
497 6e42e6c4 Diego Biurrun
    uint16_t *d = (uint16_t *)dst;
498
    end = s + src_size;
499 b63f641e Aurelien Jacobs
#if HAVE_MMX
500 6e42e6c4 Diego Biurrun
    mm_end = end - 15;
501 594ff7cc Diego Biurrun
#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
502 7ad6469e Diego Pettenò
    __asm__ volatile(
503 6e42e6c4 Diego Biurrun
    "movq           %3, %%mm5   \n\t"
504
    "movq           %4, %%mm6   \n\t"
505
    "movq           %5, %%mm7   \n\t"
506
    "jmp            2f          \n\t"
507
    ASMALIGN(4)
508
    "1:                         \n\t"
509
    PREFETCH"   32(%1)          \n\t"
510
    "movd         (%1), %%mm0   \n\t"
511
    "movd        4(%1), %%mm3   \n\t"
512
    "punpckldq   8(%1), %%mm0   \n\t"
513
    "punpckldq  12(%1), %%mm3   \n\t"
514
    "movq        %%mm0, %%mm1   \n\t"
515
    "movq        %%mm3, %%mm4   \n\t"
516
    "pand        %%mm6, %%mm0   \n\t"
517
    "pand        %%mm6, %%mm3   \n\t"
518
    "pmaddwd     %%mm7, %%mm0   \n\t"
519
    "pmaddwd     %%mm7, %%mm3   \n\t"
520
    "pand        %%mm5, %%mm1   \n\t"
521
    "pand        %%mm5, %%mm4   \n\t"
522
    "por         %%mm1, %%mm0   \n\t"
523
    "por         %%mm4, %%mm3   \n\t"
524
    "psrld          $6, %%mm0   \n\t"
525
    "pslld         $10, %%mm3   \n\t"
526
    "por         %%mm3, %%mm0   \n\t"
527
    MOVNTQ"      %%mm0, (%0)    \n\t"
528
    "add           $16,  %1     \n\t"
529
    "add            $8,  %0     \n\t"
530
    "2:                         \n\t"
531
    "cmp            %2,  %1     \n\t"
532
    " jb            1b          \n\t"
533
    : "+r" (d), "+r"(s)
534
    : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
535
    );
536 aeae5d53 Michael Niedermayer
#else
537 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
538
    __asm__ volatile(
539 6e42e6c4 Diego Biurrun
        "movq          %0, %%mm7    \n\t"
540
        "movq          %1, %%mm6    \n\t"
541
        ::"m"(red_15mask),"m"(green_15mask));
542
    while (s < mm_end)
543
    {
544 7ad6469e Diego Pettenò
        __asm__ volatile(
545 6e42e6c4 Diego Biurrun
        PREFETCH"    32%1           \n\t"
546
        "movd          %1, %%mm0    \n\t"
547
        "movd         4%1, %%mm3    \n\t"
548
        "punpckldq    8%1, %%mm0    \n\t"
549
        "punpckldq   12%1, %%mm3    \n\t"
550
        "movq       %%mm0, %%mm1    \n\t"
551
        "movq       %%mm0, %%mm2    \n\t"
552
        "movq       %%mm3, %%mm4    \n\t"
553
        "movq       %%mm3, %%mm5    \n\t"
554
        "psrlq         $3, %%mm0    \n\t"
555
        "psrlq         $3, %%mm3    \n\t"
556
        "pand          %2, %%mm0    \n\t"
557
        "pand          %2, %%mm3    \n\t"
558
        "psrlq         $6, %%mm1    \n\t"
559
        "psrlq         $6, %%mm4    \n\t"
560
        "pand       %%mm6, %%mm1    \n\t"
561
        "pand       %%mm6, %%mm4    \n\t"
562
        "psrlq         $9, %%mm2    \n\t"
563
        "psrlq         $9, %%mm5    \n\t"
564
        "pand       %%mm7, %%mm2    \n\t"
565
        "pand       %%mm7, %%mm5    \n\t"
566
        "por        %%mm1, %%mm0    \n\t"
567
        "por        %%mm4, %%mm3    \n\t"
568
        "por        %%mm2, %%mm0    \n\t"
569
        "por        %%mm5, %%mm3    \n\t"
570
        "psllq        $16, %%mm3    \n\t"
571
        "por        %%mm3, %%mm0    \n\t"
572
        MOVNTQ"     %%mm0, %0       \n\t"
573
        :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
574
        d += 4;
575
        s += 16;
576
    }
577
#endif
578 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
579
    __asm__ volatile(EMMS:::"memory");
580 6e42e6c4 Diego Biurrun
#endif
581
    while (s < end)
582
    {
583 994c1ef0 Baptiste Coudurier
        register int rgb = *(const uint32_t*)s; s += 4;
584 6e42e6c4 Diego Biurrun
        *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
585
    }
586 fcfbc150 Michael Niedermayer
}
587
588 7f526efd Reimar Döffinger
static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
589 ac4d0aea Michael Niedermayer
{
590 6e42e6c4 Diego Biurrun
    const uint8_t *s = src;
591
    const uint8_t *end;
592 b63f641e Aurelien Jacobs
#if HAVE_MMX
593 6e42e6c4 Diego Biurrun
    const uint8_t *mm_end;
594 ac4d0aea Michael Niedermayer
#endif
595 6e42e6c4 Diego Biurrun
    uint16_t *d = (uint16_t *)dst;
596
    end = s + src_size;
597 b63f641e Aurelien Jacobs
#if HAVE_MMX
598 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
599
    __asm__ volatile(
600 6e42e6c4 Diego Biurrun
        "movq          %0, %%mm7    \n\t"
601
        "movq          %1, %%mm6    \n\t"
602
        ::"m"(red_15mask),"m"(green_15mask));
603
    mm_end = end - 15;
604
    while (s < mm_end)
605
    {
606 7ad6469e Diego Pettenò
        __asm__ volatile(
607 6e42e6c4 Diego Biurrun
        PREFETCH"    32%1           \n\t"
608
        "movd          %1, %%mm0    \n\t"
609
        "movd         4%1, %%mm3    \n\t"
610
        "punpckldq    8%1, %%mm0    \n\t"
611
        "punpckldq   12%1, %%mm3    \n\t"
612
        "movq       %%mm0, %%mm1    \n\t"
613
        "movq       %%mm0, %%mm2    \n\t"
614
        "movq       %%mm3, %%mm4    \n\t"
615
        "movq       %%mm3, %%mm5    \n\t"
616
        "psllq         $7, %%mm0    \n\t"
617
        "psllq         $7, %%mm3    \n\t"
618
        "pand       %%mm7, %%mm0    \n\t"
619
        "pand       %%mm7, %%mm3    \n\t"
620
        "psrlq         $6, %%mm1    \n\t"
621
        "psrlq         $6, %%mm4    \n\t"
622
        "pand       %%mm6, %%mm1    \n\t"
623
        "pand       %%mm6, %%mm4    \n\t"
624
        "psrlq        $19, %%mm2    \n\t"
625
        "psrlq        $19, %%mm5    \n\t"
626
        "pand          %2, %%mm2    \n\t"
627
        "pand          %2, %%mm5    \n\t"
628
        "por        %%mm1, %%mm0    \n\t"
629
        "por        %%mm4, %%mm3    \n\t"
630
        "por        %%mm2, %%mm0    \n\t"
631
        "por        %%mm5, %%mm3    \n\t"
632
        "psllq        $16, %%mm3    \n\t"
633
        "por        %%mm3, %%mm0    \n\t"
634
        MOVNTQ"     %%mm0, %0       \n\t"
635
        :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
636
        d += 4;
637
        s += 16;
638
    }
639 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
640
    __asm__ volatile(EMMS:::"memory");
641 6e42e6c4 Diego Biurrun
#endif
642
    while (s < end)
643
    {
644 994c1ef0 Baptiste Coudurier
        register int rgb = *(const uint32_t*)s; s += 4;
645 6e42e6c4 Diego Biurrun
        *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
646
    }
647 ac4d0aea Michael Niedermayer
}
648
649 6107059c Michael Niedermayer
static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long src_size)
650 996e1a7c Nick Kurshev
{
651 6e42e6c4 Diego Biurrun
    const uint8_t *s = src;
652
    const uint8_t *end;
653 b63f641e Aurelien Jacobs
#if HAVE_MMX
654 6e42e6c4 Diego Biurrun
    const uint8_t *mm_end;
655 0d9f3d85 Arpi
#endif
656 6e42e6c4 Diego Biurrun
    uint16_t *d = (uint16_t *)dst;
657
    end = s + src_size;
658 b63f641e Aurelien Jacobs
#if HAVE_MMX
659 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
660
    __asm__ volatile(
661 6e42e6c4 Diego Biurrun
        "movq         %0, %%mm7     \n\t"
662
        "movq         %1, %%mm6     \n\t"
663
        ::"m"(red_16mask),"m"(green_16mask));
664
    mm_end = end - 11;
665
    while (s < mm_end)
666
    {
667 7ad6469e Diego Pettenò
        __asm__ volatile(
668 6e42e6c4 Diego Biurrun
        PREFETCH"    32%1           \n\t"
669
        "movd          %1, %%mm0    \n\t"
670
        "movd         3%1, %%mm3    \n\t"
671
        "punpckldq    6%1, %%mm0    \n\t"
672
        "punpckldq    9%1, %%mm3    \n\t"
673
        "movq       %%mm0, %%mm1    \n\t"
674
        "movq       %%mm0, %%mm2    \n\t"
675
        "movq       %%mm3, %%mm4    \n\t"
676
        "movq       %%mm3, %%mm5    \n\t"
677
        "psrlq         $3, %%mm0    \n\t"
678
        "psrlq         $3, %%mm3    \n\t"
679
        "pand          %2, %%mm0    \n\t"
680
        "pand          %2, %%mm3    \n\t"
681
        "psrlq         $5, %%mm1    \n\t"
682
        "psrlq         $5, %%mm4    \n\t"
683
        "pand       %%mm6, %%mm1    \n\t"
684
        "pand       %%mm6, %%mm4    \n\t"
685
        "psrlq         $8, %%mm2    \n\t"
686
        "psrlq         $8, %%mm5    \n\t"
687
        "pand       %%mm7, %%mm2    \n\t"
688
        "pand       %%mm7, %%mm5    \n\t"
689
        "por        %%mm1, %%mm0    \n\t"
690
        "por        %%mm4, %%mm3    \n\t"
691
        "por        %%mm2, %%mm0    \n\t"
692
        "por        %%mm5, %%mm3    \n\t"
693
        "psllq        $16, %%mm3    \n\t"
694
        "por        %%mm3, %%mm0    \n\t"
695
        MOVNTQ"     %%mm0, %0       \n\t"
696
        :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
697
        d += 4;
698
        s += 12;
699
    }
700 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
701
    __asm__ volatile(EMMS:::"memory");
702 6e42e6c4 Diego Biurrun
#endif
703
    while (s < end)
704
    {
705
        const int b = *s++;
706
        const int g = *s++;
707
        const int r = *s++;
708
        *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
709
    }
710 996e1a7c Nick Kurshev
}
711
712 6107059c Michael Niedermayer
static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size)
713 ac4d0aea Michael Niedermayer
{
714 6e42e6c4 Diego Biurrun
    const uint8_t *s = src;
715
    const uint8_t *end;
716 b63f641e Aurelien Jacobs
#if HAVE_MMX
717 6e42e6c4 Diego Biurrun
    const uint8_t *mm_end;
718 ac4d0aea Michael Niedermayer
#endif
719 6e42e6c4 Diego Biurrun
    uint16_t *d = (uint16_t *)dst;
720
    end = s + src_size;
721 b63f641e Aurelien Jacobs
#if HAVE_MMX
722 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
723
    __asm__ volatile(
724 6e42e6c4 Diego Biurrun
        "movq         %0, %%mm7     \n\t"
725
        "movq         %1, %%mm6     \n\t"
726
        ::"m"(red_16mask),"m"(green_16mask));
727
    mm_end = end - 15;
728
    while (s < mm_end)
729
    {
730 7ad6469e Diego Pettenò
        __asm__ volatile(
731 6e42e6c4 Diego Biurrun
        PREFETCH"    32%1           \n\t"
732
        "movd          %1, %%mm0    \n\t"
733
        "movd         3%1, %%mm3    \n\t"
734
        "punpckldq    6%1, %%mm0    \n\t"
735
        "punpckldq    9%1, %%mm3    \n\t"
736
        "movq       %%mm0, %%mm1    \n\t"
737
        "movq       %%mm0, %%mm2    \n\t"
738
        "movq       %%mm3, %%mm4    \n\t"
739
        "movq       %%mm3, %%mm5    \n\t"
740
        "psllq         $8, %%mm0    \n\t"
741
        "psllq         $8, %%mm3    \n\t"
742
        "pand       %%mm7, %%mm0    \n\t"
743
        "pand       %%mm7, %%mm3    \n\t"
744
        "psrlq         $5, %%mm1    \n\t"
745
        "psrlq         $5, %%mm4    \n\t"
746
        "pand       %%mm6, %%mm1    \n\t"
747
        "pand       %%mm6, %%mm4    \n\t"
748
        "psrlq        $19, %%mm2    \n\t"
749
        "psrlq        $19, %%mm5    \n\t"
750
        "pand          %2, %%mm2    \n\t"
751
        "pand          %2, %%mm5    \n\t"
752
        "por        %%mm1, %%mm0    \n\t"
753
        "por        %%mm4, %%mm3    \n\t"
754
        "por        %%mm2, %%mm0    \n\t"
755
        "por        %%mm5, %%mm3    \n\t"
756
        "psllq        $16, %%mm3    \n\t"
757
        "por        %%mm3, %%mm0    \n\t"
758
        MOVNTQ"     %%mm0, %0       \n\t"
759
        :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
760
        d += 4;
761
        s += 12;
762
    }
763 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
764
    __asm__ volatile(EMMS:::"memory");
765 6e42e6c4 Diego Biurrun
#endif
766
    while (s < end)
767
    {
768
        const int r = *s++;
769
        const int g = *s++;
770
        const int b = *s++;
771
        *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
772
    }
773 ac4d0aea Michael Niedermayer
}
774
775 6107059c Michael Niedermayer
static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long src_size)
776 996e1a7c Nick Kurshev
{
777 6e42e6c4 Diego Biurrun
    const uint8_t *s = src;
778
    const uint8_t *end;
779 b63f641e Aurelien Jacobs
#if HAVE_MMX
780 6e42e6c4 Diego Biurrun
    const uint8_t *mm_end;
781 0d9f3d85 Arpi
#endif
782 6e42e6c4 Diego Biurrun
    uint16_t *d = (uint16_t *)dst;
783
    end = s + src_size;
784 b63f641e Aurelien Jacobs
#if HAVE_MMX
785 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
786
    __asm__ volatile(
787 6e42e6c4 Diego Biurrun
        "movq          %0, %%mm7    \n\t"
788
        "movq          %1, %%mm6    \n\t"
789
        ::"m"(red_15mask),"m"(green_15mask));
790
    mm_end = end - 11;
791
    while (s < mm_end)
792
    {
793 7ad6469e Diego Pettenò
        __asm__ volatile(
794 6e42e6c4 Diego Biurrun
        PREFETCH"    32%1           \n\t"
795
        "movd          %1, %%mm0    \n\t"
796
        "movd         3%1, %%mm3    \n\t"
797
        "punpckldq    6%1, %%mm0    \n\t"
798
        "punpckldq    9%1, %%mm3    \n\t"
799
        "movq       %%mm0, %%mm1    \n\t"
800
        "movq       %%mm0, %%mm2    \n\t"
801
        "movq       %%mm3, %%mm4    \n\t"
802
        "movq       %%mm3, %%mm5    \n\t"
803
        "psrlq         $3, %%mm0    \n\t"
804
        "psrlq         $3, %%mm3    \n\t"
805
        "pand          %2, %%mm0    \n\t"
806
        "pand          %2, %%mm3    \n\t"
807
        "psrlq         $6, %%mm1    \n\t"
808
        "psrlq         $6, %%mm4    \n\t"
809
        "pand       %%mm6, %%mm1    \n\t"
810
        "pand       %%mm6, %%mm4    \n\t"
811
        "psrlq         $9, %%mm2    \n\t"
812
        "psrlq         $9, %%mm5    \n\t"
813
        "pand       %%mm7, %%mm2    \n\t"
814
        "pand       %%mm7, %%mm5    \n\t"
815
        "por        %%mm1, %%mm0    \n\t"
816
        "por        %%mm4, %%mm3    \n\t"
817
        "por        %%mm2, %%mm0    \n\t"
818
        "por        %%mm5, %%mm3    \n\t"
819
        "psllq        $16, %%mm3    \n\t"
820
        "por        %%mm3, %%mm0    \n\t"
821
        MOVNTQ"     %%mm0, %0       \n\t"
822
        :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
823
        d += 4;
824
        s += 12;
825
    }
826 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
827
    __asm__ volatile(EMMS:::"memory");
828 6e42e6c4 Diego Biurrun
#endif
829
    while (s < end)
830
    {
831
        const int b = *s++;
832
        const int g = *s++;
833
        const int r = *s++;
834
        *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
835
    }
836 0d9f3d85 Arpi
}
837
838 6107059c Michael Niedermayer
static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size)
839 ac4d0aea Michael Niedermayer
{
840 6e42e6c4 Diego Biurrun
    const uint8_t *s = src;
841
    const uint8_t *end;
842 b63f641e Aurelien Jacobs
#if HAVE_MMX
843 6e42e6c4 Diego Biurrun
    const uint8_t *mm_end;
844 ac4d0aea Michael Niedermayer
#endif
845 6e42e6c4 Diego Biurrun
    uint16_t *d = (uint16_t *)dst;
846
    end = s + src_size;
847 b63f641e Aurelien Jacobs
#if HAVE_MMX
848 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*src):"memory");
849
    __asm__ volatile(
850 6e42e6c4 Diego Biurrun
        "movq         %0, %%mm7     \n\t"
851
        "movq         %1, %%mm6     \n\t"
852
        ::"m"(red_15mask),"m"(green_15mask));
853
    mm_end = end - 15;
854
    while (s < mm_end)
855
    {
856 7ad6469e Diego Pettenò
        __asm__ volatile(
857 6e42e6c4 Diego Biurrun
        PREFETCH"   32%1            \n\t"
858
        "movd         %1, %%mm0     \n\t"
859
        "movd        3%1, %%mm3     \n\t"
860
        "punpckldq   6%1, %%mm0     \n\t"
861
        "punpckldq   9%1, %%mm3     \n\t"
862
        "movq      %%mm0, %%mm1     \n\t"
863
        "movq      %%mm0, %%mm2     \n\t"
864
        "movq      %%mm3, %%mm4     \n\t"
865
        "movq      %%mm3, %%mm5     \n\t"
866
        "psllq        $7, %%mm0     \n\t"
867
        "psllq        $7, %%mm3     \n\t"
868
        "pand      %%mm7, %%mm0     \n\t"
869
        "pand      %%mm7, %%mm3     \n\t"
870
        "psrlq        $6, %%mm1     \n\t"
871
        "psrlq        $6, %%mm4     \n\t"
872
        "pand      %%mm6, %%mm1     \n\t"
873
        "pand      %%mm6, %%mm4     \n\t"
874
        "psrlq       $19, %%mm2     \n\t"
875
        "psrlq       $19, %%mm5     \n\t"
876
        "pand         %2, %%mm2     \n\t"
877
        "pand         %2, %%mm5     \n\t"
878
        "por       %%mm1, %%mm0     \n\t"
879
        "por       %%mm4, %%mm3     \n\t"
880
        "por       %%mm2, %%mm0     \n\t"
881
        "por       %%mm5, %%mm3     \n\t"
882
        "psllq       $16, %%mm3     \n\t"
883
        "por       %%mm3, %%mm0     \n\t"
884
        MOVNTQ"    %%mm0, %0        \n\t"
885
        :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
886
        d += 4;
887
        s += 12;
888
    }
889 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
890
    __asm__ volatile(EMMS:::"memory");
891 6e42e6c4 Diego Biurrun
#endif
892
    while (s < end)
893
    {
894
        const int r = *s++;
895
        const int g = *s++;
896
        const int b = *s++;
897
        *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
898
    }
899 ac4d0aea Michael Niedermayer
}
900
901 0d9f3d85 Arpi
/*
902 594ff7cc Diego Biurrun
  I use less accurate approximation here by simply left-shifting the input
903
  value and filling the low order bits with zeroes. This method improves PNG
904
  compression but this scheme cannot reproduce white exactly, since it does
905
  not generate an all-ones maximum value; the net effect is to darken the
906 0d9f3d85 Arpi
  image slightly.
907

908
  The better method should be "left bit replication":
909

910
   4 3 2 1 0
911
   ---------
912
   1 1 0 1 1
913

914
   7 6 5 4 3  2 1 0
915
   ----------------
916
   1 1 0 1 1  1 1 0
917
   |=======|  |===|
918 8a322796 Diego Biurrun
       |      leftmost bits repeated to fill open bits
919 0d9f3d85 Arpi
       |
920 8a322796 Diego Biurrun
   original bits
921 0d9f3d85 Arpi
*/
922 6107059c Michael Niedermayer
static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
923 0d9f3d85 Arpi
{
924 6e42e6c4 Diego Biurrun
    const uint16_t *end;
925 b63f641e Aurelien Jacobs
#if HAVE_MMX
926 6e42e6c4 Diego Biurrun
    const uint16_t *mm_end;
927 0d9f3d85 Arpi
#endif
928 baf7f7c6 Baptiste Coudurier
    uint8_t *d = dst;
929 994c1ef0 Baptiste Coudurier
    const uint16_t *s = (const uint16_t*)src;
930 6e42e6c4 Diego Biurrun
    end = s + src_size/2;
931 b63f641e Aurelien Jacobs
#if HAVE_MMX
932 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
933 6e42e6c4 Diego Biurrun
    mm_end = end - 7;
934
    while (s < mm_end)
935
    {
936 7ad6469e Diego Pettenò
        __asm__ volatile(
937 6e42e6c4 Diego Biurrun
        PREFETCH"    32%1           \n\t"
938
        "movq          %1, %%mm0    \n\t"
939
        "movq          %1, %%mm1    \n\t"
940
        "movq          %1, %%mm2    \n\t"
941
        "pand          %2, %%mm0    \n\t"
942
        "pand          %3, %%mm1    \n\t"
943
        "pand          %4, %%mm2    \n\t"
944
        "psllq         $3, %%mm0    \n\t"
945
        "psrlq         $2, %%mm1    \n\t"
946
        "psrlq         $7, %%mm2    \n\t"
947
        "movq       %%mm0, %%mm3    \n\t"
948
        "movq       %%mm1, %%mm4    \n\t"
949
        "movq       %%mm2, %%mm5    \n\t"
950
        "punpcklwd     %5, %%mm0    \n\t"
951
        "punpcklwd     %5, %%mm1    \n\t"
952
        "punpcklwd     %5, %%mm2    \n\t"
953
        "punpckhwd     %5, %%mm3    \n\t"
954
        "punpckhwd     %5, %%mm4    \n\t"
955
        "punpckhwd     %5, %%mm5    \n\t"
956
        "psllq         $8, %%mm1    \n\t"
957
        "psllq        $16, %%mm2    \n\t"
958
        "por        %%mm1, %%mm0    \n\t"
959
        "por        %%mm2, %%mm0    \n\t"
960
        "psllq         $8, %%mm4    \n\t"
961
        "psllq        $16, %%mm5    \n\t"
962
        "por        %%mm4, %%mm3    \n\t"
963
        "por        %%mm5, %%mm3    \n\t"
964
965
        "movq       %%mm0, %%mm6    \n\t"
966
        "movq       %%mm3, %%mm7    \n\t"
967
968
        "movq         8%1, %%mm0    \n\t"
969
        "movq         8%1, %%mm1    \n\t"
970
        "movq         8%1, %%mm2    \n\t"
971
        "pand          %2, %%mm0    \n\t"
972
        "pand          %3, %%mm1    \n\t"
973
        "pand          %4, %%mm2    \n\t"
974
        "psllq         $3, %%mm0    \n\t"
975
        "psrlq         $2, %%mm1    \n\t"
976
        "psrlq         $7, %%mm2    \n\t"
977
        "movq       %%mm0, %%mm3    \n\t"
978
        "movq       %%mm1, %%mm4    \n\t"
979
        "movq       %%mm2, %%mm5    \n\t"
980
        "punpcklwd     %5, %%mm0    \n\t"
981
        "punpcklwd     %5, %%mm1    \n\t"
982
        "punpcklwd     %5, %%mm2    \n\t"
983
        "punpckhwd     %5, %%mm3    \n\t"
984
        "punpckhwd     %5, %%mm4    \n\t"
985
        "punpckhwd     %5, %%mm5    \n\t"
986
        "psllq         $8, %%mm1    \n\t"
987
        "psllq        $16, %%mm2    \n\t"
988
        "por        %%mm1, %%mm0    \n\t"
989
        "por        %%mm2, %%mm0    \n\t"
990
        "psllq         $8, %%mm4    \n\t"
991
        "psllq        $16, %%mm5    \n\t"
992
        "por        %%mm4, %%mm3    \n\t"
993
        "por        %%mm5, %%mm3    \n\t"
994
995
        :"=m"(*d)
996
        :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
997
        :"memory");
998 8a322796 Diego Biurrun
        /* borrowed 32 to 24 */
999 7ad6469e Diego Pettenò
        __asm__ volatile(
1000 6e42e6c4 Diego Biurrun
        "movq       %%mm0, %%mm4    \n\t"
1001
        "movq       %%mm3, %%mm5    \n\t"
1002
        "movq       %%mm6, %%mm0    \n\t"
1003
        "movq       %%mm7, %%mm1    \n\t"
1004
1005
        "movq       %%mm4, %%mm6    \n\t"
1006
        "movq       %%mm5, %%mm7    \n\t"
1007
        "movq       %%mm0, %%mm2    \n\t"
1008
        "movq       %%mm1, %%mm3    \n\t"
1009
1010
        "psrlq         $8, %%mm2    \n\t"
1011
        "psrlq         $8, %%mm3    \n\t"
1012
        "psrlq         $8, %%mm6    \n\t"
1013
        "psrlq         $8, %%mm7    \n\t"
1014
        "pand          %2, %%mm0    \n\t"
1015
        "pand          %2, %%mm1    \n\t"
1016
        "pand          %2, %%mm4    \n\t"
1017
        "pand          %2, %%mm5    \n\t"
1018
        "pand          %3, %%mm2    \n\t"
1019
        "pand          %3, %%mm3    \n\t"
1020
        "pand          %3, %%mm6    \n\t"
1021
        "pand          %3, %%mm7    \n\t"
1022
        "por        %%mm2, %%mm0    \n\t"
1023
        "por        %%mm3, %%mm1    \n\t"
1024
        "por        %%mm6, %%mm4    \n\t"
1025
        "por        %%mm7, %%mm5    \n\t"
1026
1027
        "movq       %%mm1, %%mm2    \n\t"
1028
        "movq       %%mm4, %%mm3    \n\t"
1029
        "psllq        $48, %%mm2    \n\t"
1030
        "psllq        $32, %%mm3    \n\t"
1031
        "pand          %4, %%mm2    \n\t"
1032
        "pand          %5, %%mm3    \n\t"
1033
        "por        %%mm2, %%mm0    \n\t"
1034
        "psrlq        $16, %%mm1    \n\t"
1035
        "psrlq        $32, %%mm4    \n\t"
1036
        "psllq        $16, %%mm5    \n\t"
1037
        "por        %%mm3, %%mm1    \n\t"
1038
        "pand          %6, %%mm5    \n\t"
1039
        "por        %%mm5, %%mm4    \n\t"
1040
1041
        MOVNTQ"     %%mm0,   %0     \n\t"
1042
        MOVNTQ"     %%mm1,  8%0     \n\t"
1043
        MOVNTQ"     %%mm4, 16%0"
1044
1045
        :"=m"(*d)
1046
        :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
1047
        :"memory");
1048
        d += 24;
1049
        s += 8;
1050
    }
1051 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
1052
    __asm__ volatile(EMMS:::"memory");
1053 6e42e6c4 Diego Biurrun
#endif
1054
    while (s < end)
1055
    {
1056
        register uint16_t bgr;
1057
        bgr = *s++;
1058
        *d++ = (bgr&0x1F)<<3;
1059
        *d++ = (bgr&0x3E0)>>2;
1060
        *d++ = (bgr&0x7C00)>>7;
1061
    }
1062 0d9f3d85 Arpi
}
1063
1064 6107059c Michael Niedermayer
static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
1065 0d9f3d85 Arpi
{
1066 6e42e6c4 Diego Biurrun
    const uint16_t *end;
1067 b63f641e Aurelien Jacobs
#if HAVE_MMX
1068 6e42e6c4 Diego Biurrun
    const uint16_t *mm_end;
1069 0d9f3d85 Arpi
#endif
1070 6e42e6c4 Diego Biurrun
    uint8_t *d = (uint8_t *)dst;
1071
    const uint16_t *s = (const uint16_t *)src;
1072
    end = s + src_size/2;
1073 b63f641e Aurelien Jacobs
#if HAVE_MMX
1074 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
1075 6e42e6c4 Diego Biurrun
    mm_end = end - 7;
1076
    while (s < mm_end)
1077
    {
1078 7ad6469e Diego Pettenò
        __asm__ volatile(
1079 6e42e6c4 Diego Biurrun
        PREFETCH"    32%1           \n\t"
1080
        "movq          %1, %%mm0    \n\t"
1081
        "movq          %1, %%mm1    \n\t"
1082
        "movq          %1, %%mm2    \n\t"
1083
        "pand          %2, %%mm0    \n\t"
1084
        "pand          %3, %%mm1    \n\t"
1085
        "pand          %4, %%mm2    \n\t"
1086
        "psllq         $3, %%mm0    \n\t"
1087
        "psrlq         $3, %%mm1    \n\t"
1088
        "psrlq         $8, %%mm2    \n\t"
1089
        "movq       %%mm0, %%mm3    \n\t"
1090
        "movq       %%mm1, %%mm4    \n\t"
1091
        "movq       %%mm2, %%mm5    \n\t"
1092
        "punpcklwd     %5, %%mm0    \n\t"
1093
        "punpcklwd     %5, %%mm1    \n\t"
1094
        "punpcklwd     %5, %%mm2    \n\t"
1095
        "punpckhwd     %5, %%mm3    \n\t"
1096
        "punpckhwd     %5, %%mm4    \n\t"
1097
        "punpckhwd     %5, %%mm5    \n\t"
1098
        "psllq         $8, %%mm1    \n\t"
1099
        "psllq        $16, %%mm2    \n\t"
1100
        "por        %%mm1, %%mm0    \n\t"
1101
        "por        %%mm2, %%mm0    \n\t"
1102
        "psllq         $8, %%mm4    \n\t"
1103
        "psllq        $16, %%mm5    \n\t"
1104
        "por        %%mm4, %%mm3    \n\t"
1105
        "por        %%mm5, %%mm3    \n\t"
1106
1107
        "movq       %%mm0, %%mm6    \n\t"
1108
        "movq       %%mm3, %%mm7    \n\t"
1109
1110
        "movq         8%1, %%mm0    \n\t"
1111
        "movq         8%1, %%mm1    \n\t"
1112
        "movq         8%1, %%mm2    \n\t"
1113
        "pand          %2, %%mm0    \n\t"
1114
        "pand          %3, %%mm1    \n\t"
1115
        "pand          %4, %%mm2    \n\t"
1116
        "psllq         $3, %%mm0    \n\t"
1117
        "psrlq         $3, %%mm1    \n\t"
1118
        "psrlq         $8, %%mm2    \n\t"
1119
        "movq       %%mm0, %%mm3    \n\t"
1120
        "movq       %%mm1, %%mm4    \n\t"
1121
        "movq       %%mm2, %%mm5    \n\t"
1122
        "punpcklwd     %5, %%mm0    \n\t"
1123
        "punpcklwd     %5, %%mm1    \n\t"
1124
        "punpcklwd     %5, %%mm2    \n\t"
1125
        "punpckhwd     %5, %%mm3    \n\t"
1126
        "punpckhwd     %5, %%mm4    \n\t"
1127
        "punpckhwd     %5, %%mm5    \n\t"
1128
        "psllq         $8, %%mm1    \n\t"
1129
        "psllq        $16, %%mm2    \n\t"
1130
        "por        %%mm1, %%mm0    \n\t"
1131
        "por        %%mm2, %%mm0    \n\t"
1132
        "psllq         $8, %%mm4    \n\t"
1133
        "psllq        $16, %%mm5    \n\t"
1134
        "por        %%mm4, %%mm3    \n\t"
1135
        "por        %%mm5, %%mm3    \n\t"
1136
        :"=m"(*d)
1137
        :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
1138
        :"memory");
1139 8a322796 Diego Biurrun
        /* borrowed 32 to 24 */
1140 7ad6469e Diego Pettenò
        __asm__ volatile(
1141 6e42e6c4 Diego Biurrun
        "movq       %%mm0, %%mm4    \n\t"
1142
        "movq       %%mm3, %%mm5    \n\t"
1143
        "movq       %%mm6, %%mm0    \n\t"
1144
        "movq       %%mm7, %%mm1    \n\t"
1145
1146
        "movq       %%mm4, %%mm6    \n\t"
1147
        "movq       %%mm5, %%mm7    \n\t"
1148
        "movq       %%mm0, %%mm2    \n\t"
1149
        "movq       %%mm1, %%mm3    \n\t"
1150
1151
        "psrlq         $8, %%mm2    \n\t"
1152
        "psrlq         $8, %%mm3    \n\t"
1153
        "psrlq         $8, %%mm6    \n\t"
1154
        "psrlq         $8, %%mm7    \n\t"
1155
        "pand          %2, %%mm0    \n\t"
1156
        "pand          %2, %%mm1    \n\t"
1157
        "pand          %2, %%mm4    \n\t"
1158
        "pand          %2, %%mm5    \n\t"
1159
        "pand          %3, %%mm2    \n\t"
1160
        "pand          %3, %%mm3    \n\t"
1161
        "pand          %3, %%mm6    \n\t"
1162
        "pand          %3, %%mm7    \n\t"
1163
        "por        %%mm2, %%mm0    \n\t"
1164
        "por        %%mm3, %%mm1    \n\t"
1165
        "por        %%mm6, %%mm4    \n\t"
1166
        "por        %%mm7, %%mm5    \n\t"
1167
1168
        "movq       %%mm1, %%mm2    \n\t"
1169
        "movq       %%mm4, %%mm3    \n\t"
1170
        "psllq        $48, %%mm2    \n\t"
1171
        "psllq        $32, %%mm3    \n\t"
1172
        "pand          %4, %%mm2    \n\t"
1173
        "pand          %5, %%mm3    \n\t"
1174
        "por        %%mm2, %%mm0    \n\t"
1175
        "psrlq        $16, %%mm1    \n\t"
1176
        "psrlq        $32, %%mm4    \n\t"
1177
        "psllq        $16, %%mm5    \n\t"
1178
        "por        %%mm3, %%mm1    \n\t"
1179
        "pand          %6, %%mm5    \n\t"
1180
        "por        %%mm5, %%mm4    \n\t"
1181
1182
        MOVNTQ"     %%mm0,   %0     \n\t"
1183
        MOVNTQ"     %%mm1,  8%0     \n\t"
1184
        MOVNTQ"     %%mm4, 16%0"
1185
1186
        :"=m"(*d)
1187
        :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
1188
        :"memory");
1189
        d += 24;
1190
        s += 8;
1191
    }
1192 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
1193
    __asm__ volatile(EMMS:::"memory");
1194 6e42e6c4 Diego Biurrun
#endif
1195
    while (s < end)
1196
    {
1197
        register uint16_t bgr;
1198
        bgr = *s++;
1199
        *d++ = (bgr&0x1F)<<3;
1200
        *d++ = (bgr&0x7E0)>>3;
1201
        *d++ = (bgr&0xF800)>>8;
1202
    }
1203 0d9f3d85 Arpi
}
1204
1205 a284d030 Cédric Schieli
/*
1206
 * mm0 = 00 B3 00 B2 00 B1 00 B0
1207
 * mm1 = 00 G3 00 G2 00 G1 00 G0
1208
 * mm2 = 00 R3 00 R2 00 R1 00 R0
1209
 * mm6 = FF FF FF FF FF FF FF FF
1210
 * mm7 = 00 00 00 00 00 00 00 00
1211
 */
1212
#define PACK_RGB32 \
1213
    "packuswb   %%mm7, %%mm0    \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \
1214
    "packuswb   %%mm7, %%mm1    \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \
1215
    "packuswb   %%mm7, %%mm2    \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \
1216
    "punpcklbw  %%mm1, %%mm0    \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \
1217
    "punpcklbw  %%mm6, %%mm2    \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \
1218
    "movq       %%mm0, %%mm3    \n\t"                               \
1219
    "punpcklwd  %%mm2, %%mm0    \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
1220
    "punpckhwd  %%mm2, %%mm3    \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
1221
    MOVNTQ"     %%mm0,  %0      \n\t"                               \
1222
    MOVNTQ"     %%mm3, 8%0      \n\t"                               \
1223
1224 7f526efd Reimar Döffinger
static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size)
1225 0d9f3d85 Arpi
{
1226 6e42e6c4 Diego Biurrun
    const uint16_t *end;
1227 b63f641e Aurelien Jacobs
#if HAVE_MMX
1228 6e42e6c4 Diego Biurrun
    const uint16_t *mm_end;
1229 0d9f3d85 Arpi
#endif
1230 baf7f7c6 Baptiste Coudurier
    uint8_t *d = dst;
1231 6e42e6c4 Diego Biurrun
    const uint16_t *s = (const uint16_t *)src;
1232
    end = s + src_size/2;
1233 b63f641e Aurelien Jacobs
#if HAVE_MMX
1234 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
1235
    __asm__ volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
1236 a284d030 Cédric Schieli
    __asm__ volatile("pcmpeqd %%mm6,%%mm6    \n\t":::"memory");
1237 6e42e6c4 Diego Biurrun
    mm_end = end - 3;
1238
    while (s < mm_end)
1239
    {
1240 7ad6469e Diego Pettenò
        __asm__ volatile(
1241 6e42e6c4 Diego Biurrun
        PREFETCH"    32%1           \n\t"
1242
        "movq          %1, %%mm0    \n\t"
1243
        "movq          %1, %%mm1    \n\t"
1244
        "movq          %1, %%mm2    \n\t"
1245
        "pand          %2, %%mm0    \n\t"
1246
        "pand          %3, %%mm1    \n\t"
1247
        "pand          %4, %%mm2    \n\t"
1248
        "psllq         $3, %%mm0    \n\t"
1249
        "psrlq         $2, %%mm1    \n\t"
1250
        "psrlq         $7, %%mm2    \n\t"
1251 a284d030 Cédric Schieli
        PACK_RGB32
1252 6e42e6c4 Diego Biurrun
        :"=m"(*d)
1253
        :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
1254
        :"memory");
1255
        d += 16;
1256
        s += 4;
1257
    }
1258 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
1259
    __asm__ volatile(EMMS:::"memory");
1260 6e42e6c4 Diego Biurrun
#endif
1261
    while (s < end)
1262
    {
1263 594ff7cc Diego Biurrun
#if 0 //slightly slower on Athlon
1264 6e42e6c4 Diego Biurrun
        int bgr= *s++;
1265
        *((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9);
1266 deb2277c Michael Niedermayer
#else
1267 6e42e6c4 Diego Biurrun
        register uint16_t bgr;
1268
        bgr = *s++;
1269 6cb38650 Alex Beregszaszi
#ifdef WORDS_BIGENDIAN
1270 f8a138be Cédric Schieli
        *d++ = 255;
1271 6e42e6c4 Diego Biurrun
        *d++ = (bgr&0x7C00)>>7;
1272
        *d++ = (bgr&0x3E0)>>2;
1273
        *d++ = (bgr&0x1F)<<3;
1274 6cb38650 Alex Beregszaszi
#else
1275 6e42e6c4 Diego Biurrun
        *d++ = (bgr&0x1F)<<3;
1276
        *d++ = (bgr&0x3E0)>>2;
1277
        *d++ = (bgr&0x7C00)>>7;
1278 f8a138be Cédric Schieli
        *d++ = 255;
1279 deb2277c Michael Niedermayer
#endif
1280 6cb38650 Alex Beregszaszi
1281
#endif
1282 6e42e6c4 Diego Biurrun
    }
1283 0d9f3d85 Arpi
}
1284 996e1a7c Nick Kurshev
1285 7f526efd Reimar Döffinger
static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size)
1286 0d9f3d85 Arpi
{
1287 6e42e6c4 Diego Biurrun
    const uint16_t *end;
1288 b63f641e Aurelien Jacobs
#if HAVE_MMX
1289 6e42e6c4 Diego Biurrun
    const uint16_t *mm_end;
1290 0d9f3d85 Arpi
#endif
1291 baf7f7c6 Baptiste Coudurier
    uint8_t *d = dst;
1292 994c1ef0 Baptiste Coudurier
    const uint16_t *s = (const uint16_t*)src;
1293 6e42e6c4 Diego Biurrun
    end = s + src_size/2;
1294 b63f641e Aurelien Jacobs
#if HAVE_MMX
1295 7ad6469e Diego Pettenò
    __asm__ volatile(PREFETCH"    %0"::"m"(*s):"memory");
1296
    __asm__ volatile("pxor    %%mm7,%%mm7    \n\t":::"memory");
1297 a284d030 Cédric Schieli
    __asm__ volatile("pcmpeqd %%mm6,%%mm6    \n\t":::"memory");
1298 6e42e6c4 Diego Biurrun
    mm_end = end - 3;
1299
    while (s < mm_end)
1300
    {
1301 7ad6469e Diego Pettenò
        __asm__ volatile(
1302 6e42e6c4 Diego Biurrun
        PREFETCH"    32%1           \n\t"
1303
        "movq          %1, %%mm0    \n\t"
1304
        "movq          %1, %%mm1    \n\t"
1305
        "movq          %1, %%mm2    \n\t"
1306
        "pand          %2, %%mm0    \n\t"
1307
        "pand          %3, %%mm1    \n\t"
1308
        "pand          %4, %%mm2    \n\t"
1309
        "psllq         $3, %%mm0    \n\t"
1310
        "psrlq         $3, %%mm1    \n\t"
1311
        "psrlq         $8, %%mm2    \n\t"
1312 a284d030 Cédric Schieli
        PACK_RGB32
1313 6e42e6c4 Diego Biurrun
        :"=m"(*d)
1314
        :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
1315
        :"memory");
1316
        d += 16;
1317
        s += 4;
1318
    }
1319 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
1320
    __asm__ volatile(EMMS:::"memory");
1321 6e42e6c4 Diego Biurrun
#endif
1322
    while (s < end)
1323
    {
1324
        register uint16_t bgr;
1325
        bgr = *s++;
1326 6cb38650 Alex Beregszaszi
#ifdef WORDS_BIGENDIAN
1327 f8a138be Cédric Schieli
        *d++ = 255;
1328 6e42e6c4 Diego Biurrun
        *d++ = (bgr&0xF800)>>8;
1329
        *d++ = (bgr&0x7E0)>>3;
1330
        *d++ = (bgr&0x1F)<<3;
1331 6cb38650 Alex Beregszaszi
#else
1332 6e42e6c4 Diego Biurrun
        *d++ = (bgr&0x1F)<<3;
1333
        *d++ = (bgr&0x7E0)>>3;
1334
        *d++ = (bgr&0xF800)>>8;
1335 f8a138be Cédric Schieli
        *d++ = 255;
1336 6cb38650 Alex Beregszaszi
#endif
1337 6e42e6c4 Diego Biurrun
    }
1338 996e1a7c Nick Kurshev
}
1339 fcfbc150 Michael Niedermayer
1340 7f526efd Reimar Döffinger
static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
1341 99969243 Michael Niedermayer
{
1342 9326d3f3 Michael Niedermayer
    x86_reg idx = 15 - src_size;
1343 994c1ef0 Baptiste Coudurier
    const uint8_t *s = src-idx;
1344
    uint8_t *d = dst-idx;
1345 b63f641e Aurelien Jacobs
#if HAVE_MMX
1346 7ad6469e Diego Pettenò
    __asm__ volatile(
1347 6e42e6c4 Diego Biurrun
    "test          %0, %0           \n\t"
1348
    "jns           2f               \n\t"
1349
    PREFETCH"       (%1, %0)        \n\t"
1350
    "movq          %3, %%mm7        \n\t"
1351
    "pxor          %4, %%mm7        \n\t"
1352
    "movq       %%mm7, %%mm6        \n\t"
1353
    "pxor          %5, %%mm7        \n\t"
1354
    ASMALIGN(4)
1355
    "1:                             \n\t"
1356
    PREFETCH"     32(%1, %0)        \n\t"
1357
    "movq           (%1, %0), %%mm0 \n\t"
1358
    "movq          8(%1, %0), %%mm1 \n\t"
1359 b63f641e Aurelien Jacobs
# if HAVE_MMX2
1360 6e42e6c4 Diego Biurrun
    "pshufw      $177, %%mm0, %%mm3 \n\t"
1361
    "pshufw      $177, %%mm1, %%mm5 \n\t"
1362
    "pand       %%mm7, %%mm0        \n\t"
1363
    "pand       %%mm6, %%mm3        \n\t"
1364
    "pand       %%mm7, %%mm1        \n\t"
1365
    "pand       %%mm6, %%mm5        \n\t"
1366
    "por        %%mm3, %%mm0        \n\t"
1367
    "por        %%mm5, %%mm1        \n\t"
1368 b38d4874 Ivo van Poorten
# else
1369 6e42e6c4 Diego Biurrun
    "movq       %%mm0, %%mm2        \n\t"
1370
    "movq       %%mm1, %%mm4        \n\t"
1371
    "pand       %%mm7, %%mm0        \n\t"
1372
    "pand       %%mm6, %%mm2        \n\t"
1373
    "pand       %%mm7, %%mm1        \n\t"
1374
    "pand       %%mm6, %%mm4        \n\t"
1375
    "movq       %%mm2, %%mm3        \n\t"
1376
    "movq       %%mm4, %%mm5        \n\t"
1377
    "pslld        $16, %%mm2        \n\t"
1378
    "psrld        $16, %%mm3        \n\t"
1379
    "pslld        $16, %%mm4        \n\t"
1380
    "psrld        $16, %%mm5        \n\t"
1381
    "por        %%mm2, %%mm0        \n\t"
1382
    "por        %%mm4, %%mm1        \n\t"
1383
    "por        %%mm3, %%mm0        \n\t"
1384
    "por        %%mm5, %%mm1        \n\t"
1385 b38d4874 Ivo van Poorten
# endif
1386 6e42e6c4 Diego Biurrun
    MOVNTQ"     %%mm0,  (%2, %0)    \n\t"
1387
    MOVNTQ"     %%mm1, 8(%2, %0)    \n\t"
1388
    "add          $16, %0           \n\t"
1389
    "js            1b               \n\t"
1390
    SFENCE"                         \n\t"
1391
    EMMS"                           \n\t"
1392
    "2:                             \n\t"
1393
    : "+&r"(idx)
1394
    : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
1395
    : "memory");
1396
#endif
1397
    for (; idx<15; idx+=4) {
1398 994c1ef0 Baptiste Coudurier
        register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00;
1399 6e42e6c4 Diego Biurrun
        v &= 0xff00ff;
1400
        *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
1401
    }
1402 99969243 Michael Niedermayer
}
1403
1404 7f526efd Reimar Döffinger
static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)
1405 74d35835 Michael Niedermayer
{
1406 6e42e6c4 Diego Biurrun
    unsigned i;
1407 b63f641e Aurelien Jacobs
#if HAVE_MMX
1408 d0ce212a Ramiro Polla
    x86_reg mmx_size= 23 - src_size;
1409 7ad6469e Diego Pettenò
    __asm__ volatile (
1410 6e42e6c4 Diego Biurrun
    "test             %%"REG_a", %%"REG_a"          \n\t"
1411
    "jns                     2f                     \n\t"
1412
    "movq     "MANGLE(mask24r)", %%mm5              \n\t"
1413
    "movq     "MANGLE(mask24g)", %%mm6              \n\t"
1414
    "movq     "MANGLE(mask24b)", %%mm7              \n\t"
1415
    ASMALIGN(4)
1416
    "1:                                             \n\t"
1417
    PREFETCH" 32(%1, %%"REG_a")                     \n\t"
1418
    "movq       (%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG
1419
    "movq       (%1, %%"REG_a"), %%mm1              \n\t" // BGR BGR BG
1420
    "movq      2(%1, %%"REG_a"), %%mm2              \n\t" // R BGR BGR B
1421
    "psllq                  $16, %%mm0              \n\t" // 00 BGR BGR
1422
    "pand                 %%mm5, %%mm0              \n\t"
1423
    "pand                 %%mm6, %%mm1              \n\t"
1424
    "pand                 %%mm7, %%mm2              \n\t"
1425
    "por                  %%mm0, %%mm1              \n\t"
1426
    "por                  %%mm2, %%mm1              \n\t"
1427
    "movq      6(%1, %%"REG_a"), %%mm0              \n\t" // BGR BGR BG
1428
    MOVNTQ"               %%mm1,   (%2, %%"REG_a")  \n\t" // RGB RGB RG
1429
    "movq      8(%1, %%"REG_a"), %%mm1              \n\t" // R BGR BGR B
1430
    "movq     10(%1, %%"REG_a"), %%mm2              \n\t" // GR BGR BGR
1431
    "pand                 %%mm7, %%mm0              \n\t"
1432
    "pand                 %%mm5, %%mm1              \n\t"
1433
    "pand                 %%mm6, %%mm2              \n\t"
1434
    "por                  %%mm0, %%mm1              \n\t"
1435
    "por                  %%mm2, %%mm1              \n\t"
1436
    "movq     14(%1, %%"REG_a"), %%mm0              \n\t" // R BGR BGR B
1437
    MOVNTQ"               %%mm1,  8(%2, %%"REG_a")  \n\t" // B RGB RGB R
1438
    "movq     16(%1, %%"REG_a"), %%mm1              \n\t" // GR BGR BGR
1439
    "movq     18(%1, %%"REG_a"), %%mm2              \n\t" // BGR BGR BG
1440
    "pand                 %%mm6, %%mm0              \n\t"
1441
    "pand                 %%mm7, %%mm1              \n\t"
1442
    "pand                 %%mm5, %%mm2              \n\t"
1443
    "por                  %%mm0, %%mm1              \n\t"
1444
    "por                  %%mm2, %%mm1              \n\t"
1445
    MOVNTQ"               %%mm1, 16(%2, %%"REG_a")  \n\t"
1446
    "add                    $24, %%"REG_a"          \n\t"
1447
    " js                     1b                     \n\t"
1448
    "2:                                             \n\t"
1449
    : "+a" (mmx_size)
1450
    : "r" (src-mmx_size), "r"(dst-mmx_size)
1451
    );
1452
1453 7ad6469e Diego Pettenò
    __asm__ volatile(SFENCE:::"memory");
1454
    __asm__ volatile(EMMS:::"memory");
1455 6e42e6c4 Diego Biurrun
1456 8a322796 Diego Biurrun
    if (mmx_size==23) return; //finished, was multiple of 8
1457 6e42e6c4 Diego Biurrun
1458
    src+= src_size;
1459
    dst+= src_size;
1460
    src_size= 23-mmx_size;
1461
    src-= src_size;
1462
    dst-= src_size;
1463
#endif
1464
    for (i=0; i<src_size; i+=3)
1465
    {
1466
        register uint8_t x;
1467
        x          = src[i + 2];
1468
        dst[i + 1] = src[i + 1];
1469
        dst[i + 2] = src[i + 0];
1470
        dst[i + 0] = x;
1471
    }
1472 74d35835 Michael Niedermayer
}
1473
1474 b1ec5875 Michael Niedermayer
static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1475 6e42e6c4 Diego Biurrun
                                           long width, long height,
1476
                                           long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
1477 d9d58d17 Michael Niedermayer
{
1478 6e42e6c4 Diego Biurrun
    long y;
1479 9326d3f3 Michael Niedermayer
    const x86_reg chromWidth= width>>1;
1480 6e42e6c4 Diego Biurrun
    for (y=0; y<height; y++)
1481
    {
1482 b63f641e Aurelien Jacobs
#if HAVE_MMX
1483 594ff7cc Diego Biurrun
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1484 7ad6469e Diego Pettenò
        __asm__ volatile(
1485 6e42e6c4 Diego Biurrun
        "xor                 %%"REG_a", %%"REG_a"   \n\t"
1486
        ASMALIGN(4)
1487
        "1:                                         \n\t"
1488
        PREFETCH"    32(%1, %%"REG_a", 2)           \n\t"
1489
        PREFETCH"    32(%2, %%"REG_a")              \n\t"
1490
        PREFETCH"    32(%3, %%"REG_a")              \n\t"
1491
        "movq          (%2, %%"REG_a"), %%mm0       \n\t" // U(0)
1492
        "movq                    %%mm0, %%mm2       \n\t" // U(0)
1493
        "movq          (%3, %%"REG_a"), %%mm1       \n\t" // V(0)
1494
        "punpcklbw               %%mm1, %%mm0       \n\t" // UVUV UVUV(0)
1495
        "punpckhbw               %%mm1, %%mm2       \n\t" // UVUV UVUV(8)
1496
1497
        "movq        (%1, %%"REG_a",2), %%mm3       \n\t" // Y(0)
1498
        "movq       8(%1, %%"REG_a",2), %%mm5       \n\t" // Y(8)
1499
        "movq                    %%mm3, %%mm4       \n\t" // Y(0)
1500
        "movq                    %%mm5, %%mm6       \n\t" // Y(8)
1501
        "punpcklbw               %%mm0, %%mm3       \n\t" // YUYV YUYV(0)
1502
        "punpckhbw               %%mm0, %%mm4       \n\t" // YUYV YUYV(4)
1503
        "punpcklbw               %%mm2, %%mm5       \n\t" // YUYV YUYV(8)
1504
        "punpckhbw               %%mm2, %%mm6       \n\t" // YUYV YUYV(12)
1505
1506
        MOVNTQ"                  %%mm3,   (%0, %%"REG_a", 4)    \n\t"
1507
        MOVNTQ"                  %%mm4,  8(%0, %%"REG_a", 4)    \n\t"
1508
        MOVNTQ"                  %%mm5, 16(%0, %%"REG_a", 4)    \n\t"
1509
        MOVNTQ"                  %%mm6, 24(%0, %%"REG_a", 4)    \n\t"
1510
1511
        "add                        $8, %%"REG_a"   \n\t"
1512
        "cmp                        %4, %%"REG_a"   \n\t"
1513
        " jb                        1b              \n\t"
1514 9326d3f3 Michael Niedermayer
        ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
1515 6e42e6c4 Diego Biurrun
        : "%"REG_a
1516
        );
1517 4060205b Michael Niedermayer
#else
1518 b3b8bf64 Michael Niedermayer
1519 b63f641e Aurelien Jacobs
#if ARCH_ALPHA && HAVE_MVI
1520 6e42e6c4 Diego Biurrun
#define pl2yuy2(n)                  \
1521
    y1 = yc[n];                     \
1522
    y2 = yc2[n];                    \
1523
    u = uc[n];                      \
1524
    v = vc[n];                      \
1525 7ad6469e Diego Pettenò
    __asm__("unpkbw %1, %0" : "=r"(y1) : "r"(y1));  \
1526
    __asm__("unpkbw %1, %0" : "=r"(y2) : "r"(y2));  \
1527
    __asm__("unpkbl %1, %0" : "=r"(u) : "r"(u));    \
1528
    __asm__("unpkbl %1, %0" : "=r"(v) : "r"(v));    \
1529 6e42e6c4 Diego Biurrun
    yuv1 = (u << 8) + (v << 24);                \
1530
    yuv2 = yuv1 + y2;               \
1531
    yuv1 += y1;                     \
1532
    qdst[n]  = yuv1;                \
1533
    qdst2[n] = yuv2;
1534
1535
        int i;
1536
        uint64_t *qdst = (uint64_t *) dst;
1537
        uint64_t *qdst2 = (uint64_t *) (dst + dstStride);
1538
        const uint32_t *yc = (uint32_t *) ysrc;
1539
        const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride);
1540
        const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc;
1541
        for (i = 0; i < chromWidth; i += 8){
1542
            uint64_t y1, y2, yuv1, yuv2;
1543
            uint64_t u, v;
1544
            /* Prefetch */
1545 7ad6469e Diego Pettenò
            __asm__("ldq $31,64(%0)" :: "r"(yc));
1546
            __asm__("ldq $31,64(%0)" :: "r"(yc2));
1547
            __asm__("ldq $31,64(%0)" :: "r"(uc));
1548
            __asm__("ldq $31,64(%0)" :: "r"(vc));
1549 6e42e6c4 Diego Biurrun
1550
            pl2yuy2(0);
1551
            pl2yuy2(1);
1552
            pl2yuy2(2);
1553
            pl2yuy2(3);
1554
1555
            yc    += 4;
1556
            yc2   += 4;
1557
            uc    += 4;
1558
            vc    += 4;
1559
            qdst  += 4;
1560
            qdst2 += 4;
1561
        }
1562
        y++;
1563
        ysrc += lumStride;
1564
        dst += dstStride;
1565 b3b8bf64 Michael Niedermayer
1566 02a6a6ee Diego Biurrun
#elif HAVE_FAST_64BIT
1567 6e42e6c4 Diego Biurrun
        int i;
1568
        uint64_t *ldst = (uint64_t *) dst;
1569
        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
1570
        for (i = 0; i < chromWidth; i += 2){
1571
            uint64_t k, l;
1572
            k = yc[0] + (uc[0] << 8) +
1573
                (yc[1] << 16) + (vc[0] << 24);
1574
            l = yc[2] + (uc[1] << 8) +
1575
                (yc[3] << 16) + (vc[1] << 24);
1576
            *ldst++ = k + (l << 32);
1577
            yc += 4;
1578
            uc += 2;
1579
            vc += 2;
1580
        }
1581 0d9f3d85 Arpi
1582
#else
1583 6e42e6c4 Diego Biurrun
        int i, *idst = (int32_t *) dst;
1584
        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
1585
        for (i = 0; i < chromWidth; i++){
1586 da7f8893 Michael Niedermayer
#ifdef WORDS_BIGENDIAN
1587 6e42e6c4 Diego Biurrun
            *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
1588
                (yc[1] << 8) + (vc[0] << 0);
1589 da7f8893 Michael Niedermayer
#else
1590 6e42e6c4 Diego Biurrun
            *idst++ = yc[0] + (uc[0] << 8) +
1591
                (yc[1] << 16) + (vc[0] << 24);
1592
#endif
1593
            yc += 2;
1594
            uc++;
1595
            vc++;
1596
        }
1597
#endif
1598
#endif
1599 8916b4b5 Benoit Fouet
        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1)
1600 6e42e6c4 Diego Biurrun
        {
1601
            usrc += chromStride;
1602
            vsrc += chromStride;
1603
        }
1604
        ysrc += lumStride;
1605
        dst  += dstStride;
1606
    }
1607 b63f641e Aurelien Jacobs
#if HAVE_MMX
1608 7ad6469e Diego Pettenò
__asm__(    EMMS"       \n\t"
1609 6e42e6c4 Diego Biurrun
        SFENCE"     \n\t"
1610 42b5fcb8 Michael Niedermayer
        :::"memory");
1611 4060205b Michael Niedermayer
#endif
1612 d9d58d17 Michael Niedermayer
}
1613
1614 dabcdbc4 Michael Niedermayer
/**
1615 8a322796 Diego Biurrun
 * Height should be a multiple of 2 and width should be a multiple of 16.
1616
 * (If this is a problem for anyone then tell me, and I will fix it.)
1617 dabcdbc4 Michael Niedermayer
 */
1618 b1ec5875 Michael Niedermayer
static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1619 6e42e6c4 Diego Biurrun
                                      long width, long height,
1620
                                      long lumStride, long chromStride, long dstStride)
1621 b1ec5875 Michael Niedermayer
{
1622 6e42e6c4 Diego Biurrun
    //FIXME interpolate chroma
1623
    RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
1624 caeaabe7 Alex Beregszaszi
}
1625
1626
static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1627 6e42e6c4 Diego Biurrun
                                           long width, long height,
1628
                                           long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
1629 caeaabe7 Alex Beregszaszi
{
1630 6e42e6c4 Diego Biurrun
    long y;
1631 9326d3f3 Michael Niedermayer
    const x86_reg chromWidth= width>>1;
1632 6e42e6c4 Diego Biurrun
    for (y=0; y<height; y++)
1633
    {
1634 b63f641e Aurelien Jacobs
#if HAVE_MMX
1635 594ff7cc Diego Biurrun
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1636 7ad6469e Diego Pettenò
        __asm__ volatile(
1637 6e42e6c4 Diego Biurrun
        "xor                %%"REG_a", %%"REG_a"    \n\t"
1638
        ASMALIGN(4)
1639
        "1:                                         \n\t"
1640
        PREFETCH"   32(%1, %%"REG_a", 2)            \n\t"
1641
        PREFETCH"   32(%2, %%"REG_a")               \n\t"
1642
        PREFETCH"   32(%3, %%"REG_a")               \n\t"
1643
        "movq         (%2, %%"REG_a"), %%mm0        \n\t" // U(0)
1644
        "movq                   %%mm0, %%mm2        \n\t" // U(0)
1645
        "movq         (%3, %%"REG_a"), %%mm1        \n\t" // V(0)
1646
        "punpcklbw              %%mm1, %%mm0        \n\t" // UVUV UVUV(0)
1647
        "punpckhbw              %%mm1, %%mm2        \n\t" // UVUV UVUV(8)
1648
1649
        "movq       (%1, %%"REG_a",2), %%mm3        \n\t" // Y(0)
1650
        "movq      8(%1, %%"REG_a",2), %%mm5        \n\t" // Y(8)
1651
        "movq                   %%mm0, %%mm4        \n\t" // Y(0)
1652
        "movq                   %%mm2, %%mm6        \n\t" // Y(8)
1653
        "punpcklbw              %%mm3, %%mm0        \n\t" // YUYV YUYV(0)
1654
        "punpckhbw              %%mm3, %%mm4        \n\t" // YUYV YUYV(4)
1655
        "punpcklbw              %%mm5, %%mm2        \n\t" // YUYV YUYV(8)
1656
        "punpckhbw              %%mm5, %%mm6        \n\t" // YUYV YUYV(12)
1657
1658
        MOVNTQ"                 %%mm0,   (%0, %%"REG_a", 4)     \n\t"
1659
        MOVNTQ"                 %%mm4,  8(%0, %%"REG_a", 4)     \n\t"
1660
        MOVNTQ"                 %%mm2, 16(%0, %%"REG_a", 4)     \n\t"
1661
        MOVNTQ"                 %%mm6, 24(%0, %%"REG_a", 4)     \n\t"
1662
1663
        "add                       $8, %%"REG_a"    \n\t"
1664
        "cmp                       %4, %%"REG_a"    \n\t"
1665
        " jb                       1b               \n\t"
1666 9326d3f3 Michael Niedermayer
        ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
1667 6e42e6c4 Diego Biurrun
        : "%"REG_a
1668
        );
1669 7ac25f2d Michael Niedermayer
#else
1670 594ff7cc Diego Biurrun
//FIXME adapt the Alpha ASM code from yv12->yuy2
1671 7ac25f2d Michael Niedermayer
1672 02a6a6ee Diego Biurrun
#if HAVE_FAST_64BIT
1673 6e42e6c4 Diego Biurrun
        int i;
1674
        uint64_t *ldst = (uint64_t *) dst;
1675
        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
1676
        for (i = 0; i < chromWidth; i += 2){
1677
            uint64_t k, l;
1678
            k = uc[0] + (yc[0] << 8) +
1679
                (vc[0] << 16) + (yc[1] << 24);
1680
            l = uc[1] + (yc[2] << 8) +
1681
                (vc[1] << 16) + (yc[3] << 24);
1682
            *ldst++ = k + (l << 32);
1683
            yc += 4;
1684
            uc += 2;
1685
            vc += 2;
1686
        }
1687 caeaabe7 Alex Beregszaszi
1688
#else
1689 6e42e6c4 Diego Biurrun
        int i, *idst = (int32_t *) dst;
1690
        const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
1691
        for (i = 0; i < chromWidth; i++){
1692 da7f8893 Michael Niedermayer
#ifdef WORDS_BIGENDIAN
1693 6e42e6c4 Diego Biurrun
            *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
1694
                (vc[0] << 8) + (yc[1] << 0);
1695 da7f8893 Michael Niedermayer
#else
1696 6e42e6c4 Diego Biurrun
            *idst++ = uc[0] + (yc[0] << 8) +
1697 8a322796 Diego Biurrun
               (vc[0] << 16) + (yc[1] << 24);
1698 6e42e6c4 Diego Biurrun
#endif
1699
            yc += 2;
1700
            uc++;
1701
            vc++;
1702
        }
1703
#endif
1704
#endif
1705 8916b4b5 Benoit Fouet
        if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1)
1706 6e42e6c4 Diego Biurrun
        {
1707
            usrc += chromStride;
1708
            vsrc += chromStride;
1709
        }
1710
        ysrc += lumStride;
1711
        dst += dstStride;
1712
    }
1713 b63f641e Aurelien Jacobs
#if HAVE_MMX
1714 7ad6469e Diego Pettenò
__asm__(    EMMS"       \n\t"
1715 6e42e6c4 Diego Biurrun
        SFENCE"     \n\t"
1716 7ac25f2d Michael Niedermayer
        :::"memory");
1717
#endif
1718 caeaabe7 Alex Beregszaszi
}
1719
1720
/**
1721 8a322796 Diego Biurrun
 * Height should be a multiple of 2 and width should be a multiple of 16
1722
 * (If this is a problem for anyone then tell me, and I will fix it.)
1723 caeaabe7 Alex Beregszaszi
 */
1724
static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1725 6e42e6c4 Diego Biurrun
                                      long width, long height,
1726
                                      long lumStride, long chromStride, long dstStride)
1727 caeaabe7 Alex Beregszaszi
{
1728 6e42e6c4 Diego Biurrun
    //FIXME interpolate chroma
1729
    RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 2);
1730 b1ec5875 Michael Niedermayer
}
1731
1732
/**
1733 594ff7cc Diego Biurrun
 * Width should be a multiple of 16.
1734 b1ec5875 Michael Niedermayer
 */
1735 a6100f39 Baptiste Coudurier
static inline void RENAME(yuv422ptouyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1736
                                         long width, long height,
1737
                                         long lumStride, long chromStride, long dstStride)
1738
{
1739
    RENAME(yuvPlanartouyvy)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
1740
}
1741
1742
/**
1743
 * Width should be a multiple of 16.
1744
 */
1745 b1ec5875 Michael Niedermayer
static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1746 6e42e6c4 Diego Biurrun
                                         long width, long height,
1747
                                         long lumStride, long chromStride, long dstStride)
1748 b1ec5875 Michael Niedermayer
{
1749 6e42e6c4 Diego Biurrun
    RENAME(yuvPlanartoyuy2)(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride, 1);
1750 b1ec5875 Michael Niedermayer
}
1751
1752
/**
1753 8a322796 Diego Biurrun
 * Height should be a multiple of 2 and width should be a multiple of 16.
1754
 * (If this is a problem for anyone then tell me, and I will fix it.)
1755 b1ec5875 Michael Niedermayer
 */
1756 1de97d84 Michael Niedermayer
static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
1757 6e42e6c4 Diego Biurrun
                                      long width, long height,
1758
                                      long lumStride, long chromStride, long srcStride)
1759 d9d58d17 Michael Niedermayer
{
1760 6e42e6c4 Diego Biurrun
    long y;
1761 9326d3f3 Michael Niedermayer
    const x86_reg chromWidth= width>>1;
1762 6e42e6c4 Diego Biurrun
    for (y=0; y<height; y+=2)
1763
    {
1764 b63f641e Aurelien Jacobs
#if HAVE_MMX
1765 7ad6469e Diego Pettenò
        __asm__ volatile(
1766 6e42e6c4 Diego Biurrun
        "xor                 %%"REG_a", %%"REG_a"   \n\t"
1767
        "pcmpeqw                 %%mm7, %%mm7       \n\t"
1768
        "psrlw                      $8, %%mm7       \n\t" // FF,00,FF,00...
1769
        ASMALIGN(4)
1770
        "1:                \n\t"
1771
        PREFETCH" 64(%0, %%"REG_a", 4)              \n\t"
1772
        "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
1773
        "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
1774
        "movq                    %%mm0, %%mm2       \n\t" // YUYV YUYV(0)
1775
        "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(4)
1776
        "psrlw                      $8, %%mm0       \n\t" // U0V0 U0V0(0)
1777
        "psrlw                      $8, %%mm1       \n\t" // U0V0 U0V0(4)
1778
        "pand                    %%mm7, %%mm2       \n\t" // Y0Y0 Y0Y0(0)
1779
        "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(4)
1780
        "packuswb                %%mm1, %%mm0       \n\t" // UVUV UVUV(0)
1781
        "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(0)
1782
1783
        MOVNTQ"                  %%mm2, (%1, %%"REG_a", 2)  \n\t"
1784
1785
        "movq     16(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(8)
1786
        "movq     24(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(12)
1787
        "movq                    %%mm1, %%mm3       \n\t" // YUYV YUYV(8)
1788
        "movq                    %%mm2, %%mm4       \n\t" // YUYV YUYV(12)
1789
        "psrlw                      $8, %%mm1       \n\t" // U0V0 U0V0(8)
1790
        "psrlw                      $8, %%mm2       \n\t" // U0V0 U0V0(12)
1791
        "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(8)
1792
        "pand                    %%mm7, %%mm4       \n\t" // Y0Y0 Y0Y0(12)
1793
        "packuswb                %%mm2, %%mm1       \n\t" // UVUV UVUV(8)
1794
        "packuswb                %%mm4, %%mm3       \n\t" // YYYY YYYY(8)
1795
1796
        MOVNTQ"                  %%mm3, 8(%1, %%"REG_a", 2) \n\t"
1797
1798
        "movq                    %%mm0, %%mm2       \n\t" // UVUV UVUV(0)
1799
        "movq                    %%mm1, %%mm3       \n\t" // UVUV UVUV(8)
1800
        "psrlw                      $8, %%mm0       \n\t" // V0V0 V0V0(0)
1801
        "psrlw                      $8, %%mm1       \n\t" // V0V0 V0V0(8)
1802
        "pand                    %%mm7, %%mm2       \n\t" // U0U0 U0U0(0)
1803
        "pand                    %%mm7, %%mm3       \n\t" // U0U0 U0U0(8)
1804
        "packuswb                %%mm1, %%mm0       \n\t" // VVVV VVVV(0)
1805
        "packuswb                %%mm3, %%mm2       \n\t" // UUUU UUUU(0)
1806
1807
        MOVNTQ"                  %%mm0, (%3, %%"REG_a")     \n\t"
1808
        MOVNTQ"                  %%mm2, (%2, %%"REG_a")     \n\t"
1809
1810
        "add                        $8, %%"REG_a"   \n\t"
1811
        "cmp                        %4, %%"REG_a"   \n\t"
1812
        " jb                        1b              \n\t"
1813 9326d3f3 Michael Niedermayer
        ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
1814 6e42e6c4 Diego Biurrun
        : "memory", "%"REG_a
1815
        );
1816
1817
        ydst += lumStride;
1818
        src  += srcStride;
1819
1820 7ad6469e Diego Pettenò
        __asm__ volatile(
1821 6e42e6c4 Diego Biurrun
        "xor                 %%"REG_a", %%"REG_a"   \n\t"
1822
        ASMALIGN(4)
1823
        "1:                                         \n\t"
1824
        PREFETCH" 64(%0, %%"REG_a", 4)              \n\t"
1825
        "movq       (%0, %%"REG_a", 4), %%mm0       \n\t" // YUYV YUYV(0)
1826
        "movq      8(%0, %%"REG_a", 4), %%mm1       \n\t" // YUYV YUYV(4)
1827
        "movq     16(%0, %%"REG_a", 4), %%mm2       \n\t" // YUYV YUYV(8)
1828
        "movq     24(%0, %%"REG_a", 4), %%mm3       \n\t" // YUYV YUYV(12)
1829
        "pand                    %%mm7, %%mm0       \n\t" // Y0Y0 Y0Y0(0)
1830
        "pand                    %%mm7, %%mm1       \n\t" // Y0Y0 Y0Y0(4)
1831
        "pand                    %%mm7, %%mm2       \n\t" // Y0Y0 Y0Y0(8)
1832
        "pand                    %%mm7, %%mm3       \n\t" // Y0Y0 Y0Y0(12)
1833
        "packuswb                %%mm1, %%mm0       \n\t" // YYYY YYYY(0)
1834
        "packuswb                %%mm3, %%mm2       \n\t" // YYYY YYYY(8)
1835
1836
        MOVNTQ"                  %%mm0,  (%1, %%"REG_a", 2) \n\t"
1837
        MOVNTQ"                  %%mm2, 8(%1, %%"REG_a", 2) \n\t"
1838
1839
        "add                        $8, %%"REG_a"   \n\t"
1840
        "cmp                        %4, %%"REG_a"   \n\t"
1841
        " jb                        1b              \n\t"
1842
1843 9326d3f3 Michael Niedermayer
        ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
1844 6e42e6c4 Diego Biurrun
        : "memory", "%"REG_a
1845
        );
1846 bd09433f Michael Niedermayer
#else
1847 6e42e6c4 Diego Biurrun
        long i;
1848
        for (i=0; i<chromWidth; i++)
1849
        {
1850
            ydst[2*i+0]     = src[4*i+0];
1851
            udst[i]     = src[4*i+1];
1852
            ydst[2*i+1]     = src[4*i+2];
1853
            vdst[i]     = src[4*i+3];
1854
        }
1855
        ydst += lumStride;
1856
        src  += srcStride;
1857
1858
        for (i=0; i<chromWidth; i++)
1859
        {
1860
            ydst[2*i+0]     = src[4*i+0];
1861
            ydst[2*i+1]     = src[4*i+2];
1862
        }
1863
#endif
1864
        udst += chromStride;
1865
        vdst += chromStride;
1866
        ydst += lumStride;
1867
        src  += srcStride;
1868
    }
1869 b63f641e Aurelien Jacobs
#if HAVE_MMX
1870 7ad6469e Diego Pettenò
__asm__ volatile(   EMMS"       \n\t"
1871 6e42e6c4 Diego Biurrun
                SFENCE"     \n\t"
1872
                :::"memory");
1873 bd09433f Michael Niedermayer
#endif
1874 42b5fcb8 Michael Niedermayer
}
1875 81c0590e Arpi
1876 d661d18d Alex Beregszaszi
static inline void RENAME(yvu9toyv12)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc,
1877 6e42e6c4 Diego Biurrun
                                      uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
1878
                                      long width, long height, long lumStride, long chromStride)
1879 d661d18d Alex Beregszaszi
{
1880 6e42e6c4 Diego Biurrun
    /* Y Plane */
1881
    memcpy(ydst, ysrc, width*height);
1882 d661d18d Alex Beregszaszi
1883 6e42e6c4 Diego Biurrun
    /* XXX: implement upscaling for U,V */
1884 d661d18d Alex Beregszaszi
}
1885
1886 7f526efd Reimar Döffinger
static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWidth, long srcHeight, long srcStride, long dstStride)
1887 b241cbf2 Michael Niedermayer
{
1888 6e42e6c4 Diego Biurrun
    long x,y;
1889 6a4970ab Diego Biurrun
1890 6e42e6c4 Diego Biurrun
    dst[0]= src[0];
1891 6a4970ab Diego Biurrun
1892 6e42e6c4 Diego Biurrun
    // first line
1893
    for (x=0; x<srcWidth-1; x++){
1894
        dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
1895
        dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
1896
    }
1897
    dst[2*srcWidth-1]= src[srcWidth-1];
1898 6a4970ab Diego Biurrun
1899 b2609d4c Michael Niedermayer
        dst+= dstStride;
1900 b241cbf2 Michael Niedermayer
1901 6e42e6c4 Diego Biurrun
    for (y=1; y<srcHeight; y++){
1902 f4406ec1 Diego Biurrun
#if HAVE_MMX2 || HAVE_AMD3DNOW
1903 d0ce212a Ramiro Polla
        const x86_reg mmxSize= srcWidth&~15;
1904 7ad6469e Diego Pettenò
        __asm__ volatile(
1905 6e42e6c4 Diego Biurrun
        "mov           %4, %%"REG_a"            \n\t"
1906
        "1:                                     \n\t"
1907
        "movq         (%0, %%"REG_a"), %%mm0    \n\t"
1908
        "movq         (%1, %%"REG_a"), %%mm1    \n\t"
1909
        "movq        1(%0, %%"REG_a"), %%mm2    \n\t"
1910
        "movq        1(%1, %%"REG_a"), %%mm3    \n\t"
1911
        "movq       -1(%0, %%"REG_a"), %%mm4    \n\t"
1912
        "movq       -1(%1, %%"REG_a"), %%mm5    \n\t"
1913
        PAVGB"                  %%mm0, %%mm5    \n\t"
1914
        PAVGB"                  %%mm0, %%mm3    \n\t"
1915
        PAVGB"                  %%mm0, %%mm5    \n\t"
1916
        PAVGB"                  %%mm0, %%mm3    \n\t"
1917
        PAVGB"                  %%mm1, %%mm4    \n\t"
1918
        PAVGB"                  %%mm1, %%mm2    \n\t"
1919
        PAVGB"                  %%mm1, %%mm4    \n\t"
1920
        PAVGB"                  %%mm1, %%mm2    \n\t"
1921
        "movq                   %%mm5, %%mm7    \n\t"
1922
        "movq                   %%mm4, %%mm6    \n\t"
1923
        "punpcklbw              %%mm3, %%mm5    \n\t"
1924
        "punpckhbw              %%mm3, %%mm7    \n\t"
1925
        "punpcklbw              %%mm2, %%mm4    \n\t"
1926
        "punpckhbw              %%mm2, %%mm6    \n\t"
1927 b241cbf2 Michael Niedermayer
#if 1
1928 6e42e6c4 Diego Biurrun
        MOVNTQ"                 %%mm5,  (%2, %%"REG_a", 2)  \n\t"
1929
        MOVNTQ"                 %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
1930
        MOVNTQ"                 %%mm4,  (%3, %%"REG_a", 2)  \n\t"
1931
        MOVNTQ"                 %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
1932 b241cbf2 Michael Niedermayer
#else
1933 6e42e6c4 Diego Biurrun
        "movq                   %%mm5,  (%2, %%"REG_a", 2)  \n\t"
1934
        "movq                   %%mm7, 8(%2, %%"REG_a", 2)  \n\t"
1935
        "movq                   %%mm4,  (%3, %%"REG_a", 2)  \n\t"
1936
        "movq                   %%mm6, 8(%3, %%"REG_a", 2)  \n\t"
1937
#endif
1938
        "add                       $8, %%"REG_a"            \n\t"
1939
        " js                       1b                       \n\t"
1940
        :: "r" (src + mmxSize  ), "r" (src + srcStride + mmxSize  ),
1941
           "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
1942
           "g" (-mmxSize)
1943
        : "%"REG_a
1944
1945
        );
1946 b241cbf2 Michael Niedermayer
#else
1947 9326d3f3 Michael Niedermayer
        const x86_reg mmxSize=1;
1948 6e42e6c4 Diego Biurrun
#endif
1949
        dst[0        ]= (3*src[0] +   src[srcStride])>>2;
1950
        dst[dstStride]= (  src[0] + 3*src[srcStride])>>2;
1951
1952
        for (x=mmxSize-1; x<srcWidth-1; x++){
1953
            dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
1954
            dst[2*x+dstStride+2]= (  src[x+0] + 3*src[x+srcStride+1])>>2;
1955
            dst[2*x+dstStride+1]= (  src[x+1] + 3*src[x+srcStride  ])>>2;
1956
            dst[2*x          +2]= (3*src[x+1] +   src[x+srcStride  ])>>2;
1957
        }
1958
        dst[srcWidth*2 -1            ]= (3*src[srcWidth-1] +   src[srcWidth-1 + srcStride])>>2;
1959
        dst[srcWidth*2 -1 + dstStride]= (  src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
1960
1961
        dst+=dstStride*2;
1962
        src+=srcStride;
1963
    }
1964 6a4970ab Diego Biurrun
1965 6e42e6c4 Diego Biurrun
    // last line
1966 b2609d4c Michael Niedermayer
#if 1
1967 6e42e6c4 Diego Biurrun
    dst[0]= src[0];
1968 6a4970ab Diego Biurrun
1969 6e42e6c4 Diego Biurrun
    for (x=0; x<srcWidth-1; x++){
1970
        dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
1971
        dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
1972
    }
1973
    dst[2*srcWidth-1]= src[srcWidth-1];
1974 b2609d4c Michael Niedermayer
#else
1975 6e42e6c4 Diego Biurrun
    for (x=0; x<srcWidth; x++){
1976
        dst[2*x+0]=
1977
        dst[2*x+1]= src[x];
1978
    }
1979 b2609d4c Michael Niedermayer
#endif
1980
1981 b63f641e Aurelien Jacobs
#if HAVE_MMX
1982 7ad6469e Diego Pettenò
__asm__ volatile(   EMMS"       \n\t"
1983 6e42e6c4 Diego Biurrun
                SFENCE"     \n\t"
1984
                :::"memory");
1985 b241cbf2 Michael Niedermayer
#endif
1986
}
1987
1988 81c0590e Arpi
/**
1989 8a322796 Diego Biurrun
 * Height should be a multiple of 2 and width should be a multiple of 16.
1990
 * (If this is a problem for anyone then tell me, and I will fix it.)
1991
 * Chrominance data is only taken from every second line, others are ignored.
1992 594ff7cc Diego Biurrun
 * FIXME: Write HQ version.
1993 81c0590e Arpi
 */
1994 1de97d84 Michael Niedermayer
static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
1995 6e42e6c4 Diego Biurrun
                                      long width, long height,
1996
                                      long lumStride, long chromStride, long srcStride)
1997 81c0590e Arpi
{
1998 6e42e6c4 Diego Biurrun
    long y;
1999 9326d3f3 Michael Niedermayer
    const x86_reg chromWidth= width>>1;
2000 6e42e6c4 Diego Biurrun
    for (y=0; y<height; y+=2)
2001
    {
2002 b63f641e Aurelien Jacobs
#if HAVE_MMX
2003 7ad6469e Diego Pettenò
        __asm__ volatile(
2004 217d8202 Reimar Döffinger
        "xor                 %%"REG_a", %%"REG_a"   \n\t"
2005 6e42e6c4 Diego Biurrun
        "pcmpeqw             %%mm7, %%mm7   \n\t"
2006
        "psrlw                  $8, %%mm7   \n\t" // FF,00,FF,00...
2007
        ASMALIGN(4)
2008
        "1:                                 \n\t"
2009 217d8202 Reimar Döffinger
        PREFETCH" 64(%0, %%"REG_a", 4)          \n\t"
2010
        "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // UYVY UYVY(0)
2011
        "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(4)
2012 6e42e6c4 Diego Biurrun
        "movq                %%mm0, %%mm2   \n\t" // UYVY UYVY(0)
2013
        "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(4)
2014
        "pand                %%mm7, %%mm0   \n\t" // U0V0 U0V0(0)
2015
        "pand                %%mm7, %%mm1   \n\t" // U0V0 U0V0(4)
2016
        "psrlw                  $8, %%mm2   \n\t" // Y0Y0 Y0Y0(0)
2017
        "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(4)
2018
        "packuswb            %%mm1, %%mm0   \n\t" // UVUV UVUV(0)
2019
        "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(0)
2020
2021 217d8202 Reimar Döffinger
        MOVNTQ"              %%mm2,  (%1, %%"REG_a", 2) \n\t"
2022 6e42e6c4 Diego Biurrun
2023 217d8202 Reimar Döffinger
        "movq     16(%0, %%"REG_a", 4), %%mm1   \n\t" // UYVY UYVY(8)
2024
        "movq     24(%0, %%"REG_a", 4), %%mm2   \n\t" // UYVY UYVY(12)
2025 6e42e6c4 Diego Biurrun
        "movq                %%mm1, %%mm3   \n\t" // UYVY UYVY(8)
2026
        "movq                %%mm2, %%mm4   \n\t" // UYVY UYVY(12)
2027
        "pand                %%mm7, %%mm1   \n\t" // U0V0 U0V0(8)
2028
        "pand                %%mm7, %%mm2   \n\t" // U0V0 U0V0(12)
2029
        "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(8)
2030
        "psrlw                  $8, %%mm4   \n\t" // Y0Y0 Y0Y0(12)
2031
        "packuswb            %%mm2, %%mm1   \n\t" // UVUV UVUV(8)
2032
        "packuswb            %%mm4, %%mm3   \n\t" // YYYY YYYY(8)
2033
2034 217d8202 Reimar Döffinger
        MOVNTQ"              %%mm3, 8(%1, %%"REG_a", 2) \n\t"
2035 6e42e6c4 Diego Biurrun
2036
        "movq                %%mm0, %%mm2   \n\t" // UVUV UVUV(0)
2037
        "movq                %%mm1, %%mm3   \n\t" // UVUV UVUV(8)
2038
        "psrlw                  $8, %%mm0   \n\t" // V0V0 V0V0(0)
2039
        "psrlw                  $8, %%mm1   \n\t" // V0V0 V0V0(8)
2040
        "pand                %%mm7, %%mm2   \n\t" // U0U0 U0U0(0)
2041
        "pand                %%mm7, %%mm3   \n\t" // U0U0 U0U0(8)
2042
        "packuswb            %%mm1, %%mm0   \n\t" // VVVV VVVV(0)
2043
        "packuswb            %%mm3, %%mm2   \n\t" // UUUU UUUU(0)
2044
2045 217d8202 Reimar Döffinger
        MOVNTQ"              %%mm0, (%3, %%"REG_a") \n\t"
2046
        MOVNTQ"              %%mm2, (%2, %%"REG_a") \n\t"
2047 6e42e6c4 Diego Biurrun
2048 217d8202 Reimar Döffinger
        "add                    $8, %%"REG_a"   \n\t"
2049
        "cmp                    %4, %%"REG_a"   \n\t"
2050 6e42e6c4 Diego Biurrun
        " jb                    1b          \n\t"
2051 9326d3f3 Michael Niedermayer
        ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
2052 217d8202 Reimar Döffinger
        : "memory", "%"REG_a
2053 6e42e6c4 Diego Biurrun
        );
2054
2055
        ydst += lumStride;
2056
        src  += srcStride;
2057
2058 7ad6469e Diego Pettenò
        __asm__ volatile(
2059 217d8202 Reimar Döffinger
        "xor                 %%"REG_a", %%"REG_a"   \n\t"
2060 6e42e6c4 Diego Biurrun
        ASMALIGN(4)
2061
        "1:                                 \n\t"
2062 217d8202 Reimar Döffinger
        PREFETCH" 64(%0, %%"REG_a", 4)          \n\t"
2063
        "movq       (%0, %%"REG_a", 4), %%mm0   \n\t" // YUYV YUYV(0)
2064
        "movq      8(%0, %%"REG_a", 4), %%mm1   \n\t" // YUYV YUYV(4)
2065
        "movq     16(%0, %%"REG_a", 4), %%mm2   \n\t" // YUYV YUYV(8)
2066
        "movq     24(%0, %%"REG_a", 4), %%mm3   \n\t" // YUYV YUYV(12)
2067 6e42e6c4 Diego Biurrun
        "psrlw                  $8, %%mm0   \n\t" // Y0Y0 Y0Y0(0)
2068
        "psrlw                  $8, %%mm1   \n\t" // Y0Y0 Y0Y0(4)
2069
        "psrlw                  $8, %%mm2   \n\t" // Y0Y0 Y0Y0(8)
2070
        "psrlw                  $8, %%mm3   \n\t" // Y0Y0 Y0Y0(12)
2071
        "packuswb            %%mm1, %%mm0   \n\t" // YYYY YYYY(0)
2072
        "packuswb            %%mm3, %%mm2   \n\t" // YYYY YYYY(8)
2073
2074 217d8202 Reimar Döffinger
        MOVNTQ"              %%mm0,  (%1, %%"REG_a", 2) \n\t"
2075
        MOVNTQ"              %%mm2, 8(%1, %%"REG_a", 2) \n\t"
2076 6e42e6c4 Diego Biurrun
2077 217d8202 Reimar Döffinger
        "add                    $8, %%"REG_a"   \n\t"
2078
        "cmp                    %4, %%"REG_a"   \n\t"
2079 6e42e6c4 Diego Biurrun
        " jb                    1b          \n\t"
2080
2081 9326d3f3 Michael Niedermayer
        ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
2082 217d8202 Reimar Döffinger
        : "memory", "%"REG_a
2083 6e42e6c4 Diego Biurrun
        );
2084 ed8c0670 Michael Niedermayer
#else
2085 6e42e6c4 Diego Biurrun
        long i;
2086
        for (i=0; i<chromWidth; i++)
2087
        {
2088
            udst[i]     = src[4*i+0];
2089
            ydst[2*i+0] = src[4*i+1];
2090
            vdst[i]     = src[4*i+2];
2091
            ydst[2*i+1] = src[4*i+3];
2092
        }
2093
        ydst += lumStride;
2094
        src  += srcStride;
2095
2096
        for (i=0; i<chromWidth; i++)
2097
        {
2098
            ydst[2*i+0] = src[4*i+1];
2099
            ydst[2*i+1] = src[4*i+3];
2100
        }
2101
#endif
2102
        udst += chromStride;
2103
        vdst += chromStride;
2104
        ydst += lumStride;
2105
        src  += srcStride;
2106
    }
2107 b63f641e Aurelien Jacobs
#if HAVE_MMX
2108 7ad6469e Diego Pettenò
__asm__ volatile(   EMMS"       \n\t"
2109 6e42e6c4 Diego Biurrun
                SFENCE"     \n\t"
2110
                :::"memory");
2111 ed8c0670 Michael Niedermayer
#endif
2112 81c0590e Arpi
}
2113
2114 1de97d84 Michael Niedermayer
/**
2115 8a322796 Diego Biurrun
 * Height should be a multiple of 2 and width should be a multiple of 2.
2116
 * (If this is a problem for anyone then tell me, and I will fix it.)
2117
 * Chrominance data is only taken from every second line,
2118 594ff7cc Diego Biurrun
 * others are ignored in the C version.
2119
 * FIXME: Write HQ version.
2120 1de97d84 Michael Niedermayer
 */
2121
static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
2122 6e42e6c4 Diego Biurrun
                                       long width, long height,
2123
                                       long lumStride, long chromStride, long srcStride)
2124 1de97d84 Michael Niedermayer
{
2125 6e42e6c4 Diego Biurrun
    long y;
2126 9326d3f3 Michael Niedermayer
    const x86_reg chromWidth= width>>1;
2127 b63f641e Aurelien Jacobs
#if HAVE_MMX
2128 6e42e6c4 Diego Biurrun
    for (y=0; y<height-2; y+=2)
2129
    {
2130
        long i;
2131
        for (i=0; i<2; i++)
2132
        {
2133 7ad6469e Diego Pettenò
            __asm__ volatile(
2134 6e42e6c4 Diego Biurrun
            "mov                        %2, %%"REG_a"   \n\t"
2135 5802683a Reimar Döffinger
            "movq  "MANGLE(ff_bgr2YCoeff)", %%mm6       \n\t"
2136
            "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
2137 6e42e6c4 Diego Biurrun
            "pxor                    %%mm7, %%mm7       \n\t"
2138
            "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
2139
            ASMALIGN(4)
2140
            "1:                                         \n\t"
2141
            PREFETCH"    64(%0, %%"REG_d")              \n\t"
2142
            "movd          (%0, %%"REG_d"), %%mm0       \n\t"
2143
            "movd         3(%0, %%"REG_d"), %%mm1       \n\t"
2144
            "punpcklbw               %%mm7, %%mm0       \n\t"
2145
            "punpcklbw               %%mm7, %%mm1       \n\t"
2146
            "movd         6(%0, %%"REG_d"), %%mm2       \n\t"
2147
            "movd         9(%0, %%"REG_d"), %%mm3       \n\t"
2148
            "punpcklbw               %%mm7, %%mm2       \n\t"
2149
            "punpcklbw               %%mm7, %%mm3       \n\t"
2150
            "pmaddwd                 %%mm6, %%mm0       \n\t"
2151
            "pmaddwd                 %%mm6, %%mm1       \n\t"
2152
            "pmaddwd                 %%mm6, %%mm2       \n\t"
2153
            "pmaddwd                 %%mm6, %%mm3       \n\t"
2154 21316f3c Michael Niedermayer
#ifndef FAST_BGR2YV12
2155 6e42e6c4 Diego Biurrun
            "psrad                      $8, %%mm0       \n\t"
2156
            "psrad                      $8, %%mm1       \n\t"
2157
            "psrad                      $8, %%mm2       \n\t"
2158
            "psrad                      $8, %%mm3       \n\t"
2159
#endif
2160
            "packssdw                %%mm1, %%mm0       \n\t"
2161
            "packssdw                %%mm3, %%mm2       \n\t"
2162
            "pmaddwd                 %%mm5, %%mm0       \n\t"
2163
            "pmaddwd                 %%mm5, %%mm2       \n\t"
2164
            "packssdw                %%mm2, %%mm0       \n\t"
2165
            "psraw                      $7, %%mm0       \n\t"
2166
2167
            "movd        12(%0, %%"REG_d"), %%mm4       \n\t"
2168
            "movd        15(%0, %%"REG_d"), %%mm1       \n\t"
2169
            "punpcklbw               %%mm7, %%mm4       \n\t"
2170
            "punpcklbw               %%mm7, %%mm1       \n\t"
2171
            "movd        18(%0, %%"REG_d"), %%mm2       \n\t"
2172
            "movd        21(%0, %%"REG_d"), %%mm3       \n\t"
2173
            "punpcklbw               %%mm7, %%mm2       \n\t"
2174
            "punpcklbw               %%mm7, %%mm3       \n\t"
2175
            "pmaddwd                 %%mm6, %%mm4       \n\t"
2176
            "pmaddwd                 %%mm6, %%mm1       \n\t"
2177
            "pmaddwd                 %%mm6, %%mm2       \n\t"
2178
            "pmaddwd                 %%mm6, %%mm3       \n\t"
2179 21316f3c Michael Niedermayer
#ifndef FAST_BGR2YV12
2180 6e42e6c4 Diego Biurrun
            "psrad                      $8, %%mm4       \n\t"
2181
            "psrad                      $8, %%mm1       \n\t"
2182
            "psrad                      $8, %%mm2       \n\t"
2183
            "psrad                      $8, %%mm3       \n\t"
2184
#endif
2185
            "packssdw                %%mm1, %%mm4       \n\t"
2186
            "packssdw                %%mm3, %%mm2       \n\t"
2187
            "pmaddwd                 %%mm5, %%mm4       \n\t"
2188
            "pmaddwd                 %%mm5, %%mm2       \n\t"
2189
            "add                       $24, %%"REG_d"   \n\t"
2190
            "packssdw                %%mm2, %%mm4       \n\t"
2191
            "psraw                      $7, %%mm4       \n\t"
2192
2193
            "packuswb                %%mm4, %%mm0       \n\t"
2194 5802683a Reimar Döffinger
            "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0    \n\t"
2195 6e42e6c4 Diego Biurrun
2196
            MOVNTQ"                  %%mm0, (%1, %%"REG_a") \n\t"
2197
            "add                        $8,      %%"REG_a"  \n\t"
2198
            " js                        1b                  \n\t"
2199 d0ce212a Ramiro Polla
            : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width)
2200 6e42e6c4 Diego Biurrun
            : "%"REG_a, "%"REG_d
2201
            );
2202
            ydst += lumStride;
2203
            src  += srcStride;
2204
        }
2205
        src -= srcStride*2;
2206 7ad6469e Diego Pettenò
        __asm__ volatile(
2207 6e42e6c4 Diego Biurrun
        "mov                        %4, %%"REG_a"   \n\t"
2208 5802683a Reimar Döffinger
        "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
2209
        "movq  "MANGLE(ff_bgr2UCoeff)", %%mm6       \n\t"
2210 6e42e6c4 Diego Biurrun
        "pxor                    %%mm7, %%mm7       \n\t"
2211
        "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d"   \n\t"
2212 ce505b85 Diego Biurrun
        "add                 %%"REG_d", %%"REG_d"   \n\t"
2213 6e42e6c4 Diego Biurrun
        ASMALIGN(4)
2214
        "1:                                         \n\t"
2215
        PREFETCH"    64(%0, %%"REG_d")              \n\t"
2216
        PREFETCH"    64(%1, %%"REG_d")              \n\t"
2217 f4406ec1 Diego Biurrun
#if HAVE_MMX2 || HAVE_AMD3DNOW
2218 6e42e6c4 Diego Biurrun
        "movq          (%0, %%"REG_d"), %%mm0       \n\t"
2219
        "movq          (%1, %%"REG_d"), %%mm1       \n\t"
2220
        "movq         6(%0, %%"REG_d"), %%mm2       \n\t"
2221
        "movq         6(%1, %%"REG_d"), %%mm3       \n\t"
2222
        PAVGB"                   %%mm1, %%mm0       \n\t"
2223
        PAVGB"                   %%mm3, %%mm2       \n\t"
2224
        "movq                    %%mm0, %%mm1       \n\t"
2225
        "movq                    %%mm2, %%mm3       \n\t"
2226
        "psrlq                     $24, %%mm0       \n\t"
2227
        "psrlq                     $24, %%mm2       \n\t"
2228
        PAVGB"                   %%mm1, %%mm0       \n\t"
2229
        PAVGB"                   %%mm3, %%mm2       \n\t"
2230
        "punpcklbw               %%mm7, %%mm0       \n\t"
2231
        "punpcklbw               %%mm7, %%mm2       \n\t"
2232 21316f3c Michael Niedermayer
#else
2233 6e42e6c4 Diego Biurrun
        "movd          (%0, %%"REG_d"), %%mm0       \n\t"
2234
        "movd          (%1, %%"REG_d"), %%mm1       \n\t"
2235
        "movd         3(%0, %%"REG_d"), %%mm2       \n\t"
2236
        "movd         3(%1, %%"REG_d"), %%mm3       \n\t"
2237
        "punpcklbw               %%mm7, %%mm0       \n\t"
2238
        "punpcklbw               %%mm7, %%mm1       \n\t"
2239
        "punpcklbw               %%mm7, %%mm2       \n\t"
2240
        "punpcklbw               %%mm7, %%mm3       \n\t"
2241
        "paddw                   %%mm1, %%mm0       \n\t"
2242
        "paddw                   %%mm3, %%mm2       \n\t"
2243
        "paddw                   %%mm2, %%mm0       \n\t"
2244
        "movd         6(%0, %%"REG_d"), %%mm4       \n\t"
2245
        "movd         6(%1, %%"REG_d"), %%mm1       \n\t"
2246
        "movd         9(%0, %%"REG_d"), %%mm2       \n\t"
2247
        "movd         9(%1, %%"REG_d"), %%mm3       \n\t"
2248
        "punpcklbw               %%mm7, %%mm4       \n\t"
2249
        "punpcklbw               %%mm7, %%mm1       \n\t"
2250
        "punpcklbw               %%mm7, %%mm2       \n\t"
2251
        "punpcklbw               %%mm7, %%mm3       \n\t"
2252
        "paddw                   %%mm1, %%mm4       \n\t"
2253
        "paddw                   %%mm3, %%mm2       \n\t"
2254
        "paddw                   %%mm4, %%mm2       \n\t"
2255
        "psrlw                      $2, %%mm0       \n\t"
2256
        "psrlw                      $2, %%mm2       \n\t"
2257
#endif
2258 5802683a Reimar Döffinger
        "movq  "MANGLE(ff_bgr2VCoeff)", %%mm1       \n\t"
2259
        "movq  "MANGLE(ff_bgr2VCoeff)", %%mm3       \n\t"
2260 6e42e6c4 Diego Biurrun
2261
        "pmaddwd                 %%mm0, %%mm1       \n\t"
2262
        "pmaddwd                 %%mm2, %%mm3       \n\t"
2263
        "pmaddwd                 %%mm6, %%mm0       \n\t"
2264
        "pmaddwd                 %%mm6, %%mm2       \n\t"
2265 21316f3c Michael Niedermayer
#ifndef FAST_BGR2YV12
2266 6e42e6c4 Diego Biurrun
        "psrad                      $8, %%mm0       \n\t"
2267
        "psrad                      $8, %%mm1       \n\t"
2268
        "psrad                      $8, %%mm2       \n\t"
2269
        "psrad                      $8, %%mm3       \n\t"
2270
#endif
2271
        "packssdw                %%mm2, %%mm0       \n\t"
2272
        "packssdw                %%mm3, %%mm1       \n\t"
2273
        "pmaddwd                 %%mm5, %%mm0       \n\t"
2274
        "pmaddwd                 %%mm5, %%mm1       \n\t"
2275
        "packssdw                %%mm1, %%mm0       \n\t" // V1 V0 U1 U0
2276
        "psraw                      $7, %%mm0       \n\t"
2277 21316f3c Michael Niedermayer
2278 f4406ec1 Diego Biurrun
#if HAVE_MMX2 || HAVE_AMD3DNOW
2279 6e42e6c4 Diego Biurrun
        "movq        12(%0, %%"REG_d"), %%mm4       \n\t"
2280
        "movq        12(%1, %%"REG_d"), %%mm1       \n\t"
2281
        "movq        18(%0, %%"REG_d"), %%mm2       \n\t"
2282
        "movq        18(%1, %%"REG_d"), %%mm3       \n\t"