Revision bffc36e0 libswscale/x86/yuv2rgb_template2.c

View differences:

libswscale/x86/yuv2rgb_template2.c
124 124
    "paddsw    %%mm6, %%mm0\n\t"                 \
125 125
    "paddsw    %%mm6, %%mm1\n\t"                 \
126 126
    "paddsw    %%mm6, %%mm2\n\t"                 \
127
\
127

  
128
#define RGB_PACK_INTERLEAVE                  \
128 129
    /* pack and interleave even/odd pixels */    \
129
    "packuswb  %%mm0, %%mm0\n\t"                 \
130
    "packuswb  %%mm1, %%mm1\n\t"                 \
130
    "packuswb  %%mm1, %%mm0\n\t"                 \
131
    "packuswb  %%mm5, %%mm3\n\t"                 \
131 132
    "packuswb  %%mm2, %%mm2\n\t"                 \
132
    "packuswb  %%mm3, %%mm3\n\t"                 \
133
    "packuswb  %%mm5, %%mm5\n\t"                 \
133
    "movq      %%mm0, %%mm1\n\n"                 \
134 134
    "packuswb  %%mm7, %%mm7\n\t"                 \
135 135
    "punpcklbw %%mm3, %%mm0\n\t"                 \
136
    "punpcklbw %%mm5, %%mm1\n\t"                 \
136
    "punpckhbw %%mm3, %%mm1\n\t"                 \
137 137
    "punpcklbw %%mm7, %%mm2\n\t"                 \
138 138

  
139 139
#define YUV2RGB_ENDLOOP(depth)                   \
......
210 210

  
211 211
        YUV2RGB_INITIAL_LOAD
212 212
        YUV2RGB
213
        RGB_PACK_INTERLEAVE
213 214
#ifdef DITHER1XBPP
214 215
        DITHER_RGB
215 216
#endif
......
237 238

  
238 239
        YUV2RGB_INITIAL_LOAD
239 240
        YUV2RGB
241
        RGB_PACK_INTERLEAVE
240 242
#ifdef DITHER1XBPP
241 243
        DITHER_RGB
242 244
#endif
......
247 249
    YUV2RGB_ENDFUNC
248 250
}
249 251

  
252
#define RGB_PACK24(blue, red)\
253
    "packuswb  %%mm3,      %%mm0 \n" /* R0 R2 R4 R6 R1 R3 R5 R7 */\
254
    "packuswb  %%mm5,      %%mm1 \n" /* B0 B2 B4 B6 B1 B3 B5 B7 */\
255
    "packuswb  %%mm7,      %%mm2 \n" /* G0 G2 G4 G6 G1 G3 G5 G7 */\
256
    "movq      %%mm"red",  %%mm3 \n"\
257
    "movq      %%mm"blue", %%mm6 \n"\
258
    "psrlq     $32,        %%mm"red" \n" /* R1 R3 R5 R7 */\
259
    "punpcklbw %%mm2,      %%mm3 \n" /* R0 G0 R2 G2 R4 G4 R6 G6 */\
260
    "punpcklbw %%mm"red",  %%mm6 \n" /* B0 R1 B2 R3 B4 R5 B6 R7 */\
261
    "movq      %%mm3,      %%mm5 \n"\
262
    "punpckhbw %%mm"blue", %%mm2 \n" /* G1 B1 G3 B3 G5 B5 G7 B7 */\
263
    "punpcklwd %%mm6,      %%mm3 \n" /* R0 G0 B0 R1 R2 G2 B2 R3 */\
264
    "punpckhwd %%mm6,      %%mm5 \n" /* R4 G4 B4 R5 R6 G6 B6 R7 */\
265
    RGB_PACK24_B
250 266

  
251
#define RGB_PACK24(red, blue)              \
252
    /* generate first packed RGB octet */  \
253
    "movq      %%mm2,      %%mm5\n\t"      \
254
    "movq      %%mm"blue", %%mm6\n\t"      \
255
    "movq      %%mm"red",  %%mm7\n\t"      \
256
    "punpcklbw %%mm5,      %%mm6\n\t"      \
257
    "punpcklbw %%mm4,      %%mm7\n\t"      \
258
    "movq      %%mm6,      %%mm3\n\t"      \
259
    "punpcklwd %%mm7,      %%mm6\n\t"      \
260
    "psrlq     $32,        %%mm3\n\t"      \
261
    "movq      %%mm6,      %%mm5\n\t"      \
262
    "psllq     $40,        %%mm6\n\t"      \
263
    "psllq     $48,        %%mm3\n\t"      \
264
    "psrlq     $32,        %%mm5\n\t"      \
265
    "psrlq     $40,        %%mm6\n\t"      \
266
    "psllq     $24,        %%mm5\n\t"      \
267
    "por       %%mm3,      %%mm6\n\t"      \
268
    "por       %%mm5,      %%mm6\n\t"      \
269
    MOVNTQ "   %%mm6,      (%1)\n\t"       \
270
\
271
    /* generate second packed RGB octet */ \
272
    "movq      %%mm"red",  %%mm7\n\t"      \
273
    "movq      %%mm2,      %%mm5\n\t"      \
274
    "movq      %%mm"blue", %%mm6\n\t"      \
275
    "punpcklbw %%mm4,      %%mm7\n\t"      \
276
    "punpcklbw %%mm5,      %%mm6\n\t"      \
277
    "movq      %%mm7,      %%mm3\n\t"      \
278
    "punpckhwd %%mm7,      %%mm6\n\t"      \
279
    "psllq     $16,        %%mm3\n\t"      \
280
    "psrlq     $32,        %%mm6\n\t"      \
281
    "psrlq     $48,        %%mm3\n\t"      \
282
    "psllq     $8,         %%mm6\n\t"      \
283
    "movq      %%mm"red",  %%mm7\n\t"      \
284
    "por       %%mm6,      %%mm3\n\t"      \
285
    "movq      %%mm"blue", %%mm6\n\t"      \
286
    "movq      %%mm2,      %%mm5\n\t"      \
287
    "punpckhbw %%mm4,      %%mm7\n\t"      \
288
    "punpckhbw %%mm5,      %%mm6\n\t"      \
289
    "movq      %%mm6,      %%mm5\n\t"      \
290
    "punpcklwd %%mm7,      %%mm6\n\t"      \
291
    "psrlq     $16,        %%mm5\n\t"      \
292
    "psllq     $56,        %%mm5\n\t"      \
293
    "por       %%mm5,      %%mm3\n\t"      \
294
    "psllq     $32,        %%mm6\n\t"      \
295
    "por       %%mm6,      %%mm3\n\t"      \
296
    MOVNTQ "   %%mm3,      8(%1)\n\t"      \
297
\
298
    /* generate third packed RGB octet */  \
299
    "movq      %%mm"red",  %%mm7\n\t"      \
300
    "movq      %%mm2,      %%mm5\n\t"      \
301
    "movq      %%mm2,      %%mm3\n\t"      \
302
    "movq      %%mm"blue", %%mm6\n\t"      \
303
    "punpckhbw %%mm"red",  %%mm3\n\t"      \
304
    "punpckhbw %%mm4,      %%mm7\n\t"      \
305
    "psllq     $32,        %%mm3\n\t"      \
306
    "punpckhbw %%mm5,      %%mm6\n\t"      \
307
    "psrlq     $48,        %%mm3\n\t"      \
308
    "punpckhwd %%mm7,      %%mm6\n\t"      \
309
    "movq      %%mm6,      %%mm7\n\t"      \
310
    "psrlq     $32,        %%mm6\n\t"      \
311
    "psllq     $32,        %%mm7\n\t"      \
312
    "psllq     $40,        %%mm6\n\t"      \
313
    "psrlq     $16,        %%mm7\n\t"      \
314
    "por       %%mm6,      %%mm3\n\t"      \
315
    "por       %%mm7,      %%mm3\n\t"      \
316
    MOVNTQ "   %%mm3,      16(%1)\n\t"     \
267
#if HAVE_MMX2
268
DECLARE_ASM_CONST(8, int16_t, mask1101[4]) = {-1,-1, 0,-1};
269
DECLARE_ASM_CONST(8, int16_t, mask0010[4]) = { 0, 0,-1, 0};
270
DECLARE_ASM_CONST(8, int16_t, mask0110[4]) = { 0,-1,-1, 0};
271
DECLARE_ASM_CONST(8, int16_t, mask1001[4]) = {-1, 0, 0,-1};
272
DECLARE_ASM_CONST(8, int16_t, mask0100[4]) = { 0,-1, 0, 0};
273
#undef RGB_PACK24_B
274
#define RGB_PACK24_B\
275
    "pshufw    $0xc6,  %%mm2, %%mm1 \n"\
276
    "pshufw    $0x84,  %%mm3, %%mm6 \n"\
277
    "pshufw    $0x38,  %%mm5, %%mm7 \n"\
278
    "pand "MANGLE(mask1101)", %%mm6 \n" /* R0 G0 B0 R1 -- -- R2 G2 */\
279
    "movq      %%mm1,         %%mm0 \n"\
280
    "pand "MANGLE(mask0110)", %%mm7 \n" /* -- -- R6 G6 B6 R7 -- -- */\
281
    "movq      %%mm1,         %%mm2 \n"\
282
    "pand "MANGLE(mask0100)", %%mm1 \n" /* -- -- G3 B3 -- -- -- -- */\
283
    "psrlq       $48,         %%mm3 \n" /* B2 R3 -- -- -- -- -- -- */\
284
    "pand "MANGLE(mask0010)", %%mm0 \n" /* -- -- -- -- G1 B1 -- -- */\
285
    "psllq       $32,         %%mm5 \n" /* -- -- -- -- R4 G4 B4 R5 */\
286
    "pand "MANGLE(mask1001)", %%mm2 \n" /* G5 B5 -- -- -- -- G7 B7 */\
287
    "por       %%mm3,         %%mm1 \n"\
288
    "por       %%mm6,         %%mm0 \n"\
289
    "por       %%mm5,         %%mm1 \n"\
290
    "por       %%mm7,         %%mm2 \n"\
291
    MOVNTQ"    %%mm0,          (%1) \n"\
292
    MOVNTQ"    %%mm1,         8(%1) \n"\
293
    MOVNTQ"    %%mm2,        16(%1) \n"\
294

  
295
#else
296
#undef RGB_PACK24_B
297
#define RGB_PACK24_B\
298
    "movd      %%mm3,       (%1) \n" /* R0 G0 B0 R1 */\
299
    "movd      %%mm2,      4(%1) \n" /* G1 B1 */\
300
    "psrlq     $32,        %%mm3 \n"\
301
    "psrlq     $16,        %%mm2 \n"\
302
    "movd      %%mm3,      6(%1) \n" /* R2 G2 B2 R3 */\
303
    "movd      %%mm2,     10(%1) \n" /* G3 B3 */\
304
    "psrlq     $16,        %%mm2 \n"\
305
    "movd      %%mm5,     12(%1) \n" /* R4 G4 B4 R5 */\
306
    "movd      %%mm2,     16(%1) \n" /* G5 B5 */\
307
    "psrlq     $32,        %%mm5 \n"\
308
    "movd      %%mm2,     20(%1) \n" /* -- -- G7 B7 */\
309
    "movd      %%mm5,     18(%1) \n" /* R6 G6 B6 R7 */\
310

  
311
#endif
317 312

  
318 313
static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[],
319 314
                                       int srcStride[],
......
387 382

  
388 383
        YUV2RGB_INITIAL_LOAD
389 384
        YUV2RGB
385
        RGB_PACK_INTERLEAVE
390 386
        SET_EMPTY_ALPHA
391 387
        RGB_PACK32(REG_RED, REG_GREEN, REG_BLUE, REG_ALPHA)
392 388

  
......
408 404
        const uint8_t *pa = src[3] + y * srcStride[3];
409 405
        YUV2RGB_INITIAL_LOAD
410 406
        YUV2RGB
407
        RGB_PACK_INTERLEAVE
411 408
        LOAD_ALPHA
412 409
        RGB_PACK32(REG_RED, REG_GREEN, REG_BLUE, REG_ALPHA)
413 410

  
......
428 425

  
429 426
        YUV2RGB_INITIAL_LOAD
430 427
        YUV2RGB
428
        RGB_PACK_INTERLEAVE
431 429
        SET_EMPTY_ALPHA
432 430
        RGB_PACK32(REG_BLUE, REG_GREEN, REG_RED, REG_ALPHA)
433 431

  
......
449 447
        const uint8_t *pa = src[3] + y * srcStride[3];
450 448
        YUV2RGB_INITIAL_LOAD
451 449
        YUV2RGB
450
        RGB_PACK_INTERLEAVE
452 451
        LOAD_ALPHA
453 452
        RGB_PACK32(REG_BLUE, REG_GREEN, REG_RED, REG_ALPHA)
454 453

  

Also available in: Unified diff