Revision bffc36e0 libswscale/x86/yuv2rgb_template2.c
libswscale/x86/yuv2rgb_template2.c | ||
---|---|---|
124 | 124 |
"paddsw %%mm6, %%mm0\n\t" \ |
125 | 125 |
"paddsw %%mm6, %%mm1\n\t" \ |
126 | 126 |
"paddsw %%mm6, %%mm2\n\t" \ |
127 |
\ |
|
127 |
|
|
128 |
#define RGB_PACK_INTERLEAVE \ |
|
128 | 129 |
/* pack and interleave even/odd pixels */ \ |
129 |
"packuswb %%mm0, %%mm0\n\t" \
|
|
130 |
"packuswb %%mm1, %%mm1\n\t" \
|
|
130 |
"packuswb %%mm1, %%mm0\n\t" \
|
|
131 |
"packuswb %%mm5, %%mm3\n\t" \
|
|
131 | 132 |
"packuswb %%mm2, %%mm2\n\t" \ |
132 |
"packuswb %%mm3, %%mm3\n\t" \ |
|
133 |
"packuswb %%mm5, %%mm5\n\t" \ |
|
133 |
"movq %%mm0, %%mm1\n\n" \ |
|
134 | 134 |
"packuswb %%mm7, %%mm7\n\t" \ |
135 | 135 |
"punpcklbw %%mm3, %%mm0\n\t" \ |
136 |
"punpcklbw %%mm5, %%mm1\n\t" \
|
|
136 |
"punpckhbw %%mm3, %%mm1\n\t" \
|
|
137 | 137 |
"punpcklbw %%mm7, %%mm2\n\t" \ |
138 | 138 |
|
139 | 139 |
#define YUV2RGB_ENDLOOP(depth) \ |
... | ... | |
210 | 210 |
|
211 | 211 |
YUV2RGB_INITIAL_LOAD |
212 | 212 |
YUV2RGB |
213 |
RGB_PACK_INTERLEAVE |
|
213 | 214 |
#ifdef DITHER1XBPP |
214 | 215 |
DITHER_RGB |
215 | 216 |
#endif |
... | ... | |
237 | 238 |
|
238 | 239 |
YUV2RGB_INITIAL_LOAD |
239 | 240 |
YUV2RGB |
241 |
RGB_PACK_INTERLEAVE |
|
240 | 242 |
#ifdef DITHER1XBPP |
241 | 243 |
DITHER_RGB |
242 | 244 |
#endif |
... | ... | |
247 | 249 |
YUV2RGB_ENDFUNC |
248 | 250 |
} |
249 | 251 |
|
252 |
#define RGB_PACK24(blue, red)\ |
|
253 |
"packuswb %%mm3, %%mm0 \n" /* R0 R2 R4 R6 R1 R3 R5 R7 */\ |
|
254 |
"packuswb %%mm5, %%mm1 \n" /* B0 B2 B4 B6 B1 B3 B5 B7 */\ |
|
255 |
"packuswb %%mm7, %%mm2 \n" /* G0 G2 G4 G6 G1 G3 G5 G7 */\ |
|
256 |
"movq %%mm"red", %%mm3 \n"\ |
|
257 |
"movq %%mm"blue", %%mm6 \n"\ |
|
258 |
"psrlq $32, %%mm"red" \n" /* R1 R3 R5 R7 */\ |
|
259 |
"punpcklbw %%mm2, %%mm3 \n" /* R0 G0 R2 G2 R4 G4 R6 G6 */\ |
|
260 |
"punpcklbw %%mm"red", %%mm6 \n" /* B0 R1 B2 R3 B4 R5 B6 R7 */\ |
|
261 |
"movq %%mm3, %%mm5 \n"\ |
|
262 |
"punpckhbw %%mm"blue", %%mm2 \n" /* G1 B1 G3 B3 G5 B5 G7 B7 */\ |
|
263 |
"punpcklwd %%mm6, %%mm3 \n" /* R0 G0 B0 R1 R2 G2 B2 R3 */\ |
|
264 |
"punpckhwd %%mm6, %%mm5 \n" /* R4 G4 B4 R5 R6 G6 B6 R7 */\ |
|
265 |
RGB_PACK24_B |
|
250 | 266 |
|
251 |
#define RGB_PACK24(red, blue) \ |
|
252 |
/* generate first packed RGB octet */ \ |
|
253 |
"movq %%mm2, %%mm5\n\t" \ |
|
254 |
"movq %%mm"blue", %%mm6\n\t" \ |
|
255 |
"movq %%mm"red", %%mm7\n\t" \ |
|
256 |
"punpcklbw %%mm5, %%mm6\n\t" \ |
|
257 |
"punpcklbw %%mm4, %%mm7\n\t" \ |
|
258 |
"movq %%mm6, %%mm3\n\t" \ |
|
259 |
"punpcklwd %%mm7, %%mm6\n\t" \ |
|
260 |
"psrlq $32, %%mm3\n\t" \ |
|
261 |
"movq %%mm6, %%mm5\n\t" \ |
|
262 |
"psllq $40, %%mm6\n\t" \ |
|
263 |
"psllq $48, %%mm3\n\t" \ |
|
264 |
"psrlq $32, %%mm5\n\t" \ |
|
265 |
"psrlq $40, %%mm6\n\t" \ |
|
266 |
"psllq $24, %%mm5\n\t" \ |
|
267 |
"por %%mm3, %%mm6\n\t" \ |
|
268 |
"por %%mm5, %%mm6\n\t" \ |
|
269 |
MOVNTQ " %%mm6, (%1)\n\t" \ |
|
270 |
\ |
|
271 |
/* generate second packed RGB octet */ \ |
|
272 |
"movq %%mm"red", %%mm7\n\t" \ |
|
273 |
"movq %%mm2, %%mm5\n\t" \ |
|
274 |
"movq %%mm"blue", %%mm6\n\t" \ |
|
275 |
"punpcklbw %%mm4, %%mm7\n\t" \ |
|
276 |
"punpcklbw %%mm5, %%mm6\n\t" \ |
|
277 |
"movq %%mm7, %%mm3\n\t" \ |
|
278 |
"punpckhwd %%mm7, %%mm6\n\t" \ |
|
279 |
"psllq $16, %%mm3\n\t" \ |
|
280 |
"psrlq $32, %%mm6\n\t" \ |
|
281 |
"psrlq $48, %%mm3\n\t" \ |
|
282 |
"psllq $8, %%mm6\n\t" \ |
|
283 |
"movq %%mm"red", %%mm7\n\t" \ |
|
284 |
"por %%mm6, %%mm3\n\t" \ |
|
285 |
"movq %%mm"blue", %%mm6\n\t" \ |
|
286 |
"movq %%mm2, %%mm5\n\t" \ |
|
287 |
"punpckhbw %%mm4, %%mm7\n\t" \ |
|
288 |
"punpckhbw %%mm5, %%mm6\n\t" \ |
|
289 |
"movq %%mm6, %%mm5\n\t" \ |
|
290 |
"punpcklwd %%mm7, %%mm6\n\t" \ |
|
291 |
"psrlq $16, %%mm5\n\t" \ |
|
292 |
"psllq $56, %%mm5\n\t" \ |
|
293 |
"por %%mm5, %%mm3\n\t" \ |
|
294 |
"psllq $32, %%mm6\n\t" \ |
|
295 |
"por %%mm6, %%mm3\n\t" \ |
|
296 |
MOVNTQ " %%mm3, 8(%1)\n\t" \ |
|
297 |
\ |
|
298 |
/* generate third packed RGB octet */ \ |
|
299 |
"movq %%mm"red", %%mm7\n\t" \ |
|
300 |
"movq %%mm2, %%mm5\n\t" \ |
|
301 |
"movq %%mm2, %%mm3\n\t" \ |
|
302 |
"movq %%mm"blue", %%mm6\n\t" \ |
|
303 |
"punpckhbw %%mm"red", %%mm3\n\t" \ |
|
304 |
"punpckhbw %%mm4, %%mm7\n\t" \ |
|
305 |
"psllq $32, %%mm3\n\t" \ |
|
306 |
"punpckhbw %%mm5, %%mm6\n\t" \ |
|
307 |
"psrlq $48, %%mm3\n\t" \ |
|
308 |
"punpckhwd %%mm7, %%mm6\n\t" \ |
|
309 |
"movq %%mm6, %%mm7\n\t" \ |
|
310 |
"psrlq $32, %%mm6\n\t" \ |
|
311 |
"psllq $32, %%mm7\n\t" \ |
|
312 |
"psllq $40, %%mm6\n\t" \ |
|
313 |
"psrlq $16, %%mm7\n\t" \ |
|
314 |
"por %%mm6, %%mm3\n\t" \ |
|
315 |
"por %%mm7, %%mm3\n\t" \ |
|
316 |
MOVNTQ " %%mm3, 16(%1)\n\t" \ |
|
267 |
#if HAVE_MMX2 |
|
268 |
DECLARE_ASM_CONST(8, int16_t, mask1101[4]) = {-1,-1, 0,-1}; |
|
269 |
DECLARE_ASM_CONST(8, int16_t, mask0010[4]) = { 0, 0,-1, 0}; |
|
270 |
DECLARE_ASM_CONST(8, int16_t, mask0110[4]) = { 0,-1,-1, 0}; |
|
271 |
DECLARE_ASM_CONST(8, int16_t, mask1001[4]) = {-1, 0, 0,-1}; |
|
272 |
DECLARE_ASM_CONST(8, int16_t, mask0100[4]) = { 0,-1, 0, 0}; |
|
273 |
#undef RGB_PACK24_B |
|
274 |
#define RGB_PACK24_B\ |
|
275 |
"pshufw $0xc6, %%mm2, %%mm1 \n"\ |
|
276 |
"pshufw $0x84, %%mm3, %%mm6 \n"\ |
|
277 |
"pshufw $0x38, %%mm5, %%mm7 \n"\ |
|
278 |
"pand "MANGLE(mask1101)", %%mm6 \n" /* R0 G0 B0 R1 -- -- R2 G2 */\ |
|
279 |
"movq %%mm1, %%mm0 \n"\ |
|
280 |
"pand "MANGLE(mask0110)", %%mm7 \n" /* -- -- R6 G6 B6 R7 -- -- */\ |
|
281 |
"movq %%mm1, %%mm2 \n"\ |
|
282 |
"pand "MANGLE(mask0100)", %%mm1 \n" /* -- -- G3 B3 -- -- -- -- */\ |
|
283 |
"psrlq $48, %%mm3 \n" /* B2 R3 -- -- -- -- -- -- */\ |
|
284 |
"pand "MANGLE(mask0010)", %%mm0 \n" /* -- -- -- -- G1 B1 -- -- */\ |
|
285 |
"psllq $32, %%mm5 \n" /* -- -- -- -- R4 G4 B4 R5 */\ |
|
286 |
"pand "MANGLE(mask1001)", %%mm2 \n" /* G5 B5 -- -- -- -- G7 B7 */\ |
|
287 |
"por %%mm3, %%mm1 \n"\ |
|
288 |
"por %%mm6, %%mm0 \n"\ |
|
289 |
"por %%mm5, %%mm1 \n"\ |
|
290 |
"por %%mm7, %%mm2 \n"\ |
|
291 |
MOVNTQ" %%mm0, (%1) \n"\ |
|
292 |
MOVNTQ" %%mm1, 8(%1) \n"\ |
|
293 |
MOVNTQ" %%mm2, 16(%1) \n"\ |
|
294 |
|
|
295 |
#else |
|
296 |
#undef RGB_PACK24_B |
|
297 |
#define RGB_PACK24_B\ |
|
298 |
"movd %%mm3, (%1) \n" /* R0 G0 B0 R1 */\ |
|
299 |
"movd %%mm2, 4(%1) \n" /* G1 B1 */\ |
|
300 |
"psrlq $32, %%mm3 \n"\ |
|
301 |
"psrlq $16, %%mm2 \n"\ |
|
302 |
"movd %%mm3, 6(%1) \n" /* R2 G2 B2 R3 */\ |
|
303 |
"movd %%mm2, 10(%1) \n" /* G3 B3 */\ |
|
304 |
"psrlq $16, %%mm2 \n"\ |
|
305 |
"movd %%mm5, 12(%1) \n" /* R4 G4 B4 R5 */\ |
|
306 |
"movd %%mm2, 16(%1) \n" /* G5 B5 */\ |
|
307 |
"psrlq $32, %%mm5 \n"\ |
|
308 |
"movd %%mm2, 20(%1) \n" /* -- -- G7 B7 */\ |
|
309 |
"movd %%mm5, 18(%1) \n" /* R6 G6 B6 R7 */\ |
|
310 |
|
|
311 |
#endif |
|
317 | 312 |
|
318 | 313 |
static inline int RENAME(yuv420_rgb24)(SwsContext *c, const uint8_t *src[], |
319 | 314 |
int srcStride[], |
... | ... | |
387 | 382 |
|
388 | 383 |
YUV2RGB_INITIAL_LOAD |
389 | 384 |
YUV2RGB |
385 |
RGB_PACK_INTERLEAVE |
|
390 | 386 |
SET_EMPTY_ALPHA |
391 | 387 |
RGB_PACK32(REG_RED, REG_GREEN, REG_BLUE, REG_ALPHA) |
392 | 388 |
|
... | ... | |
408 | 404 |
const uint8_t *pa = src[3] + y * srcStride[3]; |
409 | 405 |
YUV2RGB_INITIAL_LOAD |
410 | 406 |
YUV2RGB |
407 |
RGB_PACK_INTERLEAVE |
|
411 | 408 |
LOAD_ALPHA |
412 | 409 |
RGB_PACK32(REG_RED, REG_GREEN, REG_BLUE, REG_ALPHA) |
413 | 410 |
|
... | ... | |
428 | 425 |
|
429 | 426 |
YUV2RGB_INITIAL_LOAD |
430 | 427 |
YUV2RGB |
428 |
RGB_PACK_INTERLEAVE |
|
431 | 429 |
SET_EMPTY_ALPHA |
432 | 430 |
RGB_PACK32(REG_BLUE, REG_GREEN, REG_RED, REG_ALPHA) |
433 | 431 |
|
... | ... | |
449 | 447 |
const uint8_t *pa = src[3] + y * srcStride[3]; |
450 | 448 |
YUV2RGB_INITIAL_LOAD |
451 | 449 |
YUV2RGB |
450 |
RGB_PACK_INTERLEAVE |
|
452 | 451 |
LOAD_ALPHA |
453 | 452 |
RGB_PACK32(REG_BLUE, REG_GREEN, REG_RED, REG_ALPHA) |
454 | 453 |
|
Also available in: Unified diff