ffmpeg / libswscale / rgb2rgb_template.c @ c0038328
History | View | Annotate | Download (28.2 KB)
1 |
/*
|
---|---|
2 |
* software RGB to RGB converter
|
3 |
* pluralize by software PAL8 to RGB converter
|
4 |
* software YUV to YUV converter
|
5 |
* software YUV to RGB converter
|
6 |
* Written by Nick Kurshev.
|
7 |
* palette & YUV & runtime CPU stuff by Michael (michaelni@gmx.at)
|
8 |
* lot of big-endian byte order fixes by Alex Beregszaszi
|
9 |
*
|
10 |
* This file is part of Libav.
|
11 |
*
|
12 |
* Libav is free software; you can redistribute it and/or
|
13 |
* modify it under the terms of the GNU Lesser General Public
|
14 |
* License as published by the Free Software Foundation; either
|
15 |
* version 2.1 of the License, or (at your option) any later version.
|
16 |
*
|
17 |
* Libav is distributed in the hope that it will be useful,
|
18 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
19 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
20 |
* Lesser General Public License for more details.
|
21 |
*
|
22 |
* You should have received a copy of the GNU Lesser General Public
|
23 |
* License along with Libav; if not, write to the Free Software
|
24 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
25 |
*/
|
26 |
|
27 |
#include <stddef.h> |
28 |
|
29 |
static inline void rgb24tobgr32_c(const uint8_t *src, uint8_t *dst, long src_size) |
30 |
{ |
31 |
uint8_t *dest = dst; |
32 |
const uint8_t *s = src;
|
33 |
const uint8_t *end;
|
34 |
end = s + src_size; |
35 |
|
36 |
while (s < end) {
|
37 |
#if HAVE_BIGENDIAN
|
38 |
/* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
|
39 |
*dest++ = 255;
|
40 |
*dest++ = s[2];
|
41 |
*dest++ = s[1];
|
42 |
*dest++ = s[0];
|
43 |
s+=3;
|
44 |
#else
|
45 |
*dest++ = *s++; |
46 |
*dest++ = *s++; |
47 |
*dest++ = *s++; |
48 |
*dest++ = 255;
|
49 |
#endif
|
50 |
} |
51 |
} |
52 |
|
53 |
static inline void rgb32tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size) |
54 |
{ |
55 |
uint8_t *dest = dst; |
56 |
const uint8_t *s = src;
|
57 |
const uint8_t *end;
|
58 |
|
59 |
end = s + src_size; |
60 |
|
61 |
while (s < end) {
|
62 |
#if HAVE_BIGENDIAN
|
63 |
/* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
|
64 |
s++; |
65 |
dest[2] = *s++;
|
66 |
dest[1] = *s++;
|
67 |
dest[0] = *s++;
|
68 |
dest += 3;
|
69 |
#else
|
70 |
*dest++ = *s++; |
71 |
*dest++ = *s++; |
72 |
*dest++ = *s++; |
73 |
s++; |
74 |
#endif
|
75 |
} |
76 |
} |
77 |
|
78 |
/*
|
79 |
original by Strepto/Astral
|
80 |
ported to gcc & bugfixed: A'rpi
|
81 |
MMX2, 3DNOW optimization by Nick Kurshev
|
82 |
32-bit C version, and and&add trick by Michael Niedermayer
|
83 |
*/
|
84 |
static inline void rgb15to16_c(const uint8_t *src, uint8_t *dst, long src_size) |
85 |
{ |
86 |
register const uint8_t* s=src; |
87 |
register uint8_t* d=dst;
|
88 |
register const uint8_t *end; |
89 |
const uint8_t *mm_end;
|
90 |
end = s + src_size; |
91 |
mm_end = end - 3;
|
92 |
while (s < mm_end) {
|
93 |
register unsigned x= *((const uint32_t *)s); |
94 |
*((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); |
95 |
d+=4;
|
96 |
s+=4;
|
97 |
} |
98 |
if (s < end) {
|
99 |
register unsigned short x= *((const uint16_t *)s); |
100 |
*((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); |
101 |
} |
102 |
} |
103 |
|
104 |
static inline void rgb16to15_c(const uint8_t *src, uint8_t *dst, long src_size) |
105 |
{ |
106 |
register const uint8_t* s=src; |
107 |
register uint8_t* d=dst;
|
108 |
register const uint8_t *end; |
109 |
const uint8_t *mm_end;
|
110 |
end = s + src_size; |
111 |
|
112 |
mm_end = end - 3;
|
113 |
while (s < mm_end) {
|
114 |
register uint32_t x= *((const uint32_t*)s); |
115 |
*((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F); |
116 |
s+=4;
|
117 |
d+=4;
|
118 |
} |
119 |
if (s < end) {
|
120 |
register uint16_t x= *((const uint16_t*)s); |
121 |
*((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F); |
122 |
} |
123 |
} |
124 |
|
125 |
static inline void rgb32to16_c(const uint8_t *src, uint8_t *dst, long src_size) |
126 |
{ |
127 |
const uint8_t *s = src;
|
128 |
const uint8_t *end;
|
129 |
uint16_t *d = (uint16_t *)dst; |
130 |
end = s + src_size; |
131 |
|
132 |
while (s < end) {
|
133 |
register int rgb = *(const uint32_t*)s; s += 4; |
134 |
*d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8); |
135 |
} |
136 |
} |
137 |
|
138 |
static inline void rgb32tobgr16_c(const uint8_t *src, uint8_t *dst, long src_size) |
139 |
{ |
140 |
const uint8_t *s = src;
|
141 |
const uint8_t *end;
|
142 |
uint16_t *d = (uint16_t *)dst; |
143 |
end = s + src_size; |
144 |
while (s < end) {
|
145 |
register int rgb = *(const uint32_t*)s; s += 4; |
146 |
*d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19); |
147 |
} |
148 |
} |
149 |
|
150 |
static inline void rgb32to15_c(const uint8_t *src, uint8_t *dst, long src_size) |
151 |
{ |
152 |
const uint8_t *s = src;
|
153 |
const uint8_t *end;
|
154 |
uint16_t *d = (uint16_t *)dst; |
155 |
end = s + src_size; |
156 |
while (s < end) {
|
157 |
register int rgb = *(const uint32_t*)s; s += 4; |
158 |
*d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9); |
159 |
} |
160 |
} |
161 |
|
162 |
static inline void rgb32tobgr15_c(const uint8_t *src, uint8_t *dst, long src_size) |
163 |
{ |
164 |
const uint8_t *s = src;
|
165 |
const uint8_t *end;
|
166 |
uint16_t *d = (uint16_t *)dst; |
167 |
end = s + src_size; |
168 |
while (s < end) {
|
169 |
register int rgb = *(const uint32_t*)s; s += 4; |
170 |
*d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19); |
171 |
} |
172 |
} |
173 |
|
174 |
static inline void rgb24tobgr16_c(const uint8_t *src, uint8_t *dst, long src_size) |
175 |
{ |
176 |
const uint8_t *s = src;
|
177 |
const uint8_t *end;
|
178 |
uint16_t *d = (uint16_t *)dst; |
179 |
end = s + src_size; |
180 |
while (s < end) {
|
181 |
const int b = *s++; |
182 |
const int g = *s++; |
183 |
const int r = *s++; |
184 |
*d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
185 |
} |
186 |
} |
187 |
|
188 |
static inline void rgb24to16_c(const uint8_t *src, uint8_t *dst, long src_size) |
189 |
{ |
190 |
const uint8_t *s = src;
|
191 |
const uint8_t *end;
|
192 |
uint16_t *d = (uint16_t *)dst; |
193 |
end = s + src_size; |
194 |
while (s < end) {
|
195 |
const int r = *s++; |
196 |
const int g = *s++; |
197 |
const int b = *s++; |
198 |
*d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
199 |
} |
200 |
} |
201 |
|
202 |
static inline void rgb24tobgr15_c(const uint8_t *src, uint8_t *dst, long src_size) |
203 |
{ |
204 |
const uint8_t *s = src;
|
205 |
const uint8_t *end;
|
206 |
uint16_t *d = (uint16_t *)dst; |
207 |
end = s + src_size; |
208 |
while (s < end) {
|
209 |
const int b = *s++; |
210 |
const int g = *s++; |
211 |
const int r = *s++; |
212 |
*d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); |
213 |
} |
214 |
} |
215 |
|
216 |
static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, long src_size) |
217 |
{ |
218 |
const uint8_t *s = src;
|
219 |
const uint8_t *end;
|
220 |
uint16_t *d = (uint16_t *)dst; |
221 |
end = s + src_size; |
222 |
while (s < end) {
|
223 |
const int r = *s++; |
224 |
const int g = *s++; |
225 |
const int b = *s++; |
226 |
*d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); |
227 |
} |
228 |
} |
229 |
|
230 |
/*
|
231 |
I use less accurate approximation here by simply left-shifting the input
|
232 |
value and filling the low order bits with zeroes. This method improves PNG
|
233 |
compression but this scheme cannot reproduce white exactly, since it does
|
234 |
not generate an all-ones maximum value; the net effect is to darken the
|
235 |
image slightly.
|
236 |
|
237 |
The better method should be "left bit replication":
|
238 |
|
239 |
4 3 2 1 0
|
240 |
---------
|
241 |
1 1 0 1 1
|
242 |
|
243 |
7 6 5 4 3 2 1 0
|
244 |
----------------
|
245 |
1 1 0 1 1 1 1 0
|
246 |
|=======| |===|
|
247 |
| leftmost bits repeated to fill open bits
|
248 |
|
|
249 |
original bits
|
250 |
*/
|
251 |
static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size) |
252 |
{ |
253 |
const uint16_t *end;
|
254 |
uint8_t *d = dst; |
255 |
const uint16_t *s = (const uint16_t*)src; |
256 |
end = s + src_size/2;
|
257 |
while (s < end) {
|
258 |
register uint16_t bgr;
|
259 |
bgr = *s++; |
260 |
*d++ = (bgr&0x1F)<<3; |
261 |
*d++ = (bgr&0x3E0)>>2; |
262 |
*d++ = (bgr&0x7C00)>>7; |
263 |
} |
264 |
} |
265 |
|
266 |
static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size) |
267 |
{ |
268 |
const uint16_t *end;
|
269 |
uint8_t *d = (uint8_t *)dst; |
270 |
const uint16_t *s = (const uint16_t *)src; |
271 |
end = s + src_size/2;
|
272 |
while (s < end) {
|
273 |
register uint16_t bgr;
|
274 |
bgr = *s++; |
275 |
*d++ = (bgr&0x1F)<<3; |
276 |
*d++ = (bgr&0x7E0)>>3; |
277 |
*d++ = (bgr&0xF800)>>8; |
278 |
} |
279 |
} |
280 |
|
281 |
/*
|
282 |
* mm0 = 00 B3 00 B2 00 B1 00 B0
|
283 |
* mm1 = 00 G3 00 G2 00 G1 00 G0
|
284 |
* mm2 = 00 R3 00 R2 00 R1 00 R0
|
285 |
* mm6 = FF FF FF FF FF FF FF FF
|
286 |
* mm7 = 00 00 00 00 00 00 00 00
|
287 |
*/
|
288 |
#define PACK_RGB32 \
|
289 |
"packuswb %%mm7, %%mm0 \n\t" /* 00 00 00 00 B3 B2 B1 B0 */ \ |
290 |
"packuswb %%mm7, %%mm1 \n\t" /* 00 00 00 00 G3 G2 G1 G0 */ \ |
291 |
"packuswb %%mm7, %%mm2 \n\t" /* 00 00 00 00 R3 R2 R1 R0 */ \ |
292 |
"punpcklbw %%mm1, %%mm0 \n\t" /* G3 B3 G2 B2 G1 B1 G0 B0 */ \ |
293 |
"punpcklbw %%mm6, %%mm2 \n\t" /* FF R3 FF R2 FF R1 FF R0 */ \ |
294 |
"movq %%mm0, %%mm3 \n\t" \
|
295 |
"punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \ |
296 |
"punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \ |
297 |
MOVNTQ" %%mm0, %0 \n\t" \
|
298 |
MOVNTQ" %%mm3, 8%0 \n\t" \
|
299 |
|
300 |
static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, long src_size) |
301 |
{ |
302 |
const uint16_t *end;
|
303 |
uint8_t *d = dst; |
304 |
const uint16_t *s = (const uint16_t *)src; |
305 |
end = s + src_size/2;
|
306 |
while (s < end) {
|
307 |
register uint16_t bgr;
|
308 |
bgr = *s++; |
309 |
#if HAVE_BIGENDIAN
|
310 |
*d++ = 255;
|
311 |
*d++ = (bgr&0x7C00)>>7; |
312 |
*d++ = (bgr&0x3E0)>>2; |
313 |
*d++ = (bgr&0x1F)<<3; |
314 |
#else
|
315 |
*d++ = (bgr&0x1F)<<3; |
316 |
*d++ = (bgr&0x3E0)>>2; |
317 |
*d++ = (bgr&0x7C00)>>7; |
318 |
*d++ = 255;
|
319 |
#endif
|
320 |
} |
321 |
} |
322 |
|
323 |
static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, long src_size) |
324 |
{ |
325 |
const uint16_t *end;
|
326 |
uint8_t *d = dst; |
327 |
const uint16_t *s = (const uint16_t*)src; |
328 |
end = s + src_size/2;
|
329 |
while (s < end) {
|
330 |
register uint16_t bgr;
|
331 |
bgr = *s++; |
332 |
#if HAVE_BIGENDIAN
|
333 |
*d++ = 255;
|
334 |
*d++ = (bgr&0xF800)>>8; |
335 |
*d++ = (bgr&0x7E0)>>3; |
336 |
*d++ = (bgr&0x1F)<<3; |
337 |
#else
|
338 |
*d++ = (bgr&0x1F)<<3; |
339 |
*d++ = (bgr&0x7E0)>>3; |
340 |
*d++ = (bgr&0xF800)>>8; |
341 |
*d++ = 255;
|
342 |
#endif
|
343 |
} |
344 |
} |
345 |
|
346 |
static inline void shuffle_bytes_2103_c(const uint8_t *src, uint8_t *dst, long src_size) |
347 |
{ |
348 |
int idx = 15 - src_size; |
349 |
const uint8_t *s = src-idx;
|
350 |
uint8_t *d = dst-idx; |
351 |
for (; idx<15; idx+=4) { |
352 |
register int v = *(const uint32_t *)&s[idx], g = v & 0xff00ff00; |
353 |
v &= 0xff00ff;
|
354 |
*(uint32_t *)&d[idx] = (v>>16) + g + (v<<16); |
355 |
} |
356 |
} |
357 |
|
358 |
static inline void rgb24tobgr24_c(const uint8_t *src, uint8_t *dst, long src_size) |
359 |
{ |
360 |
unsigned i;
|
361 |
for (i=0; i<src_size; i+=3) { |
362 |
register uint8_t x;
|
363 |
x = src[i + 2];
|
364 |
dst[i + 1] = src[i + 1]; |
365 |
dst[i + 2] = src[i + 0]; |
366 |
dst[i + 0] = x;
|
367 |
} |
368 |
} |
369 |
|
370 |
static inline void yuvPlanartoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc, |
371 |
const uint8_t *vsrc, uint8_t *dst,
|
372 |
long width, long height, |
373 |
long lumStride, long chromStride, |
374 |
long dstStride, long vertLumPerChroma) |
375 |
{ |
376 |
long y;
|
377 |
const int chromWidth = width >> 1; |
378 |
for (y=0; y<height; y++) { |
379 |
#if HAVE_FAST_64BIT
|
380 |
int i;
|
381 |
uint64_t *ldst = (uint64_t *) dst; |
382 |
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
|
383 |
for (i = 0; i < chromWidth; i += 2) { |
384 |
uint64_t k, l; |
385 |
k = yc[0] + (uc[0] << 8) + |
386 |
(yc[1] << 16) + (vc[0] << 24); |
387 |
l = yc[2] + (uc[1] << 8) + |
388 |
(yc[3] << 16) + (vc[1] << 24); |
389 |
*ldst++ = k + (l << 32);
|
390 |
yc += 4;
|
391 |
uc += 2;
|
392 |
vc += 2;
|
393 |
} |
394 |
|
395 |
#else
|
396 |
int i, *idst = (int32_t *) dst;
|
397 |
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
|
398 |
for (i = 0; i < chromWidth; i++) { |
399 |
#if HAVE_BIGENDIAN
|
400 |
*idst++ = (yc[0] << 24)+ (uc[0] << 16) + |
401 |
(yc[1] << 8) + (vc[0] << 0); |
402 |
#else
|
403 |
*idst++ = yc[0] + (uc[0] << 8) + |
404 |
(yc[1] << 16) + (vc[0] << 24); |
405 |
#endif
|
406 |
yc += 2;
|
407 |
uc++; |
408 |
vc++; |
409 |
} |
410 |
#endif
|
411 |
if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { |
412 |
usrc += chromStride; |
413 |
vsrc += chromStride; |
414 |
} |
415 |
ysrc += lumStride; |
416 |
dst += dstStride; |
417 |
} |
418 |
} |
419 |
|
420 |
/**
|
421 |
* Height should be a multiple of 2 and width should be a multiple of 16.
|
422 |
* (If this is a problem for anyone then tell me, and I will fix it.)
|
423 |
*/
|
424 |
static inline void yv12toyuy2_c(const uint8_t *ysrc, const uint8_t *usrc, |
425 |
const uint8_t *vsrc, uint8_t *dst,
|
426 |
long width, long height, |
427 |
long lumStride, long chromStride, |
428 |
long dstStride)
|
429 |
{ |
430 |
//FIXME interpolate chroma
|
431 |
yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride, |
432 |
chromStride, dstStride, 2);
|
433 |
} |
434 |
|
435 |
static inline void yuvPlanartouyvy_c(const uint8_t *ysrc, const uint8_t *usrc, |
436 |
const uint8_t *vsrc, uint8_t *dst,
|
437 |
long width, long height, |
438 |
long lumStride, long chromStride, |
439 |
long dstStride, long vertLumPerChroma) |
440 |
{ |
441 |
long y;
|
442 |
const int chromWidth = width >> 1; |
443 |
for (y=0; y<height; y++) { |
444 |
#if HAVE_FAST_64BIT
|
445 |
int i;
|
446 |
uint64_t *ldst = (uint64_t *) dst; |
447 |
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
|
448 |
for (i = 0; i < chromWidth; i += 2) { |
449 |
uint64_t k, l; |
450 |
k = uc[0] + (yc[0] << 8) + |
451 |
(vc[0] << 16) + (yc[1] << 24); |
452 |
l = uc[1] + (yc[2] << 8) + |
453 |
(vc[1] << 16) + (yc[3] << 24); |
454 |
*ldst++ = k + (l << 32);
|
455 |
yc += 4;
|
456 |
uc += 2;
|
457 |
vc += 2;
|
458 |
} |
459 |
|
460 |
#else
|
461 |
int i, *idst = (int32_t *) dst;
|
462 |
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
|
463 |
for (i = 0; i < chromWidth; i++) { |
464 |
#if HAVE_BIGENDIAN
|
465 |
*idst++ = (uc[0] << 24)+ (yc[0] << 16) + |
466 |
(vc[0] << 8) + (yc[1] << 0); |
467 |
#else
|
468 |
*idst++ = uc[0] + (yc[0] << 8) + |
469 |
(vc[0] << 16) + (yc[1] << 24); |
470 |
#endif
|
471 |
yc += 2;
|
472 |
uc++; |
473 |
vc++; |
474 |
} |
475 |
#endif
|
476 |
if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { |
477 |
usrc += chromStride; |
478 |
vsrc += chromStride; |
479 |
} |
480 |
ysrc += lumStride; |
481 |
dst += dstStride; |
482 |
} |
483 |
} |
484 |
|
485 |
/**
|
486 |
* Height should be a multiple of 2 and width should be a multiple of 16
|
487 |
* (If this is a problem for anyone then tell me, and I will fix it.)
|
488 |
*/
|
489 |
static inline void yv12touyvy_c(const uint8_t *ysrc, const uint8_t *usrc, |
490 |
const uint8_t *vsrc, uint8_t *dst,
|
491 |
long width, long height, |
492 |
long lumStride, long chromStride, |
493 |
long dstStride)
|
494 |
{ |
495 |
//FIXME interpolate chroma
|
496 |
yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride, |
497 |
chromStride, dstStride, 2);
|
498 |
} |
499 |
|
500 |
/**
|
501 |
* Width should be a multiple of 16.
|
502 |
*/
|
503 |
static inline void yuv422ptouyvy_c(const uint8_t *ysrc, const uint8_t *usrc, |
504 |
const uint8_t *vsrc, uint8_t *dst,
|
505 |
long width, long height, |
506 |
long lumStride, long chromStride, |
507 |
long dstStride)
|
508 |
{ |
509 |
yuvPlanartouyvy_c(ysrc, usrc, vsrc, dst, width, height, lumStride, |
510 |
chromStride, dstStride, 1);
|
511 |
} |
512 |
|
513 |
/**
|
514 |
* Width should be a multiple of 16.
|
515 |
*/
|
516 |
static inline void yuv422ptoyuy2_c(const uint8_t *ysrc, const uint8_t *usrc, |
517 |
const uint8_t *vsrc, uint8_t *dst,
|
518 |
long width, long height, |
519 |
long lumStride, long chromStride, |
520 |
long dstStride)
|
521 |
{ |
522 |
yuvPlanartoyuy2_c(ysrc, usrc, vsrc, dst, width, height, lumStride, |
523 |
chromStride, dstStride, 1);
|
524 |
} |
525 |
|
526 |
/**
|
527 |
* Height should be a multiple of 2 and width should be a multiple of 16.
|
528 |
* (If this is a problem for anyone then tell me, and I will fix it.)
|
529 |
*/
|
530 |
static inline void yuy2toyv12_c(const uint8_t *src, uint8_t *ydst, |
531 |
uint8_t *udst, uint8_t *vdst, |
532 |
long width, long height, |
533 |
long lumStride, long chromStride, |
534 |
long srcStride)
|
535 |
{ |
536 |
long y;
|
537 |
const int chromWidth = width >> 1; |
538 |
for (y=0; y<height; y+=2) { |
539 |
long i;
|
540 |
for (i=0; i<chromWidth; i++) { |
541 |
ydst[2*i+0] = src[4*i+0]; |
542 |
udst[i] = src[4*i+1]; |
543 |
ydst[2*i+1] = src[4*i+2]; |
544 |
vdst[i] = src[4*i+3]; |
545 |
} |
546 |
ydst += lumStride; |
547 |
src += srcStride; |
548 |
|
549 |
for (i=0; i<chromWidth; i++) { |
550 |
ydst[2*i+0] = src[4*i+0]; |
551 |
ydst[2*i+1] = src[4*i+2]; |
552 |
} |
553 |
udst += chromStride; |
554 |
vdst += chromStride; |
555 |
ydst += lumStride; |
556 |
src += srcStride; |
557 |
} |
558 |
} |
559 |
|
560 |
static inline void planar2x_c(const uint8_t *src, uint8_t *dst, long srcWidth, |
561 |
long srcHeight, long srcStride, long dstStride) |
562 |
{ |
563 |
long x,y;
|
564 |
|
565 |
dst[0]= src[0]; |
566 |
|
567 |
// first line
|
568 |
for (x=0; x<srcWidth-1; x++) { |
569 |
dst[2*x+1]= (3*src[x] + src[x+1])>>2; |
570 |
dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; |
571 |
} |
572 |
dst[2*srcWidth-1]= src[srcWidth-1]; |
573 |
|
574 |
dst+= dstStride; |
575 |
|
576 |
for (y=1; y<srcHeight; y++) { |
577 |
const int mmxSize = 1; |
578 |
|
579 |
dst[0 ]= (3*src[0] + src[srcStride])>>2; |
580 |
dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; |
581 |
|
582 |
for (x=mmxSize-1; x<srcWidth-1; x++) { |
583 |
dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; |
584 |
dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2; |
585 |
dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2; |
586 |
dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2; |
587 |
} |
588 |
dst[srcWidth*2 -1 ]= (3*src[srcWidth-1] + src[srcWidth-1 + srcStride])>>2; |
589 |
dst[srcWidth*2 -1 + dstStride]= ( src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2; |
590 |
|
591 |
dst+=dstStride*2;
|
592 |
src+=srcStride; |
593 |
} |
594 |
|
595 |
// last line
|
596 |
#if 1 |
597 |
dst[0]= src[0]; |
598 |
|
599 |
for (x=0; x<srcWidth-1; x++) { |
600 |
dst[2*x+1]= (3*src[x] + src[x+1])>>2; |
601 |
dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; |
602 |
} |
603 |
dst[2*srcWidth-1]= src[srcWidth-1]; |
604 |
#else
|
605 |
for (x=0; x<srcWidth; x++) { |
606 |
dst[2*x+0]= |
607 |
dst[2*x+1]= src[x]; |
608 |
} |
609 |
#endif
|
610 |
} |
611 |
|
612 |
/**
|
613 |
* Height should be a multiple of 2 and width should be a multiple of 16.
|
614 |
* (If this is a problem for anyone then tell me, and I will fix it.)
|
615 |
* Chrominance data is only taken from every second line, others are ignored.
|
616 |
* FIXME: Write HQ version.
|
617 |
*/
|
618 |
static inline void uyvytoyv12_c(const uint8_t *src, uint8_t *ydst, |
619 |
uint8_t *udst, uint8_t *vdst, |
620 |
long width, long height, |
621 |
long lumStride, long chromStride, |
622 |
long srcStride)
|
623 |
{ |
624 |
long y;
|
625 |
const int chromWidth = width >> 1; |
626 |
for (y=0; y<height; y+=2) { |
627 |
long i;
|
628 |
for (i=0; i<chromWidth; i++) { |
629 |
udst[i] = src[4*i+0]; |
630 |
ydst[2*i+0] = src[4*i+1]; |
631 |
vdst[i] = src[4*i+2]; |
632 |
ydst[2*i+1] = src[4*i+3]; |
633 |
} |
634 |
ydst += lumStride; |
635 |
src += srcStride; |
636 |
|
637 |
for (i=0; i<chromWidth; i++) { |
638 |
ydst[2*i+0] = src[4*i+1]; |
639 |
ydst[2*i+1] = src[4*i+3]; |
640 |
} |
641 |
udst += chromStride; |
642 |
vdst += chromStride; |
643 |
ydst += lumStride; |
644 |
src += srcStride; |
645 |
} |
646 |
} |
647 |
|
648 |
/**
|
649 |
* Height should be a multiple of 2 and width should be a multiple of 2.
|
650 |
* (If this is a problem for anyone then tell me, and I will fix it.)
|
651 |
* Chrominance data is only taken from every second line,
|
652 |
* others are ignored in the C version.
|
653 |
* FIXME: Write HQ version.
|
654 |
*/
|
655 |
static inline void rgb24toyv12_c(const uint8_t *src, uint8_t *ydst, |
656 |
uint8_t *udst, uint8_t *vdst, |
657 |
long width, long height, |
658 |
long lumStride, long chromStride, |
659 |
long srcStride)
|
660 |
{ |
661 |
long y;
|
662 |
const int chromWidth = width >> 1; |
663 |
y=0;
|
664 |
for (; y<height; y+=2) { |
665 |
long i;
|
666 |
for (i=0; i<chromWidth; i++) { |
667 |
unsigned int b = src[6*i+0]; |
668 |
unsigned int g = src[6*i+1]; |
669 |
unsigned int r = src[6*i+2]; |
670 |
|
671 |
unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
672 |
unsigned int V = ((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128; |
673 |
unsigned int U = ((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128; |
674 |
|
675 |
udst[i] = U; |
676 |
vdst[i] = V; |
677 |
ydst[2*i] = Y;
|
678 |
|
679 |
b = src[6*i+3]; |
680 |
g = src[6*i+4]; |
681 |
r = src[6*i+5]; |
682 |
|
683 |
Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
|
684 |
ydst[2*i+1] = Y; |
685 |
} |
686 |
ydst += lumStride; |
687 |
src += srcStride; |
688 |
|
689 |
for (i=0; i<chromWidth; i++) { |
690 |
unsigned int b = src[6*i+0]; |
691 |
unsigned int g = src[6*i+1]; |
692 |
unsigned int r = src[6*i+2]; |
693 |
|
694 |
unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
695 |
|
696 |
ydst[2*i] = Y;
|
697 |
|
698 |
b = src[6*i+3]; |
699 |
g = src[6*i+4]; |
700 |
r = src[6*i+5]; |
701 |
|
702 |
Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
|
703 |
ydst[2*i+1] = Y; |
704 |
} |
705 |
udst += chromStride; |
706 |
vdst += chromStride; |
707 |
ydst += lumStride; |
708 |
src += srcStride; |
709 |
} |
710 |
} |
711 |
|
712 |
static void interleaveBytes_c(const uint8_t *src1, const uint8_t *src2, |
713 |
uint8_t *dest, long width,
|
714 |
long height, long src1Stride, |
715 |
long src2Stride, long dstStride) |
716 |
{ |
717 |
long h;
|
718 |
|
719 |
for (h=0; h < height; h++) { |
720 |
long w;
|
721 |
for (w=0; w < width; w++) { |
722 |
dest[2*w+0] = src1[w]; |
723 |
dest[2*w+1] = src2[w]; |
724 |
} |
725 |
dest += dstStride; |
726 |
src1 += src1Stride; |
727 |
src2 += src2Stride; |
728 |
} |
729 |
} |
730 |
|
731 |
static inline void vu9_to_vu12_c(const uint8_t *src1, const uint8_t *src2, |
732 |
uint8_t *dst1, uint8_t *dst2, |
733 |
long width, long height, |
734 |
long srcStride1, long srcStride2, |
735 |
long dstStride1, long dstStride2) |
736 |
{ |
737 |
int y;
|
738 |
long x,w,h;
|
739 |
w=width/2; h=height/2; |
740 |
for (y=0;y<h;y++) { |
741 |
const uint8_t* s1=src1+srcStride1*(y>>1); |
742 |
uint8_t* d=dst1+dstStride1*y; |
743 |
x=0;
|
744 |
for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; |
745 |
} |
746 |
for (y=0;y<h;y++) { |
747 |
const uint8_t* s2=src2+srcStride2*(y>>1); |
748 |
uint8_t* d=dst2+dstStride2*y; |
749 |
x=0;
|
750 |
for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x]; |
751 |
} |
752 |
} |
753 |
|
754 |
static inline void yvu9_to_yuy2_c(const uint8_t *src1, const uint8_t *src2, |
755 |
const uint8_t *src3, uint8_t *dst,
|
756 |
long width, long height, |
757 |
long srcStride1, long srcStride2, |
758 |
long srcStride3, long dstStride) |
759 |
{ |
760 |
int x;
|
761 |
long y,w,h;
|
762 |
w=width/2; h=height;
|
763 |
for (y=0;y<h;y++) { |
764 |
const uint8_t* yp=src1+srcStride1*y;
|
765 |
const uint8_t* up=src2+srcStride2*(y>>2); |
766 |
const uint8_t* vp=src3+srcStride3*(y>>2); |
767 |
uint8_t* d=dst+dstStride*y; |
768 |
x=0;
|
769 |
for (; x<w; x++) {
|
770 |
const long x2 = x<<2; |
771 |
d[8*x+0] = yp[x2]; |
772 |
d[8*x+1] = up[x]; |
773 |
d[8*x+2] = yp[x2+1]; |
774 |
d[8*x+3] = vp[x]; |
775 |
d[8*x+4] = yp[x2+2]; |
776 |
d[8*x+5] = up[x]; |
777 |
d[8*x+6] = yp[x2+3]; |
778 |
d[8*x+7] = vp[x]; |
779 |
} |
780 |
} |
781 |
} |
782 |
|
783 |
static void extract_even_c(const uint8_t *src, uint8_t *dst, int count) |
784 |
{ |
785 |
dst += count; |
786 |
src += 2*count;
|
787 |
count= - count; |
788 |
|
789 |
while(count<0) { |
790 |
dst[count]= src[2*count];
|
791 |
count++; |
792 |
} |
793 |
} |
794 |
|
795 |
static void extract_even2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, |
796 |
int count)
|
797 |
{ |
798 |
dst0+= count; |
799 |
dst1+= count; |
800 |
src += 4*count;
|
801 |
count= - count; |
802 |
while(count<0) { |
803 |
dst0[count]= src[4*count+0]; |
804 |
dst1[count]= src[4*count+2]; |
805 |
count++; |
806 |
} |
807 |
} |
808 |
|
809 |
static void extract_even2avg_c(const uint8_t *src0, const uint8_t *src1, |
810 |
uint8_t *dst0, uint8_t *dst1, int count)
|
811 |
{ |
812 |
dst0 += count; |
813 |
dst1 += count; |
814 |
src0 += 4*count;
|
815 |
src1 += 4*count;
|
816 |
count= - count; |
817 |
while(count<0) { |
818 |
dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; |
819 |
dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; |
820 |
count++; |
821 |
} |
822 |
} |
823 |
|
824 |
static void extract_odd2_c(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, |
825 |
int count)
|
826 |
{ |
827 |
dst0+= count; |
828 |
dst1+= count; |
829 |
src += 4*count;
|
830 |
count= - count; |
831 |
src++; |
832 |
while(count<0) { |
833 |
dst0[count]= src[4*count+0]; |
834 |
dst1[count]= src[4*count+2]; |
835 |
count++; |
836 |
} |
837 |
} |
838 |
|
839 |
static void extract_odd2avg_c(const uint8_t *src0, const uint8_t *src1, |
840 |
uint8_t *dst0, uint8_t *dst1, int count)
|
841 |
{ |
842 |
dst0 += count; |
843 |
dst1 += count; |
844 |
src0 += 4*count;
|
845 |
src1 += 4*count;
|
846 |
count= - count; |
847 |
src0++; |
848 |
src1++; |
849 |
while(count<0) { |
850 |
dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; |
851 |
dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; |
852 |
count++; |
853 |
} |
854 |
} |
855 |
|
856 |
static void yuyvtoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
857 |
const uint8_t *src, long width, long height, |
858 |
long lumStride, long chromStride, long srcStride) |
859 |
{ |
860 |
long y;
|
861 |
const long chromWidth= -((-width)>>1); |
862 |
|
863 |
for (y=0; y<height; y++) { |
864 |
extract_even_c(src, ydst, width); |
865 |
if(y&1) { |
866 |
extract_odd2avg_c(src - srcStride, src, udst, vdst, chromWidth); |
867 |
udst+= chromStride; |
868 |
vdst+= chromStride; |
869 |
} |
870 |
|
871 |
src += srcStride; |
872 |
ydst+= lumStride; |
873 |
} |
874 |
} |
875 |
|
876 |
static void yuyvtoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
877 |
const uint8_t *src, long width, long height, |
878 |
long lumStride, long chromStride, long srcStride) |
879 |
{ |
880 |
long y;
|
881 |
const long chromWidth= -((-width)>>1); |
882 |
|
883 |
for (y=0; y<height; y++) { |
884 |
extract_even_c(src, ydst, width); |
885 |
extract_odd2_c(src, udst, vdst, chromWidth); |
886 |
|
887 |
src += srcStride; |
888 |
ydst+= lumStride; |
889 |
udst+= chromStride; |
890 |
vdst+= chromStride; |
891 |
} |
892 |
} |
893 |
|
894 |
static void uyvytoyuv420_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
895 |
const uint8_t *src, long width, long height, |
896 |
long lumStride, long chromStride, long srcStride) |
897 |
{ |
898 |
long y;
|
899 |
const long chromWidth= -((-width)>>1); |
900 |
|
901 |
for (y=0; y<height; y++) { |
902 |
extract_even_c(src + 1, ydst, width);
|
903 |
if(y&1) { |
904 |
extract_even2avg_c(src - srcStride, src, udst, vdst, chromWidth); |
905 |
udst+= chromStride; |
906 |
vdst+= chromStride; |
907 |
} |
908 |
|
909 |
src += srcStride; |
910 |
ydst+= lumStride; |
911 |
} |
912 |
} |
913 |
|
914 |
static void uyvytoyuv422_c(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, |
915 |
const uint8_t *src, long width, long height, |
916 |
long lumStride, long chromStride, long srcStride) |
917 |
{ |
918 |
long y;
|
919 |
const long chromWidth= -((-width)>>1); |
920 |
|
921 |
for (y=0; y<height; y++) { |
922 |
extract_even_c(src + 1, ydst, width);
|
923 |
extract_even2_c(src, udst, vdst, chromWidth); |
924 |
|
925 |
src += srcStride; |
926 |
ydst+= lumStride; |
927 |
udst+= chromStride; |
928 |
vdst+= chromStride; |
929 |
} |
930 |
} |
931 |
|
932 |
static inline void rgb2rgb_init_c(void) |
933 |
{ |
934 |
rgb15to16 = rgb15to16_c; |
935 |
rgb15tobgr24 = rgb15tobgr24_c; |
936 |
rgb15to32 = rgb15to32_c; |
937 |
rgb16tobgr24 = rgb16tobgr24_c; |
938 |
rgb16to32 = rgb16to32_c; |
939 |
rgb16to15 = rgb16to15_c; |
940 |
rgb24tobgr16 = rgb24tobgr16_c; |
941 |
rgb24tobgr15 = rgb24tobgr15_c; |
942 |
rgb24tobgr32 = rgb24tobgr32_c; |
943 |
rgb32to16 = rgb32to16_c; |
944 |
rgb32to15 = rgb32to15_c; |
945 |
rgb32tobgr24 = rgb32tobgr24_c; |
946 |
rgb24to15 = rgb24to15_c; |
947 |
rgb24to16 = rgb24to16_c; |
948 |
rgb24tobgr24 = rgb24tobgr24_c; |
949 |
shuffle_bytes_2103 = shuffle_bytes_2103_c; |
950 |
rgb32tobgr16 = rgb32tobgr16_c; |
951 |
rgb32tobgr15 = rgb32tobgr15_c; |
952 |
yv12toyuy2 = yv12toyuy2_c; |
953 |
yv12touyvy = yv12touyvy_c; |
954 |
yuv422ptoyuy2 = yuv422ptoyuy2_c; |
955 |
yuv422ptouyvy = yuv422ptouyvy_c; |
956 |
yuy2toyv12 = yuy2toyv12_c; |
957 |
planar2x = planar2x_c; |
958 |
rgb24toyv12 = rgb24toyv12_c; |
959 |
interleaveBytes = interleaveBytes_c; |
960 |
vu9_to_vu12 = vu9_to_vu12_c; |
961 |
yvu9_to_yuy2 = yvu9_to_yuy2_c; |
962 |
|
963 |
uyvytoyuv420 = uyvytoyuv420_c; |
964 |
uyvytoyuv422 = uyvytoyuv422_c; |
965 |
yuyvtoyuv420 = yuyvtoyuv420_c; |
966 |
yuyvtoyuv422 = yuyvtoyuv422_c; |
967 |
} |