Statistics
| Branch: | Revision:

ffmpeg / libswscale / yuv2rgb_altivec.c @ a23c9c4a

History | View | Annotate | Download (38.6 KB)

1 a31de956 Michael Niedermayer
/*
2
  marc.hoffman@analog.com    March 8, 2004
3

4 298726ba Diego Biurrun
  AltiVec acceleration for colorspace conversion revision 0.2
5 a31de956 Michael Niedermayer

6
  convert I420 YV12 to RGB in various formats,
7
    it rejects images that are not in 420 formats
8
    it rejects images that don't have widths of multiples of 16
9
    it rejects images that don't have heights of multiples of 2
10
  reject defers to C simulation codes.
11

12
  lots of optimizations to be done here
13

14
  1. need to fix saturation code, I just couldn't get it to fly with packs and adds.
15
     so we currently use max min to clip
16

17
  2. the inefficient use of chroma loading needs a bit of brushing up
18

19
  3. analysis of pipeline stalls needs to be done, use shark to identify pipeline stalls
20

21

22
  MODIFIED to calculate coeffs from currently selected color space.
23
  MODIFIED core to be a macro which you spec the output format.
24
  ADDED UYVY conversion which is never called due to some thing in SWSCALE.
25
  CORRECTED algorithim selection to be strict on input formats.
26
  ADDED runtime detection of altivec.
27

28
  ADDED altivec_yuv2packedX vertical scl + RGB converter
29

30
  March 27,2004
31
  PERFORMANCE ANALYSIS
32

33
  The C version use 25% of the processor or ~250Mips for D1 video rawvideo used as test
34
  The ALTIVEC version uses 10% of the processor or ~100Mips for D1 video same sequence
35

36
  720*480*30  ~10MPS
37

38
  so we have roughly 10clocks per pixel this is too high something has to be wrong.
39

40
  OPTIMIZED clip codes to utilize vec_max and vec_packs removing the need for vec_min.
41

42
  OPTIMIZED DST OUTPUT cache/dma controls. we are pretty much
43
  guaranteed to have the input video frame it was just decompressed so
44
  it probably resides in L1 caches.  However we are creating the
45
  output video stream this needs to use the DSTST instruction to
46
  optimize for the cache.  We couple this with the fact that we are
47
  not going to be visiting the input buffer again so we mark it Least
48
  Recently Used.  This shaves 25% of the processor cycles off.
49

50
  Now MEMCPY is the largest mips consumer in the system, probably due
51
  to the inefficient X11 stuff.
52

53
  GL libraries seem to be very slow on this machine 1.33Ghz PB running
54
  Jaguar, this is not the case for my 1Ghz PB.  I thought it might be
55
  a versioning issues, however i have libGL.1.2.dylib for both
56
  machines. ((We need to figure this out now))
57

58
  GL2 libraries work now with patch for RGB32
59

60
  NOTE quartz vo driver ARGB32_to_RGB24 consumes 30% of the processor
61

62 6a4970ab Diego Biurrun
  Integrated luma prescaling adjustment for saturation/contrast/brightness adjustment.
63 d026b45e Diego Biurrun
*/
64 a31de956 Michael Niedermayer
65 d026b45e Diego Biurrun
/*
66
 * This file is part of FFmpeg.
67
 *
68
 * FFmpeg is free software; you can redistribute it and/or modify
69
 * it under the terms of the GNU General Public License as published by
70
 * the Free Software Foundation; either version 2 of the License, or
71
 * (at your option) any later version.
72
 *
73
 * FFmpeg is distributed in the hope that it will be useful,
74
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
75
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
76
 * GNU General Public License for more details.
77
 *
78
 * You should have received a copy of the GNU General Public License
79
 * along with FFmpeg; if not, write to the Free Software
80 b19bcbaa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
81 d026b45e Diego Biurrun
 */
82 2111440a Diego Biurrun
83 a31de956 Michael Niedermayer
#include <stdio.h>
84
#include <stdlib.h>
85 84fdd642 Alex Beregszaszi
#include <string.h>
86 a31de956 Michael Niedermayer
#include <inttypes.h>
87
#include <assert.h>
88
#include "config.h"
89 5edb653b Alan Curry
#ifdef HAVE_MALLOC_H
90
#include <malloc.h>
91
#endif
92 a31de956 Michael Niedermayer
#include "rgb2rgb.h"
93
#include "swscale.h"
94
#include "swscale_internal.h"
95
96
#undef PROFILE_THE_BEAST
97
#undef INC_SCALING
98
99
typedef unsigned char ubyte;
100
typedef signed char   sbyte;
101
102
103
/* RGB interleaver, 16 planar pels 8-bit samples per channel in
104
   homogeneous vector registers x0,x1,x2 are interleaved with the
105
   following technique:
106

107
      o0 = vec_mergeh (x0,x1);
108
      o1 = vec_perm (o0, x2, perm_rgb_0);
109
      o2 = vec_perm (o0, x2, perm_rgb_1);
110
      o3 = vec_mergel (x0,x1);
111
      o4 = vec_perm (o3,o2,perm_rgb_2);
112
      o5 = vec_perm (o3,o2,perm_rgb_3);
113

114
  perm_rgb_0:   o0(RG).h v1(B) --> o1*
115
              0   1  2   3   4
116
             rgbr|gbrg|brgb|rgbr
117
             0010 0100 1001 0010
118
             0102 3145 2673 894A
119

120
  perm_rgb_1:   o0(RG).h v1(B) --> o2
121
              0   1  2   3   4
122
             gbrg|brgb|bbbb|bbbb
123
             0100 1001 1111 1111
124
             B5CD 6EF7 89AB CDEF
125

126
  perm_rgb_2:   o3(RG).l o2(rgbB.l) --> o4*
127
              0   1  2   3   4
128
             gbrg|brgb|rgbr|gbrg
129
             1111 1111 0010 0100
130
             89AB CDEF 0182 3945
131

132
  perm_rgb_2:   o3(RG).l o2(rgbB.l) ---> o5*
133
              0   1  2   3   4
134
             brgb|rgbr|gbrg|brgb
135
             1001 0010 0100 1001
136
             a67b 89cA BdCD eEFf
137

138
*/
139
static
140
const vector unsigned char
141 582552fb Luca Barbato
  perm_rgb_0 = (const vector unsigned char)AVV(0x00,0x01,0x10,0x02,0x03,0x11,0x04,0x05,
142 42809816 Diego Biurrun
                                               0x12,0x06,0x07,0x13,0x08,0x09,0x14,0x0a),
143 582552fb Luca Barbato
  perm_rgb_1 = (const vector unsigned char)AVV(0x0b,0x15,0x0c,0x0d,0x16,0x0e,0x0f,0x17,
144 42809816 Diego Biurrun
                                               0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f),
145 582552fb Luca Barbato
  perm_rgb_2 = (const vector unsigned char)AVV(0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
146 42809816 Diego Biurrun
                                               0x00,0x01,0x18,0x02,0x03,0x19,0x04,0x05),
147 582552fb Luca Barbato
  perm_rgb_3 = (const vector unsigned char)AVV(0x1a,0x06,0x07,0x1b,0x08,0x09,0x1c,0x0a,
148 42809816 Diego Biurrun
                                               0x0b,0x1d,0x0c,0x0d,0x1e,0x0e,0x0f,0x1f);
149
150
#define vec_merge3(x2,x1,x0,y0,y1,y2)       \
151
do {                                        \
152
    typeof(x0) o0,o2,o3;                    \
153
        o0 = vec_mergeh (x0,x1);            \
154
        y0 = vec_perm (o0, x2, perm_rgb_0); \
155
        o2 = vec_perm (o0, x2, perm_rgb_1); \
156
        o3 = vec_mergel (x0,x1);            \
157
        y1 = vec_perm (o3,o2,perm_rgb_2);   \
158
        y2 = vec_perm (o3,o2,perm_rgb_3);   \
159 a31de956 Michael Niedermayer
} while(0)
160
161 42809816 Diego Biurrun
#define vec_mstbgr24(x0,x1,x2,ptr)      \
162
do {                                    \
163
    typeof(x0) _0,_1,_2;                \
164
    vec_merge3 (x0,x1,x2,_0,_1,_2);     \
165
    vec_st (_0, 0, ptr++);              \
166
    vec_st (_1, 0, ptr++);              \
167
    vec_st (_2, 0, ptr++);              \
168 a31de956 Michael Niedermayer
}  while (0);
169
170 42809816 Diego Biurrun
#define vec_mstrgb24(x0,x1,x2,ptr)      \
171
do {                                    \
172
    typeof(x0) _0,_1,_2;                \
173
    vec_merge3 (x2,x1,x0,_0,_1,_2);     \
174
    vec_st (_0, 0, ptr++);              \
175
    vec_st (_1, 0, ptr++);              \
176
    vec_st (_2, 0, ptr++);              \
177 a31de956 Michael Niedermayer
}  while (0);
178
179
/* pack the pixels in rgb0 format
180
   msb R
181
   lsb 0
182
*/
183 42809816 Diego Biurrun
#define vec_mstrgb32(T,x0,x1,x2,x3,ptr)                                       \
184
do {                                                                          \
185
    T _0,_1,_2,_3;                                                            \
186
    _0 = vec_mergeh (x0,x1);                                                  \
187
    _1 = vec_mergeh (x2,x3);                                                  \
188
    _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
189
    _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
190
    vec_st (_2, 0*16, (T *)ptr);                                              \
191
    vec_st (_3, 1*16, (T *)ptr);                                              \
192
    _0 = vec_mergel (x0,x1);                                                  \
193
    _1 = vec_mergel (x2,x3);                                                  \
194
    _2 = (T)vec_mergeh ((vector unsigned short)_0,(vector unsigned short)_1); \
195
    _3 = (T)vec_mergel ((vector unsigned short)_0,(vector unsigned short)_1); \
196
    vec_st (_2, 2*16, (T *)ptr);                                              \
197
    vec_st (_3, 3*16, (T *)ptr);                                              \
198
    ptr += 4;                                                                 \
199 a31de956 Michael Niedermayer
}  while (0);
200
201
/*
202

203
  | 1     0       1.4021   | | Y |
204
  | 1    -0.3441 -0.7142   |x| Cb|
205 42809816 Diego Biurrun
  | 1     1.7718  0        | | Cr|
206 a31de956 Michael Niedermayer

207

208
  Y:      [-128 127]
209
  Cb/Cr : [-128 127]
210

211
  typical yuv conversion work on Y: 0-255 this version has been optimized for jpeg decode.
212

213
*/
214
215
216
217
218
#define vec_unh(x) \
219 42809816 Diego Biurrun
    (vector signed short) \
220
        vec_perm(x,(typeof(x))AVV(0),\
221
                 (vector unsigned char)AVV(0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03,\
222
                                           0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07))
223 a31de956 Michael Niedermayer
#define vec_unl(x) \
224 42809816 Diego Biurrun
    (vector signed short) \
225
        vec_perm(x,(typeof(x))AVV(0),\
226
                 (vector unsigned char)AVV(0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B,\
227
                                           0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F))
228 a31de956 Michael Niedermayer
229 cbddd5df Alan Curry
#define vec_clip_s16(x) \
230 42809816 Diego Biurrun
    vec_max (vec_min (x, (vector signed short)AVV(235,235,235,235,235,235,235,235)),\
231
                         (vector signed short)AVV( 16, 16, 16, 16, 16, 16, 16, 16))
232 a31de956 Michael Niedermayer
233
#define vec_packclp(x,y) \
234 42809816 Diego Biurrun
    (vector unsigned char)vec_packs \
235
        ((vector unsigned short)vec_max (x,(vector signed short) AVV(0)), \
236
         (vector unsigned short)vec_max (y,(vector signed short) AVV(0)))
237 a31de956 Michael Niedermayer
238 582552fb Luca Barbato
//#define out_pixels(a,b,c,ptr) vec_mstrgb32(typeof(a),((typeof (a))AVV(0)),a,a,a,ptr)
239 a31de956 Michael Niedermayer
240
241 84fdd642 Alex Beregszaszi
static inline void cvtyuvtoRGB (SwsContext *c,
242 42809816 Diego Biurrun
                                vector signed short Y, vector signed short U, vector signed short V,
243
                                vector signed short *R, vector signed short *G, vector signed short *B)
244 a31de956 Michael Niedermayer
{
245 42809816 Diego Biurrun
    vector signed   short vx,ux,uvx;
246 a31de956 Michael Niedermayer
247 42809816 Diego Biurrun
    Y = vec_mradds (Y, c->CY, c->OY);
248
    U  = vec_sub (U,(vector signed short)
249
                    vec_splat((vector signed short)AVV(128),0));
250
    V  = vec_sub (V,(vector signed short)
251
                    vec_splat((vector signed short)AVV(128),0));
252 a31de956 Michael Niedermayer
253 42809816 Diego Biurrun
    //   ux  = (CBU*(u<<c->CSHIFT)+0x4000)>>15;
254
    ux = vec_sl (U, c->CSHIFT);
255
    *B = vec_mradds (ux, c->CBU, Y);
256 a31de956 Michael Niedermayer
257 42809816 Diego Biurrun
    // vx  = (CRV*(v<<c->CSHIFT)+0x4000)>>15;
258
    vx = vec_sl (V, c->CSHIFT);
259
    *R = vec_mradds (vx, c->CRV, Y);
260 a31de956 Michael Niedermayer
261 42809816 Diego Biurrun
    // uvx = ((CGU*u) + (CGV*v))>>15;
262
    uvx = vec_mradds (U, c->CGU, Y);
263
    *G  = vec_mradds (V, c->CGV, uvx);
264 a31de956 Michael Niedermayer
}
265
266
267
/*
268
  ------------------------------------------------------------------------------
269
  CS converters
270
  ------------------------------------------------------------------------------
271
*/
272
273
274 42809816 Diego Biurrun
#define DEFCSP420_CVT(name,out_pixels)                                  \
275
static int altivec_##name (SwsContext *c,                               \
276
                           unsigned char **in, int *instrides,          \
277
                           int srcSliceY,        int srcSliceH,         \
278
                           unsigned char **oplanes, int *outstrides)    \
279
{                                                                       \
280
    int w = c->srcW;                                                    \
281
    int h = srcSliceH;                                                  \
282
    int i,j;                                                            \
283
    int instrides_scl[3];                                               \
284
    vector unsigned char y0,y1;                                         \
285
                                                                        \
286
    vector signed char  u,v;                                            \
287
                                                                        \
288
    vector signed short Y0,Y1,Y2,Y3;                                    \
289
    vector signed short U,V;                                            \
290
    vector signed short vx,ux,uvx;                                      \
291
    vector signed short vx0,ux0,uvx0;                                   \
292
    vector signed short vx1,ux1,uvx1;                                   \
293
    vector signed short R0,G0,B0;                                       \
294
    vector signed short R1,G1,B1;                                       \
295
    vector unsigned char R,G,B;                                         \
296
                                                                        \
297
    vector unsigned char *y1ivP, *y2ivP, *uivP, *vivP;                  \
298
    vector unsigned char align_perm;                                    \
299
                                                                        \
300
    vector signed short                                                 \
301
        lCY  = c->CY,                                                   \
302
        lOY  = c->OY,                                                   \
303
        lCRV = c->CRV,                                                  \
304
        lCBU = c->CBU,                                                  \
305
        lCGU = c->CGU,                                                  \
306
        lCGV = c->CGV;                                                  \
307
                                                                        \
308
    vector unsigned short lCSHIFT = c->CSHIFT;                          \
309
                                                                        \
310
    ubyte *y1i   = in[0];                                               \
311
    ubyte *y2i   = in[0]+instrides[0];                                  \
312
    ubyte *ui    = in[1];                                               \
313
    ubyte *vi    = in[2];                                               \
314
                                                                        \
315
    vector unsigned char *oute                                          \
316
        = (vector unsigned char *)                                      \
317
            (oplanes[0]+srcSliceY*outstrides[0]);                       \
318
    vector unsigned char *outo                                          \
319
        = (vector unsigned char *)                                      \
320
            (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]);         \
321
                                                                        \
322
                                                                        \
323
    instrides_scl[0] = instrides[0]*2-w;  /* the loop moves y{1,2}i by w */ \
324
    instrides_scl[1] = instrides[1]-w/2;  /* the loop moves ui by w/2 */    \
325
    instrides_scl[2] = instrides[2]-w/2;  /* the loop moves vi by w/2 */    \
326
                                                                        \
327
                                                                        \
328
    for (i=0;i<h/2;i++) {                                               \
329
        vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0);          \
330
        vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1);          \
331
                                                                        \
332
        for (j=0;j<w/16;j++) {                                          \
333
                                                                        \
334
            y1ivP = (vector unsigned char *)y1i;                        \
335
            y2ivP = (vector unsigned char *)y2i;                        \
336
            uivP  = (vector unsigned char *)ui;                         \
337
            vivP  = (vector unsigned char *)vi;                         \
338
                                                                        \
339
            align_perm = vec_lvsl (0, y1i);                             \
340
            y0 = (vector unsigned char)                                 \
341
                 vec_perm (y1ivP[0], y1ivP[1], align_perm);             \
342
                                                                        \
343
            align_perm = vec_lvsl (0, y2i);                             \
344
            y1 = (vector unsigned char)                                 \
345
                 vec_perm (y2ivP[0], y2ivP[1], align_perm);             \
346
                                                                        \
347
            align_perm = vec_lvsl (0, ui);                              \
348
            u = (vector signed char)                                    \
349
                vec_perm (uivP[0], uivP[1], align_perm);                \
350
                                                                        \
351
            align_perm = vec_lvsl (0, vi);                              \
352
            v = (vector signed char)                                    \
353
                vec_perm (vivP[0], vivP[1], align_perm);                \
354
                                                                        \
355
            u  = (vector signed char)                                   \
356
                 vec_sub (u,(vector signed char)                        \
357
                          vec_splat((vector signed char)AVV(128),0));   \
358
            v  = (vector signed char)                                   \
359
                 vec_sub (v,(vector signed char)                        \
360
                          vec_splat((vector signed char)AVV(128),0));   \
361
                                                                        \
362
            U  = vec_unpackh (u);                                       \
363
            V  = vec_unpackh (v);                                       \
364
                                                                        \
365
                                                                        \
366
            Y0 = vec_unh (y0);                                          \
367
            Y1 = vec_unl (y0);                                          \
368
            Y2 = vec_unh (y1);                                          \
369
            Y3 = vec_unl (y1);                                          \
370
                                                                        \
371
            Y0 = vec_mradds (Y0, lCY, lOY);                             \
372
            Y1 = vec_mradds (Y1, lCY, lOY);                             \
373
            Y2 = vec_mradds (Y2, lCY, lOY);                             \
374
            Y3 = vec_mradds (Y3, lCY, lOY);                             \
375
                                                                        \
376
            /*   ux  = (CBU*(u<<CSHIFT)+0x4000)>>15 */                  \
377
            ux = vec_sl (U, lCSHIFT);                                   \
378
            ux = vec_mradds (ux, lCBU, (vector signed short)AVV(0));    \
379
            ux0  = vec_mergeh (ux,ux);                                  \
380
            ux1  = vec_mergel (ux,ux);                                  \
381
                                                                        \
382
            /* vx  = (CRV*(v<<CSHIFT)+0x4000)>>15;        */            \
383
            vx = vec_sl (V, lCSHIFT);                                   \
384
            vx = vec_mradds (vx, lCRV, (vector signed short)AVV(0));    \
385
            vx0  = vec_mergeh (vx,vx);                                  \
386
            vx1  = vec_mergel (vx,vx);                                  \
387
                                                                        \
388
            /* uvx = ((CGU*u) + (CGV*v))>>15 */                         \
389
            uvx = vec_mradds (U, lCGU, (vector signed short)AVV(0));    \
390
            uvx = vec_mradds (V, lCGV, uvx);                            \
391
            uvx0 = vec_mergeh (uvx,uvx);                                \
392
            uvx1 = vec_mergel (uvx,uvx);                                \
393
                                                                        \
394
            R0 = vec_add (Y0,vx0);                                      \
395
            G0 = vec_add (Y0,uvx0);                                     \
396
            B0 = vec_add (Y0,ux0);                                      \
397
            R1 = vec_add (Y1,vx1);                                      \
398
            G1 = vec_add (Y1,uvx1);                                     \
399
            B1 = vec_add (Y1,ux1);                                      \
400
                                                                        \
401
            R  = vec_packclp (R0,R1);                                   \
402
            G  = vec_packclp (G0,G1);                                   \
403
            B  = vec_packclp (B0,B1);                                   \
404
                                                                        \
405
            out_pixels(R,G,B,oute);                                     \
406
                                                                        \
407
            R0 = vec_add (Y2,vx0);                                      \
408
            G0 = vec_add (Y2,uvx0);                                     \
409
            B0 = vec_add (Y2,ux0);                                      \
410
            R1 = vec_add (Y3,vx1);                                      \
411
            G1 = vec_add (Y3,uvx1);                                     \
412
            B1 = vec_add (Y3,ux1);                                      \
413
            R  = vec_packclp (R0,R1);                                   \
414
            G  = vec_packclp (G0,G1);                                   \
415
            B  = vec_packclp (B0,B1);                                   \
416
                                                                        \
417
                                                                        \
418
            out_pixels(R,G,B,outo);                                     \
419
                                                                        \
420
            y1i  += 16;                                                 \
421
            y2i  += 16;                                                 \
422
            ui   += 8;                                                  \
423
            vi   += 8;                                                  \
424
                                                                        \
425
        }                                                               \
426
                                                                        \
427
        outo  += (outstrides[0])>>4;                                    \
428
        oute  += (outstrides[0])>>4;                                    \
429
                                                                        \
430
        ui    += instrides_scl[1];                                      \
431
        vi    += instrides_scl[2];                                      \
432
        y1i   += instrides_scl[0];                                      \
433
        y2i   += instrides_scl[0];                                      \
434
    }                                                                   \
435
    return srcSliceH;                                                   \
436 a31de956 Michael Niedermayer
}
437
438
439 582552fb Luca Barbato
#define out_abgr(a,b,c,ptr)  vec_mstrgb32(typeof(a),((typeof (a))AVV(0)),c,b,a,ptr)
440
#define out_bgra(a,b,c,ptr)  vec_mstrgb32(typeof(a),c,b,a,((typeof (a))AVV(0)),ptr)
441
#define out_rgba(a,b,c,ptr)  vec_mstrgb32(typeof(a),a,b,c,((typeof (a))AVV(0)),ptr)
442
#define out_argb(a,b,c,ptr)  vec_mstrgb32(typeof(a),((typeof (a))AVV(0)),a,b,c,ptr)
443 a31de956 Michael Niedermayer
#define out_rgb24(a,b,c,ptr) vec_mstrgb24(a,b,c,ptr)
444 7d20ebff Alan Curry
#define out_bgr24(a,b,c,ptr) vec_mstbgr24(a,b,c,ptr)
445 a31de956 Michael Niedermayer
446 340ea251 Alan Curry
DEFCSP420_CVT (yuv2_abgr, out_abgr)
447 582552fb Luca Barbato
#if 1
448 340ea251 Alan Curry
DEFCSP420_CVT (yuv2_bgra, out_bgra)
449 582552fb Luca Barbato
#else
450 6a4970ab Diego Biurrun
static int altivec_yuv2_bgra32 (SwsContext *c,
451 42809816 Diego Biurrun
                                unsigned char **in, int *instrides,
452
                                int srcSliceY,        int srcSliceH,
453
                                unsigned char **oplanes, int *outstrides)
454 6a4970ab Diego Biurrun
{
455 42809816 Diego Biurrun
    int w = c->srcW;
456
    int h = srcSliceH;
457
    int i,j;
458
    int instrides_scl[3];
459
    vector unsigned char y0,y1;
460
461
    vector signed char  u,v;
462
463
    vector signed short Y0,Y1,Y2,Y3;
464
    vector signed short U,V;
465
    vector signed short vx,ux,uvx;
466
    vector signed short vx0,ux0,uvx0;
467
    vector signed short vx1,ux1,uvx1;
468
    vector signed short R0,G0,B0;
469
    vector signed short R1,G1,B1;
470
    vector unsigned char R,G,B;
471
472
    vector unsigned char *uivP, *vivP;
473
    vector unsigned char align_perm;
474
475
    vector signed short
476
        lCY  = c->CY,
477
        lOY  = c->OY,
478
        lCRV = c->CRV,
479
        lCBU = c->CBU,
480
        lCGU = c->CGU,
481
        lCGV = c->CGV;
482
483
    vector unsigned short lCSHIFT = c->CSHIFT;
484
485
    ubyte *y1i   = in[0];
486
    ubyte *y2i   = in[0]+w;
487
    ubyte *ui    = in[1];
488
    ubyte *vi    = in[2];
489
490
    vector unsigned char *oute
491
        = (vector unsigned char *)
492
          (oplanes[0]+srcSliceY*outstrides[0]);
493
    vector unsigned char *outo
494
        = (vector unsigned char *)
495
          (oplanes[0]+srcSliceY*outstrides[0]+outstrides[0]);
496
497
498
    instrides_scl[0] = instrides[0];
499
    instrides_scl[1] = instrides[1]-w/2;  /* the loop moves ui by w/2 */
500
    instrides_scl[2] = instrides[2]-w/2;  /* the loop moves vi by w/2 */
501
502
503
    for (i=0;i<h/2;i++) {
504
        vec_dstst (outo, (0x02000002|(((w*3+32)/32)<<16)), 0);
505
        vec_dstst (oute, (0x02000002|(((w*3+32)/32)<<16)), 1);
506
507
        for (j=0;j<w/16;j++) {
508
509
            y0 = vec_ldl (0,y1i);
510
            y1 = vec_ldl (0,y2i);
511
            uivP = (vector unsigned char *)ui;
512
            vivP = (vector unsigned char *)vi;
513
514
            align_perm = vec_lvsl (0, ui);
515
            u  = (vector signed char)vec_perm (uivP[0], uivP[1], align_perm);
516
517
            align_perm = vec_lvsl (0, vi);
518
            v  = (vector signed char)vec_perm (vivP[0], vivP[1], align_perm);
519
            u  = (vector signed char)
520
                 vec_sub (u,(vector signed char)
521
                          vec_splat((vector signed char)AVV(128),0));
522
523
            v  = (vector signed char)
524
                 vec_sub (v, (vector signed char)
525
                          vec_splat((vector signed char)AVV(128),0));
526
527
            U  = vec_unpackh (u);
528
            V  = vec_unpackh (v);
529
530
531
            Y0 = vec_unh (y0);
532
            Y1 = vec_unl (y0);
533
            Y2 = vec_unh (y1);
534
            Y3 = vec_unl (y1);
535
536
            Y0 = vec_mradds (Y0, lCY, lOY);
537
            Y1 = vec_mradds (Y1, lCY, lOY);
538
            Y2 = vec_mradds (Y2, lCY, lOY);
539
            Y3 = vec_mradds (Y3, lCY, lOY);
540
541
            /*   ux  = (CBU*(u<<CSHIFT)+0x4000)>>15 */
542
            ux = vec_sl (U, lCSHIFT);
543
            ux = vec_mradds (ux, lCBU, (vector signed short)AVV(0));
544
            ux0  = vec_mergeh (ux,ux);
545
            ux1  = vec_mergel (ux,ux);
546
547
            /* vx  = (CRV*(v<<CSHIFT)+0x4000)>>15;        */
548
            vx = vec_sl (V, lCSHIFT);
549
            vx = vec_mradds (vx, lCRV, (vector signed short)AVV(0));
550
            vx0  = vec_mergeh (vx,vx);
551
            vx1  = vec_mergel (vx,vx);
552
            /* uvx = ((CGU*u) + (CGV*v))>>15 */
553
            uvx = vec_mradds (U, lCGU, (vector signed short)AVV(0));
554
            uvx = vec_mradds (V, lCGV, uvx);
555
            uvx0 = vec_mergeh (uvx,uvx);
556
            uvx1 = vec_mergel (uvx,uvx);
557
            R0 = vec_add (Y0,vx0);
558
            G0 = vec_add (Y0,uvx0);
559
            B0 = vec_add (Y0,ux0);
560
            R1 = vec_add (Y1,vx1);
561
            G1 = vec_add (Y1,uvx1);
562
            B1 = vec_add (Y1,ux1);
563
            R  = vec_packclp (R0,R1);
564
            G  = vec_packclp (G0,G1);
565
            B  = vec_packclp (B0,B1);
566
567
            out_argb(R,G,B,oute);
568
            R0 = vec_add (Y2,vx0);
569
            G0 = vec_add (Y2,uvx0);
570
            B0 = vec_add (Y2,ux0);
571
            R1 = vec_add (Y3,vx1);
572
            G1 = vec_add (Y3,uvx1);
573
            B1 = vec_add (Y3,ux1);
574
            R  = vec_packclp (R0,R1);
575
            G  = vec_packclp (G0,G1);
576
            B  = vec_packclp (B0,B1);
577
578
            out_argb(R,G,B,outo);
579
            y1i  += 16;
580
            y2i  += 16;
581
            ui   += 8;
582
            vi   += 8;
583 6a4970ab Diego Biurrun
584 42809816 Diego Biurrun
        }
585 6a4970ab Diego Biurrun
586 42809816 Diego Biurrun
        outo  += (outstrides[0])>>4;
587
        oute  += (outstrides[0])>>4;
588 6a4970ab Diego Biurrun
589 42809816 Diego Biurrun
        ui    += instrides_scl[1];
590
        vi    += instrides_scl[2];
591
        y1i   += instrides_scl[0];
592
        y2i   += instrides_scl[0];
593
    }
594
    return srcSliceH;
595 582552fb Luca Barbato
}
596
597
#endif
598
599
600 340ea251 Alan Curry
DEFCSP420_CVT (yuv2_rgba, out_rgba)
601
DEFCSP420_CVT (yuv2_argb, out_argb)
602 a31de956 Michael Niedermayer
DEFCSP420_CVT (yuv2_rgb24,  out_rgb24)
603
DEFCSP420_CVT (yuv2_bgr24,  out_bgr24)
604
605
606
// uyvy|uyvy|uyvy|uyvy
607
// 0123 4567 89ab cdef
608
static
609
const vector unsigned char
610 42809816 Diego Biurrun
    demux_u = (const vector unsigned char)AVV(0x10,0x00,0x10,0x00,
611
                                              0x10,0x04,0x10,0x04,
612
                                              0x10,0x08,0x10,0x08,
613
                                              0x10,0x0c,0x10,0x0c),
614
    demux_v = (const vector unsigned char)AVV(0x10,0x02,0x10,0x02,
615
                                              0x10,0x06,0x10,0x06,
616
                                              0x10,0x0A,0x10,0x0A,
617
                                              0x10,0x0E,0x10,0x0E),
618
    demux_y = (const vector unsigned char)AVV(0x10,0x01,0x10,0x03,
619
                                              0x10,0x05,0x10,0x07,
620
                                              0x10,0x09,0x10,0x0B,
621
                                              0x10,0x0D,0x10,0x0F);
622 a31de956 Michael Niedermayer
623
/*
624
  this is so I can play live CCIR raw video
625
*/
626
static int altivec_uyvy_rgb32 (SwsContext *c,
627 42809816 Diego Biurrun
                               unsigned char **in, int *instrides,
628
                               int srcSliceY,        int srcSliceH,
629
                               unsigned char **oplanes, int *outstrides)
630 a31de956 Michael Niedermayer
{
631 42809816 Diego Biurrun
    int w = c->srcW;
632
    int h = srcSliceH;
633
    int i,j;
634
    vector unsigned char uyvy;
635
    vector signed   short Y,U,V;
636
    vector signed   short R0,G0,B0,R1,G1,B1;
637
    vector unsigned char  R,G,B;
638
    vector unsigned char *out;
639
    ubyte *img;
640 a31de956 Michael Niedermayer
641 42809816 Diego Biurrun
    img = in[0];
642
    out = (vector unsigned char *)(oplanes[0]+srcSliceY*outstrides[0]);
643 a31de956 Michael Niedermayer
644 42809816 Diego Biurrun
    for (i=0;i<h;i++) {
645
        for (j=0;j<w/16;j++) {
646
            uyvy = vec_ld (0, img);
647
            U = (vector signed short)
648
                vec_perm (uyvy, (vector unsigned char)AVV(0), demux_u);
649 a31de956 Michael Niedermayer
650 42809816 Diego Biurrun
            V = (vector signed short)
651
                vec_perm (uyvy, (vector unsigned char)AVV(0), demux_v);
652 a31de956 Michael Niedermayer
653 42809816 Diego Biurrun
            Y = (vector signed short)
654
                vec_perm (uyvy, (vector unsigned char)AVV(0), demux_y);
655 a31de956 Michael Niedermayer
656 42809816 Diego Biurrun
            cvtyuvtoRGB (c, Y,U,V,&R0,&G0,&B0);
657 a31de956 Michael Niedermayer
658 42809816 Diego Biurrun
            uyvy = vec_ld (16, img);
659
            U = (vector signed short)
660
                vec_perm (uyvy, (vector unsigned char)AVV(0), demux_u);
661 a31de956 Michael Niedermayer
662 42809816 Diego Biurrun
            V = (vector signed short)
663
                vec_perm (uyvy, (vector unsigned char)AVV(0), demux_v);
664 a31de956 Michael Niedermayer
665 42809816 Diego Biurrun
            Y = (vector signed short)
666
                vec_perm (uyvy, (vector unsigned char)AVV(0), demux_y);
667 a31de956 Michael Niedermayer
668 42809816 Diego Biurrun
            cvtyuvtoRGB (c, Y,U,V,&R1,&G1,&B1);
669 a31de956 Michael Niedermayer
670 42809816 Diego Biurrun
            R  = vec_packclp (R0,R1);
671
            G  = vec_packclp (G0,G1);
672
            B  = vec_packclp (B0,B1);
673 a31de956 Michael Niedermayer
674 42809816 Diego Biurrun
            //      vec_mstbgr24 (R,G,B, out);
675
            out_rgba (R,G,B,out);
676 a31de956 Michael Niedermayer
677 42809816 Diego Biurrun
            img += 32;
678
        }
679 a31de956 Michael Niedermayer
    }
680 42809816 Diego Biurrun
    return srcSliceH;
681 a31de956 Michael Niedermayer
}
682
683
684
685
/* Ok currently the acceleration routine only supports
686
   inputs of widths a multiple of 16
687
   and heights a multiple 2
688

689
   So we just fall back to the C codes for this.
690
*/
691
SwsFunc yuv2rgb_init_altivec (SwsContext *c)
692
{
693 42809816 Diego Biurrun
    if (!(c->flags & SWS_CPU_CAPS_ALTIVEC))
694
        return NULL;
695 a31de956 Michael Niedermayer
696 42809816 Diego Biurrun
    /*
697
      and this seems not to matter too much I tried a bunch of
698
      videos with abnormal widths and mplayer crashes else where.
699
      mplayer -vo x11 -rawvideo on:w=350:h=240 raw-350x240.eyuv
700
      boom with X11 bad match.
701 a31de956 Michael Niedermayer

702 42809816 Diego Biurrun
    */
703
    if ((c->srcW & 0xf) != 0)    return NULL;
704
705
    switch (c->srcFormat) {
706
    case PIX_FMT_YUV410P:
707
    case PIX_FMT_YUV420P:
708
    /*case IMGFMT_CLPL:        ??? */
709
    case PIX_FMT_GRAY8:
710
    case PIX_FMT_NV12:
711
    case PIX_FMT_NV21:
712
        if ((c->srcH & 0x1) != 0)
713
            return NULL;
714
715
        switch(c->dstFormat){
716
        case PIX_FMT_RGB24:
717
            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n");
718
            return altivec_yuv2_rgb24;
719
        case PIX_FMT_BGR24:
720
            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGR24\n");
721
            return altivec_yuv2_bgr24;
722
        case PIX_FMT_ARGB:
723
            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ARGB\n");
724
            return altivec_yuv2_argb;
725
        case PIX_FMT_ABGR:
726
            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ABGR\n");
727
            return altivec_yuv2_abgr;
728
        case PIX_FMT_RGBA:
729
            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGBA\n");
730
            return altivec_yuv2_rgba;
731
        case PIX_FMT_BGRA:
732
            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGRA\n");
733
            return altivec_yuv2_bgra;
734
        default: return NULL;
735
        }
736
        break;
737
738
    case PIX_FMT_UYVY422:
739
        switch(c->dstFormat){
740
        case PIX_FMT_BGR32:
741
            av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n");
742
            return altivec_uyvy_rgb32;
743
        default: return NULL;
744
        }
745
        break;
746
747
    }
748
    return NULL;
749 a31de956 Michael Niedermayer
}
750
751 582552fb Luca Barbato
static uint16_t roundToInt16(int64_t f){
752 42809816 Diego Biurrun
    int r= (f + (1<<15))>>16;
753
         if (r<-0x7FFF) return 0x8000;
754
    else if (r> 0x7FFF) return 0x7FFF;
755
    else                return r;
756 582552fb Luca Barbato
}
757 84fdd642 Alex Beregszaszi
758 582552fb Luca Barbato
void yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation)
759
{
760 42809816 Diego Biurrun
    union {
761
        signed short tmp[8] __attribute__ ((aligned(16)));
762
        vector signed short vec;
763
    } buf;
764
765 e5091488 Benoit Fouet
    buf.tmp[0] =  ((0xffffLL) * contrast>>8)>>9;                        //cy
766 42809816 Diego Biurrun
    buf.tmp[1] =  -256*brightness;                                      //oy
767
    buf.tmp[2] =  (inv_table[0]>>3) *(contrast>>16)*(saturation>>16);   //crv
768
    buf.tmp[3] =  (inv_table[1]>>3) *(contrast>>16)*(saturation>>16);   //cbu
769
    buf.tmp[4] = -((inv_table[2]>>1)*(contrast>>16)*(saturation>>16));  //cgu
770
    buf.tmp[5] = -((inv_table[3]>>1)*(contrast>>16)*(saturation>>16));  //cgv
771
772
773
    c->CSHIFT = (vector unsigned short)vec_splat_u16(2);
774
    c->CY   = vec_splat ((vector signed short)buf.vec, 0);
775
    c->OY   = vec_splat ((vector signed short)buf.vec, 1);
776
    c->CRV  = vec_splat ((vector signed short)buf.vec, 2);
777
    c->CBU  = vec_splat ((vector signed short)buf.vec, 3);
778
    c->CGU  = vec_splat ((vector signed short)buf.vec, 4);
779
    c->CGV  = vec_splat ((vector signed short)buf.vec, 5);
780 84fdd642 Alex Beregszaszi
#if 0
781 42809816 Diego Biurrun
    {
782
    int i;
783
    char *v[6]={"cy","oy","crv","cbu","cgu","cgv"};
784
    for (i=0; i<6; i++)
785
        printf("%s %d ", v[i],buf.tmp[i] );
786
        printf("\n");
787
    }
788 a31de956 Michael Niedermayer
#endif
789 42809816 Diego Biurrun
    return;
790 a31de956 Michael Niedermayer
}
791
792
793
void
794
altivec_yuv2packedX (SwsContext *c,
795 42809816 Diego Biurrun
                     int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
796
                     int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
797
                     uint8_t *dest, int dstW, int dstY)
798 a31de956 Michael Niedermayer
{
799 42809816 Diego Biurrun
    int i,j;
800
    vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V;
801
    vector signed short R0,G0,B0,R1,G1,B1;
802 582552fb Luca Barbato
803 42809816 Diego Biurrun
    vector unsigned char R,G,B;
804
    vector unsigned char *out,*nout;
805 a31de956 Michael Niedermayer
806 42809816 Diego Biurrun
    vector signed short   RND = vec_splat_s16(1<<3);
807
    vector unsigned short SCL = vec_splat_u16(4);
808
    unsigned long scratch[16] __attribute__ ((aligned (16)));
809 a31de956 Michael Niedermayer
810 42809816 Diego Biurrun
    vector signed short *YCoeffs, *CCoeffs;
811 a31de956 Michael Niedermayer
812 42809816 Diego Biurrun
    YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize;
813
    CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize;
814 a31de956 Michael Niedermayer
815 42809816 Diego Biurrun
    out = (vector unsigned char *)dest;
816 a31de956 Michael Niedermayer
817 42809816 Diego Biurrun
    for (i=0; i<dstW; i+=16){
818
        Y0 = RND;
819
        Y1 = RND;
820
        /* extract 16 coeffs from lumSrc */
821
        for (j=0; j<lumFilterSize; j++) {
822
            X0 = vec_ld (0,  &lumSrc[j][i]);
823
            X1 = vec_ld (16, &lumSrc[j][i]);
824
            Y0 = vec_mradds (X0, YCoeffs[j], Y0);
825
            Y1 = vec_mradds (X1, YCoeffs[j], Y1);
826
        }
827 a31de956 Michael Niedermayer
828 42809816 Diego Biurrun
        U = RND;
829
        V = RND;
830
        /* extract 8 coeffs from U,V */
831
        for (j=0; j<chrFilterSize; j++) {
832
            X  = vec_ld (0, &chrSrc[j][i/2]);
833
            U  = vec_mradds (X, CCoeffs[j], U);
834
            X  = vec_ld (0, &chrSrc[j][i/2+2048]);
835
            V  = vec_mradds (X, CCoeffs[j], V);
836 3845b56d Alan Curry
        }
837 a31de956 Michael Niedermayer
838 42809816 Diego Biurrun
        /* scale and clip signals */
839
        Y0 = vec_sra (Y0, SCL);
840
        Y1 = vec_sra (Y1, SCL);
841
        U  = vec_sra (U,  SCL);
842
        V  = vec_sra (V,  SCL);
843
844
        Y0 = vec_clip_s16 (Y0);
845
        Y1 = vec_clip_s16 (Y1);
846
        U  = vec_clip_s16 (U);
847
        V  = vec_clip_s16 (V);
848
849
        /* now we have
850
          Y0= y0 y1 y2 y3 y4 y5 y6 y7     Y1= y8 y9 y10 y11 y12 y13 y14 y15
851
          U= u0 u1 u2 u3 u4 u5 u6 u7      V= v0 v1 v2 v3 v4 v5 v6 v7
852

853
          Y0= y0 y1 y2 y3 y4 y5 y6 y7    Y1= y8 y9 y10 y11 y12 y13 y14 y15
854
          U0= u0 u0 u1 u1 u2 u2 u3 u3    U1= u4 u4 u5 u5 u6 u6 u7 u7
855
          V0= v0 v0 v1 v1 v2 v2 v3 v3    V1= v4 v4 v5 v5 v6 v6 v7 v7
856
        */
857
858
        U0 = vec_mergeh (U,U);
859
        V0 = vec_mergeh (V,V);
860
861
        U1 = vec_mergel (U,U);
862
        V1 = vec_mergel (V,V);
863
864
        cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
865
        cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
866
867
        R  = vec_packclp (R0,R1);
868
        G  = vec_packclp (G0,G1);
869
        B  = vec_packclp (B0,B1);
870
871
        switch(c->dstFormat) {
872
            case PIX_FMT_ABGR:  out_abgr  (R,G,B,out); break;
873
            case PIX_FMT_BGRA:  out_bgra  (R,G,B,out); break;
874
            case PIX_FMT_RGBA:  out_rgba  (R,G,B,out); break;
875
            case PIX_FMT_ARGB:  out_argb  (R,G,B,out); break;
876
            case PIX_FMT_RGB24: out_rgb24 (R,G,B,out); break;
877
            case PIX_FMT_BGR24: out_bgr24 (R,G,B,out); break;
878
            default:
879
            {
880
                /* If this is reached, the caller should have called yuv2packedXinC
881
                   instead. */
882
                static int printed_error_message;
883
                if (!printed_error_message) {
884
                    av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
885
                           sws_format_name(c->dstFormat));
886
                    printed_error_message=1;
887
                }
888
                return;
889
            }
890
        }
891 a31de956 Michael Niedermayer
    }
892
893 42809816 Diego Biurrun
    if (i < dstW) {
894
        i -= 16;
895
896
        Y0 = RND;
897
        Y1 = RND;
898
        /* extract 16 coeffs from lumSrc */
899
        for (j=0; j<lumFilterSize; j++) {
900
            X0 = vec_ld (0,  &lumSrc[j][i]);
901
            X1 = vec_ld (16, &lumSrc[j][i]);
902
            Y0 = vec_mradds (X0, YCoeffs[j], Y0);
903
            Y1 = vec_mradds (X1, YCoeffs[j], Y1);
904
        }
905 a31de956 Michael Niedermayer
906 42809816 Diego Biurrun
        U = RND;
907
        V = RND;
908
        /* extract 8 coeffs from U,V */
909
        for (j=0; j<chrFilterSize; j++) {
910
            X  = vec_ld (0, &chrSrc[j][i/2]);
911
            U  = vec_mradds (X, CCoeffs[j], U);
912
            X  = vec_ld (0, &chrSrc[j][i/2+2048]);
913
            V  = vec_mradds (X, CCoeffs[j], V);
914
        }
915 a31de956 Michael Niedermayer
916 42809816 Diego Biurrun
        /* scale and clip signals */
917
        Y0 = vec_sra (Y0, SCL);
918
        Y1 = vec_sra (Y1, SCL);
919
        U  = vec_sra (U,  SCL);
920
        V  = vec_sra (V,  SCL);
921
922
        Y0 = vec_clip_s16 (Y0);
923
        Y1 = vec_clip_s16 (Y1);
924
        U  = vec_clip_s16 (U);
925
        V  = vec_clip_s16 (V);
926
927
        /* now we have
928
           Y0= y0 y1 y2 y3 y4 y5 y6 y7     Y1= y8 y9 y10 y11 y12 y13 y14 y15
929
           U = u0 u1 u2 u3 u4 u5 u6 u7     V = v0 v1 v2 v3 v4 v5 v6 v7
930

931
           Y0= y0 y1 y2 y3 y4 y5 y6 y7    Y1= y8 y9 y10 y11 y12 y13 y14 y15
932
           U0= u0 u0 u1 u1 u2 u2 u3 u3    U1= u4 u4 u5 u5 u6 u6 u7 u7
933
           V0= v0 v0 v1 v1 v2 v2 v3 v3    V1= v4 v4 v5 v5 v6 v6 v7 v7
934
        */
935
936
        U0 = vec_mergeh (U,U);
937
        V0 = vec_mergeh (V,V);
938
939
        U1 = vec_mergel (U,U);
940
        V1 = vec_mergel (V,V);
941
942
        cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0);
943
        cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1);
944
945
        R  = vec_packclp (R0,R1);
946
        G  = vec_packclp (G0,G1);
947
        B  = vec_packclp (B0,B1);
948
949
        nout = (vector unsigned char *)scratch;
950
        switch(c->dstFormat) {
951
            case PIX_FMT_ABGR:  out_abgr  (R,G,B,nout); break;
952
            case PIX_FMT_BGRA:  out_bgra  (R,G,B,nout); break;
953
            case PIX_FMT_RGBA:  out_rgba  (R,G,B,nout); break;
954
            case PIX_FMT_ARGB:  out_argb  (R,G,B,nout); break;
955
            case PIX_FMT_RGB24: out_rgb24 (R,G,B,nout); break;
956
            case PIX_FMT_BGR24: out_bgr24 (R,G,B,nout); break;
957
            default:
958
                /* Unreachable, I think. */
959
                av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n",
960
                       sws_format_name(c->dstFormat));
961
                return;
962
        }
963 a31de956 Michael Niedermayer
964 42809816 Diego Biurrun
        memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4);
965 3845b56d Alan Curry
    }
966 a31de956 Michael Niedermayer
967
}