Statistics
| Branch: | Revision:

ffmpeg / postproc / swscale.c @ 42225a30

History | View | Annotate | Download (76.7 KB)

1 fe8054c0 Michael Niedermayer
/*
2 5427e242 Michael Niedermayer
    Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3 fe8054c0 Michael Niedermayer

4
    This program is free software; you can redistribute it and/or modify
5
    it under the terms of the GNU General Public License as published by
6
    the Free Software Foundation; either version 2 of the License, or
7
    (at your option) any later version.
8 31190492 Arpi

9 fe8054c0 Michael Niedermayer
    This program is distributed in the hope that it will be useful,
10
    but WITHOUT ANY WARRANTY; without even the implied warranty of
11
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
    GNU General Public License for more details.
13 31190492 Arpi

14 fe8054c0 Michael Niedermayer
    You should have received a copy of the GNU General Public License
15
    along with this program; if not, write to the Free Software
16 660204c6 Diego Biurrun
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 fe8054c0 Michael Niedermayer
*/
18 783e9cc9 Michael Niedermayer
19 28bf81c9 Michael Niedermayer
/*
20 7322a67c Michael Niedermayer
  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8/Y800, YVU9/IF09
21 caeaabe7 Alex Beregszaszi
  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
22 e09d12f4 Michael Niedermayer
  {BGR,RGB}{1,4,8,15,16} support dithering
23 a861d4d7 Michael Niedermayer
  
24 e09d12f4 Michael Niedermayer
  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
25
  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
26
  x -> x
27
  YUV9 -> YV12
28
  YUV9/YV12 -> Y800
29
  Y800 -> YUV9/YV12
30 b935781b Michael Niedermayer
  BGR24 -> BGR32 & RGB24 -> RGB32
31
  BGR32 -> BGR24 & RGB32 -> RGB24
32 4bb3fa5e Michael Niedermayer
  BGR15 -> BGR16
33 b935781b Michael Niedermayer
*/
34
35
/* 
36 e09d12f4 Michael Niedermayer
tested special converters (most are tested actually but i didnt write it down ...)
37
 YV12 -> BGR16
38 b935781b Michael Niedermayer
 YV12 -> YV12
39 4bb3fa5e Michael Niedermayer
 BGR15 -> BGR16
40 1e1c4fe9 Michael Niedermayer
 BGR16 -> BGR16
41 e09d12f4 Michael Niedermayer
 YVU9 -> YV12
42 b935781b Michael Niedermayer

43
untested special converters
44 1e1c4fe9 Michael Niedermayer
  YV12/I420 -> BGR15/BGR24/BGR32 (its the yuv2rgb stuff, so it should be ok)
45
  YV12/I420 -> YV12/I420
46
  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
47 b935781b Michael Niedermayer
  BGR24 -> BGR32 & RGB24 -> RGB32
48
  BGR32 -> BGR24 & RGB32 -> RGB24
49 ec22603f Michael Niedermayer
  BGR24 -> YV12
50 28bf81c9 Michael Niedermayer
*/
51
52 d3f41512 Michael Niedermayer
#include <inttypes.h>
53 dda87e9f Pierre Lombard
#include <string.h>
54 077ea8a7 Michael Niedermayer
#include <math.h>
55 c1b0bfb4 Michael Niedermayer
#include <stdio.h>
56 171d7d78 Bohdan Horst
#include <unistd.h>
57 b2d374c9 Diego Biurrun
#include "config.h"
58
#include "mangle.h"
59 81b7c056 Michael Niedermayer
#include <assert.h>
60 c1b0bfb4 Michael Niedermayer
#ifdef HAVE_MALLOC_H
61
#include <malloc.h>
62 b6acbc3c Björn Sandell
#else
63
#include <stdlib.h>
64 c1b0bfb4 Michael Niedermayer
#endif
65 38d5c282 Aurelien Jacobs
#ifdef HAVE_SYS_MMAN_H
66
#include <sys/mman.h>
67 113ef149 Reimar Döffinger
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
68
#define MAP_ANONYMOUS MAP_ANON
69
#endif
70 38d5c282 Aurelien Jacobs
#endif
71 d604bab9 Michael Niedermayer
#include "swscale.h"
72 5427e242 Michael Niedermayer
#include "swscale_internal.h"
73 b2d374c9 Diego Biurrun
#include "cpudetect.h"
74
#include "bswap.h"
75
#include "libvo/img_format.h"
76 37079906 Michael Niedermayer
#include "rgb2rgb.h"
77 b2d374c9 Diego Biurrun
#include "libvo/fastmemcpy.h"
78 0d9f3d85 Arpi
79 541c4eb9 Michael Niedermayer
#undef MOVNTQ
80 7d7f78b5 Michael Niedermayer
#undef PAVGB
81 d3f41512 Michael Niedermayer
82 783e9cc9 Michael Niedermayer
//#undef HAVE_MMX2
83 7f56a527 Michael Niedermayer
//#define HAVE_3DNOW
84 d3f41512 Michael Niedermayer
//#undef HAVE_MMX
85 783e9cc9 Michael Niedermayer
//#undef ARCH_X86
86 2ba1bff0 Michael Niedermayer
//#define WORDS_BIGENDIAN
87 d604bab9 Michael Niedermayer
#define DITHER1XBPP
88 d3f41512 Michael Niedermayer
89 ac6a2e45 Michael Niedermayer
#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
90
91 1e621b18 Michael Niedermayer
#define RET 0xC3 //near return opcode for X86
92 c1b0bfb4 Michael Niedermayer
93 28bf81c9 Michael Niedermayer
#ifdef MP_DEBUG
94 81b7c056 Michael Niedermayer
#define ASSERT(x) assert(x);
95 28bf81c9 Michael Niedermayer
#else
96 c1b0bfb4 Michael Niedermayer
#define ASSERT(x) ;
97 28bf81c9 Michael Niedermayer
#endif
98
99
#ifdef M_PI
100
#define PI M_PI
101
#else
102
#define PI 3.14159265358979323846
103
#endif
104 c1b0bfb4 Michael Niedermayer
105 6c7506de Michael Niedermayer
//FIXME replace this with something faster
106 fccb9b2b Michael Niedermayer
#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YVU9 \
107 6118e52e Ville Syrjälä
                        || (x)==IMGFMT_NV12 || (x)==IMGFMT_NV21 \
108 d80e2fa2 Michael Niedermayer
                        || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
109 7322a67c Michael Niedermayer
#define isYUV(x)       ((x)==IMGFMT_UYVY || (x)==IMGFMT_YUY2 || isPlanarYUV(x))
110 44c1035c Michael Niedermayer
#define isGray(x)      ((x)==IMGFMT_Y800)
111 cf7d1c1a Michael Niedermayer
#define isRGB(x)       (((x)&IMGFMT_RGB_MASK)==IMGFMT_RGB)
112
#define isBGR(x)       (((x)&IMGFMT_BGR_MASK)==IMGFMT_BGR)
113 fccb9b2b Michael Niedermayer
#define isSupportedIn(x)  ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\
114 b72034dd Michael Niedermayer
                        || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\
115 a861d4d7 Michael Niedermayer
                        || (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\
116 d80e2fa2 Michael Niedermayer
                        || (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9\
117
                        || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
118 caeaabe7 Alex Beregszaszi
#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\
119 d80e2fa2 Michael Niedermayer
                        || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P\
120 cf7d1c1a Michael Niedermayer
                        || isRGB(x) || isBGR(x)\
121 6118e52e Ville Syrjälä
                        || (x)==IMGFMT_NV12 || (x)==IMGFMT_NV21\
122 e616aa93 Michael Niedermayer
                        || (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9)
123 7322a67c Michael Niedermayer
#define isPacked(x)    ((x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY ||isRGB(x) || isBGR(x))
124 6ff0ad6b Michael Niedermayer
125
#define RGB2YUV_SHIFT 16
126 1e621b18 Michael Niedermayer
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
127
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
128
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
129
#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
130
#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
131
#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
132
#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
133
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
134
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
135 6c7506de Michael Niedermayer
136 0481412a Michael Niedermayer
extern const int32_t Inverse_Table_6_9[8][4];
137
138 783e9cc9 Michael Niedermayer
/*
139
NOTES
140 d604bab9 Michael Niedermayer
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
141 31190492 Arpi

142 783e9cc9 Michael Niedermayer
TODO
143 d604bab9 Michael Niedermayer
more intelligent missalignment avoidance for the horizontal scaler
144 c1b0bfb4 Michael Niedermayer
write special vertical cubic upscale version
145
Optimize C code (yv12 / minmax)
146 ff7ba856 Michael Niedermayer
add support for packed pixel yuv input & output
147 6ff0ad6b Michael Niedermayer
add support for Y8 output
148
optimize bgr24 & bgr32
149 ff7ba856 Michael Niedermayer
add BGR4 output support
150 1e621b18 Michael Niedermayer
write special BGR->BGR scaler
151 783e9cc9 Michael Niedermayer
*/
152 31190492 Arpi
153 d604bab9 Michael Niedermayer
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
154 2ff198c1 Michael Niedermayer
#define MIN(a,b) ((a) > (b) ? (b) : (a))
155
#define MAX(a,b) ((a) < (b) ? (b) : (a))
156 d604bab9 Michael Niedermayer
157 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
158 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) bF8=       0xF8F8F8F8F8F8F8F8LL;
159
static uint64_t attribute_used __attribute__((aligned(8))) bFC=       0xFCFCFCFCFCFCFCFCLL;
160 d604bab9 Michael Niedermayer
static uint64_t __attribute__((aligned(8))) w10=       0x0010001000100010LL;
161 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) w02=       0x0002000200020002LL;
162
static uint64_t attribute_used __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
163
static uint64_t attribute_used __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
164
static uint64_t attribute_used __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
165
static uint64_t attribute_used __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL;
166 d604bab9 Michael Niedermayer
167 db7a2e0d Matthieu Castet
static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
168
static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
169
static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
170
static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;
171 d8fa3c54 Michael Niedermayer
172
static uint64_t __attribute__((aligned(8))) dither4[2]={
173
        0x0103010301030103LL,
174
        0x0200020002000200LL,};
175
176
static uint64_t __attribute__((aligned(8))) dither8[2]={
177
        0x0602060206020602LL,
178
        0x0004000400040004LL,};
179 d604bab9 Michael Niedermayer
180
static uint64_t __attribute__((aligned(8))) b16Mask=   0x001F001F001F001FLL;
181 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) g16Mask=   0x07E007E007E007E0LL;
182
static uint64_t attribute_used __attribute__((aligned(8))) r16Mask=   0xF800F800F800F800LL;
183 d604bab9 Michael Niedermayer
static uint64_t __attribute__((aligned(8))) b15Mask=   0x001F001F001F001FLL;
184 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) g15Mask=   0x03E003E003E003E0LL;
185
static uint64_t attribute_used __attribute__((aligned(8))) r15Mask=   0x7C007C007C007C00LL;
186 d604bab9 Michael Niedermayer
187 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) M24A=   0x00FF0000FF0000FFLL;
188
static uint64_t attribute_used __attribute__((aligned(8))) M24B=   0xFF0000FF0000FF00LL;
189
static uint64_t attribute_used __attribute__((aligned(8))) M24C=   0x0000FF0000FF0000LL;
190 99d2cb72 Michael Niedermayer
191 ac6a2e45 Michael Niedermayer
#ifdef FAST_BGR2YV12
192 db7a2e0d Matthieu Castet
static const uint64_t bgr2YCoeff  attribute_used __attribute__((aligned(8))) = 0x000000210041000DULL;
193
static const uint64_t bgr2UCoeff  attribute_used __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
194
static const uint64_t bgr2VCoeff  attribute_used __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
195 ac6a2e45 Michael Niedermayer
#else
196 db7a2e0d Matthieu Castet
static const uint64_t bgr2YCoeff  attribute_used __attribute__((aligned(8))) = 0x000020E540830C8BULL;
197
static const uint64_t bgr2UCoeff  attribute_used __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
198
static const uint64_t bgr2VCoeff  attribute_used __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
199 ac6a2e45 Michael Niedermayer
#endif
200 db7a2e0d Matthieu Castet
static const uint64_t bgr2YOffset attribute_used __attribute__((aligned(8))) = 0x1010101010101010ULL;
201
static const uint64_t bgr2UVOffset attribute_used __attribute__((aligned(8)))= 0x8080808080808080ULL;
202
static const uint64_t w1111       attribute_used __attribute__((aligned(8))) = 0x0001000100010001ULL;
203 d604bab9 Michael Niedermayer
#endif
204 783e9cc9 Michael Niedermayer
205
// clipping helper table for C implementations:
206
static unsigned char clip_table[768];
207
208 d4e24275 Michael Niedermayer
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
209
                  
210 cf7d1c1a Michael Niedermayer
extern const uint8_t dither_2x2_4[2][8];
211
extern const uint8_t dither_2x2_8[2][8];
212
extern const uint8_t dither_8x8_32[8][8];
213
extern const uint8_t dither_8x8_73[8][8];
214
extern const uint8_t dither_8x8_220[8][8];
215 5cebb24b Michael Niedermayer
216 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
217 96034638 Michael Niedermayer
void in_asm_used_var_warning_killer()
218
{
219 20380eb8 Michael Niedermayer
 volatile int i= bF8+bFC+w10+
220 5ac80202 Michael Niedermayer
 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+
221 6ff0ad6b Michael Niedermayer
 M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101;
222 96034638 Michael Niedermayer
 if(i) i=0;
223
}
224
#endif
225 d604bab9 Michael Niedermayer
226 5859233b Michael Niedermayer
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
227 e3d2500f Michael Niedermayer
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
228 5859233b Michael Niedermayer
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
229 e3d2500f Michael Niedermayer
{
230
        //FIXME Optimize (just quickly writen not opti..)
231
        int i;
232 5859233b Michael Niedermayer
        for(i=0; i<dstW; i++)
233 e3d2500f Michael Niedermayer
        {
234 379a2036 Michael Niedermayer
                int val=1<<18;
235 e3d2500f Michael Niedermayer
                int j;
236
                for(j=0; j<lumFilterSize; j++)
237
                        val += lumSrc[j][i] * lumFilter[j];
238
239
                dest[i]= MIN(MAX(val>>19, 0), 255);
240
        }
241
242
        if(uDest != NULL)
243 5859233b Michael Niedermayer
                for(i=0; i<chrDstW; i++)
244 e3d2500f Michael Niedermayer
                {
245 379a2036 Michael Niedermayer
                        int u=1<<18;
246
                        int v=1<<18;
247 e3d2500f Michael Niedermayer
                        int j;
248 627690b5 Michael Niedermayer
                        for(j=0; j<chrFilterSize; j++)
249 e3d2500f Michael Niedermayer
                        {
250
                                u += chrSrc[j][i] * chrFilter[j];
251
                                v += chrSrc[j][i + 2048] * chrFilter[j];
252
                        }
253
254
                        uDest[i]= MIN(MAX(u>>19, 0), 255);
255
                        vDest[i]= MIN(MAX(v>>19, 0), 255);
256
                }
257
}
258
259 6118e52e Ville Syrjälä
static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
260
                                int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
261
                                uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
262
{
263
        //FIXME Optimize (just quickly writen not opti..)
264
        int i;
265
        for(i=0; i<dstW; i++)
266
        {
267
                int val=1<<18;
268
                int j;
269
                for(j=0; j<lumFilterSize; j++)
270
                        val += lumSrc[j][i] * lumFilter[j];
271
272
                dest[i]= MIN(MAX(val>>19, 0), 255);
273
        }
274
275
        if(uDest == NULL)
276
                return;
277
278
        if(dstFormat == IMGFMT_NV12)
279
                for(i=0; i<chrDstW; i++)
280
                {
281
                        int u=1<<18;
282
                        int v=1<<18;
283
                        int j;
284
                        for(j=0; j<chrFilterSize; j++)
285
                        {
286
                                u += chrSrc[j][i] * chrFilter[j];
287
                                v += chrSrc[j][i + 2048] * chrFilter[j];
288
                        }
289
290
                        uDest[2*i]= MIN(MAX(u>>19, 0), 255);
291
                        uDest[2*i+1]= MIN(MAX(v>>19, 0), 255);
292
                }
293
        else
294
                for(i=0; i<chrDstW; i++)
295
                {
296
                        int u=1<<18;
297
                        int v=1<<18;
298
                        int j;
299
                        for(j=0; j<chrFilterSize; j++)
300
                        {
301
                                u += chrSrc[j][i] * chrFilter[j];
302
                                v += chrSrc[j][i + 2048] * chrFilter[j];
303
                        }
304
305
                        uDest[2*i]= MIN(MAX(v>>19, 0), 255);
306
                        uDest[2*i+1]= MIN(MAX(u>>19, 0), 255);
307
                }
308
}
309 46de8b73 Michael Niedermayer
310 25593e29 Michael Niedermayer
#define YSCALE_YUV_2_PACKEDX_C(type) \
311 cf7d1c1a Michael Niedermayer
                for(i=0; i<(dstW>>1); i++){\
312
                        int j;\
313 379a2036 Michael Niedermayer
                        int Y1=1<<18;\
314
                        int Y2=1<<18;\
315
                        int U=1<<18;\
316
                        int V=1<<18;\
317 cf7d1c1a Michael Niedermayer
                        type *r, *b, *g;\
318
                        const int i2= 2*i;\
319
                        \
320
                        for(j=0; j<lumFilterSize; j++)\
321
                        {\
322
                                Y1 += lumSrc[j][i2] * lumFilter[j];\
323
                                Y2 += lumSrc[j][i2+1] * lumFilter[j];\
324
                        }\
325
                        for(j=0; j<chrFilterSize; j++)\
326
                        {\
327
                                U += chrSrc[j][i] * chrFilter[j];\
328
                                V += chrSrc[j][i+2048] * chrFilter[j];\
329
                        }\
330
                        Y1>>=19;\
331
                        Y2>>=19;\
332
                        U >>=19;\
333
                        V >>=19;\
334
                        if((Y1|Y2|U|V)&256)\
335
                        {\
336
                                if(Y1>255)   Y1=255;\
337
                                else if(Y1<0)Y1=0;\
338
                                if(Y2>255)   Y2=255;\
339
                                else if(Y2<0)Y2=0;\
340
                                if(U>255)    U=255;\
341
                                else if(U<0) U=0;\
342
                                if(V>255)    V=255;\
343
                                else if(V<0) V=0;\
344 46de8b73 Michael Niedermayer
                        }
345
                        
346
#define YSCALE_YUV_2_RGBX_C(type) \
347 25593e29 Michael Niedermayer
                        YSCALE_YUV_2_PACKEDX_C(type)\
348 cf7d1c1a Michael Niedermayer
                        r = c->table_rV[V];\
349
                        g = c->table_gU[U] + c->table_gV[V];\
350
                        b = c->table_bU[U];\
351
352 25593e29 Michael Niedermayer
#define YSCALE_YUV_2_PACKED2_C \
353 cf7d1c1a Michael Niedermayer
                for(i=0; i<(dstW>>1); i++){\
354
                        const int i2= 2*i;\
355
                        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;\
356
                        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;\
357
                        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;\
358
                        int V= (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;\
359 46de8b73 Michael Niedermayer
360
#define YSCALE_YUV_2_RGB2_C(type) \
361 25593e29 Michael Niedermayer
                        YSCALE_YUV_2_PACKED2_C\
362 cf7d1c1a Michael Niedermayer
                        type *r, *b, *g;\
363
                        r = c->table_rV[V];\
364
                        g = c->table_gU[U] + c->table_gV[V];\
365
                        b = c->table_bU[U];\
366
367 25593e29 Michael Niedermayer
#define YSCALE_YUV_2_PACKED1_C \
368 cf7d1c1a Michael Niedermayer
                for(i=0; i<(dstW>>1); i++){\
369
                        const int i2= 2*i;\
370
                        int Y1= buf0[i2  ]>>7;\
371
                        int Y2= buf0[i2+1]>>7;\
372
                        int U= (uvbuf1[i     ])>>7;\
373
                        int V= (uvbuf1[i+2048])>>7;\
374 46de8b73 Michael Niedermayer
375
#define YSCALE_YUV_2_RGB1_C(type) \
376 25593e29 Michael Niedermayer
                        YSCALE_YUV_2_PACKED1_C\
377 cf7d1c1a Michael Niedermayer
                        type *r, *b, *g;\
378
                        r = c->table_rV[V];\
379
                        g = c->table_gU[U] + c->table_gV[V];\
380
                        b = c->table_bU[U];\
381
382 25593e29 Michael Niedermayer
#define YSCALE_YUV_2_PACKED1B_C \
383 cf7d1c1a Michael Niedermayer
                for(i=0; i<(dstW>>1); i++){\
384
                        const int i2= 2*i;\
385
                        int Y1= buf0[i2  ]>>7;\
386
                        int Y2= buf0[i2+1]>>7;\
387
                        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
388
                        int V= (uvbuf0[i+2048] + uvbuf1[i+2048])>>8;\
389 46de8b73 Michael Niedermayer
390
#define YSCALE_YUV_2_RGB1B_C(type) \
391 25593e29 Michael Niedermayer
                        YSCALE_YUV_2_PACKED1B_C\
392 cf7d1c1a Michael Niedermayer
                        type *r, *b, *g;\
393
                        r = c->table_rV[V];\
394
                        g = c->table_gU[U] + c->table_gV[V];\
395
                        b = c->table_bU[U];\
396
397 46de8b73 Michael Niedermayer
#define YSCALE_YUV_2_ANYRGB_C(func, func2)\
398 cf7d1c1a Michael Niedermayer
        switch(c->dstFormat)\
399
        {\
400
        case IMGFMT_BGR32:\
401
        case IMGFMT_RGB32:\
402
                func(uint32_t)\
403
                        ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
404
                        ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
405
                }                \
406
                break;\
407
        case IMGFMT_RGB24:\
408
                func(uint8_t)\
409
                        ((uint8_t*)dest)[0]= r[Y1];\
410
                        ((uint8_t*)dest)[1]= g[Y1];\
411
                        ((uint8_t*)dest)[2]= b[Y1];\
412
                        ((uint8_t*)dest)[3]= r[Y2];\
413
                        ((uint8_t*)dest)[4]= g[Y2];\
414
                        ((uint8_t*)dest)[5]= b[Y2];\
415 ae4cffd9 D Richard Felker III
                        dest+=6;\
416 cf7d1c1a Michael Niedermayer
                }\
417
                break;\
418
        case IMGFMT_BGR24:\
419
                func(uint8_t)\
420
                        ((uint8_t*)dest)[0]= b[Y1];\
421
                        ((uint8_t*)dest)[1]= g[Y1];\
422
                        ((uint8_t*)dest)[2]= r[Y1];\
423
                        ((uint8_t*)dest)[3]= b[Y2];\
424
                        ((uint8_t*)dest)[4]= g[Y2];\
425
                        ((uint8_t*)dest)[5]= r[Y2];\
426 ae4cffd9 D Richard Felker III
                        dest+=6;\
427 cf7d1c1a Michael Niedermayer
                }\
428
                break;\
429
        case IMGFMT_RGB16:\
430
        case IMGFMT_BGR16:\
431
                {\
432
                        const int dr1= dither_2x2_8[y&1    ][0];\
433
                        const int dg1= dither_2x2_4[y&1    ][0];\
434
                        const int db1= dither_2x2_8[(y&1)^1][0];\
435
                        const int dr2= dither_2x2_8[y&1    ][1];\
436
                        const int dg2= dither_2x2_4[y&1    ][1];\
437
                        const int db2= dither_2x2_8[(y&1)^1][1];\
438
                        func(uint16_t)\
439
                                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
440
                                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
441
                        }\
442
                }\
443
                break;\
444
        case IMGFMT_RGB15:\
445
        case IMGFMT_BGR15:\
446
                {\
447
                        const int dr1= dither_2x2_8[y&1    ][0];\
448
                        const int dg1= dither_2x2_8[y&1    ][1];\
449
                        const int db1= dither_2x2_8[(y&1)^1][0];\
450
                        const int dr2= dither_2x2_8[y&1    ][1];\
451
                        const int dg2= dither_2x2_8[y&1    ][0];\
452
                        const int db2= dither_2x2_8[(y&1)^1][1];\
453
                        func(uint16_t)\
454
                                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
455
                                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
456
                        }\
457
                }\
458
                break;\
459
        case IMGFMT_RGB8:\
460
        case IMGFMT_BGR8:\
461
                {\
462
                        const uint8_t * const d64= dither_8x8_73[y&7];\
463
                        const uint8_t * const d32= dither_8x8_32[y&7];\
464
                        func(uint8_t)\
465
                                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
466
                                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
467
                        }\
468
                }\
469
                break;\
470
        case IMGFMT_RGB4:\
471
        case IMGFMT_BGR4:\
472
                {\
473
                        const uint8_t * const d64= dither_8x8_73 [y&7];\
474
                        const uint8_t * const d128=dither_8x8_220[y&7];\
475
                        func(uint8_t)\
476 799fd467 Michael Niedermayer
                                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
477 f17457ac Michael Niedermayer
                                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
478
                        }\
479
                }\
480
                break;\
481
        case IMGFMT_RG4B:\
482
        case IMGFMT_BG4B:\
483
                {\
484
                        const uint8_t * const d64= dither_8x8_73 [y&7];\
485
                        const uint8_t * const d128=dither_8x8_220[y&7];\
486
                        func(uint8_t)\
487 cf7d1c1a Michael Niedermayer
                                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
488
                                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
489
                        }\
490
                }\
491
                break;\
492
        case IMGFMT_RGB1:\
493
        case IMGFMT_BGR1:\
494
                {\
495
                        const uint8_t * const d128=dither_8x8_220[y&7];\
496
                        uint8_t *g= c->table_gU[128] + c->table_gV[128];\
497
                        for(i=0; i<dstW-7; i+=8){\
498
                                int acc;\
499
                                acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
500
                                acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
501
                                acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
502
                                acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
503
                                acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
504
                                acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
505
                                acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
506
                                acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
507
                                ((uint8_t*)dest)[0]= acc;\
508 ae4cffd9 D Richard Felker III
                                dest++;\
509 cf7d1c1a Michael Niedermayer
                        }\
510
\
511
/*\
512
((uint8_t*)dest)-= dstW>>4;\
513
{\
514
                        int acc=0;\
515
                        int left=0;\
516
                        static int top[1024];\
517
                        static int last_new[1024][1024];\
518
                        static int last_in3[1024][1024];\
519
                        static int drift[1024][1024];\
520
                        int topLeft=0;\
521
                        int shift=0;\
522
                        int count=0;\
523
                        const uint8_t * const d128=dither_8x8_220[y&7];\
524
                        int error_new=0;\
525
                        int error_in3=0;\
526
                        int f=0;\
527
                        \
528
                        for(i=dstW>>1; i<dstW; i++){\
529
                                int in= ((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19);\
530
                                int in2 = (76309 * (in - 16) + 32768) >> 16;\
531
                                int in3 = (in2 < 0) ? 0 : ((in2 > 255) ? 255 : in2);\
532
                                int old= (left*7 + topLeft + top[i]*5 + top[i+1]*3)/20 + in3\
533
                                        + (last_new[y][i] - in3)*f/256;\
534
                                int new= old> 128 ? 255 : 0;\
535
\
536
                                error_new+= ABS(last_new[y][i] - new);\
537
                                error_in3+= ABS(last_in3[y][i] - in3);\
538
                                f= error_new - error_in3*4;\
539
                                if(f<0) f=0;\
540
                                if(f>256) f=256;\
541
\
542
                                topLeft= top[i];\
543
                                left= top[i]= old - new;\
544
                                last_new[y][i]= new;\
545
                                last_in3[y][i]= in3;\
546
\
547
                                acc+= acc + (new&1);\
548
                                if((i&7)==6){\
549
                                        ((uint8_t*)dest)[0]= acc;\
550
                                        ((uint8_t*)dest)++;\
551
                                }\
552
                        }\
553
}\
554
*/\
555
                }\
556
                break;\
557 46de8b73 Michael Niedermayer
        case IMGFMT_YUY2:\
558
                func2\
559
                        ((uint8_t*)dest)[2*i2+0]= Y1;\
560
                        ((uint8_t*)dest)[2*i2+1]= U;\
561
                        ((uint8_t*)dest)[2*i2+2]= Y2;\
562
                        ((uint8_t*)dest)[2*i2+3]= V;\
563
                }                \
564
                break;\
565 caeaabe7 Alex Beregszaszi
        case IMGFMT_UYVY:\
566
                func2\
567
                        ((uint8_t*)dest)[2*i2+0]= U;\
568
                        ((uint8_t*)dest)[2*i2+1]= Y1;\
569
                        ((uint8_t*)dest)[2*i2+2]= V;\
570
                        ((uint8_t*)dest)[2*i2+3]= Y2;\
571
                }                \
572
                break;\
573 cf7d1c1a Michael Niedermayer
        }\
574
575
576 25593e29 Michael Niedermayer
static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
577 e3d2500f Michael Niedermayer
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
578 cf7d1c1a Michael Niedermayer
                                    uint8_t *dest, int dstW, int y)
579 e3d2500f Michael Niedermayer
{
580 cf7d1c1a Michael Niedermayer
        int i;
581
        switch(c->dstFormat)
582 e3d2500f Michael Niedermayer
        {
583 cf7d1c1a Michael Niedermayer
        case IMGFMT_RGB32:
584
        case IMGFMT_BGR32:
585
                YSCALE_YUV_2_RGBX_C(uint32_t)
586
                        ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];
587
                        ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];
588 e3d2500f Michael Niedermayer
                }
589 cf7d1c1a Michael Niedermayer
                break;
590
        case IMGFMT_RGB24:
591
                YSCALE_YUV_2_RGBX_C(uint8_t)
592
                        ((uint8_t*)dest)[0]= r[Y1];
593
                        ((uint8_t*)dest)[1]= g[Y1];
594
                        ((uint8_t*)dest)[2]= b[Y1];
595
                        ((uint8_t*)dest)[3]= r[Y2];
596
                        ((uint8_t*)dest)[4]= g[Y2];
597
                        ((uint8_t*)dest)[5]= b[Y2];
598 ae4cffd9 D Richard Felker III
                        dest+=6;
599 cf7d1c1a Michael Niedermayer
                }
600
                break;
601
        case IMGFMT_BGR24:
602
                YSCALE_YUV_2_RGBX_C(uint8_t)
603
                        ((uint8_t*)dest)[0]= b[Y1];
604
                        ((uint8_t*)dest)[1]= g[Y1];
605
                        ((uint8_t*)dest)[2]= r[Y1];
606
                        ((uint8_t*)dest)[3]= b[Y2];
607
                        ((uint8_t*)dest)[4]= g[Y2];
608
                        ((uint8_t*)dest)[5]= r[Y2];
609 ae4cffd9 D Richard Felker III
                        dest+=6;
610 cf7d1c1a Michael Niedermayer
                }
611
                break;
612
        case IMGFMT_RGB16:
613
        case IMGFMT_BGR16:
614
                {
615
                        const int dr1= dither_2x2_8[y&1    ][0];
616
                        const int dg1= dither_2x2_4[y&1    ][0];
617
                        const int db1= dither_2x2_8[(y&1)^1][0];
618
                        const int dr2= dither_2x2_8[y&1    ][1];
619
                        const int dg2= dither_2x2_4[y&1    ][1];
620
                        const int db2= dither_2x2_8[(y&1)^1][1];
621
                        YSCALE_YUV_2_RGBX_C(uint16_t)
622
                                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
623
                                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
624 e3d2500f Michael Niedermayer
                        }
625
                }
626 cf7d1c1a Michael Niedermayer
                break;
627
        case IMGFMT_RGB15:
628
        case IMGFMT_BGR15:
629
                {
630
                        const int dr1= dither_2x2_8[y&1    ][0];
631
                        const int dg1= dither_2x2_8[y&1    ][1];
632
                        const int db1= dither_2x2_8[(y&1)^1][0];
633
                        const int dr2= dither_2x2_8[y&1    ][1];
634
                        const int dg2= dither_2x2_8[y&1    ][0];
635
                        const int db2= dither_2x2_8[(y&1)^1][1];
636
                        YSCALE_YUV_2_RGBX_C(uint16_t)
637
                                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
638
                                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
639 e3d2500f Michael Niedermayer
                        }
640 cf7d1c1a Michael Niedermayer
                }
641
                break;
642
        case IMGFMT_RGB8:
643
        case IMGFMT_BGR8:
644
                {
645
                        const uint8_t * const d64= dither_8x8_73[y&7];
646
                        const uint8_t * const d32= dither_8x8_32[y&7];
647
                        YSCALE_YUV_2_RGBX_C(uint8_t)
648
                                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];
649
                                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];
650 e3d2500f Michael Niedermayer
                        }
651
                }
652 cf7d1c1a Michael Niedermayer
                break;
653
        case IMGFMT_RGB4:
654
        case IMGFMT_BGR4:
655
                {
656
                        const uint8_t * const d64= dither_8x8_73 [y&7];
657
                        const uint8_t * const d128=dither_8x8_220[y&7];
658
                        YSCALE_YUV_2_RGBX_C(uint8_t)
659 799fd467 Michael Niedermayer
                                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]
660 f17457ac Michael Niedermayer
                                                  +((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);
661
                        }
662
                }
663
                break;
664
        case IMGFMT_RG4B:
665
        case IMGFMT_BG4B:
666
                {
667
                        const uint8_t * const d64= dither_8x8_73 [y&7];
668
                        const uint8_t * const d128=dither_8x8_220[y&7];
669
                        YSCALE_YUV_2_RGBX_C(uint8_t)
670 cf7d1c1a Michael Niedermayer
                                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];
671
                                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];
672 e3d2500f Michael Niedermayer
                        }
673 cf7d1c1a Michael Niedermayer
                }
674
                break;
675
        case IMGFMT_RGB1:
676
        case IMGFMT_BGR1:
677
                {
678
                        const uint8_t * const d128=dither_8x8_220[y&7];
679
                        uint8_t *g= c->table_gU[128] + c->table_gV[128];
680
                        int acc=0;
681
                        for(i=0; i<dstW-1; i+=2){
682
                                int j;
683 379a2036 Michael Niedermayer
                                int Y1=1<<18;
684
                                int Y2=1<<18;
685 cf7d1c1a Michael Niedermayer
686
                                for(j=0; j<lumFilterSize; j++)
687
                                {
688
                                        Y1 += lumSrc[j][i] * lumFilter[j];
689
                                        Y2 += lumSrc[j][i+1] * lumFilter[j];
690
                                }
691
                                Y1>>=19;
692
                                Y2>>=19;
693
                                if((Y1|Y2)&256)
694
                                {
695
                                        if(Y1>255)   Y1=255;
696
                                        else if(Y1<0)Y1=0;
697
                                        if(Y2>255)   Y2=255;
698
                                        else if(Y2<0)Y2=0;
699
                                }
700
                                acc+= acc + g[Y1+d128[(i+0)&7]];
701
                                acc+= acc + g[Y2+d128[(i+1)&7]];
702
                                if((i&7)==6){
703
                                        ((uint8_t*)dest)[0]= acc;
704 ae4cffd9 D Richard Felker III
                                        dest++;
705 cf7d1c1a Michael Niedermayer
                                }
706 e3d2500f Michael Niedermayer
                        }
707
                }
708 cf7d1c1a Michael Niedermayer
                break;
709 46de8b73 Michael Niedermayer
        case IMGFMT_YUY2:
710 25593e29 Michael Niedermayer
                YSCALE_YUV_2_PACKEDX_C(void)
711 46de8b73 Michael Niedermayer
                        ((uint8_t*)dest)[2*i2+0]= Y1;
712
                        ((uint8_t*)dest)[2*i2+1]= U;
713
                        ((uint8_t*)dest)[2*i2+2]= Y2;
714
                        ((uint8_t*)dest)[2*i2+3]= V;
715
                }
716
                break;
717 caeaabe7 Alex Beregszaszi
        case IMGFMT_UYVY:
718
                YSCALE_YUV_2_PACKEDX_C(void)
719
                        ((uint8_t*)dest)[2*i2+0]= U;
720
                        ((uint8_t*)dest)[2*i2+1]= Y1;
721
                        ((uint8_t*)dest)[2*i2+2]= V;
722
                        ((uint8_t*)dest)[2*i2+3]= Y2;
723
                }
724
                break;
725 e3d2500f Michael Niedermayer
        }
726
}
727
728
729 7630f2e0 Michael Niedermayer
//Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
730
//Plain C versions
731 726a959a Michael Niedermayer
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
732
#define COMPILE_C
733
#endif
734
735 a2faa401 Romain Dolbeau
#ifdef ARCH_POWERPC
736 6634d0ef Nicolas Plourde
#if defined (HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)
737 a2faa401 Romain Dolbeau
#define COMPILE_ALTIVEC
738
#endif //HAVE_ALTIVEC
739
#endif //ARCH_POWERPC
740
741 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
742 726a959a Michael Niedermayer
743
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
744
#define COMPILE_MMX
745
#endif
746
747
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
748
#define COMPILE_MMX2
749
#endif
750
751
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
752
#define COMPILE_3DNOW
753
#endif
754 6e1c66bc Aurelien Jacobs
#endif //ARCH_X86 || ARCH_X86_64
755 726a959a Michael Niedermayer
756
#undef HAVE_MMX
757
#undef HAVE_MMX2
758
#undef HAVE_3DNOW
759
760
#ifdef COMPILE_C
761 7630f2e0 Michael Niedermayer
#undef HAVE_MMX
762
#undef HAVE_MMX2
763
#undef HAVE_3DNOW
764 a2faa401 Romain Dolbeau
#undef HAVE_ALTIVEC
765 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _C
766
#include "swscale_template.c"
767 726a959a Michael Niedermayer
#endif
768 397c035e Michael Niedermayer
769 a2faa401 Romain Dolbeau
#ifdef ARCH_POWERPC
770
#ifdef COMPILE_ALTIVEC
771
#undef RENAME
772
#define HAVE_ALTIVEC
773
#define RENAME(a) a ## _altivec
774
#include "swscale_template.c"
775
#endif
776
#endif //ARCH_POWERPC
777
778 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
779 397c035e Michael Niedermayer
780 7630f2e0 Michael Niedermayer
//X86 versions
781
/*
782
#undef RENAME
783
#undef HAVE_MMX
784
#undef HAVE_MMX2
785
#undef HAVE_3DNOW
786
#define ARCH_X86
787
#define RENAME(a) a ## _X86
788
#include "swscale_template.c"
789 1faf0867 Michael Niedermayer
*/
790 7630f2e0 Michael Niedermayer
//MMX versions
791 726a959a Michael Niedermayer
#ifdef COMPILE_MMX
792 7630f2e0 Michael Niedermayer
#undef RENAME
793
#define HAVE_MMX
794
#undef HAVE_MMX2
795
#undef HAVE_3DNOW
796
#define RENAME(a) a ## _MMX
797
#include "swscale_template.c"
798 726a959a Michael Niedermayer
#endif
799 7630f2e0 Michael Niedermayer
800
//MMX2 versions
801 726a959a Michael Niedermayer
#ifdef COMPILE_MMX2
802 7630f2e0 Michael Niedermayer
#undef RENAME
803
#define HAVE_MMX
804
#define HAVE_MMX2
805
#undef HAVE_3DNOW
806
#define RENAME(a) a ## _MMX2
807
#include "swscale_template.c"
808 726a959a Michael Niedermayer
#endif
809 7630f2e0 Michael Niedermayer
810
//3DNOW versions
811 726a959a Michael Niedermayer
#ifdef COMPILE_3DNOW
812 7630f2e0 Michael Niedermayer
#undef RENAME
813
#define HAVE_MMX
814
#undef HAVE_MMX2
815
#define HAVE_3DNOW
816
#define RENAME(a) a ## _3DNow
817
#include "swscale_template.c"
818 726a959a Michael Niedermayer
#endif
819 7630f2e0 Michael Niedermayer
820 6e1c66bc Aurelien Jacobs
#endif //ARCH_X86 || ARCH_X86_64
821 7630f2e0 Michael Niedermayer
822 77a416e8 Gabucino
// minor note: the HAVE_xyz is messed up after that line so don't use it
823 d604bab9 Michael Niedermayer
824 a86c461c Michael Niedermayer
static double getSplineCoeff(double a, double b, double c, double d, double dist)
825
{
826
//        printf("%f %f %f %f %f\n", a,b,c,d,dist);
827
        if(dist<=1.0)         return ((d*dist + c)*dist + b)*dist +a;
828
        else                return getSplineCoeff(        0.0, 
829
                                                 b+ 2.0*c + 3.0*d,
830
                                                        c + 3.0*d,
831
                                                -b- 3.0*c - 6.0*d,
832
                                                dist-1.0);
833
}
834 6c7506de Michael Niedermayer
835 c7f822d9 Michael Niedermayer
static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
836
                              int srcW, int dstW, int filterAlign, int one, int flags,
837 66d1cdb6 Michael Niedermayer
                              SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
838 28bf81c9 Michael Niedermayer
{
839
        int i;
840 c7f822d9 Michael Niedermayer
        int filterSize;
841
        int filter2Size;
842
        int minFilterSize;
843
        double *filter=NULL;
844
        double *filter2=NULL;
845 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
846 516b1f82 Michael Niedermayer
        if(flags & SWS_CPU_CAPS_MMX)
847 28bf81c9 Michael Niedermayer
                asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
848 726a959a Michael Niedermayer
#endif
849 31190492 Arpi
850 adeaecb9 Michael Niedermayer
        // Note the +1 is for the MMXscaler which reads over the end
851 6c7506de Michael Niedermayer
        *filterPos = (int16_t*)memalign(8, (dstW+1)*sizeof(int16_t));
852
853 28bf81c9 Michael Niedermayer
        if(ABS(xInc - 0x10000) <10) // unscaled
854
        {
855
                int i;
856 c7f822d9 Michael Niedermayer
                filterSize= 1;
857
                filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
858
                for(i=0; i<dstW*filterSize; i++) filter[i]=0;
859 28bf81c9 Michael Niedermayer
860
                for(i=0; i<dstW; i++)
861
                {
862 c7f822d9 Michael Niedermayer
                        filter[i*filterSize]=1;
863
                        (*filterPos)[i]=i;
864 28bf81c9 Michael Niedermayer
                }
865
866
        }
867 ff7ba856 Michael Niedermayer
        else if(flags&SWS_POINT) // lame looking point sampling mode
868
        {
869
                int i;
870
                int xDstInSrc;
871
                filterSize= 1;
872
                filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
873
                
874
                xDstInSrc= xInc/2 - 0x8000;
875
                for(i=0; i<dstW; i++)
876
                {
877 8a01d20c Michael Niedermayer
                        int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
878 ff7ba856 Michael Niedermayer
879
                        (*filterPos)[i]= xx;
880
                        filter[i]= 1.0;
881
                        xDstInSrc+= xInc;
882
                }
883
        }
884 a86c461c Michael Niedermayer
        else if((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
885 28bf81c9 Michael Niedermayer
        {
886
                int i;
887
                int xDstInSrc;
888 c7f822d9 Michael Niedermayer
                if     (flags&SWS_BICUBIC) filterSize= 4;
889
                else if(flags&SWS_X      ) filterSize= 4;
890 d8863d37 Michael Niedermayer
                else                           filterSize= 2; // SWS_BILINEAR / SWS_AREA 
891 c7f822d9 Michael Niedermayer
                filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
892 28bf81c9 Michael Niedermayer
893
                xDstInSrc= xInc/2 - 0x8000;
894
                for(i=0; i<dstW; i++)
895
                {
896 8a01d20c Michael Niedermayer
                        int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
897 28bf81c9 Michael Niedermayer
                        int j;
898
899 c7f822d9 Michael Niedermayer
                        (*filterPos)[i]= xx;
900 d8863d37 Michael Niedermayer
                                //Bilinear upscale / linear interpolate / Area averaging
901 c7f822d9 Michael Niedermayer
                                for(j=0; j<filterSize; j++)
902 28bf81c9 Michael Niedermayer
                                {
903
                                        double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16);
904
                                        double coeff= 1.0 - d;
905
                                        if(coeff<0) coeff=0;
906 c7f822d9 Michael Niedermayer
                                        filter[i*filterSize + j]= coeff;
907 28bf81c9 Michael Niedermayer
                                        xx++;
908
                                }
909
                        xDstInSrc+= xInc;
910
                }
911
        }
912 a86c461c Michael Niedermayer
        else
913 28bf81c9 Michael Niedermayer
        {
914 a86c461c Michael Niedermayer
                double xDstInSrc;
915
                double sizeFactor, filterSizeInSrc;
916
                const double xInc1= (double)xInc / (double)(1<<16);
917
918
                if     (flags&SWS_BICUBIC)        sizeFactor= 4.0;
919
                else if(flags&SWS_X)                sizeFactor= 8.0;
920
                else if(flags&SWS_AREA)                sizeFactor= 1.0; //downscale only, for upscale it is bilinear
921
                else if(flags&SWS_GAUSS)        sizeFactor= 8.0;   // infinite ;)
922 66d1cdb6 Michael Niedermayer
                else if(flags&SWS_LANCZOS)        sizeFactor= param[0] != SWS_PARAM_DEFAULT ? 2.0*param[0] : 6.0;
923 93768378 Michael Niedermayer
                else if(flags&SWS_SINC)                sizeFactor= 20.0; // infinite ;)
924 a86c461c Michael Niedermayer
                else if(flags&SWS_SPLINE)        sizeFactor= 20.0;  // infinite ;)
925
                else if(flags&SWS_BILINEAR)        sizeFactor= 2.0;
926 93768378 Michael Niedermayer
                else {
927
                        sizeFactor= 0.0; //GCC warning killer
928
                        ASSERT(0)
929
                }
930 a86c461c Michael Niedermayer
                
931
                if(xInc1 <= 1.0)        filterSizeInSrc= sizeFactor; // upscale
932
                else                        filterSizeInSrc= sizeFactor*srcW / (double)dstW;
933 81b7c056 Michael Niedermayer
934 a86c461c Michael Niedermayer
                filterSize= (int)ceil(1 + filterSizeInSrc); // will be reduced later if possible
935
                if(filterSize > srcW-2) filterSize=srcW-2;
936 28bf81c9 Michael Niedermayer
937 a86c461c Michael Niedermayer
                filter= (double*)memalign(16, dstW*sizeof(double)*filterSize);
938
939
                xDstInSrc= xInc1 / 2.0 - 0.5;
940 28bf81c9 Michael Niedermayer
                for(i=0; i<dstW; i++)
941
                {
942 a86c461c Michael Niedermayer
                        int xx= (int)(xDstInSrc - (filterSize-1)*0.5 + 0.5);
943 28bf81c9 Michael Niedermayer
                        int j;
944 c7f822d9 Michael Niedermayer
                        (*filterPos)[i]= xx;
945
                        for(j=0; j<filterSize; j++)
946 28bf81c9 Michael Niedermayer
                        {
947 a86c461c Michael Niedermayer
                                double d= ABS(xx - xDstInSrc)/filterSizeInSrc*sizeFactor;
948 28bf81c9 Michael Niedermayer
                                double coeff;
949 a86c461c Michael Niedermayer
                                if(flags & SWS_BICUBIC)
950 28bf81c9 Michael Niedermayer
                                {
951 66d1cdb6 Michael Niedermayer
                                        double B= param[0] != SWS_PARAM_DEFAULT ? param[0] : 0.0;
952
                                        double C= param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6;
953
954
                                        if(d<1.0) 
955
                                                coeff = (12-9*B-6*C)*d*d*d + (-18+12*B+6*C)*d*d + 6-2*B;
956 28bf81c9 Michael Niedermayer
                                        else if(d<2.0)
957 66d1cdb6 Michael Niedermayer
                                                coeff = (-B-6*C)*d*d*d + (6*B+30*C)*d*d + (-12*B-48*C)*d +8*B+24*C;
958 28bf81c9 Michael Niedermayer
                                        else
959
                                                coeff=0.0;
960
                                }
961 a86c461c Michael Niedermayer
/*                                else if(flags & SWS_X)
962
                                {
963
                                        double p= param ? param*0.01 : 0.3;
964
                                        coeff = d ? sin(d*PI)/(d*PI) : 1.0;
965
                                        coeff*= pow(2.0, - p*d*d);
966
                                }*/
967
                                else if(flags & SWS_X)
968
                                {
969 66d1cdb6 Michael Niedermayer
                                        double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
970 a86c461c Michael Niedermayer
                                        
971
                                        if(d<1.0)
972
                                                coeff = cos(d*PI);
973
                                        else
974
                                                coeff=-1.0;
975
                                        if(coeff<0.0)         coeff= -pow(-coeff, A);
976
                                        else                coeff=  pow( coeff, A);
977
                                        coeff= coeff*0.5 + 0.5;
978
                                }
979 d8863d37 Michael Niedermayer
                                else if(flags & SWS_AREA)
980 28bf81c9 Michael Niedermayer
                                {
981 a86c461c Michael Niedermayer
                                        double srcPixelSize= 1.0/xInc1;
982 d8863d37 Michael Niedermayer
                                        if(d + srcPixelSize/2 < 0.5) coeff= 1.0;
983
                                        else if(d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
984
                                        else coeff=0.0;
985
                                }
986 a86c461c Michael Niedermayer
                                else if(flags & SWS_GAUSS)
987
                                {
988 66d1cdb6 Michael Niedermayer
                                        double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
989 a86c461c Michael Niedermayer
                                        coeff = pow(2.0, - p*d*d);
990
                                }
991
                                else if(flags & SWS_SINC)
992
                                {
993
                                        coeff = d ? sin(d*PI)/(d*PI) : 1.0;
994
                                }
995
                                else if(flags & SWS_LANCZOS)
996
                                {
997 66d1cdb6 Michael Niedermayer
                                        double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; 
998 a86c461c Michael Niedermayer
                                        coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0;
999
                                        if(d>p) coeff=0;
1000
                                }
1001
                                else if(flags & SWS_BILINEAR)
1002 28bf81c9 Michael Niedermayer
                                {
1003
                                        coeff= 1.0 - d;
1004
                                        if(coeff<0) coeff=0;
1005
                                }
1006 a86c461c Michael Niedermayer
                                else if(flags & SWS_SPLINE)
1007
                                {
1008
                                        double p=-2.196152422706632;
1009
                                        coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, d);
1010
                                }
1011 93768378 Michael Niedermayer
                                else {
1012
                                        coeff= 0.0; //GCC warning killer
1013
                                        ASSERT(0)
1014
                                }
1015 a86c461c Michael Niedermayer
1016 c7f822d9 Michael Niedermayer
                                filter[i*filterSize + j]= coeff;
1017 28bf81c9 Michael Niedermayer
                                xx++;
1018
                        }
1019 a86c461c Michael Niedermayer
                        xDstInSrc+= xInc1;
1020 28bf81c9 Michael Niedermayer
                }
1021
        }
1022
1023 c7f822d9 Michael Niedermayer
        /* apply src & dst Filter to filter -> filter2
1024
           free(filter);
1025
        */
1026 81b7c056 Michael Niedermayer
        ASSERT(filterSize>0)
1027 c7f822d9 Michael Niedermayer
        filter2Size= filterSize;
1028
        if(srcFilter) filter2Size+= srcFilter->length - 1;
1029
        if(dstFilter) filter2Size+= dstFilter->length - 1;
1030 81b7c056 Michael Niedermayer
        ASSERT(filter2Size>0)
1031 c7f822d9 Michael Niedermayer
        filter2= (double*)memalign(8, filter2Size*dstW*sizeof(double));
1032
1033
        for(i=0; i<dstW; i++)
1034
        {
1035
                int j;
1036
                SwsVector scaleFilter;
1037
                SwsVector *outVec;
1038
1039
                scaleFilter.coeff= filter + i*filterSize;
1040
                scaleFilter.length= filterSize;
1041
1042 d4e24275 Michael Niedermayer
                if(srcFilter) outVec= sws_getConvVec(srcFilter, &scaleFilter);
1043 c7f822d9 Michael Niedermayer
                else              outVec= &scaleFilter;
1044
1045
                ASSERT(outVec->length == filter2Size)
1046
                //FIXME dstFilter
1047
1048
                for(j=0; j<outVec->length; j++)
1049
                {
1050
                        filter2[i*filter2Size + j]= outVec->coeff[j];
1051
                }
1052
1053
                (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
1054
1055 d4e24275 Michael Niedermayer
                if(outVec != &scaleFilter) sws_freeVec(outVec);
1056 c7f822d9 Michael Niedermayer
        }
1057
        free(filter); filter=NULL;
1058
1059
        /* try to reduce the filter-size (step1 find size and shift left) */
1060
        // Assume its near normalized (*0.5 or *2.0 is ok but * 0.001 is not)
1061
        minFilterSize= 0;
1062
        for(i=dstW-1; i>=0; i--)
1063
        {
1064
                int min= filter2Size;
1065
                int j;
1066
                double cutOff=0.0;
1067
1068
                /* get rid off near zero elements on the left by shifting left */
1069
                for(j=0; j<filter2Size; j++)
1070
                {
1071
                        int k;
1072
                        cutOff += ABS(filter2[i*filter2Size]);
1073
1074
                        if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1075
1076 77a416e8 Gabucino
                        /* preserve Monotonicity because the core can't handle the filter otherwise */
1077 c7f822d9 Michael Niedermayer
                        if(i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1078
1079
                        // Move filter coeffs left
1080
                        for(k=1; k<filter2Size; k++)
1081
                                filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1082
                        filter2[i*filter2Size + k - 1]= 0.0;
1083
                        (*filterPos)[i]++;
1084
                }
1085
1086
                cutOff=0.0;
1087
                /* count near zeros on the right */
1088
                for(j=filter2Size-1; j>0; j--)
1089
                {
1090
                        cutOff += ABS(filter2[i*filter2Size + j]);
1091
1092
                        if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1093
                        min--;
1094
                }
1095
1096
                if(min>minFilterSize) minFilterSize= min;
1097
        }
1098
1099 8c266f0c Romain Dolbeau
        if (flags & SWS_CPU_CAPS_ALTIVEC) {
1100
          // we can handle the special case 4,
1101
          // so we don't want to go to the full 8
1102
          if (minFilterSize < 5)
1103
            filterAlign = 4;
1104
1105
          // we really don't want to waste our time
1106
          // doing useless computation, so fall-back on
1107
          // the scalar C code for very small filter.
1108
          // vectorizing is worth it only if you have
1109
          // decent-sized vector.
1110
          if (minFilterSize < 3)
1111
            filterAlign = 1;
1112
        }
1113
1114 81b7c056 Michael Niedermayer
        ASSERT(minFilterSize > 0)
1115 6c7506de Michael Niedermayer
        filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
1116 81b7c056 Michael Niedermayer
        ASSERT(filterSize > 0)
1117 6c7506de Michael Niedermayer
        filter= (double*)memalign(8, filterSize*dstW*sizeof(double));
1118
        *outFilterSize= filterSize;
1119
1120 4a53a912 Alban Bedel
        if(flags&SWS_PRINT_INFO)
1121 b40e353a Diego Biurrun
                MSG_V("SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
1122 c7f822d9 Michael Niedermayer
        /* try to reduce the filter-size (step2 reduce it) */
1123
        for(i=0; i<dstW; i++)
1124
        {
1125
                int j;
1126
1127 6c7506de Michael Niedermayer
                for(j=0; j<filterSize; j++)
1128
                {
1129
                        if(j>=filter2Size) filter[i*filterSize + j]= 0.0;
1130
                        else                   filter[i*filterSize + j]= filter2[i*filter2Size + j];
1131
                }
1132 c7f822d9 Michael Niedermayer
        }
1133 6c7506de Michael Niedermayer
        free(filter2); filter2=NULL;
1134
        
1135 c7f822d9 Michael Niedermayer
1136
        //FIXME try to align filterpos if possible
1137
1138 28bf81c9 Michael Niedermayer
        //fix borders
1139
        for(i=0; i<dstW; i++)
1140
        {
1141
                int j;
1142 c7f822d9 Michael Niedermayer
                if((*filterPos)[i] < 0)
1143 28bf81c9 Michael Niedermayer
                {
1144
                        // Move filter coeffs left to compensate for filterPos
1145 6c7506de Michael Niedermayer
                        for(j=1; j<filterSize; j++)
1146 28bf81c9 Michael Niedermayer
                        {
1147 c7f822d9 Michael Niedermayer
                                int left= MAX(j + (*filterPos)[i], 0);
1148 6c7506de Michael Niedermayer
                                filter[i*filterSize + left] += filter[i*filterSize + j];
1149
                                filter[i*filterSize + j]=0;
1150 28bf81c9 Michael Niedermayer
                        }
1151 c7f822d9 Michael Niedermayer
                        (*filterPos)[i]= 0;
1152 28bf81c9 Michael Niedermayer
                }
1153
1154 6c7506de Michael Niedermayer
                if((*filterPos)[i] + filterSize > srcW)
1155 28bf81c9 Michael Niedermayer
                {
1156 6c7506de Michael Niedermayer
                        int shift= (*filterPos)[i] + filterSize - srcW;
1157 28bf81c9 Michael Niedermayer
                        // Move filter coeffs right to compensate for filterPos
1158 6c7506de Michael Niedermayer
                        for(j=filterSize-2; j>=0; j--)
1159 28bf81c9 Michael Niedermayer
                        {
1160 6c7506de Michael Niedermayer
                                int right= MIN(j + shift, filterSize-1);
1161
                                filter[i*filterSize +right] += filter[i*filterSize +j];
1162
                                filter[i*filterSize +j]=0;
1163 28bf81c9 Michael Niedermayer
                        }
1164 6c7506de Michael Niedermayer
                        (*filterPos)[i]= srcW - filterSize;
1165 28bf81c9 Michael Niedermayer
                }
1166
        }
1167
1168 6c7506de Michael Niedermayer
        // Note the +1 is for the MMXscaler which reads over the end
1169 5edb653b Alan Curry
        /* align at 16 for AltiVec (needed by hScale_altivec_real) */
1170
        *outFilter= (int16_t*)memalign(16, *outFilterSize*(dstW+1)*sizeof(int16_t));
1171 6c7506de Michael Niedermayer
        memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t));
1172 c7f822d9 Michael Niedermayer
1173
        /* Normalize & Store in outFilter */
1174 28bf81c9 Michael Niedermayer
        for(i=0; i<dstW; i++)
1175
        {
1176
                int j;
1177 ff490720 Michael Niedermayer
                double error=0;
1178 28bf81c9 Michael Niedermayer
                double sum=0;
1179
                double scale= one;
1180 ff490720 Michael Niedermayer
1181 6c7506de Michael Niedermayer
                for(j=0; j<filterSize; j++)
1182 28bf81c9 Michael Niedermayer
                {
1183 6c7506de Michael Niedermayer
                        sum+= filter[i*filterSize + j];
1184 28bf81c9 Michael Niedermayer
                }
1185
                scale/= sum;
1186 93768378 Michael Niedermayer
                for(j=0; j<*outFilterSize; j++)
1187 28bf81c9 Michael Niedermayer
                {
1188 ff490720 Michael Niedermayer
                        double v= filter[i*filterSize + j]*scale + error;
1189
                        int intV= floor(v + 0.5);
1190
                        (*outFilter)[i*(*outFilterSize) + j]= intV;
1191
                        error = v - intV;
1192 28bf81c9 Michael Niedermayer
                }
1193
        }
1194 adeaecb9 Michael Niedermayer
        
1195
        (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
1196
        for(i=0; i<*outFilterSize; i++)
1197
        {
1198
                int j= dstW*(*outFilterSize);
1199
                (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
1200
        }
1201 c7f822d9 Michael Niedermayer
1202 6c7506de Michael Niedermayer
        free(filter);
1203 7630f2e0 Michael Niedermayer
}
1204 31190492 Arpi
1205 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
1206 b7dc6f66 Michael Niedermayer
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
1207 28bf81c9 Michael Niedermayer
{
1208 b7dc6f66 Michael Niedermayer
        uint8_t *fragmentA;
1209 6e1c66bc Aurelien Jacobs
        long imm8OfPShufW1A;
1210
        long imm8OfPShufW2A;
1211
        long fragmentLengthA;
1212 b7dc6f66 Michael Niedermayer
        uint8_t *fragmentB;
1213 6e1c66bc Aurelien Jacobs
        long imm8OfPShufW1B;
1214
        long imm8OfPShufW2B;
1215
        long fragmentLengthB;
1216 b7dc6f66 Michael Niedermayer
        int fragmentPos;
1217 28bf81c9 Michael Niedermayer
1218
        int xpos, i;
1219
1220
        // create an optimized horizontal scaling routine
1221
1222
        //code fragment
1223
1224
        asm volatile(
1225
                "jmp 9f                                \n\t"
1226
        // Begin
1227
                "0:                                \n\t"
1228 6e1c66bc Aurelien Jacobs
                "movq (%%"REG_d", %%"REG_a"), %%mm3\n\t" 
1229
                "movd (%%"REG_c", %%"REG_S"), %%mm0\n\t" 
1230
                "movd 1(%%"REG_c", %%"REG_S"), %%mm1\n\t"
1231 b7dc6f66 Michael Niedermayer
                "punpcklbw %%mm7, %%mm1                \n\t"
1232
                "punpcklbw %%mm7, %%mm0                \n\t"
1233 28bf81c9 Michael Niedermayer
                "pshufw $0xFF, %%mm1, %%mm1        \n\t"
1234
                "1:                                \n\t"
1235
                "pshufw $0xFF, %%mm0, %%mm0        \n\t"
1236
                "2:                                \n\t"
1237
                "psubw %%mm1, %%mm0                \n\t"
1238 6d606c4f Aurelien Jacobs
                "movl 8(%%"REG_b", %%"REG_a"), %%esi\n\t"
1239 28bf81c9 Michael Niedermayer
                "pmullw %%mm3, %%mm0                \n\t"
1240
                "psllw $7, %%mm1                \n\t"
1241
                "paddw %%mm1, %%mm0                \n\t"
1242
1243 6e1c66bc Aurelien Jacobs
                "movq %%mm0, (%%"REG_D", %%"REG_a")\n\t"
1244 28bf81c9 Michael Niedermayer
1245 6e1c66bc Aurelien Jacobs
                "add $8, %%"REG_a"                \n\t"
1246 28bf81c9 Michael Niedermayer
        // End
1247
                "9:                                \n\t"
1248
//                "int $3\n\t"
1249 6e1c66bc Aurelien Jacobs
                "lea 0b, %0                        \n\t"
1250
                "lea 1b, %1                        \n\t"
1251
                "lea 2b, %2                        \n\t"
1252
                "dec %1                                \n\t"
1253
                "dec %2                                \n\t"
1254
                "sub %0, %1                        \n\t"
1255
                "sub %0, %2                        \n\t"
1256
                "lea 9b, %3                        \n\t"
1257
                "sub %0, %3                        \n\t"
1258 b7dc6f66 Michael Niedermayer
1259
1260
                :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
1261
                "=r" (fragmentLengthA)
1262 28bf81c9 Michael Niedermayer
        );
1263
1264 b7dc6f66 Michael Niedermayer
        asm volatile(
1265
                "jmp 9f                                \n\t"
1266
        // Begin
1267
                "0:                                \n\t"
1268 6e1c66bc Aurelien Jacobs
                "movq (%%"REG_d", %%"REG_a"), %%mm3\n\t" 
1269
                "movd (%%"REG_c", %%"REG_S"), %%mm0\n\t" 
1270 b7dc6f66 Michael Niedermayer
                "punpcklbw %%mm7, %%mm0                \n\t"
1271
                "pshufw $0xFF, %%mm0, %%mm1        \n\t"
1272
                "1:                                \n\t"
1273
                "pshufw $0xFF, %%mm0, %%mm0        \n\t"
1274
                "2:                                \n\t"
1275
                "psubw %%mm1, %%mm0                \n\t"
1276 6d606c4f Aurelien Jacobs
                "movl 8(%%"REG_b", %%"REG_a"), %%esi\n\t"
1277 b7dc6f66 Michael Niedermayer
                "pmullw %%mm3, %%mm0                \n\t"
1278
                "psllw $7, %%mm1                \n\t"
1279
                "paddw %%mm1, %%mm0                \n\t"
1280
1281 6e1c66bc Aurelien Jacobs
                "movq %%mm0, (%%"REG_D", %%"REG_a")\n\t"
1282 28bf81c9 Michael Niedermayer
1283 6e1c66bc Aurelien Jacobs
                "add $8, %%"REG_a"                \n\t"
1284 b7dc6f66 Michael Niedermayer
        // End
1285
                "9:                                \n\t"
1286
//                "int $3\n\t"
1287 6e1c66bc Aurelien Jacobs
                "lea 0b, %0                        \n\t"
1288
                "lea 1b, %1                        \n\t"
1289
                "lea 2b, %2                        \n\t"
1290
                "dec %1                                \n\t"
1291
                "dec %2                                \n\t"
1292
                "sub %0, %1                        \n\t"
1293
                "sub %0, %2                        \n\t"
1294
                "lea 9b, %3                        \n\t"
1295
                "sub %0, %3                        \n\t"
1296 b7dc6f66 Michael Niedermayer
1297
1298
                :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
1299
                "=r" (fragmentLengthB)
1300
        );
1301
1302
        xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
1303
        fragmentPos=0;
1304
        
1305
        for(i=0; i<dstW/numSplits; i++)
1306 28bf81c9 Michael Niedermayer
        {
1307
                int xx=xpos>>16;
1308
1309
                if((i&3) == 0)
1310
                {
1311
                        int a=0;
1312
                        int b=((xpos+xInc)>>16) - xx;
1313
                        int c=((xpos+xInc*2)>>16) - xx;
1314
                        int d=((xpos+xInc*3)>>16) - xx;
1315
1316 b7dc6f66 Michael Niedermayer
                        filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
1317
                        filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
1318
                        filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
1319
                        filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
1320
                        filterPos[i/2]= xx;
1321
1322
                        if(d+1<4)
1323
                        {
1324
                                int maxShift= 3-(d+1);
1325
                                int shift=0;
1326
1327
                                memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
1328
1329
                                funnyCode[fragmentPos + imm8OfPShufW1B]=
1330
                                        (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
1331
                                funnyCode[fragmentPos + imm8OfPShufW2B]=
1332
                                        a | (b<<2) | (c<<4) | (d<<6);
1333
1334
                                if(i+3>=dstW) shift=maxShift; //avoid overread
1335
                                else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
1336
1337
                                if(shift && i>=shift)
1338
                                {
1339
                                        funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
1340
                                        funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
1341
                                        filterPos[i/2]-=shift;
1342
                                }
1343
1344
                                fragmentPos+= fragmentLengthB;
1345
                        }
1346
                        else
1347
                        {
1348
                                int maxShift= 3-d;
1349
                                int shift=0;
1350
1351
                                memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
1352 28bf81c9 Michael Niedermayer
1353 b7dc6f66 Michael Niedermayer
                                funnyCode[fragmentPos + imm8OfPShufW1A]=
1354
                                funnyCode[fragmentPos + imm8OfPShufW2A]=
1355
                                        a | (b<<2) | (c<<4) | (d<<6);
1356 28bf81c9 Michael Niedermayer
1357 b7dc6f66 Michael Niedermayer
                                if(i+4>=dstW) shift=maxShift; //avoid overread
1358
                                else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
1359 28bf81c9 Michael Niedermayer
1360 b7dc6f66 Michael Niedermayer
                                if(shift && i>=shift)
1361
                                {
1362
                                        funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
1363
                                        funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
1364
                                        filterPos[i/2]-=shift;
1365
                                }
1366
1367
                                fragmentPos+= fragmentLengthA;
1368
                        }
1369
1370
                        funnyCode[fragmentPos]= RET;
1371 28bf81c9 Michael Niedermayer
                }
1372
                xpos+=xInc;
1373
        }
1374 b7dc6f66 Michael Niedermayer
        filterPos[i/2]= xpos>>16; // needed to jump to the next part
1375 28bf81c9 Michael Niedermayer
}
1376 6e1c66bc Aurelien Jacobs
#endif // ARCH_X86 || ARCH_X86_64
1377 28bf81c9 Michael Niedermayer
1378 9b2283cc Stefan Huehner
static void globalInit(void){
1379 31190492 Arpi
    // generating tables:
1380
    int i;
1381 c1b0bfb4 Michael Niedermayer
    for(i=0; i<768; i++){
1382
        int c= MIN(MAX(i-256, 0), 255);
1383
        clip_table[i]=c;
1384 b18ea156 Michael Niedermayer
    }
1385 516b1f82 Michael Niedermayer
}
1386 c1b0bfb4 Michael Niedermayer
1387 516b1f82 Michael Niedermayer
static SwsFunc getSwsFunc(int flags){
1388
    
1389 28bf81c9 Michael Niedermayer
#ifdef RUNTIME_CPUDETECT
1390 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
1391 28bf81c9 Michael Niedermayer
        // ordered per speed fasterst first
1392 516b1f82 Michael Niedermayer
        if(flags & SWS_CPU_CAPS_MMX2)
1393
                return swScale_MMX2;
1394
        else if(flags & SWS_CPU_CAPS_3DNOW)
1395
                return swScale_3DNow;
1396
        else if(flags & SWS_CPU_CAPS_MMX)
1397
                return swScale_MMX;
1398 28bf81c9 Michael Niedermayer
        else
1399 516b1f82 Michael Niedermayer
                return swScale_C;
1400 28bf81c9 Michael Niedermayer
1401
#else
1402 a2faa401 Romain Dolbeau
#ifdef ARCH_POWERPC
1403
        if(flags & SWS_CPU_CAPS_ALTIVEC)
1404
          return swScale_altivec;
1405
        else
1406
          return swScale_C;
1407
#endif
1408 516b1f82 Michael Niedermayer
        return swScale_C;
1409 28bf81c9 Michael Niedermayer
#endif
1410
#else //RUNTIME_CPUDETECT
1411
#ifdef HAVE_MMX2
1412 516b1f82 Michael Niedermayer
        return swScale_MMX2;
1413 28bf81c9 Michael Niedermayer
#elif defined (HAVE_3DNOW)
1414 516b1f82 Michael Niedermayer
        return swScale_3DNow;
1415 28bf81c9 Michael Niedermayer
#elif defined (HAVE_MMX)
1416 516b1f82 Michael Niedermayer
        return swScale_MMX;
1417 a2faa401 Romain Dolbeau
#elif defined (HAVE_ALTIVEC)
1418
        return swScale_altivec;
1419 28bf81c9 Michael Niedermayer
#else
1420 516b1f82 Michael Niedermayer
        return swScale_C;
1421 28bf81c9 Michael Niedermayer
#endif
1422
#endif //!RUNTIME_CPUDETECT
1423 31190492 Arpi
}
1424 7630f2e0 Michael Niedermayer
1425 d4e24275 Michael Niedermayer
static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1426 0d9f3d85 Arpi
             int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1427
        uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1428
        /* Copy Y plane */
1429 12e11662 Jindřich Makovička
        if(dstStride[0]==srcStride[0] && srcStride[0] > 0)
1430 0d9f3d85 Arpi
                memcpy(dst, src[0], srcSliceH*dstStride[0]);
1431
        else
1432
        {
1433
                int i;
1434
                uint8_t *srcPtr= src[0];
1435
                uint8_t *dstPtr= dst;
1436
                for(i=0; i<srcSliceH; i++)
1437
                {
1438 6118e52e Ville Syrjälä
                        memcpy(dstPtr, srcPtr, c->srcW);
1439 0d9f3d85 Arpi
                        srcPtr+= srcStride[0];
1440
                        dstPtr+= dstStride[0];
1441
                }
1442
        }
1443 6118e52e Ville Syrjälä
        dst = dstParam[1] + dstStride[1]*srcSliceY/2;
1444
        if (c->dstFormat == IMGFMT_NV12)
1445
                interleaveBytes( src[1],src[2],dst,c->srcW/2,srcSliceH/2,srcStride[1],srcStride[2],dstStride[0] );
1446
        else
1447
                interleaveBytes( src[2],src[1],dst,c->srcW/2,srcSliceH/2,srcStride[2],srcStride[1],dstStride[0] );
1448 fccb9b2b Michael Niedermayer
1449 d4e24275 Michael Niedermayer
        return srcSliceH;
1450 0d9f3d85 Arpi
}
1451
1452 d4e24275 Michael Niedermayer
static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1453 0d9f3d85 Arpi
             int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1454
        uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1455
1456 fccb9b2b Michael Niedermayer
        yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1457
1458 d4e24275 Michael Niedermayer
        return srcSliceH;
1459 0d9f3d85 Arpi
}
1460
1461 caeaabe7 Alex Beregszaszi
static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1462
             int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1463
        uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1464
1465
        yv12touyvy( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1466
1467
        return srcSliceH;
1468
}
1469
1470 e09d12f4 Michael Niedermayer
/* {RGB,BGR}{15,16,24,32} -> {RGB,BGR}{15,16,24,32} */
1471 d4e24275 Michael Niedermayer
static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1472 e09d12f4 Michael Niedermayer
                           int srcSliceH, uint8_t* dst[], int dstStride[]){
1473
        const int srcFormat= c->srcFormat;
1474
        const int dstFormat= c->dstFormat;
1475
        const int srcBpp= ((srcFormat&0xFF) + 7)>>3;
1476
        const int dstBpp= ((dstFormat&0xFF) + 7)>>3;
1477
        const int srcId= (srcFormat&0xFF)>>2; // 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 
1478
        const int dstId= (dstFormat&0xFF)>>2;
1479 73cbfb1c D Richard Felker III
        void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
1480 e09d12f4 Michael Niedermayer
1481
        /* BGR -> BGR */
1482 20380eb8 Michael Niedermayer
        if(   (isBGR(srcFormat) && isBGR(dstFormat))
1483
           || (isRGB(srcFormat) && isRGB(dstFormat))){
1484 e09d12f4 Michael Niedermayer
                switch(srcId | (dstId<<4)){
1485
                case 0x34: conv= rgb16to15; break;
1486
                case 0x36: conv= rgb24to15; break;
1487
                case 0x38: conv= rgb32to15; break;
1488
                case 0x43: conv= rgb15to16; break;
1489
                case 0x46: conv= rgb24to16; break;
1490
                case 0x48: conv= rgb32to16; break;
1491
                case 0x63: conv= rgb15to24; break;
1492
                case 0x64: conv= rgb16to24; break;
1493
                case 0x68: conv= rgb32to24; break;
1494
                case 0x83: conv= rgb15to32; break;
1495
                case 0x84: conv= rgb16to32; break;
1496
                case 0x86: conv= rgb24to32; break;
1497
                default: MSG_ERR("swScaler: internal error %s -> %s converter\n", 
1498
                                 vo_format_name(srcFormat), vo_format_name(dstFormat)); break;
1499 b935781b Michael Niedermayer
                }
1500 20380eb8 Michael Niedermayer
        }else if(   (isBGR(srcFormat) && isRGB(dstFormat))
1501
                 || (isRGB(srcFormat) && isBGR(dstFormat))){
1502 e09d12f4 Michael Niedermayer
                switch(srcId | (dstId<<4)){
1503
                case 0x33: conv= rgb15tobgr15; break;
1504
                case 0x34: conv= rgb16tobgr15; break;
1505
                case 0x36: conv= rgb24tobgr15; break;
1506
                case 0x38: conv= rgb32tobgr15; break;
1507
                case 0x43: conv= rgb15tobgr16; break;
1508
                case 0x44: conv= rgb16tobgr16; break;
1509
                case 0x46: conv= rgb24tobgr16; break;
1510
                case 0x48: conv= rgb32tobgr16; break;
1511
                case 0x63: conv= rgb15tobgr24; break;
1512
                case 0x64: conv= rgb16tobgr24; break;
1513
                case 0x66: conv= rgb24tobgr24; break;
1514
                case 0x68: conv= rgb32tobgr24; break;
1515
                case 0x83: conv= rgb15tobgr32; break;
1516
                case 0x84: conv= rgb16tobgr32; break;
1517
                case 0x86: conv= rgb24tobgr32; break;
1518
                case 0x88: conv= rgb32tobgr32; break;
1519
                default: MSG_ERR("swScaler: internal error %s -> %s converter\n", 
1520
                                 vo_format_name(srcFormat), vo_format_name(dstFormat)); break;
1521 0d9f3d85 Arpi
                }
1522 20380eb8 Michael Niedermayer
        }else{
1523
                MSG_ERR("swScaler: internal error %s -> %s converter\n", 
1524
                         vo_format_name(srcFormat), vo_format_name(dstFormat));
1525 e09d12f4 Michael Niedermayer
        }
1526 20380eb8 Michael Niedermayer
1527 e09d12f4 Michael Niedermayer
        if(dstStride[0]*srcBpp == srcStride[0]*dstBpp)
1528
                conv(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1529 0d9f3d85 Arpi
        else
1530
        {
1531
                int i;
1532
                uint8_t *srcPtr= src[0];
1533
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1534
1535
                for(i=0; i<srcSliceH; i++)
1536
                {
1537 e09d12f4 Michael Niedermayer
                        conv(srcPtr, dstPtr, c->srcW*srcBpp);
1538 0d9f3d85 Arpi
                        srcPtr+= srcStride[0];
1539
                        dstPtr+= dstStride[0];
1540
                }
1541
        }     
1542 d4e24275 Michael Niedermayer
        return srcSliceH;
1543 0d9f3d85 Arpi
}
1544
1545 d4e24275 Michael Niedermayer
static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1546 ec22603f Michael Niedermayer
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1547
1548
        rgb24toyv12(
1549
                src[0], 
1550
                dst[0]+ srcSliceY    *dstStride[0], 
1551
                dst[1]+(srcSliceY>>1)*dstStride[1], 
1552
                dst[2]+(srcSliceY>>1)*dstStride[2],
1553
                c->srcW, srcSliceH, 
1554
                dstStride[0], dstStride[1], srcStride[0]);
1555 d4e24275 Michael Niedermayer
        return srcSliceH;
1556 ec22603f Michael Niedermayer
}
1557
1558 d4e24275 Michael Niedermayer
static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1559 b241cbf2 Michael Niedermayer
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1560
        int i;
1561
1562
        /* copy Y */
1563 12e11662 Jindřich Makovička
        if(srcStride[0]==dstStride[0] && srcStride[0] > 0) 
1564 b241cbf2 Michael Niedermayer
                memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
1565
        else{
1566
                uint8_t *srcPtr= src[0];
1567
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1568
1569
                for(i=0; i<srcSliceH; i++)
1570
                {
1571
                        memcpy(dstPtr, srcPtr, c->srcW);
1572
                        srcPtr+= srcStride[0];
1573
                        dstPtr+= dstStride[0];
1574
                }
1575
        }
1576
1577
        if(c->dstFormat==IMGFMT_YV12){
1578
                planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
1579
                planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
1580
        }else{
1581
                planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
1582
                planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
1583
        }
1584 d4e24275 Michael Niedermayer
        return srcSliceH;
1585 b241cbf2 Michael Niedermayer
}
1586
1587 44c1035c Michael Niedermayer
/**
1588
 * bring pointers in YUV order instead of YVU
1589
 */
1590 fccb9b2b Michael Niedermayer
static inline void sws_orderYUV(int format, uint8_t * sortedP[], int sortedStride[], uint8_t * p[], int stride[]){
1591
        if(format == IMGFMT_YV12 || format == IMGFMT_YVU9
1592 d80e2fa2 Michael Niedermayer
           || format == IMGFMT_444P || format == IMGFMT_422P || format == IMGFMT_411P){
1593 44c1035c Michael Niedermayer
                sortedP[0]= p[0];
1594 fccb9b2b Michael Niedermayer
                sortedP[1]= p[2];
1595
                sortedP[2]= p[1];
1596 c7a810cc Michael Niedermayer
                sortedStride[0]= stride[0];
1597 fccb9b2b Michael Niedermayer
                sortedStride[1]= stride[2];
1598
                sortedStride[2]= stride[1];
1599 44c1035c Michael Niedermayer
        }
1600 a4c90ea3 Michael Niedermayer
        else if(isPacked(format) || isGray(format) || format == IMGFMT_Y8)
1601 44c1035c Michael Niedermayer
        {
1602
                sortedP[0]= p[0];
1603
                sortedP[1]= 
1604
                sortedP[2]= NULL;
1605 c7a810cc Michael Niedermayer
                sortedStride[0]= stride[0];
1606 44c1035c Michael Niedermayer
                sortedStride[1]= 
1607
                sortedStride[2]= 0;
1608
        }
1609 fccb9b2b Michael Niedermayer
        else if(format == IMGFMT_I420 || format == IMGFMT_IYUV)
1610 44c1035c Michael Niedermayer
        {
1611
                sortedP[0]= p[0];
1612 fccb9b2b Michael Niedermayer
                sortedP[1]= p[1];
1613
                sortedP[2]= p[2];
1614 c7a810cc Michael Niedermayer
                sortedStride[0]= stride[0];
1615 fccb9b2b Michael Niedermayer
                sortedStride[1]= stride[1];
1616
                sortedStride[2]= stride[2];
1617 6118e52e Ville Syrjälä
        }
1618
        else if(format == IMGFMT_NV12 || format == IMGFMT_NV21)
1619
        {
1620
                sortedP[0]= p[0];
1621
                sortedP[1]= p[1];
1622
                sortedP[2]= NULL;
1623
                sortedStride[0]= stride[0];
1624
                sortedStride[1]= stride[1];
1625
                sortedStride[2]= 0;
1626 e09d12f4 Michael Niedermayer
        }else{
1627
                MSG_ERR("internal error in orderYUV\n");
1628 44c1035c Michael Niedermayer
        }
1629
}
1630 b935781b Michael Niedermayer
1631 b6654a54 Michael Niedermayer
/* unscaled copy like stuff (assumes nearly identical formats) */
1632 3e499f53 Michael Niedermayer
static int simpleCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1633
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1634 b6654a54 Michael Niedermayer
1635
        if(isPacked(c->srcFormat))
1636
        {
1637 12e11662 Jindřich Makovička
                if(dstStride[0]==srcStride[0] && srcStride[0] > 0)
1638 b6654a54 Michael Niedermayer
                        memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
1639
                else
1640
                {
1641
                        int i;
1642
                        uint8_t *srcPtr= src[0];
1643
                        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1644 a861d4d7 Michael Niedermayer
                        int length=0;
1645
1646
                        /* universal length finder */
1647 9bd8bd1a Michael Niedermayer
                        while(length+c->srcW <= ABS(dstStride[0]) 
1648
                           && length+c->srcW <= ABS(srcStride[0])) length+= c->srcW;
1649 a861d4d7 Michael Niedermayer
                        ASSERT(length!=0);
1650 b6654a54 Michael Niedermayer
1651
                        for(i=0; i<srcSliceH; i++)
1652
                        {
1653
                                memcpy(dstPtr, srcPtr, length);
1654
                                srcPtr+= srcStride[0];
1655
                                dstPtr+= dstStride[0];
1656
                        }
1657
                }
1658
        }
1659
        else 
1660 44c1035c Michael Niedermayer
        { /* Planar YUV or gray */
1661 b6654a54 Michael Niedermayer
                int plane;
1662
                for(plane=0; plane<3; plane++)
1663
                {
1664 e616aa93 Michael Niedermayer
                        int length= plane==0 ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
1665
                        int y=      plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
1666
                        int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
1667 44c1035c Michael Niedermayer
1668
                        if((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
1669 0d9f3d85 Arpi
                        {
1670 44c1035c Michael Niedermayer
                                if(!isGray(c->dstFormat))
1671 e616aa93 Michael Niedermayer
                                        memset(dst[plane], 128, dstStride[plane]*height);
1672 0d9f3d85 Arpi
                        }
1673 b6654a54 Michael Niedermayer
                        else
1674
                        {
1675 12e11662 Jindřich Makovička
                                if(dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
1676 44c1035c Michael Niedermayer
                                        memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
1677
                                else
1678 b6654a54 Michael Niedermayer
                                {
1679 44c1035c Michael Niedermayer
                                        int i;
1680
                                        uint8_t *srcPtr= src[plane];
1681
                                        uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
1682
                                        for(i=0; i<height; i++)
1683
                                        {
1684
                                                memcpy(dstPtr, srcPtr, length);
1685
                                                srcPtr+= srcStride[plane];
1686
                                                dstPtr+= dstStride[plane];
1687
                                        }
1688 b6654a54 Michael Niedermayer
                                }
1689
                        }
1690
                }
1691
        }
1692 d4e24275 Michael Niedermayer
        return srcSliceH;
1693 37079906 Michael Niedermayer
}
1694 28bf81c9 Michael Niedermayer
1695 44c1035c Michael Niedermayer
static int remove_dup_fourcc(int fourcc)
1696 0d9f3d85 Arpi
{
1697
        switch(fourcc)
1698
        {
1699 fccb9b2b Michael Niedermayer
            case IMGFMT_I420:
1700
            case IMGFMT_IYUV: return IMGFMT_YV12;
1701 0d9f3d85 Arpi
            case IMGFMT_Y8  : return IMGFMT_Y800;
1702 0c51ef97 Arpi
            case IMGFMT_IF09: return IMGFMT_YVU9;
1703 0d9f3d85 Arpi
            default: return fourcc;
1704
        }
1705
}
1706
1707 c7a810cc Michael Niedermayer
static void getSubSampleFactors(int *h, int *v, int format){
1708
        switch(format){
1709 7322a67c Michael Niedermayer
        case IMGFMT_UYVY:
1710 c7a810cc Michael Niedermayer
        case IMGFMT_YUY2:
1711
                *h=1;
1712
                *v=0;
1713
                break;
1714
        case IMGFMT_YV12:
1715 e616aa93 Michael Niedermayer
        case IMGFMT_Y800: //FIXME remove after different subsamplings are fully implemented
1716 6118e52e Ville Syrjälä
        case IMGFMT_NV12:
1717
        case IMGFMT_NV21:
1718 c7a810cc Michael Niedermayer
                *h=1;
1719
                *v=1;
1720
                break;
1721
        case IMGFMT_YVU9:
1722
                *h=2;
1723
                *v=2;
1724
                break;
1725 d80e2fa2 Michael Niedermayer
        case IMGFMT_444P:
1726
                *h=0;
1727
                *v=0;
1728
                break;
1729
        case IMGFMT_422P:
1730
                *h=1;
1731
                *v=0;
1732
                break;
1733
        case IMGFMT_411P:
1734
                *h=2;
1735
                *v=0;
1736
                break;
1737 c7a810cc Michael Niedermayer
        default:
1738
                *h=0;
1739
                *v=0;
1740
                break;
1741
        }
1742
}
1743
1744 5427e242 Michael Niedermayer
static uint16_t roundToInt16(int64_t f){
1745
        int r= (f + (1<<15))>>16;
1746
             if(r<-0x7FFF) return 0x8000;
1747
        else if(r> 0x7FFF) return 0x7FFF;
1748
        else               return r;
1749 0481412a Michael Niedermayer
}
1750
1751
/**
1752 5427e242 Michael Niedermayer
 * @param inv_table the yuv2rgb coeffs, normally Inverse_Table_6_9[x]
1753 0481412a Michael Niedermayer
 * @param fullRange if 1 then the luma range is 0..255 if 0 its 16..235
1754 5427e242 Michael Niedermayer
 * @return -1 if not supported
1755 0481412a Michael Niedermayer
 */
1756 5427e242 Michael Niedermayer
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
1757
        int64_t crv =  inv_table[0];
1758
        int64_t cbu =  inv_table[1];
1759
        int64_t cgu = -inv_table[2];
1760
        int64_t cgv = -inv_table[3];
1761
        int64_t cy  = 1<<16;
1762
        int64_t oy  = 0;
1763
1764
        if(isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
1765
        memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
1766
        memcpy(c->dstColorspaceTable,     table, sizeof(int)*4);
1767
1768
        c->brightness= brightness;
1769
        c->contrast  = contrast;
1770
        c->saturation= saturation;
1771
        c->srcRange  = srcRange;
1772
        c->dstRange  = dstRange;
1773 0481412a Michael Niedermayer
1774
        c->uOffset=   0x0400040004000400LL;
1775
        c->vOffset=   0x0400040004000400LL;
1776
1777 5427e242 Michael Niedermayer
        if(!srcRange){
1778
                cy= (cy*255) / 219;
1779
                oy= 16<<16;
1780 0481412a Michael Niedermayer
        }
1781
1782 5427e242 Michael Niedermayer
        cy = (cy *contrast             )>>16;
1783
        crv= (crv*contrast * saturation)>>32;
1784
        cbu= (cbu*contrast * saturation)>>32;
1785
        cgu= (cgu*contrast * saturation)>>32;
1786
        cgv= (cgv*contrast * saturation)>>32;
1787 0481412a Michael Niedermayer
1788 5427e242 Michael Niedermayer
        oy -= 256*brightness;
1789 0481412a Michael Niedermayer
1790
        c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
1791
        c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
1792
        c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
1793
        c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
1794
        c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
1795
        c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
1796 5427e242 Michael Niedermayer
1797
        yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
1798
        //FIXME factorize
1799 a31de956 Michael Niedermayer
1800 6634d0ef Nicolas Plourde
#ifdef COMPILE_ALTIVEC
1801
        if (c->flags & SWS_CPU_CAPS_ALTIVEC)
1802
            yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation);
1803 a31de956 Michael Niedermayer
#endif        
1804 5427e242 Michael Niedermayer
        return 0;
1805
}
1806
1807
/**
1808
 * @return -1 if not supported
1809
 */
1810
int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
1811
        if(isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
1812
1813
        *inv_table = c->srcColorspaceTable;
1814
        *table     = c->dstColorspaceTable;
1815
        *srcRange  = c->srcRange;
1816
        *dstRange  = c->dstRange;
1817
        *brightness= c->brightness;
1818
        *contrast  = c->contrast;
1819
        *saturation= c->saturation;
1820
        
1821
        return 0;        
1822 0481412a Michael Niedermayer
}
1823
1824 fccb9b2b Michael Niedermayer
SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int dstH, int origDstFormat, int flags,
1825 66d1cdb6 Michael Niedermayer
                         SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){
1826 28bf81c9 Michael Niedermayer
1827
        SwsContext *c;
1828
        int i;
1829 ec62c38f Michael Niedermayer
        int usesVFilter, usesHFilter;
1830 e09d12f4 Michael Niedermayer
        int unscaled, needsDither;
1831 fccb9b2b Michael Niedermayer
        int srcFormat, dstFormat;
1832 c7f822d9 Michael Niedermayer
        SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
1833 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
1834 516b1f82 Michael Niedermayer
        if(flags & SWS_CPU_CAPS_MMX)
1835 5cebb24b Michael Niedermayer
                asm volatile("emms\n\t"::: "memory");
1836
#endif
1837 516b1f82 Michael Niedermayer
1838
#ifndef RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
1839 a2faa401 Romain Dolbeau
        flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC);
1840 516b1f82 Michael Niedermayer
#ifdef HAVE_MMX2
1841
        flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
1842
#elif defined (HAVE_3DNOW)
1843
        flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
1844
#elif defined (HAVE_MMX)
1845
        flags |= SWS_CPU_CAPS_MMX;
1846 a2faa401 Romain Dolbeau
#elif defined (HAVE_ALTIVEC)
1847
        flags |= SWS_CPU_CAPS_ALTIVEC;
1848 516b1f82 Michael Niedermayer
#endif
1849
#endif
1850
        if(clip_table[512] != 255) globalInit();
1851 700490a4 Michael Niedermayer
        if(rgb15to16 == NULL) sws_rgb2rgb_init(flags);
1852 fccb9b2b Michael Niedermayer
1853 77a416e8 Gabucino
        /* avoid duplicate Formats, so we don't need to check to much */
1854 fccb9b2b Michael Niedermayer
        srcFormat = remove_dup_fourcc(origSrcFormat);
1855
        dstFormat = remove_dup_fourcc(origDstFormat);
1856 44c1035c Michael Niedermayer
1857
        unscaled = (srcW == dstW && srcH == dstH);
1858 e09d12f4 Michael Niedermayer
        needsDither= (isBGR(dstFormat) || isRGB(dstFormat)) 
1859
                     && (dstFormat&0xFF)<24
1860
                     && ((dstFormat&0xFF)<(srcFormat&0xFF) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
1861 44c1035c Michael Niedermayer
1862
        if(!isSupportedIn(srcFormat)) 
1863 b81cf274 Michael Niedermayer
        {
1864 44c1035c Michael Niedermayer
                MSG_ERR("swScaler: %s is not supported as input format\n", vo_format_name(srcFormat));
1865
                return NULL;
1866
        }
1867
        if(!isSupportedOut(dstFormat))
1868
        {
1869
                MSG_ERR("swScaler: %s is not supported as output format\n", vo_format_name(dstFormat));
1870
                return NULL;
1871 b81cf274 Michael Niedermayer
        }
1872 44c1035c Michael Niedermayer
1873 28bf81c9 Michael Niedermayer
        /* sanity check */
1874 b81cf274 Michael Niedermayer
        if(srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
1875
        {
1876 0d9f3d85 Arpi
                 MSG_ERR("swScaler: %dx%d -> %dx%d is invalid scaling dimension\n", 
1877 b81cf274 Michael Niedermayer
                        srcW, srcH, dstW, dstH);
1878
                return NULL;
1879
        }
1880 28bf81c9 Michael Niedermayer
1881 c7f822d9 Michael Niedermayer
        if(!dstFilter) dstFilter= &dummyFilter;
1882
        if(!srcFilter) srcFilter= &dummyFilter;
1883
1884 28bf81c9 Michael Niedermayer
        c= memalign(64, sizeof(SwsContext));
1885 c7f822d9 Michael Niedermayer
        memset(c, 0, sizeof(SwsContext));
1886 28bf81c9 Michael Niedermayer
1887
        c->srcW= srcW;
1888
        c->srcH= srcH;
1889
        c->dstW= dstW;
1890
        c->dstH= dstH;
1891 5521b193 Michael Niedermayer
        c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
1892
        c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
1893 28bf81c9 Michael Niedermayer
        c->flags= flags;
1894
        c->dstFormat= dstFormat;
1895
        c->srcFormat= srcFormat;
1896 fccb9b2b Michael Niedermayer
        c->origDstFormat= origDstFormat;
1897
        c->origSrcFormat= origSrcFormat;
1898 379a2036 Michael Niedermayer
        c->vRounder= 4* 0x0001000100010001ULL;
1899 28bf81c9 Michael Niedermayer
1900 ec62c38f Michael Niedermayer
        usesHFilter= usesVFilter= 0;
1901
        if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesVFilter=1;
1902
        if(dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesHFilter=1;
1903
        if(dstFilter->chrV!=NULL && dstFilter->chrV->length>1) usesVFilter=1;
1904
        if(dstFilter->chrH!=NULL && dstFilter->chrH->length>1) usesHFilter=1;
1905
        if(srcFilter->lumV!=NULL && srcFilter->lumV->length>1) usesVFilter=1;
1906
        if(srcFilter->lumH!=NULL && srcFilter->lumH->length>1) usesHFilter=1;
1907
        if(srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesVFilter=1;
1908
        if(srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesHFilter=1;
1909 e616aa93 Michael Niedermayer
1910
        getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
1911
        getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
1912
1913
        // reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation
1914
        if((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
1915
1916 5859233b Michael Niedermayer
        // drop some chroma lines if the user wants it
1917
        c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
1918
        c->chrSrcVSubSample+= c->vChrDrop;
1919 e616aa93 Michael Niedermayer
1920 5859233b Michael Niedermayer
        // drop every 2. pixel for chroma calculation unless user wants full chroma
1921 e616aa93 Michael Niedermayer
        if((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)) 
1922
                c->chrSrcHSubSample=1;
1923
1924 66d1cdb6 Michael Niedermayer
        if(param){
1925
                c->param[0] = param[0];
1926
                c->param[1] = param[1];
1927
        }else{
1928
                c->param[0] =
1929
                c->param[1] = SWS_PARAM_DEFAULT;
1930
        }
1931
1932 e616aa93 Michael Niedermayer
        c->chrIntHSubSample= c->chrDstHSubSample;
1933
        c->chrIntVSubSample= c->chrSrcVSubSample;
1934 5427e242 Michael Niedermayer
1935 e616aa93 Michael Niedermayer
        // note the -((-x)>>y) is so that we allways round toward +inf
1936
        c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
1937
        c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
1938
        c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
1939
        c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
1940 5427e242 Michael Niedermayer
1941
        sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], 0, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, 0, 0, 1<<16, 1<<16); 
1942 cf7d1c1a Michael Niedermayer
1943 b935781b Michael Niedermayer
        /* unscaled special Cases */
1944 ec62c38f Michael Niedermayer
        if(unscaled && !usesHFilter && !usesVFilter)
1945 37079906 Michael Niedermayer
        {
1946 0d9f3d85 Arpi
                /* yv12_to_nv12 */
1947 6118e52e Ville Syrjälä
                if(srcFormat == IMGFMT_YV12 && (dstFormat == IMGFMT_NV12 || dstFormat == IMGFMT_NV21))
1948 0d9f3d85 Arpi
                {
1949
                        c->swScale= PlanarToNV12Wrapper;
1950
                }
1951 37079906 Michael Niedermayer
                /* yuv2bgr */
1952 fccb9b2b Michael Niedermayer
                if((srcFormat==IMGFMT_YV12 || srcFormat==IMGFMT_422P) && (isBGR(dstFormat) || isRGB(dstFormat)))
1953 37079906 Michael Niedermayer
                {
1954 5427e242 Michael Niedermayer
                        c->swScale= yuv2rgb_get_func_ptr(c);
1955 37079906 Michael Niedermayer
                }
1956 b241cbf2 Michael Niedermayer
                
1957 fccb9b2b Michael Niedermayer
                if( srcFormat==IMGFMT_YVU9 && dstFormat==IMGFMT_YV12 )
1958 b241cbf2 Michael Niedermayer
                {
1959
                        c->swScale= yvu9toyv12Wrapper;
1960
                }
1961
1962 ec22603f Michael Niedermayer
                /* bgr24toYV12 */
1963
                if(srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_YV12)
1964
                        c->swScale= bgr24toyv12Wrapper;
1965 e09d12f4 Michael Niedermayer
                
1966
                /* rgb/bgr -> rgb/bgr (no dither needed forms) */
1967
                if(   (isBGR(srcFormat) || isRGB(srcFormat))
1968
                   && (isBGR(dstFormat) || isRGB(dstFormat)) 
1969
                   && !needsDither)
1970
                        c->swScale= rgb2rgbWrapper;
1971
1972
                /* LQ converters if -sws 0 or -sws 4*/
1973
                if(c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
1974
                        /* rgb/bgr -> rgb/bgr (dither needed forms) */
1975
                        if(  (isBGR(srcFormat) || isRGB(srcFormat))
1976
                          && (isBGR(dstFormat) || isRGB(dstFormat)) 
1977
                          && needsDither)
1978
                                c->swScale= rgb2rgbWrapper;
1979 2ce486d8 Michael Niedermayer
1980
                        /* yv12_to_yuy2 */
1981 caeaabe7 Alex Beregszaszi
                        if(srcFormat == IMGFMT_YV12 && 
1982
                            (dstFormat == IMGFMT_YUY2 || dstFormat == IMGFMT_UYVY))
1983 2ce486d8 Michael Niedermayer
                        {
1984 caeaabe7 Alex Beregszaszi
                                if (dstFormat == IMGFMT_YUY2)
1985
                                    c->swScale= PlanarToYuy2Wrapper;
1986
                                else
1987
                                    c->swScale= PlanarToUyvyWrapper;
1988 2ce486d8 Michael Niedermayer
                        }
1989 e09d12f4 Michael Niedermayer
                }
1990 ec22603f Michael Niedermayer
1991 6634d0ef Nicolas Plourde
#ifdef COMPILE_ALTIVEC
1992 b71cf33c Romain Dolbeau
                if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
1993
                    ((srcFormat == IMGFMT_YV12 && 
1994
                      (dstFormat == IMGFMT_YUY2 || dstFormat == IMGFMT_UYVY)))) {
1995
                  // unscaled YV12 -> packed YUV, we want speed
1996
                  if (dstFormat == IMGFMT_YUY2)
1997
                    c->swScale= yv12toyuy2_unscaled_altivec;
1998
                  else
1999
                    c->swScale= yv12touyvy_unscaled_altivec;
2000
                }
2001
#endif
2002
2003 20380eb8 Michael Niedermayer
                /* simple copy */
2004
                if(   srcFormat == dstFormat
2005
                   || (isPlanarYUV(srcFormat) && isGray(dstFormat))
2006
                   || (isPlanarYUV(dstFormat) && isGray(srcFormat))
2007
                  )
2008
                {
2009
                        c->swScale= simpleCopy;
2010
                }
2011
2012 e09d12f4 Michael Niedermayer
                if(c->swScale){
2013 ec22603f Michael Niedermayer
                        if(flags&SWS_PRINT_INFO)
2014 0d9f3d85 Arpi
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
2015 ec22603f Michael Niedermayer
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
2016
                        return c;
2017
                }
2018 37079906 Michael Niedermayer
        }
2019
2020 516b1f82 Michael Niedermayer
        if(flags & SWS_CPU_CAPS_MMX2)
2021 28bf81c9 Michael Niedermayer
        {
2022
                c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
2023
                if(!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
2024
                {
2025
                        if(flags&SWS_PRINT_INFO)
2026 0d9f3d85 Arpi
                                MSG_INFO("SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n");
2027 28bf81c9 Michael Niedermayer
                }
2028 ec62c38f Michael Niedermayer
                if(usesHFilter) c->canMMX2BeUsed=0;
2029 28bf81c9 Michael Niedermayer
        }
2030
        else
2031
                c->canMMX2BeUsed=0;
2032
2033 1e621b18 Michael Niedermayer
        c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
2034
        c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
2035
2036 28bf81c9 Michael Niedermayer
        // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
2037
        // but only for the FAST_BILINEAR mode otherwise do correct scaling
2038
        // n-2 is the last chrominance sample available
2039
        // this is not perfect, but noone shuld notice the difference, the more correct variant
2040
        // would be like the vertical one, but that would require some special code for the
2041
        // first and last pixel
2042
        if(flags&SWS_FAST_BILINEAR)
2043
        {
2044 1e621b18 Michael Niedermayer
                if(c->canMMX2BeUsed)
2045
                {
2046
                        c->lumXInc+= 20;
2047
                        c->chrXInc+= 20;
2048
                }
2049 77a416e8 Gabucino
                //we don't use the x86asm scaler if mmx is available
2050 516b1f82 Michael Niedermayer
                else if(flags & SWS_CPU_CAPS_MMX)
2051 1e621b18 Michael Niedermayer
                {
2052
                        c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
2053
                        c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
2054
                }
2055 28bf81c9 Michael Niedermayer
        }
2056
2057
        /* precalculate horizontal scaler filter coefficients */
2058
        {
2059 8c266f0c Romain Dolbeau
                const int filterAlign=
2060
                  (flags & SWS_CPU_CAPS_MMX) ? 4 :
2061
                  (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2062
                  1;
2063 28bf81c9 Michael Niedermayer
2064 c7f822d9 Michael Niedermayer
                initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
2065 81a571a8 Michael Niedermayer
                                 srcW      ,       dstW, filterAlign, 1<<14,
2066
                                 (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2067 66d1cdb6 Michael Niedermayer
                                 srcFilter->lumH, dstFilter->lumH, c->param);
2068 c7f822d9 Michael Niedermayer
                initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
2069 81a571a8 Michael Niedermayer
                                 c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
2070
                                 (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2071 66d1cdb6 Michael Niedermayer
                                 srcFilter->chrH, dstFilter->chrH, c->param);
2072 28bf81c9 Michael Niedermayer
2073 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
2074 77a416e8 Gabucino
// can't downscale !!!
2075 28bf81c9 Michael Niedermayer
                if(c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
2076
                {
2077 38d5c282 Aurelien Jacobs
#define MAX_FUNNY_CODE_SIZE 10000
2078 113ef149 Reimar Döffinger
#ifdef MAP_ANONYMOUS
2079 38d5c282 Aurelien Jacobs
                        c->funnyYCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2080
                        c->funnyUVCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2081
#else
2082
                        c->funnyYCode = (uint8_t*)memalign(32, MAX_FUNNY_CODE_SIZE);
2083
                        c->funnyUVCode = (uint8_t*)memalign(32, MAX_FUNNY_CODE_SIZE);
2084
#endif
2085
2086 b7dc6f66 Michael Niedermayer
                        c->lumMmx2Filter   = (int16_t*)memalign(8, (dstW        /8+8)*sizeof(int16_t));
2087
                        c->chrMmx2Filter   = (int16_t*)memalign(8, (c->chrDstW  /4+8)*sizeof(int16_t));
2088
                        c->lumMmx2FilterPos= (int32_t*)memalign(8, (dstW      /2/8+8)*sizeof(int32_t));
2089
                        c->chrMmx2FilterPos= (int32_t*)memalign(8, (c->chrDstW/2/4+8)*sizeof(int32_t));
2090
2091
                        initMMX2HScaler(      dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
2092
                        initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
2093 28bf81c9 Michael Niedermayer
                }
2094
#endif
2095
        } // Init Horizontal stuff
2096
2097
2098
2099
        /* precalculate vertical scaler filter coefficients */
2100 8c266f0c Romain Dolbeau
        {
2101
                const int filterAlign=
2102
                  (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2103
                  1;
2104
2105
                initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
2106
                                srcH      ,        dstH, filterAlign, (1<<12)-4,
2107
                                (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2108 66d1cdb6 Michael Niedermayer
                                srcFilter->lumV, dstFilter->lumV, c->param);
2109 8c266f0c Romain Dolbeau
                initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
2110
                                c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
2111
                                (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2112 66d1cdb6 Michael Niedermayer
                                srcFilter->chrV, dstFilter->chrV, c->param);
2113 d33d485e Alan Curry
2114
#ifdef HAVE_ALTIVEC
2115
                c->vYCoeffsBank = memalign (16, sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
2116 247d2c96 Alan Curry
                c->vCCoeffsBank = memalign (16, sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH);
2117 d33d485e Alan Curry
2118
                for (i=0;i<c->vLumFilterSize*c->dstH;i++) {
2119
                  int j;
2120
                  short *p = (short *)&c->vYCoeffsBank[i];
2121
                  for (j=0;j<8;j++)
2122
                    p[j] = c->vLumFilter[i];
2123
                }
2124
2125 247d2c96 Alan Curry
                for (i=0;i<c->vChrFilterSize*c->chrDstH;i++) {
2126 d33d485e Alan Curry
                  int j;
2127
                  short *p = (short *)&c->vCCoeffsBank[i];
2128
                  for (j=0;j<8;j++)
2129
                    p[j] = c->vChrFilter[i];
2130
                }
2131
#endif
2132 8c266f0c Romain Dolbeau
        }
2133 28bf81c9 Michael Niedermayer
2134 77a416e8 Gabucino
        // Calculate Buffer Sizes so that they won't run out while handling these damn slices
2135 28bf81c9 Michael Niedermayer
        c->vLumBufSize= c->vLumFilterSize;
2136
        c->vChrBufSize= c->vChrFilterSize;
2137
        for(i=0; i<dstH; i++)
2138
        {
2139
                int chrI= i*c->chrDstH / dstH;
2140
                int nextSlice= MAX(c->vLumFilterPos[i   ] + c->vLumFilterSize - 1,
2141 e616aa93 Michael Niedermayer
                                 ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
2142 c4f1e443 Michael Niedermayer
2143
                nextSlice>>= c->chrSrcVSubSample;
2144
                nextSlice<<= c->chrSrcVSubSample;
2145 28bf81c9 Michael Niedermayer
                if(c->vLumFilterPos[i   ] + c->vLumBufSize < nextSlice)
2146
                        c->vLumBufSize= nextSlice - c->vLumFilterPos[i   ];
2147 e616aa93 Michael Niedermayer
                if(c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
2148
                        c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
2149 28bf81c9 Michael Niedermayer
        }
2150
2151
        // allocate pixbufs (we use dynamic allocation because otherwise we would need to
2152 c7f822d9 Michael Niedermayer
        c->lumPixBuf= (int16_t**)memalign(4, c->vLumBufSize*2*sizeof(int16_t*));
2153
        c->chrPixBuf= (int16_t**)memalign(4, c->vChrBufSize*2*sizeof(int16_t*));
2154 6c7506de Michael Niedermayer
        //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
2155 5edb653b Alan Curry
        /* align at 16 bytes for AltiVec */
2156 28bf81c9 Michael Niedermayer
        for(i=0; i<c->vLumBufSize; i++)
2157 5edb653b Alan Curry
                c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(16, 4000);
2158 28bf81c9 Michael Niedermayer
        for(i=0; i<c->vChrBufSize; i++)
2159 5edb653b Alan Curry
                c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(16, 8000);
2160 28bf81c9 Michael Niedermayer
2161
        //try to avoid drawing green stuff between the right end and the stride end
2162
        for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000);
2163
        for(i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000);
2164
2165
        ASSERT(c->chrDstH <= dstH)
2166
2167
        if(flags&SWS_PRINT_INFO)
2168
        {
2169
#ifdef DITHER1XBPP
2170 5521b193 Michael Niedermayer
                char *dither= " dithered";
2171
#else
2172
                char *dither= "";
2173 28bf81c9 Michael Niedermayer
#endif
2174
                if(flags&SWS_FAST_BILINEAR)
2175 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: FAST_BILINEAR scaler, ");
2176 28bf81c9 Michael Niedermayer
                else if(flags&SWS_BILINEAR)
2177 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: BILINEAR scaler, ");
2178 28bf81c9 Michael Niedermayer
                else if(flags&SWS_BICUBIC)
2179 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: BICUBIC scaler, ");
2180 1e621b18 Michael Niedermayer
                else if(flags&SWS_X)
2181 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: Experimental scaler, ");
2182 ff7ba856 Michael Niedermayer
                else if(flags&SWS_POINT)
2183 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: Nearest Neighbor / POINT scaler, ");
2184 d8863d37 Michael Niedermayer
                else if(flags&SWS_AREA)
2185 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: Area Averageing scaler, ");
2186 81a571a8 Michael Niedermayer
                else if(flags&SWS_BICUBLIN)
2187 a86c461c Michael Niedermayer
                        MSG_INFO("\nSwScaler: luma BICUBIC / chroma BILINEAR scaler, ");
2188
                else if(flags&SWS_GAUSS)
2189
                        MSG_INFO("\nSwScaler: Gaussian scaler, ");
2190
                else if(flags&SWS_SINC)
2191
                        MSG_INFO("\nSwScaler: Sinc scaler, ");
2192
                else if(flags&SWS_LANCZOS)
2193
                        MSG_INFO("\nSwScaler: Lanczos scaler, ");
2194
                else if(flags&SWS_SPLINE)
2195
                        MSG_INFO("\nSwScaler: Bicubic spline scaler, ");
2196 28bf81c9 Michael Niedermayer
                else
2197 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: ehh flags invalid?! ");
2198 28bf81c9 Michael Niedermayer
2199 0d9f3d85 Arpi
                if(dstFormat==IMGFMT_BGR15 || dstFormat==IMGFMT_BGR16)
2200
                        MSG_INFO("from %s to%s %s ", 
2201
                                vo_format_name(srcFormat), dither, vo_format_name(dstFormat));
2202
                else
2203
                        MSG_INFO("from %s to %s ", 
2204
                                vo_format_name(srcFormat), vo_format_name(dstFormat));
2205 28bf81c9 Michael Niedermayer
2206 516b1f82 Michael Niedermayer
                if(flags & SWS_CPU_CAPS_MMX2)
2207 0d9f3d85 Arpi
                        MSG_INFO("using MMX2\n");
2208 516b1f82 Michael Niedermayer
                else if(flags & SWS_CPU_CAPS_3DNOW)
2209 0d9f3d85 Arpi
                        MSG_INFO("using 3DNOW\n");
2210 516b1f82 Michael Niedermayer
                else if(flags & SWS_CPU_CAPS_MMX)
2211 0d9f3d85 Arpi
                        MSG_INFO("using MMX\n");
2212 a2faa401 Romain Dolbeau
                else if(flags & SWS_CPU_CAPS_ALTIVEC)
2213
                        MSG_INFO("using AltiVec\n");
2214
                else 
2215 0d9f3d85 Arpi
                        MSG_INFO("using C\n");
2216 28bf81c9 Michael Niedermayer
        }
2217
2218 516b1f82 Michael Niedermayer
        if(flags & SWS_PRINT_INFO)
2219 28bf81c9 Michael Niedermayer
        {
2220 516b1f82 Michael Niedermayer
                if(flags & SWS_CPU_CAPS_MMX)
2221 28bf81c9 Michael Niedermayer
                {
2222
                        if(c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
2223 0d9f3d85 Arpi
                                MSG_V("SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
2224 28bf81c9 Michael Niedermayer
                        else
2225
                        {
2226
                                if(c->hLumFilterSize==4)
2227 0d9f3d85 Arpi
                                        MSG_V("SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n");
2228 28bf81c9 Michael Niedermayer
                                else if(c->hLumFilterSize==8)
2229 0d9f3d85 Arpi
                                        MSG_V("SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n");
2230 28bf81c9 Michael Niedermayer
                                else
2231 0d9f3d85 Arpi
                                        MSG_V("SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n");
2232 28bf81c9 Michael Niedermayer
2233
                                if(c->hChrFilterSize==4)
2234 0d9f3d85 Arpi
                                        MSG_V("SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n");
2235 28bf81c9 Michael Niedermayer
                                else if(c->hChrFilterSize==8)
2236 0d9f3d85 Arpi
                                        MSG_V("SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n");
2237 28bf81c9 Michael Niedermayer
                                else
2238 0d9f3d85 Arpi
                                        MSG_V("SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n");
2239 28bf81c9 Michael Niedermayer
                        }
2240
                }
2241
                else
2242
                {
2243 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
2244 0d9f3d85 Arpi
                        MSG_V("SwScaler: using X86-Asm scaler for horizontal scaling\n");
2245 28bf81c9 Michael Niedermayer
#else
2246
                        if(flags & SWS_FAST_BILINEAR)
2247 0d9f3d85 Arpi
                                MSG_V("SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n");
2248 28bf81c9 Michael Niedermayer
                        else
2249 0d9f3d85 Arpi
                                MSG_V("SwScaler: using C scaler for horizontal scaling\n");
2250 28bf81c9 Michael Niedermayer
#endif
2251
                }
2252 6c7506de Michael Niedermayer
                if(isPlanarYUV(dstFormat))
2253 28bf81c9 Michael Niedermayer
                {
2254
                        if(c->vLumFilterSize==1)
2255 516b1f82 Michael Niedermayer
                                MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2256 28bf81c9 Michael Niedermayer
                        else
2257 516b1f82 Michael Niedermayer
                                MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2258 28bf81c9 Michael Niedermayer
                }
2259
                else
2260
                {
2261
                        if(c->vLumFilterSize==1 && c->vChrFilterSize==2)
2262 0d9f3d85 Arpi
                                MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
2263 516b1f82 Michael Niedermayer
                                       "SwScaler:       2-tap scaler for vertical chrominance scaling (BGR)\n",(flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2264 28bf81c9 Michael Niedermayer
                        else if(c->vLumFilterSize==2 && c->vChrFilterSize==2)
2265 516b1f82 Michael Niedermayer
                                MSG_V("SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2266 28bf81c9 Michael Niedermayer
                        else
2267 516b1f82 Michael Niedermayer
                                MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2268 28bf81c9 Michael Niedermayer
                }
2269
2270
                if(dstFormat==IMGFMT_BGR24)
2271 0d9f3d85 Arpi
                        MSG_V("SwScaler: using %s YV12->BGR24 Converter\n",
2272 516b1f82 Michael Niedermayer
                                (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
2273 fd284805 Michael Niedermayer
                else if(dstFormat==IMGFMT_BGR32)
2274 516b1f82 Michael Niedermayer
                        MSG_V("SwScaler: using %s YV12->BGR32 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2275 fd284805 Michael Niedermayer
                else if(dstFormat==IMGFMT_BGR16)
2276 516b1f82 Michael Niedermayer
                        MSG_V("SwScaler: using %s YV12->BGR16 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2277 fd284805 Michael Niedermayer
                else if(dstFormat==IMGFMT_BGR15)
2278 516b1f82 Michael Niedermayer
                        MSG_V("SwScaler: using %s YV12->BGR15 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2279 28bf81c9 Michael Niedermayer
2280 0d9f3d85 Arpi
                MSG_V("SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
2281 28bf81c9 Michael Niedermayer
        }
2282 516b1f82 Michael Niedermayer
        if(flags & SWS_PRINT_INFO)
2283 1e621b18 Michael Niedermayer
        {
2284 0d9f3d85 Arpi
                MSG_DBG2("SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2285 1e621b18 Michael Niedermayer
                        c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
2286 0d9f3d85 Arpi
                MSG_DBG2("SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2287 1e621b18 Michael Niedermayer
                        c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
2288
        }
2289 37079906 Michael Niedermayer
2290 516b1f82 Michael Niedermayer
        c->swScale= getSwsFunc(flags);
2291 28bf81c9 Michael Niedermayer
        return c;
2292
}
2293
2294
/**
2295 77a416e8 Gabucino
 * swscale warper, so we don't need to export the SwsContext.
2296 fccb9b2b Michael Niedermayer
 * assumes planar YUV to be in YUV order instead of YVU
2297
 */
2298
int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2299
                           int srcSliceH, uint8_t* dst[], int dstStride[]){
2300 e63ac25d Jindřich Makovička
        if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
2301
            MSG_ERR("swScaler: slices start in the middle!\n");
2302
            return 0;
2303
        }
2304
        if (c->sliceDir == 0) {
2305
            if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
2306
        }
2307
2308
        // copy strides, so they can safely be modified
2309
        if (c->sliceDir == 1) {
2310
            // slices go from top to bottom
2311
            int srcStride2[3]= {srcStride[0], srcStride[1], srcStride[2]};
2312
            int dstStride2[3]= {dstStride[0], dstStride[1], dstStride[2]};
2313
            return c->swScale(c, src, srcStride2, srcSliceY, srcSliceH, dst, dstStride2);
2314
        } else {
2315
            // slices go from bottom to top => we flip the image internally
2316
            uint8_t* src2[3]= {src[0] + (srcSliceH-1)*srcStride[0],
2317
                               src[1] + ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1],
2318
                               src[2] + ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2]
2319
            };
2320
            uint8_t* dst2[3]= {dst[0] + (c->dstH-1)*dstStride[0],
2321
                               dst[1] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1],
2322
                               dst[2] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2]};
2323
            int srcStride2[3]= {-srcStride[0], -srcStride[1], -srcStride[2]};
2324
            int dstStride2[3]= {-dstStride[0], -dstStride[1], -dstStride[2]};
2325
            
2326
            return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
2327
        }
2328 fccb9b2b Michael Niedermayer
}
2329
2330
/**
2331 77a416e8 Gabucino
 * swscale warper, so we don't need to export the SwsContext
2332 d4e24275 Michael Niedermayer
 */
2333 3e499f53 Michael Niedermayer
int sws_scale(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], int srcSliceY,
2334
                           int srcSliceH, uint8_t* dstParam[], int dstStrideParam[]){
2335
        int srcStride[3];
2336
        int dstStride[3];
2337
        uint8_t *src[3];
2338
        uint8_t *dst[3];
2339 fccb9b2b Michael Niedermayer
        sws_orderYUV(c->origSrcFormat, src, srcStride, srcParam, srcStrideParam);
2340
        sws_orderYUV(c->origDstFormat, dst, dstStride, dstParam, dstStrideParam);
2341 3e499f53 Michael Niedermayer
//printf("sws: slice %d %d\n", srcSliceY, srcSliceH);
2342 a4c90ea3 Michael Niedermayer
2343 5bf01354 Michael Niedermayer
        return c->swScale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
2344 d4e24275 Michael Niedermayer
}
2345
2346 e21206a8 Michael Niedermayer
SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, 
2347
                                float lumaSharpen, float chromaSharpen,
2348
                                float chromaHShift, float chromaVShift,
2349
                                int verbose)
2350
{
2351
        SwsFilter *filter= malloc(sizeof(SwsFilter));
2352
2353
        if(lumaGBlur!=0.0){
2354
                filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0);
2355
                filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0);
2356
        }else{
2357
                filter->lumH= sws_getIdentityVec();
2358
                filter->lumV= sws_getIdentityVec();
2359
        }
2360
2361
        if(chromaGBlur!=0.0){
2362
                filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0);
2363
                filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0);
2364
        }else{
2365
                filter->chrH= sws_getIdentityVec();
2366
                filter->chrV= sws_getIdentityVec();
2367
        }
2368
2369
        if(chromaSharpen!=0.0){
2370 2e728364 Michael Niedermayer
                SwsVector *id= sws_getIdentityVec();
2371
                sws_scaleVec(filter->chrH, -chromaSharpen);
2372
                sws_scaleVec(filter->chrV, -chromaSharpen);
2373
                sws_addVec(filter->chrH, id);
2374
                sws_addVec(filter->chrV, id);
2375 e21206a8 Michael Niedermayer
                sws_freeVec(id);
2376
        }
2377
2378
        if(lumaSharpen!=0.0){
2379 2e728364 Michael Niedermayer
                SwsVector *id= sws_getIdentityVec();
2380
                sws_scaleVec(filter->lumH, -lumaSharpen);
2381
                sws_scaleVec(filter->lumV, -lumaSharpen);
2382
                sws_addVec(filter->lumH, id);
2383
                sws_addVec(filter->lumV, id);
2384 e21206a8 Michael Niedermayer
                sws_freeVec(id);
2385
        }
2386
2387
        if(chromaHShift != 0.0)
2388
                sws_shiftVec(filter->chrH, (int)(chromaHShift+0.5));
2389
2390
        if(chromaVShift != 0.0)
2391
                sws_shiftVec(filter->chrV, (int)(chromaVShift+0.5));
2392
2393
        sws_normalizeVec(filter->chrH, 1.0);
2394
        sws_normalizeVec(filter->chrV, 1.0);
2395
        sws_normalizeVec(filter->lumH, 1.0);
2396
        sws_normalizeVec(filter->lumV, 1.0);
2397
2398
        if(verbose) sws_printVec(filter->chrH);
2399
        if(verbose) sws_printVec(filter->lumH);
2400
2401
        return filter;
2402
}
2403
2404 d4e24275 Michael Niedermayer
/**
2405 28bf81c9 Michael Niedermayer
 * returns a normalized gaussian curve used to filter stuff
2406
 * quality=3 is high quality, lowwer is lowwer quality
2407
 */
2408 d4e24275 Michael Niedermayer
SwsVector *sws_getGaussianVec(double variance, double quality){
2409 28bf81c9 Michael Niedermayer
        const int length= (int)(variance*quality + 0.5) | 1;
2410
        int i;
2411
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2412
        double middle= (length-1)*0.5;
2413 c7f822d9 Michael Niedermayer
        SwsVector *vec= malloc(sizeof(SwsVector));
2414
2415
        vec->coeff= coeff;
2416
        vec->length= length;
2417 28bf81c9 Michael Niedermayer
2418
        for(i=0; i<length; i++)
2419
        {
2420
                double dist= i-middle;
2421
                coeff[i]= exp( -dist*dist/(2*variance*variance) ) / sqrt(2*variance*PI);
2422
        }
2423
2424 d4e24275 Michael Niedermayer
        sws_normalizeVec(vec, 1.0);
2425 c7f822d9 Michael Niedermayer
2426
        return vec;
2427 28bf81c9 Michael Niedermayer
}
2428
2429 d4e24275 Michael Niedermayer
SwsVector *sws_getConstVec(double c, int length){
2430 5521b193 Michael Niedermayer
        int i;
2431
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2432
        SwsVector *vec= malloc(sizeof(SwsVector));
2433
2434
        vec->coeff= coeff;
2435
        vec->length= length;
2436
2437
        for(i=0; i<length; i++)
2438
                coeff[i]= c;
2439
2440
        return vec;
2441
}
2442
2443
2444 d4e24275 Michael Niedermayer
SwsVector *sws_getIdentityVec(void){
2445 2e728364 Michael Niedermayer
        return sws_getConstVec(1.0, 1);
2446 c7f822d9 Michael Niedermayer
}
2447
2448 2e728364 Michael Niedermayer
double sws_dcVec(SwsVector *a){
2449 28bf81c9 Michael Niedermayer
        int i;
2450 2e728364 Michael Niedermayer
        double sum=0;
2451 28bf81c9 Michael Niedermayer
2452 c7f822d9 Michael Niedermayer
        for(i=0; i<a->length; i++)
2453
                sum+= a->coeff[i];
2454 28bf81c9 Michael Niedermayer
2455 2e728364 Michael Niedermayer
        return sum;
2456 28bf81c9 Michael Niedermayer
}
2457
2458 d4e24275 Michael Niedermayer
void sws_scaleVec(SwsVector *a, double scalar){
2459 c7f822d9 Michael Niedermayer
        int i;
2460
2461
        for(i=0; i<a->length; i++)
2462
                a->coeff[i]*= scalar;
2463
}
2464
2465 2e728364 Michael Niedermayer
void sws_normalizeVec(SwsVector *a, double height){
2466
        sws_scaleVec(a, height/sws_dcVec(a));
2467
}
2468
2469 d4e24275 Michael Niedermayer
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){
2470 c7f822d9 Michael Niedermayer
        int length= a->length + b->length - 1;
2471 28bf81c9 Michael Niedermayer
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2472
        int i, j;
2473 c7f822d9 Michael Niedermayer
        SwsVector *vec= malloc(sizeof(SwsVector));
2474
2475
        vec->coeff= coeff;
2476
        vec->length= length;
2477 28bf81c9 Michael Niedermayer
2478
        for(i=0; i<length; i++) coeff[i]= 0.0;
2479
2480 c7f822d9 Michael Niedermayer
        for(i=0; i<a->length; i++)
2481 28bf81c9 Michael Niedermayer
        {
2482 c7f822d9 Michael Niedermayer
                for(j=0; j<b->length; j++)
2483 28bf81c9 Michael Niedermayer
                {
2484 c7f822d9 Michael Niedermayer
                        coeff[i+j]+= a->coeff[i]*b->coeff[j];
2485 28bf81c9 Michael Niedermayer
                }
2486
        }
2487
2488 c7f822d9 Michael Niedermayer
        return vec;
2489 28bf81c9 Michael Niedermayer
}
2490
2491 d4e24275 Michael Niedermayer
static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){
2492 c7f822d9 Michael Niedermayer
        int length= MAX(a->length, b->length);
2493 28bf81c9 Michael Niedermayer
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2494
        int i;
2495 c7f822d9 Michael Niedermayer
        SwsVector *vec= malloc(sizeof(SwsVector));
2496
2497
        vec->coeff= coeff;
2498
        vec->length= length;
2499 28bf81c9 Michael Niedermayer
2500
        for(i=0; i<length; i++) coeff[i]= 0.0;
2501
2502 c7f822d9 Michael Niedermayer
        for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2503
        for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];
2504
2505
        return vec;
2506 28bf81c9 Michael Niedermayer
}
2507 c7f822d9 Michael Niedermayer
2508 d4e24275 Michael Niedermayer
static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){
2509 c7f822d9 Michael Niedermayer
        int length= MAX(a->length, b->length);
2510
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2511
        int i;
2512
        SwsVector *vec= malloc(sizeof(SwsVector));
2513
2514
        vec->coeff= coeff;
2515
        vec->length= length;
2516
2517
        for(i=0; i<length; i++) coeff[i]= 0.0;
2518
2519
        for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2520
        for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];
2521
2522
        return vec;
2523
}
2524
2525
/* shift left / or right if "shift" is negative */
2526 d4e24275 Michael Niedermayer
static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){
2527 c7f822d9 Michael Niedermayer
        int length= a->length + ABS(shift)*2;
2528
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2529 ff7ba856 Michael Niedermayer
        int i;
2530 c7f822d9 Michael Niedermayer
        SwsVector *vec= malloc(sizeof(SwsVector));
2531
2532
        vec->coeff= coeff;
2533
        vec->length= length;
2534
2535
        for(i=0; i<length; i++) coeff[i]= 0.0;
2536
2537
        for(i=0; i<a->length; i++)
2538
        {
2539
                coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
2540
        }
2541
2542
        return vec;
2543
}
2544
2545 d4e24275 Michael Niedermayer
void sws_shiftVec(SwsVector *a, int shift){
2546
        SwsVector *shifted= sws_getShiftedVec(a, shift);
2547 5cebb24b Michael Niedermayer
        free(a->coeff);
2548
        a->coeff= shifted->coeff;
2549
        a->length= shifted->length;
2550
        free(shifted);
2551
}
2552
2553 d4e24275 Michael Niedermayer
void sws_addVec(SwsVector *a, SwsVector *b){
2554
        SwsVector *sum= sws_sumVec(a, b);
2555 5cebb24b Michael Niedermayer
        free(a->coeff);
2556
        a->coeff= sum->coeff;
2557
        a->length= sum->length;
2558
        free(sum);
2559
}
2560
2561 d4e24275 Michael Niedermayer
void sws_subVec(SwsVector *a, SwsVector *b){
2562
        SwsVector *diff= sws_diffVec(a, b);
2563 5cebb24b Michael Niedermayer
        free(a->coeff);
2564
        a->coeff= diff->coeff;
2565
        a->length= diff->length;
2566
        free(diff);
2567
}
2568
2569 d4e24275 Michael Niedermayer
void sws_convVec(SwsVector *a, SwsVector *b){
2570
        SwsVector *conv= sws_getConvVec(a, b);
2571
        free(a->coeff);  
2572 5cebb24b Michael Niedermayer
        a->coeff= conv->coeff;
2573
        a->length= conv->length;
2574
        free(conv);
2575
}
2576
2577 d4e24275 Michael Niedermayer
SwsVector *sws_cloneVec(SwsVector *a){
2578 5cebb24b Michael Niedermayer
        double *coeff= memalign(sizeof(double), a->length*sizeof(double));
2579
        int i;
2580
        SwsVector *vec= malloc(sizeof(SwsVector));
2581
2582
        vec->coeff= coeff;
2583
        vec->length= a->length;
2584
2585
        for(i=0; i<a->length; i++) coeff[i]= a->coeff[i];
2586
2587
        return vec;
2588
}
2589
2590 d4e24275 Michael Niedermayer
void sws_printVec(SwsVector *a){
2591 c7f822d9 Michael Niedermayer
        int i;
2592
        double max=0;
2593
        double min=0;
2594
        double range;
2595
2596
        for(i=0; i<a->length; i++)
2597
                if(a->coeff[i]>max) max= a->coeff[i];
2598
2599
        for(i=0; i<a->length; i++)
2600
                if(a->coeff[i]<min) min= a->coeff[i];
2601
2602
        range= max - min;
2603
2604
        for(i=0; i<a->length; i++)
2605
        {
2606
                int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
2607 0d9f3d85 Arpi
                MSG_DBG2("%1.3f ", a->coeff[i]);
2608
                for(;x>0; x--) MSG_DBG2(" ");
2609
                MSG_DBG2("|\n");
2610 c7f822d9 Michael Niedermayer
        }
2611
}
2612
2613 d4e24275 Michael Niedermayer
void sws_freeVec(SwsVector *a){
2614 c7f822d9 Michael Niedermayer
        if(!a) return;
2615
        if(a->coeff) free(a->coeff);
2616
        a->coeff=NULL;
2617
        a->length=0;
2618
        free(a);
2619
}
2620
2621 e21206a8 Michael Niedermayer
void sws_freeFilter(SwsFilter *filter){
2622
        if(!filter) return;
2623
2624
        if(filter->lumH) sws_freeVec(filter->lumH);
2625
        if(filter->lumV) sws_freeVec(filter->lumV);
2626
        if(filter->chrH) sws_freeVec(filter->chrH);
2627
        if(filter->chrV) sws_freeVec(filter->chrV);
2628
        free(filter);
2629
}
2630
2631
2632 d4e24275 Michael Niedermayer
void sws_freeContext(SwsContext *c){
2633 c7f822d9 Michael Niedermayer