Statistics
| Branch: | Revision:

ffmpeg / postproc / swscale.c @ 6d606c4f

History | View | Annotate | Download (72.3 KB)

1 fe8054c0 Michael Niedermayer
/*
2 5427e242 Michael Niedermayer
    Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3 fe8054c0 Michael Niedermayer

4
    This program is free software; you can redistribute it and/or modify
5
    it under the terms of the GNU General Public License as published by
6
    the Free Software Foundation; either version 2 of the License, or
7
    (at your option) any later version.
8 31190492 Arpi

9 fe8054c0 Michael Niedermayer
    This program is distributed in the hope that it will be useful,
10
    but WITHOUT ANY WARRANTY; without even the implied warranty of
11
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
    GNU General Public License for more details.
13 31190492 Arpi

14 fe8054c0 Michael Niedermayer
    You should have received a copy of the GNU General Public License
15
    along with this program; if not, write to the Free Software
16
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
*/
18 783e9cc9 Michael Niedermayer
19 28bf81c9 Michael Niedermayer
/*
20 7322a67c Michael Niedermayer
  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8/Y800, YVU9/IF09
21 caeaabe7 Alex Beregszaszi
  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
22 e09d12f4 Michael Niedermayer
  {BGR,RGB}{1,4,8,15,16} support dithering
23 a861d4d7 Michael Niedermayer
  
24 e09d12f4 Michael Niedermayer
  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
25
  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
26
  x -> x
27
  YUV9 -> YV12
28
  YUV9/YV12 -> Y800
29
  Y800 -> YUV9/YV12
30 b935781b Michael Niedermayer
  BGR24 -> BGR32 & RGB24 -> RGB32
31
  BGR32 -> BGR24 & RGB32 -> RGB24
32 4bb3fa5e Michael Niedermayer
  BGR15 -> BGR16
33 b935781b Michael Niedermayer
*/
34
35
/* 
36 e09d12f4 Michael Niedermayer
tested special converters (most are tested actually but i didnt write it down ...)
37
 YV12 -> BGR16
38 b935781b Michael Niedermayer
 YV12 -> YV12
39 4bb3fa5e Michael Niedermayer
 BGR15 -> BGR16
40 1e1c4fe9 Michael Niedermayer
 BGR16 -> BGR16
41 e09d12f4 Michael Niedermayer
 YVU9 -> YV12
42 b935781b Michael Niedermayer

43
untested special converters
44 1e1c4fe9 Michael Niedermayer
  YV12/I420 -> BGR15/BGR24/BGR32 (its the yuv2rgb stuff, so it should be ok)
45
  YV12/I420 -> YV12/I420
46
  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
47 b935781b Michael Niedermayer
  BGR24 -> BGR32 & RGB24 -> RGB32
48
  BGR32 -> BGR24 & RGB32 -> RGB24
49 ec22603f Michael Niedermayer
  BGR24 -> YV12
50 28bf81c9 Michael Niedermayer
*/
51
52 d3f41512 Michael Niedermayer
#include <inttypes.h>
53 dda87e9f Pierre Lombard
#include <string.h>
54 077ea8a7 Michael Niedermayer
#include <math.h>
55 c1b0bfb4 Michael Niedermayer
#include <stdio.h>
56 d3f41512 Michael Niedermayer
#include "../config.h"
57 9b464428 Felix Bünemann
#include "../mangle.h"
58 81b7c056 Michael Niedermayer
#include <assert.h>
59 c1b0bfb4 Michael Niedermayer
#ifdef HAVE_MALLOC_H
60
#include <malloc.h>
61 b6acbc3c Björn Sandell
#else
62
#include <stdlib.h>
63 c1b0bfb4 Michael Niedermayer
#endif
64 d604bab9 Michael Niedermayer
#include "swscale.h"
65 5427e242 Michael Niedermayer
#include "swscale_internal.h"
66 7630f2e0 Michael Niedermayer
#include "../cpudetect.h"
67 a861d4d7 Michael Niedermayer
#include "../bswap.h"
68 28bf81c9 Michael Niedermayer
#include "../libvo/img_format.h"
69 37079906 Michael Niedermayer
#include "rgb2rgb.h"
70 b0db4198 Michael Niedermayer
#include "../libvo/fastmemcpy.h"
71 0d9f3d85 Arpi
72 541c4eb9 Michael Niedermayer
#undef MOVNTQ
73 7d7f78b5 Michael Niedermayer
#undef PAVGB
74 d3f41512 Michael Niedermayer
75 783e9cc9 Michael Niedermayer
//#undef HAVE_MMX2
76 7f56a527 Michael Niedermayer
//#define HAVE_3DNOW
77 d3f41512 Michael Niedermayer
//#undef HAVE_MMX
78 783e9cc9 Michael Niedermayer
//#undef ARCH_X86
79 2ba1bff0 Michael Niedermayer
//#define WORDS_BIGENDIAN
80 d604bab9 Michael Niedermayer
#define DITHER1XBPP
81 d3f41512 Michael Niedermayer
82 ac6a2e45 Michael Niedermayer
#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
83
84 1e621b18 Michael Niedermayer
#define RET 0xC3 //near return opcode for X86
85 c1b0bfb4 Michael Niedermayer
86 28bf81c9 Michael Niedermayer
#ifdef MP_DEBUG
87 81b7c056 Michael Niedermayer
#define ASSERT(x) assert(x);
88 28bf81c9 Michael Niedermayer
#else
89 c1b0bfb4 Michael Niedermayer
#define ASSERT(x) ;
90 28bf81c9 Michael Niedermayer
#endif
91
92
#ifdef M_PI
93
#define PI M_PI
94
#else
95
#define PI 3.14159265358979323846
96
#endif
97 c1b0bfb4 Michael Niedermayer
98 6c7506de Michael Niedermayer
//FIXME replace this with something faster
99 fccb9b2b Michael Niedermayer
#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YVU9 \
100 d80e2fa2 Michael Niedermayer
                        || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
101 7322a67c Michael Niedermayer
#define isYUV(x)       ((x)==IMGFMT_UYVY || (x)==IMGFMT_YUY2 || isPlanarYUV(x))
102 44c1035c Michael Niedermayer
#define isGray(x)      ((x)==IMGFMT_Y800)
103 cf7d1c1a Michael Niedermayer
#define isRGB(x)       (((x)&IMGFMT_RGB_MASK)==IMGFMT_RGB)
104
#define isBGR(x)       (((x)&IMGFMT_BGR_MASK)==IMGFMT_BGR)
105 fccb9b2b Michael Niedermayer
#define isSupportedIn(x)  ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\
106 b72034dd Michael Niedermayer
                        || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\
107 a861d4d7 Michael Niedermayer
                        || (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\
108 d80e2fa2 Michael Niedermayer
                        || (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9\
109
                        || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
110 caeaabe7 Alex Beregszaszi
#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\
111 d80e2fa2 Michael Niedermayer
                        || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P\
112 cf7d1c1a Michael Niedermayer
                        || isRGB(x) || isBGR(x)\
113 e616aa93 Michael Niedermayer
                        || (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9)
114 7322a67c Michael Niedermayer
#define isPacked(x)    ((x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY ||isRGB(x) || isBGR(x))
115 6ff0ad6b Michael Niedermayer
116
#define RGB2YUV_SHIFT 16
117 1e621b18 Michael Niedermayer
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
118
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
119
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
120
#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
121
#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
122
#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
123
#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
124
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
125
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
126 6c7506de Michael Niedermayer
127 0481412a Michael Niedermayer
extern const int32_t Inverse_Table_6_9[8][4];
128
129 783e9cc9 Michael Niedermayer
/*
130
NOTES
131 d604bab9 Michael Niedermayer
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
132 31190492 Arpi

133 783e9cc9 Michael Niedermayer
TODO
134 d604bab9 Michael Niedermayer
more intelligent missalignment avoidance for the horizontal scaler
135 c1b0bfb4 Michael Niedermayer
write special vertical cubic upscale version
136
Optimize C code (yv12 / minmax)
137 ff7ba856 Michael Niedermayer
add support for packed pixel yuv input & output
138 6ff0ad6b Michael Niedermayer
add support for Y8 output
139
optimize bgr24 & bgr32
140 ff7ba856 Michael Niedermayer
add BGR4 output support
141 1e621b18 Michael Niedermayer
write special BGR->BGR scaler
142 783e9cc9 Michael Niedermayer
*/
143 31190492 Arpi
144 d604bab9 Michael Niedermayer
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
145 2ff198c1 Michael Niedermayer
#define MIN(a,b) ((a) > (b) ? (b) : (a))
146
#define MAX(a,b) ((a) < (b) ? (b) : (a))
147 d604bab9 Michael Niedermayer
148 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
149 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) bF8=       0xF8F8F8F8F8F8F8F8LL;
150
static uint64_t attribute_used __attribute__((aligned(8))) bFC=       0xFCFCFCFCFCFCFCFCLL;
151 d604bab9 Michael Niedermayer
static uint64_t __attribute__((aligned(8))) w10=       0x0010001000100010LL;
152 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) w02=       0x0002000200020002LL;
153
static uint64_t attribute_used __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
154
static uint64_t attribute_used __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
155
static uint64_t attribute_used __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
156
static uint64_t attribute_used __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL;
157 d604bab9 Michael Niedermayer
158 db7a2e0d Matthieu Castet
static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
159
static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
160
static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
161
static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;
162 d8fa3c54 Michael Niedermayer
163
static uint64_t __attribute__((aligned(8))) dither4[2]={
164
        0x0103010301030103LL,
165
        0x0200020002000200LL,};
166
167
static uint64_t __attribute__((aligned(8))) dither8[2]={
168
        0x0602060206020602LL,
169
        0x0004000400040004LL,};
170 d604bab9 Michael Niedermayer
171
static uint64_t __attribute__((aligned(8))) b16Mask=   0x001F001F001F001FLL;
172 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) g16Mask=   0x07E007E007E007E0LL;
173
static uint64_t attribute_used __attribute__((aligned(8))) r16Mask=   0xF800F800F800F800LL;
174 d604bab9 Michael Niedermayer
static uint64_t __attribute__((aligned(8))) b15Mask=   0x001F001F001F001FLL;
175 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) g15Mask=   0x03E003E003E003E0LL;
176
static uint64_t attribute_used __attribute__((aligned(8))) r15Mask=   0x7C007C007C007C00LL;
177 d604bab9 Michael Niedermayer
178 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) M24A=   0x00FF0000FF0000FFLL;
179
static uint64_t attribute_used __attribute__((aligned(8))) M24B=   0xFF0000FF0000FF00LL;
180
static uint64_t attribute_used __attribute__((aligned(8))) M24C=   0x0000FF0000FF0000LL;
181 99d2cb72 Michael Niedermayer
182 ac6a2e45 Michael Niedermayer
#ifdef FAST_BGR2YV12
183 db7a2e0d Matthieu Castet
static const uint64_t bgr2YCoeff  attribute_used __attribute__((aligned(8))) = 0x000000210041000DULL;
184
static const uint64_t bgr2UCoeff  attribute_used __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
185
static const uint64_t bgr2VCoeff  attribute_used __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
186 ac6a2e45 Michael Niedermayer
#else
187 db7a2e0d Matthieu Castet
static const uint64_t bgr2YCoeff  attribute_used __attribute__((aligned(8))) = 0x000020E540830C8BULL;
188
static const uint64_t bgr2UCoeff  attribute_used __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
189
static const uint64_t bgr2VCoeff  attribute_used __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
190 ac6a2e45 Michael Niedermayer
#endif
191 db7a2e0d Matthieu Castet
static const uint64_t bgr2YOffset attribute_used __attribute__((aligned(8))) = 0x1010101010101010ULL;
192
static const uint64_t bgr2UVOffset attribute_used __attribute__((aligned(8)))= 0x8080808080808080ULL;
193
static const uint64_t w1111       attribute_used __attribute__((aligned(8))) = 0x0001000100010001ULL;
194 d604bab9 Michael Niedermayer
#endif
195 783e9cc9 Michael Niedermayer
196
// clipping helper table for C implementations:
197
static unsigned char clip_table[768];
198
199 d4e24275 Michael Niedermayer
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
200
                  
201 cf7d1c1a Michael Niedermayer
extern const uint8_t dither_2x2_4[2][8];
202
extern const uint8_t dither_2x2_8[2][8];
203
extern const uint8_t dither_8x8_32[8][8];
204
extern const uint8_t dither_8x8_73[8][8];
205
extern const uint8_t dither_8x8_220[8][8];
206 5cebb24b Michael Niedermayer
207 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
208 96034638 Michael Niedermayer
void in_asm_used_var_warning_killer()
209
{
210 20380eb8 Michael Niedermayer
 volatile int i= bF8+bFC+w10+
211 5ac80202 Michael Niedermayer
 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+
212 6ff0ad6b Michael Niedermayer
 M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101;
213 96034638 Michael Niedermayer
 if(i) i=0;
214
}
215
#endif
216 d604bab9 Michael Niedermayer
217 5859233b Michael Niedermayer
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
218 e3d2500f Michael Niedermayer
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
219 5859233b Michael Niedermayer
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
220 e3d2500f Michael Niedermayer
{
221
        //FIXME Optimize (just quickly writen not opti..)
222
        int i;
223 5859233b Michael Niedermayer
        for(i=0; i<dstW; i++)
224 e3d2500f Michael Niedermayer
        {
225 379a2036 Michael Niedermayer
                int val=1<<18;
226 e3d2500f Michael Niedermayer
                int j;
227
                for(j=0; j<lumFilterSize; j++)
228
                        val += lumSrc[j][i] * lumFilter[j];
229
230
                dest[i]= MIN(MAX(val>>19, 0), 255);
231
        }
232
233
        if(uDest != NULL)
234 5859233b Michael Niedermayer
                for(i=0; i<chrDstW; i++)
235 e3d2500f Michael Niedermayer
                {
236 379a2036 Michael Niedermayer
                        int u=1<<18;
237
                        int v=1<<18;
238 e3d2500f Michael Niedermayer
                        int j;
239 627690b5 Michael Niedermayer
                        for(j=0; j<chrFilterSize; j++)
240 e3d2500f Michael Niedermayer
                        {
241
                                u += chrSrc[j][i] * chrFilter[j];
242
                                v += chrSrc[j][i + 2048] * chrFilter[j];
243
                        }
244
245
                        uDest[i]= MIN(MAX(u>>19, 0), 255);
246
                        vDest[i]= MIN(MAX(v>>19, 0), 255);
247
                }
248
}
249
250 46de8b73 Michael Niedermayer
251 25593e29 Michael Niedermayer
#define YSCALE_YUV_2_PACKEDX_C(type) \
252 cf7d1c1a Michael Niedermayer
                for(i=0; i<(dstW>>1); i++){\
253
                        int j;\
254 379a2036 Michael Niedermayer
                        int Y1=1<<18;\
255
                        int Y2=1<<18;\
256
                        int U=1<<18;\
257
                        int V=1<<18;\
258 cf7d1c1a Michael Niedermayer
                        type *r, *b, *g;\
259
                        const int i2= 2*i;\
260
                        \
261
                        for(j=0; j<lumFilterSize; j++)\
262
                        {\
263
                                Y1 += lumSrc[j][i2] * lumFilter[j];\
264
                                Y2 += lumSrc[j][i2+1] * lumFilter[j];\
265
                        }\
266
                        for(j=0; j<chrFilterSize; j++)\
267
                        {\
268
                                U += chrSrc[j][i] * chrFilter[j];\
269
                                V += chrSrc[j][i+2048] * chrFilter[j];\
270
                        }\
271
                        Y1>>=19;\
272
                        Y2>>=19;\
273
                        U >>=19;\
274
                        V >>=19;\
275
                        if((Y1|Y2|U|V)&256)\
276
                        {\
277
                                if(Y1>255)   Y1=255;\
278
                                else if(Y1<0)Y1=0;\
279
                                if(Y2>255)   Y2=255;\
280
                                else if(Y2<0)Y2=0;\
281
                                if(U>255)    U=255;\
282
                                else if(U<0) U=0;\
283
                                if(V>255)    V=255;\
284
                                else if(V<0) V=0;\
285 46de8b73 Michael Niedermayer
                        }
286
                        
287
#define YSCALE_YUV_2_RGBX_C(type) \
288 25593e29 Michael Niedermayer
                        YSCALE_YUV_2_PACKEDX_C(type)\
289 cf7d1c1a Michael Niedermayer
                        r = c->table_rV[V];\
290
                        g = c->table_gU[U] + c->table_gV[V];\
291
                        b = c->table_bU[U];\
292
293 25593e29 Michael Niedermayer
#define YSCALE_YUV_2_PACKED2_C \
294 cf7d1c1a Michael Niedermayer
                for(i=0; i<(dstW>>1); i++){\
295
                        const int i2= 2*i;\
296
                        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;\
297
                        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;\
298
                        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;\
299
                        int V= (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;\
300 46de8b73 Michael Niedermayer
301
#define YSCALE_YUV_2_RGB2_C(type) \
302 25593e29 Michael Niedermayer
                        YSCALE_YUV_2_PACKED2_C\
303 cf7d1c1a Michael Niedermayer
                        type *r, *b, *g;\
304
                        r = c->table_rV[V];\
305
                        g = c->table_gU[U] + c->table_gV[V];\
306
                        b = c->table_bU[U];\
307
308 25593e29 Michael Niedermayer
#define YSCALE_YUV_2_PACKED1_C \
309 cf7d1c1a Michael Niedermayer
                for(i=0; i<(dstW>>1); i++){\
310
                        const int i2= 2*i;\
311
                        int Y1= buf0[i2  ]>>7;\
312
                        int Y2= buf0[i2+1]>>7;\
313
                        int U= (uvbuf1[i     ])>>7;\
314
                        int V= (uvbuf1[i+2048])>>7;\
315 46de8b73 Michael Niedermayer
316
#define YSCALE_YUV_2_RGB1_C(type) \
317 25593e29 Michael Niedermayer
                        YSCALE_YUV_2_PACKED1_C\
318 cf7d1c1a Michael Niedermayer
                        type *r, *b, *g;\
319
                        r = c->table_rV[V];\
320
                        g = c->table_gU[U] + c->table_gV[V];\
321
                        b = c->table_bU[U];\
322
323 25593e29 Michael Niedermayer
#define YSCALE_YUV_2_PACKED1B_C \
324 cf7d1c1a Michael Niedermayer
                for(i=0; i<(dstW>>1); i++){\
325
                        const int i2= 2*i;\
326
                        int Y1= buf0[i2  ]>>7;\
327
                        int Y2= buf0[i2+1]>>7;\
328
                        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
329
                        int V= (uvbuf0[i+2048] + uvbuf1[i+2048])>>8;\
330 46de8b73 Michael Niedermayer
331
#define YSCALE_YUV_2_RGB1B_C(type) \
332 25593e29 Michael Niedermayer
                        YSCALE_YUV_2_PACKED1B_C\
333 cf7d1c1a Michael Niedermayer
                        type *r, *b, *g;\
334
                        r = c->table_rV[V];\
335
                        g = c->table_gU[U] + c->table_gV[V];\
336
                        b = c->table_bU[U];\
337
338 46de8b73 Michael Niedermayer
#define YSCALE_YUV_2_ANYRGB_C(func, func2)\
339 cf7d1c1a Michael Niedermayer
        switch(c->dstFormat)\
340
        {\
341
        case IMGFMT_BGR32:\
342
        case IMGFMT_RGB32:\
343
                func(uint32_t)\
344
                        ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
345
                        ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
346
                }                \
347
                break;\
348
        case IMGFMT_RGB24:\
349
                func(uint8_t)\
350
                        ((uint8_t*)dest)[0]= r[Y1];\
351
                        ((uint8_t*)dest)[1]= g[Y1];\
352
                        ((uint8_t*)dest)[2]= b[Y1];\
353
                        ((uint8_t*)dest)[3]= r[Y2];\
354
                        ((uint8_t*)dest)[4]= g[Y2];\
355
                        ((uint8_t*)dest)[5]= b[Y2];\
356 ae4cffd9 D Richard Felker III
                        dest+=6;\
357 cf7d1c1a Michael Niedermayer
                }\
358
                break;\
359
        case IMGFMT_BGR24:\
360
                func(uint8_t)\
361
                        ((uint8_t*)dest)[0]= b[Y1];\
362
                        ((uint8_t*)dest)[1]= g[Y1];\
363
                        ((uint8_t*)dest)[2]= r[Y1];\
364
                        ((uint8_t*)dest)[3]= b[Y2];\
365
                        ((uint8_t*)dest)[4]= g[Y2];\
366
                        ((uint8_t*)dest)[5]= r[Y2];\
367 ae4cffd9 D Richard Felker III
                        dest+=6;\
368 cf7d1c1a Michael Niedermayer
                }\
369
                break;\
370
        case IMGFMT_RGB16:\
371
        case IMGFMT_BGR16:\
372
                {\
373
                        const int dr1= dither_2x2_8[y&1    ][0];\
374
                        const int dg1= dither_2x2_4[y&1    ][0];\
375
                        const int db1= dither_2x2_8[(y&1)^1][0];\
376
                        const int dr2= dither_2x2_8[y&1    ][1];\
377
                        const int dg2= dither_2x2_4[y&1    ][1];\
378
                        const int db2= dither_2x2_8[(y&1)^1][1];\
379
                        func(uint16_t)\
380
                                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
381
                                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
382
                        }\
383
                }\
384
                break;\
385
        case IMGFMT_RGB15:\
386
        case IMGFMT_BGR15:\
387
                {\
388
                        const int dr1= dither_2x2_8[y&1    ][0];\
389
                        const int dg1= dither_2x2_8[y&1    ][1];\
390
                        const int db1= dither_2x2_8[(y&1)^1][0];\
391
                        const int dr2= dither_2x2_8[y&1    ][1];\
392
                        const int dg2= dither_2x2_8[y&1    ][0];\
393
                        const int db2= dither_2x2_8[(y&1)^1][1];\
394
                        func(uint16_t)\
395
                                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
396
                                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
397
                        }\
398
                }\
399
                break;\
400
        case IMGFMT_RGB8:\
401
        case IMGFMT_BGR8:\
402
                {\
403
                        const uint8_t * const d64= dither_8x8_73[y&7];\
404
                        const uint8_t * const d32= dither_8x8_32[y&7];\
405
                        func(uint8_t)\
406
                                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
407
                                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
408
                        }\
409
                }\
410
                break;\
411
        case IMGFMT_RGB4:\
412
        case IMGFMT_BGR4:\
413
                {\
414
                        const uint8_t * const d64= dither_8x8_73 [y&7];\
415
                        const uint8_t * const d128=dither_8x8_220[y&7];\
416
                        func(uint8_t)\
417 799fd467 Michael Niedermayer
                                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
418 f17457ac Michael Niedermayer
                                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
419
                        }\
420
                }\
421
                break;\
422
        case IMGFMT_RG4B:\
423
        case IMGFMT_BG4B:\
424
                {\
425
                        const uint8_t * const d64= dither_8x8_73 [y&7];\
426
                        const uint8_t * const d128=dither_8x8_220[y&7];\
427
                        func(uint8_t)\
428 cf7d1c1a Michael Niedermayer
                                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
429
                                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
430
                        }\
431
                }\
432
                break;\
433
        case IMGFMT_RGB1:\
434
        case IMGFMT_BGR1:\
435
                {\
436
                        const uint8_t * const d128=dither_8x8_220[y&7];\
437
                        uint8_t *g= c->table_gU[128] + c->table_gV[128];\
438
                        for(i=0; i<dstW-7; i+=8){\
439
                                int acc;\
440
                                acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
441
                                acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
442
                                acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
443
                                acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
444
                                acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
445
                                acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
446
                                acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
447
                                acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
448
                                ((uint8_t*)dest)[0]= acc;\
449 ae4cffd9 D Richard Felker III
                                dest++;\
450 cf7d1c1a Michael Niedermayer
                        }\
451
\
452
/*\
453
((uint8_t*)dest)-= dstW>>4;\
454
{\
455
                        int acc=0;\
456
                        int left=0;\
457
                        static int top[1024];\
458
                        static int last_new[1024][1024];\
459
                        static int last_in3[1024][1024];\
460
                        static int drift[1024][1024];\
461
                        int topLeft=0;\
462
                        int shift=0;\
463
                        int count=0;\
464
                        const uint8_t * const d128=dither_8x8_220[y&7];\
465
                        int error_new=0;\
466
                        int error_in3=0;\
467
                        int f=0;\
468
                        \
469
                        for(i=dstW>>1; i<dstW; i++){\
470
                                int in= ((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19);\
471
                                int in2 = (76309 * (in - 16) + 32768) >> 16;\
472
                                int in3 = (in2 < 0) ? 0 : ((in2 > 255) ? 255 : in2);\
473
                                int old= (left*7 + topLeft + top[i]*5 + top[i+1]*3)/20 + in3\
474
                                        + (last_new[y][i] - in3)*f/256;\
475
                                int new= old> 128 ? 255 : 0;\
476
\
477
                                error_new+= ABS(last_new[y][i] - new);\
478
                                error_in3+= ABS(last_in3[y][i] - in3);\
479
                                f= error_new - error_in3*4;\
480
                                if(f<0) f=0;\
481
                                if(f>256) f=256;\
482
\
483
                                topLeft= top[i];\
484
                                left= top[i]= old - new;\
485
                                last_new[y][i]= new;\
486
                                last_in3[y][i]= in3;\
487
\
488
                                acc+= acc + (new&1);\
489
                                if((i&7)==6){\
490
                                        ((uint8_t*)dest)[0]= acc;\
491
                                        ((uint8_t*)dest)++;\
492
                                }\
493
                        }\
494
}\
495
*/\
496
                }\
497
                break;\
498 46de8b73 Michael Niedermayer
        case IMGFMT_YUY2:\
499
                func2\
500
                        ((uint8_t*)dest)[2*i2+0]= Y1;\
501
                        ((uint8_t*)dest)[2*i2+1]= U;\
502
                        ((uint8_t*)dest)[2*i2+2]= Y2;\
503
                        ((uint8_t*)dest)[2*i2+3]= V;\
504
                }                \
505
                break;\
506 caeaabe7 Alex Beregszaszi
        case IMGFMT_UYVY:\
507
                func2\
508
                        ((uint8_t*)dest)[2*i2+0]= U;\
509
                        ((uint8_t*)dest)[2*i2+1]= Y1;\
510
                        ((uint8_t*)dest)[2*i2+2]= V;\
511
                        ((uint8_t*)dest)[2*i2+3]= Y2;\
512
                }                \
513
                break;\
514 cf7d1c1a Michael Niedermayer
        }\
515
516
517 25593e29 Michael Niedermayer
static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
518 e3d2500f Michael Niedermayer
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
519 cf7d1c1a Michael Niedermayer
                                    uint8_t *dest, int dstW, int y)
520 e3d2500f Michael Niedermayer
{
521 cf7d1c1a Michael Niedermayer
        int i;
522
        switch(c->dstFormat)
523 e3d2500f Michael Niedermayer
        {
524 cf7d1c1a Michael Niedermayer
        case IMGFMT_RGB32:
525
        case IMGFMT_BGR32:
526
                YSCALE_YUV_2_RGBX_C(uint32_t)
527
                        ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];
528
                        ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];
529 e3d2500f Michael Niedermayer
                }
530 cf7d1c1a Michael Niedermayer
                break;
531
        case IMGFMT_RGB24:
532
                YSCALE_YUV_2_RGBX_C(uint8_t)
533
                        ((uint8_t*)dest)[0]= r[Y1];
534
                        ((uint8_t*)dest)[1]= g[Y1];
535
                        ((uint8_t*)dest)[2]= b[Y1];
536
                        ((uint8_t*)dest)[3]= r[Y2];
537
                        ((uint8_t*)dest)[4]= g[Y2];
538
                        ((uint8_t*)dest)[5]= b[Y2];
539 ae4cffd9 D Richard Felker III
                        dest+=6;
540 cf7d1c1a Michael Niedermayer
                }
541
                break;
542
        case IMGFMT_BGR24:
543
                YSCALE_YUV_2_RGBX_C(uint8_t)
544
                        ((uint8_t*)dest)[0]= b[Y1];
545
                        ((uint8_t*)dest)[1]= g[Y1];
546
                        ((uint8_t*)dest)[2]= r[Y1];
547
                        ((uint8_t*)dest)[3]= b[Y2];
548
                        ((uint8_t*)dest)[4]= g[Y2];
549
                        ((uint8_t*)dest)[5]= r[Y2];
550 ae4cffd9 D Richard Felker III
                        dest+=6;
551 cf7d1c1a Michael Niedermayer
                }
552
                break;
553
        case IMGFMT_RGB16:
554
        case IMGFMT_BGR16:
555
                {
556
                        const int dr1= dither_2x2_8[y&1    ][0];
557
                        const int dg1= dither_2x2_4[y&1    ][0];
558
                        const int db1= dither_2x2_8[(y&1)^1][0];
559
                        const int dr2= dither_2x2_8[y&1    ][1];
560
                        const int dg2= dither_2x2_4[y&1    ][1];
561
                        const int db2= dither_2x2_8[(y&1)^1][1];
562
                        YSCALE_YUV_2_RGBX_C(uint16_t)
563
                                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
564
                                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
565 e3d2500f Michael Niedermayer
                        }
566
                }
567 cf7d1c1a Michael Niedermayer
                break;
568
        case IMGFMT_RGB15:
569
        case IMGFMT_BGR15:
570
                {
571
                        const int dr1= dither_2x2_8[y&1    ][0];
572
                        const int dg1= dither_2x2_8[y&1    ][1];
573
                        const int db1= dither_2x2_8[(y&1)^1][0];
574
                        const int dr2= dither_2x2_8[y&1    ][1];
575
                        const int dg2= dither_2x2_8[y&1    ][0];
576
                        const int db2= dither_2x2_8[(y&1)^1][1];
577
                        YSCALE_YUV_2_RGBX_C(uint16_t)
578
                                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
579
                                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
580 e3d2500f Michael Niedermayer
                        }
581 cf7d1c1a Michael Niedermayer
                }
582
                break;
583
        case IMGFMT_RGB8:
584
        case IMGFMT_BGR8:
585
                {
586
                        const uint8_t * const d64= dither_8x8_73[y&7];
587
                        const uint8_t * const d32= dither_8x8_32[y&7];
588
                        YSCALE_YUV_2_RGBX_C(uint8_t)
589
                                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];
590
                                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];
591 e3d2500f Michael Niedermayer
                        }
592
                }
593 cf7d1c1a Michael Niedermayer
                break;
594
        case IMGFMT_RGB4:
595
        case IMGFMT_BGR4:
596
                {
597
                        const uint8_t * const d64= dither_8x8_73 [y&7];
598
                        const uint8_t * const d128=dither_8x8_220[y&7];
599
                        YSCALE_YUV_2_RGBX_C(uint8_t)
600 799fd467 Michael Niedermayer
                                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]
601 f17457ac Michael Niedermayer
                                                  +((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);
602
                        }
603
                }
604
                break;
605
        case IMGFMT_RG4B:
606
        case IMGFMT_BG4B:
607
                {
608
                        const uint8_t * const d64= dither_8x8_73 [y&7];
609
                        const uint8_t * const d128=dither_8x8_220[y&7];
610
                        YSCALE_YUV_2_RGBX_C(uint8_t)
611 cf7d1c1a Michael Niedermayer
                                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];
612
                                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];
613 e3d2500f Michael Niedermayer
                        }
614 cf7d1c1a Michael Niedermayer
                }
615
                break;
616
        case IMGFMT_RGB1:
617
        case IMGFMT_BGR1:
618
                {
619
                        const uint8_t * const d128=dither_8x8_220[y&7];
620
                        uint8_t *g= c->table_gU[128] + c->table_gV[128];
621
                        int acc=0;
622
                        for(i=0; i<dstW-1; i+=2){
623
                                int j;
624 379a2036 Michael Niedermayer
                                int Y1=1<<18;
625
                                int Y2=1<<18;
626 cf7d1c1a Michael Niedermayer
627
                                for(j=0; j<lumFilterSize; j++)
628
                                {
629
                                        Y1 += lumSrc[j][i] * lumFilter[j];
630
                                        Y2 += lumSrc[j][i+1] * lumFilter[j];
631
                                }
632
                                Y1>>=19;
633
                                Y2>>=19;
634
                                if((Y1|Y2)&256)
635
                                {
636
                                        if(Y1>255)   Y1=255;
637
                                        else if(Y1<0)Y1=0;
638
                                        if(Y2>255)   Y2=255;
639
                                        else if(Y2<0)Y2=0;
640
                                }
641
                                acc+= acc + g[Y1+d128[(i+0)&7]];
642
                                acc+= acc + g[Y2+d128[(i+1)&7]];
643
                                if((i&7)==6){
644
                                        ((uint8_t*)dest)[0]= acc;
645 ae4cffd9 D Richard Felker III
                                        dest++;
646 cf7d1c1a Michael Niedermayer
                                }
647 e3d2500f Michael Niedermayer
                        }
648
                }
649 cf7d1c1a Michael Niedermayer
                break;
650 46de8b73 Michael Niedermayer
        case IMGFMT_YUY2:
651 25593e29 Michael Niedermayer
                YSCALE_YUV_2_PACKEDX_C(void)
652 46de8b73 Michael Niedermayer
                        ((uint8_t*)dest)[2*i2+0]= Y1;
653
                        ((uint8_t*)dest)[2*i2+1]= U;
654
                        ((uint8_t*)dest)[2*i2+2]= Y2;
655
                        ((uint8_t*)dest)[2*i2+3]= V;
656
                }
657
                break;
658 caeaabe7 Alex Beregszaszi
        case IMGFMT_UYVY:
659
                YSCALE_YUV_2_PACKEDX_C(void)
660
                        ((uint8_t*)dest)[2*i2+0]= U;
661
                        ((uint8_t*)dest)[2*i2+1]= Y1;
662
                        ((uint8_t*)dest)[2*i2+2]= V;
663
                        ((uint8_t*)dest)[2*i2+3]= Y2;
664
                }
665
                break;
666 e3d2500f Michael Niedermayer
        }
667
}
668
669
670 7630f2e0 Michael Niedermayer
//Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
671
//Plain C versions
672 726a959a Michael Niedermayer
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
673
#define COMPILE_C
674
#endif
675
676 a2faa401 Romain Dolbeau
#ifdef ARCH_POWERPC
677
#ifdef HAVE_ALTIVEC
678
#define COMPILE_ALTIVEC
679
#endif //HAVE_ALTIVEC
680
#endif //ARCH_POWERPC
681
682 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
683 726a959a Michael Niedermayer
684
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
685
#define COMPILE_MMX
686
#endif
687
688
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
689
#define COMPILE_MMX2
690
#endif
691
692
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
693
#define COMPILE_3DNOW
694
#endif
695 6e1c66bc Aurelien Jacobs
#endif //ARCH_X86 || ARCH_X86_64
696 726a959a Michael Niedermayer
697
#undef HAVE_MMX
698
#undef HAVE_MMX2
699
#undef HAVE_3DNOW
700
701
#ifdef COMPILE_C
702 7630f2e0 Michael Niedermayer
#undef HAVE_MMX
703
#undef HAVE_MMX2
704
#undef HAVE_3DNOW
705 a2faa401 Romain Dolbeau
#undef HAVE_ALTIVEC
706 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _C
707
#include "swscale_template.c"
708 726a959a Michael Niedermayer
#endif
709 397c035e Michael Niedermayer
710 a2faa401 Romain Dolbeau
#ifdef ARCH_POWERPC
711
#ifdef COMPILE_ALTIVEC
712
#undef RENAME
713
#define HAVE_ALTIVEC
714
#define RENAME(a) a ## _altivec
715
#include "swscale_template.c"
716
#endif
717
#endif //ARCH_POWERPC
718
719 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
720 397c035e Michael Niedermayer
721 7630f2e0 Michael Niedermayer
//X86 versions
722
/*
723
#undef RENAME
724
#undef HAVE_MMX
725
#undef HAVE_MMX2
726
#undef HAVE_3DNOW
727
#define ARCH_X86
728
#define RENAME(a) a ## _X86
729
#include "swscale_template.c"
730 1faf0867 Michael Niedermayer
*/
731 7630f2e0 Michael Niedermayer
//MMX versions
732 726a959a Michael Niedermayer
#ifdef COMPILE_MMX
733 7630f2e0 Michael Niedermayer
#undef RENAME
734
#define HAVE_MMX
735
#undef HAVE_MMX2
736
#undef HAVE_3DNOW
737
#define RENAME(a) a ## _MMX
738
#include "swscale_template.c"
739 726a959a Michael Niedermayer
#endif
740 7630f2e0 Michael Niedermayer
741
//MMX2 versions
742 726a959a Michael Niedermayer
#ifdef COMPILE_MMX2
743 7630f2e0 Michael Niedermayer
#undef RENAME
744
#define HAVE_MMX
745
#define HAVE_MMX2
746
#undef HAVE_3DNOW
747
#define RENAME(a) a ## _MMX2
748
#include "swscale_template.c"
749 726a959a Michael Niedermayer
#endif
750 7630f2e0 Michael Niedermayer
751
//3DNOW versions
752 726a959a Michael Niedermayer
#ifdef COMPILE_3DNOW
753 7630f2e0 Michael Niedermayer
#undef RENAME
754
#define HAVE_MMX
755
#undef HAVE_MMX2
756
#define HAVE_3DNOW
757
#define RENAME(a) a ## _3DNow
758
#include "swscale_template.c"
759 726a959a Michael Niedermayer
#endif
760 7630f2e0 Michael Niedermayer
761 6e1c66bc Aurelien Jacobs
#endif //ARCH_X86 || ARCH_X86_64
762 7630f2e0 Michael Niedermayer
763 77a416e8 Gabucino
// minor note: the HAVE_xyz is messed up after that line so don't use it
764 d604bab9 Michael Niedermayer
765 a86c461c Michael Niedermayer
static double getSplineCoeff(double a, double b, double c, double d, double dist)
766
{
767
//        printf("%f %f %f %f %f\n", a,b,c,d,dist);
768
        if(dist<=1.0)         return ((d*dist + c)*dist + b)*dist +a;
769
        else                return getSplineCoeff(        0.0, 
770
                                                 b+ 2.0*c + 3.0*d,
771
                                                        c + 3.0*d,
772
                                                -b- 3.0*c - 6.0*d,
773
                                                dist-1.0);
774
}
775 6c7506de Michael Niedermayer
776 c7f822d9 Michael Niedermayer
static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
777
                              int srcW, int dstW, int filterAlign, int one, int flags,
778 66d1cdb6 Michael Niedermayer
                              SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
779 28bf81c9 Michael Niedermayer
{
780
        int i;
781 c7f822d9 Michael Niedermayer
        int filterSize;
782
        int filter2Size;
783
        int minFilterSize;
784
        double *filter=NULL;
785
        double *filter2=NULL;
786 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
787 516b1f82 Michael Niedermayer
        if(flags & SWS_CPU_CAPS_MMX)
788 28bf81c9 Michael Niedermayer
                asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
789 726a959a Michael Niedermayer
#endif
790 31190492 Arpi
791 adeaecb9 Michael Niedermayer
        // Note the +1 is for the MMXscaler which reads over the end
792 6c7506de Michael Niedermayer
        *filterPos = (int16_t*)memalign(8, (dstW+1)*sizeof(int16_t));
793
794 28bf81c9 Michael Niedermayer
        if(ABS(xInc - 0x10000) <10) // unscaled
795
        {
796
                int i;
797 c7f822d9 Michael Niedermayer
                filterSize= 1;
798
                filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
799
                for(i=0; i<dstW*filterSize; i++) filter[i]=0;
800 28bf81c9 Michael Niedermayer
801
                for(i=0; i<dstW; i++)
802
                {
803 c7f822d9 Michael Niedermayer
                        filter[i*filterSize]=1;
804
                        (*filterPos)[i]=i;
805 28bf81c9 Michael Niedermayer
                }
806
807
        }
808 ff7ba856 Michael Niedermayer
        else if(flags&SWS_POINT) // lame looking point sampling mode
809
        {
810
                int i;
811
                int xDstInSrc;
812
                filterSize= 1;
813
                filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
814
                
815
                xDstInSrc= xInc/2 - 0x8000;
816
                for(i=0; i<dstW; i++)
817
                {
818 8a01d20c Michael Niedermayer
                        int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
819 ff7ba856 Michael Niedermayer
820
                        (*filterPos)[i]= xx;
821
                        filter[i]= 1.0;
822
                        xDstInSrc+= xInc;
823
                }
824
        }
825 a86c461c Michael Niedermayer
        else if((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
826 28bf81c9 Michael Niedermayer
        {
827
                int i;
828
                int xDstInSrc;
829 c7f822d9 Michael Niedermayer
                if     (flags&SWS_BICUBIC) filterSize= 4;
830
                else if(flags&SWS_X      ) filterSize= 4;
831 d8863d37 Michael Niedermayer
                else                           filterSize= 2; // SWS_BILINEAR / SWS_AREA 
832 c7f822d9 Michael Niedermayer
                filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
833 28bf81c9 Michael Niedermayer
834
                xDstInSrc= xInc/2 - 0x8000;
835
                for(i=0; i<dstW; i++)
836
                {
837 8a01d20c Michael Niedermayer
                        int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
838 28bf81c9 Michael Niedermayer
                        int j;
839
840 c7f822d9 Michael Niedermayer
                        (*filterPos)[i]= xx;
841 d8863d37 Michael Niedermayer
                                //Bilinear upscale / linear interpolate / Area averaging
842 c7f822d9 Michael Niedermayer
                                for(j=0; j<filterSize; j++)
843 28bf81c9 Michael Niedermayer
                                {
844
                                        double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16);
845
                                        double coeff= 1.0 - d;
846
                                        if(coeff<0) coeff=0;
847 c7f822d9 Michael Niedermayer
                                        filter[i*filterSize + j]= coeff;
848 28bf81c9 Michael Niedermayer
                                        xx++;
849
                                }
850
                        xDstInSrc+= xInc;
851
                }
852
        }
853 a86c461c Michael Niedermayer
        else
854 28bf81c9 Michael Niedermayer
        {
855 a86c461c Michael Niedermayer
                double xDstInSrc;
856
                double sizeFactor, filterSizeInSrc;
857
                const double xInc1= (double)xInc / (double)(1<<16);
858
859
                if     (flags&SWS_BICUBIC)        sizeFactor= 4.0;
860
                else if(flags&SWS_X)                sizeFactor= 8.0;
861
                else if(flags&SWS_AREA)                sizeFactor= 1.0; //downscale only, for upscale it is bilinear
862
                else if(flags&SWS_GAUSS)        sizeFactor= 8.0;   // infinite ;)
863 66d1cdb6 Michael Niedermayer
                else if(flags&SWS_LANCZOS)        sizeFactor= param[0] != SWS_PARAM_DEFAULT ? 2.0*param[0] : 6.0;
864 93768378 Michael Niedermayer
                else if(flags&SWS_SINC)                sizeFactor= 20.0; // infinite ;)
865 a86c461c Michael Niedermayer
                else if(flags&SWS_SPLINE)        sizeFactor= 20.0;  // infinite ;)
866
                else if(flags&SWS_BILINEAR)        sizeFactor= 2.0;
867 93768378 Michael Niedermayer
                else {
868
                        sizeFactor= 0.0; //GCC warning killer
869
                        ASSERT(0)
870
                }
871 a86c461c Michael Niedermayer
                
872
                if(xInc1 <= 1.0)        filterSizeInSrc= sizeFactor; // upscale
873
                else                        filterSizeInSrc= sizeFactor*srcW / (double)dstW;
874 81b7c056 Michael Niedermayer
875 a86c461c Michael Niedermayer
                filterSize= (int)ceil(1 + filterSizeInSrc); // will be reduced later if possible
876
                if(filterSize > srcW-2) filterSize=srcW-2;
877 28bf81c9 Michael Niedermayer
878 a86c461c Michael Niedermayer
                filter= (double*)memalign(16, dstW*sizeof(double)*filterSize);
879
880
                xDstInSrc= xInc1 / 2.0 - 0.5;
881 28bf81c9 Michael Niedermayer
                for(i=0; i<dstW; i++)
882
                {
883 a86c461c Michael Niedermayer
                        int xx= (int)(xDstInSrc - (filterSize-1)*0.5 + 0.5);
884 28bf81c9 Michael Niedermayer
                        int j;
885 c7f822d9 Michael Niedermayer
                        (*filterPos)[i]= xx;
886
                        for(j=0; j<filterSize; j++)
887 28bf81c9 Michael Niedermayer
                        {
888 a86c461c Michael Niedermayer
                                double d= ABS(xx - xDstInSrc)/filterSizeInSrc*sizeFactor;
889 28bf81c9 Michael Niedermayer
                                double coeff;
890 a86c461c Michael Niedermayer
                                if(flags & SWS_BICUBIC)
891 28bf81c9 Michael Niedermayer
                                {
892 66d1cdb6 Michael Niedermayer
                                        double B= param[0] != SWS_PARAM_DEFAULT ? param[0] : 0.0;
893
                                        double C= param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6;
894
895
                                        if(d<1.0) 
896
                                                coeff = (12-9*B-6*C)*d*d*d + (-18+12*B+6*C)*d*d + 6-2*B;
897 28bf81c9 Michael Niedermayer
                                        else if(d<2.0)
898 66d1cdb6 Michael Niedermayer
                                                coeff = (-B-6*C)*d*d*d + (6*B+30*C)*d*d + (-12*B-48*C)*d +8*B+24*C;
899 28bf81c9 Michael Niedermayer
                                        else
900
                                                coeff=0.0;
901
                                }
902 a86c461c Michael Niedermayer
/*                                else if(flags & SWS_X)
903
                                {
904
                                        double p= param ? param*0.01 : 0.3;
905
                                        coeff = d ? sin(d*PI)/(d*PI) : 1.0;
906
                                        coeff*= pow(2.0, - p*d*d);
907
                                }*/
908
                                else if(flags & SWS_X)
909
                                {
910 66d1cdb6 Michael Niedermayer
                                        double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
911 a86c461c Michael Niedermayer
                                        
912
                                        if(d<1.0)
913
                                                coeff = cos(d*PI);
914
                                        else
915
                                                coeff=-1.0;
916
                                        if(coeff<0.0)         coeff= -pow(-coeff, A);
917
                                        else                coeff=  pow( coeff, A);
918
                                        coeff= coeff*0.5 + 0.5;
919
                                }
920 d8863d37 Michael Niedermayer
                                else if(flags & SWS_AREA)
921 28bf81c9 Michael Niedermayer
                                {
922 a86c461c Michael Niedermayer
                                        double srcPixelSize= 1.0/xInc1;
923 d8863d37 Michael Niedermayer
                                        if(d + srcPixelSize/2 < 0.5) coeff= 1.0;
924
                                        else if(d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
925
                                        else coeff=0.0;
926
                                }
927 a86c461c Michael Niedermayer
                                else if(flags & SWS_GAUSS)
928
                                {
929 66d1cdb6 Michael Niedermayer
                                        double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
930 a86c461c Michael Niedermayer
                                        coeff = pow(2.0, - p*d*d);
931
                                }
932
                                else if(flags & SWS_SINC)
933
                                {
934
                                        coeff = d ? sin(d*PI)/(d*PI) : 1.0;
935
                                }
936
                                else if(flags & SWS_LANCZOS)
937
                                {
938 66d1cdb6 Michael Niedermayer
                                        double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; 
939 a86c461c Michael Niedermayer
                                        coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0;
940
                                        if(d>p) coeff=0;
941
                                }
942
                                else if(flags & SWS_BILINEAR)
943 28bf81c9 Michael Niedermayer
                                {
944
                                        coeff= 1.0 - d;
945
                                        if(coeff<0) coeff=0;
946
                                }
947 a86c461c Michael Niedermayer
                                else if(flags & SWS_SPLINE)
948
                                {
949
                                        double p=-2.196152422706632;
950
                                        coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, d);
951
                                }
952 93768378 Michael Niedermayer
                                else {
953
                                        coeff= 0.0; //GCC warning killer
954
                                        ASSERT(0)
955
                                }
956 a86c461c Michael Niedermayer
957 c7f822d9 Michael Niedermayer
                                filter[i*filterSize + j]= coeff;
958 28bf81c9 Michael Niedermayer
                                xx++;
959
                        }
960 a86c461c Michael Niedermayer
                        xDstInSrc+= xInc1;
961 28bf81c9 Michael Niedermayer
                }
962
        }
963
964 c7f822d9 Michael Niedermayer
        /* apply src & dst Filter to filter -> filter2
965
           free(filter);
966
        */
967 81b7c056 Michael Niedermayer
        ASSERT(filterSize>0)
968 c7f822d9 Michael Niedermayer
        filter2Size= filterSize;
969
        if(srcFilter) filter2Size+= srcFilter->length - 1;
970
        if(dstFilter) filter2Size+= dstFilter->length - 1;
971 81b7c056 Michael Niedermayer
        ASSERT(filter2Size>0)
972 c7f822d9 Michael Niedermayer
        filter2= (double*)memalign(8, filter2Size*dstW*sizeof(double));
973
974
        for(i=0; i<dstW; i++)
975
        {
976
                int j;
977
                SwsVector scaleFilter;
978
                SwsVector *outVec;
979
980
                scaleFilter.coeff= filter + i*filterSize;
981
                scaleFilter.length= filterSize;
982
983 d4e24275 Michael Niedermayer
                if(srcFilter) outVec= sws_getConvVec(srcFilter, &scaleFilter);
984 c7f822d9 Michael Niedermayer
                else              outVec= &scaleFilter;
985
986
                ASSERT(outVec->length == filter2Size)
987
                //FIXME dstFilter
988
989
                for(j=0; j<outVec->length; j++)
990
                {
991
                        filter2[i*filter2Size + j]= outVec->coeff[j];
992
                }
993
994
                (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
995
996 d4e24275 Michael Niedermayer
                if(outVec != &scaleFilter) sws_freeVec(outVec);
997 c7f822d9 Michael Niedermayer
        }
998
        free(filter); filter=NULL;
999
1000
        /* try to reduce the filter-size (step1 find size and shift left) */
1001
        // Assume its near normalized (*0.5 or *2.0 is ok but * 0.001 is not)
1002
        minFilterSize= 0;
1003
        for(i=dstW-1; i>=0; i--)
1004
        {
1005
                int min= filter2Size;
1006
                int j;
1007
                double cutOff=0.0;
1008
1009
                /* get rid off near zero elements on the left by shifting left */
1010
                for(j=0; j<filter2Size; j++)
1011
                {
1012
                        int k;
1013
                        cutOff += ABS(filter2[i*filter2Size]);
1014
1015
                        if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1016
1017 77a416e8 Gabucino
                        /* preserve Monotonicity because the core can't handle the filter otherwise */
1018 c7f822d9 Michael Niedermayer
                        if(i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1019
1020
                        // Move filter coeffs left
1021
                        for(k=1; k<filter2Size; k++)
1022
                                filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1023
                        filter2[i*filter2Size + k - 1]= 0.0;
1024
                        (*filterPos)[i]++;
1025
                }
1026
1027
                cutOff=0.0;
1028
                /* count near zeros on the right */
1029
                for(j=filter2Size-1; j>0; j--)
1030
                {
1031
                        cutOff += ABS(filter2[i*filter2Size + j]);
1032
1033
                        if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1034
                        min--;
1035
                }
1036
1037
                if(min>minFilterSize) minFilterSize= min;
1038
        }
1039
1040 8c266f0c Romain Dolbeau
        if (flags & SWS_CPU_CAPS_ALTIVEC) {
1041
          // we can handle the special case 4,
1042
          // so we don't want to go to the full 8
1043
          if (minFilterSize < 5)
1044
            filterAlign = 4;
1045
1046
          // we really don't want to waste our time
1047
          // doing useless computation, so fall-back on
1048
          // the scalar C code for very small filter.
1049
          // vectorizing is worth it only if you have
1050
          // decent-sized vector.
1051
          if (minFilterSize < 3)
1052
            filterAlign = 1;
1053
        }
1054
1055 81b7c056 Michael Niedermayer
        ASSERT(minFilterSize > 0)
1056 6c7506de Michael Niedermayer
        filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
1057 81b7c056 Michael Niedermayer
        ASSERT(filterSize > 0)
1058 6c7506de Michael Niedermayer
        filter= (double*)memalign(8, filterSize*dstW*sizeof(double));
1059
        *outFilterSize= filterSize;
1060
1061 4a53a912 Alban Bedel
        if(flags&SWS_PRINT_INFO)
1062 0d9f3d85 Arpi
                MSG_INFO("SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
1063 c7f822d9 Michael Niedermayer
        /* try to reduce the filter-size (step2 reduce it) */
1064
        for(i=0; i<dstW; i++)
1065
        {
1066
                int j;
1067
1068 6c7506de Michael Niedermayer
                for(j=0; j<filterSize; j++)
1069
                {
1070
                        if(j>=filter2Size) filter[i*filterSize + j]= 0.0;
1071
                        else                   filter[i*filterSize + j]= filter2[i*filter2Size + j];
1072
                }
1073 c7f822d9 Michael Niedermayer
        }
1074 6c7506de Michael Niedermayer
        free(filter2); filter2=NULL;
1075
        
1076 c7f822d9 Michael Niedermayer
1077
        //FIXME try to align filterpos if possible
1078
1079 28bf81c9 Michael Niedermayer
        //fix borders
1080
        for(i=0; i<dstW; i++)
1081
        {
1082
                int j;
1083 c7f822d9 Michael Niedermayer
                if((*filterPos)[i] < 0)
1084 28bf81c9 Michael Niedermayer
                {
1085
                        // Move filter coeffs left to compensate for filterPos
1086 6c7506de Michael Niedermayer
                        for(j=1; j<filterSize; j++)
1087 28bf81c9 Michael Niedermayer
                        {
1088 c7f822d9 Michael Niedermayer
                                int left= MAX(j + (*filterPos)[i], 0);
1089 6c7506de Michael Niedermayer
                                filter[i*filterSize + left] += filter[i*filterSize + j];
1090
                                filter[i*filterSize + j]=0;
1091 28bf81c9 Michael Niedermayer
                        }
1092 c7f822d9 Michael Niedermayer
                        (*filterPos)[i]= 0;
1093 28bf81c9 Michael Niedermayer
                }
1094
1095 6c7506de Michael Niedermayer
                if((*filterPos)[i] + filterSize > srcW)
1096 28bf81c9 Michael Niedermayer
                {
1097 6c7506de Michael Niedermayer
                        int shift= (*filterPos)[i] + filterSize - srcW;
1098 28bf81c9 Michael Niedermayer
                        // Move filter coeffs right to compensate for filterPos
1099 6c7506de Michael Niedermayer
                        for(j=filterSize-2; j>=0; j--)
1100 28bf81c9 Michael Niedermayer
                        {
1101 6c7506de Michael Niedermayer
                                int right= MIN(j + shift, filterSize-1);
1102
                                filter[i*filterSize +right] += filter[i*filterSize +j];
1103
                                filter[i*filterSize +j]=0;
1104 28bf81c9 Michael Niedermayer
                        }
1105 6c7506de Michael Niedermayer
                        (*filterPos)[i]= srcW - filterSize;
1106 28bf81c9 Michael Niedermayer
                }
1107
        }
1108
1109 6c7506de Michael Niedermayer
        // Note the +1 is for the MMXscaler which reads over the end
1110
        *outFilter= (int16_t*)memalign(8, *outFilterSize*(dstW+1)*sizeof(int16_t));
1111
        memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t));
1112 c7f822d9 Michael Niedermayer
1113
        /* Normalize & Store in outFilter */
1114 28bf81c9 Michael Niedermayer
        for(i=0; i<dstW; i++)
1115
        {
1116
                int j;
1117 ff490720 Michael Niedermayer
                double error=0;
1118 28bf81c9 Michael Niedermayer
                double sum=0;
1119
                double scale= one;
1120 ff490720 Michael Niedermayer
1121 6c7506de Michael Niedermayer
                for(j=0; j<filterSize; j++)
1122 28bf81c9 Michael Niedermayer
                {
1123 6c7506de Michael Niedermayer
                        sum+= filter[i*filterSize + j];
1124 28bf81c9 Michael Niedermayer
                }
1125
                scale/= sum;
1126 93768378 Michael Niedermayer
                for(j=0; j<*outFilterSize; j++)
1127 28bf81c9 Michael Niedermayer
                {
1128 ff490720 Michael Niedermayer
                        double v= filter[i*filterSize + j]*scale + error;
1129
                        int intV= floor(v + 0.5);
1130
                        (*outFilter)[i*(*outFilterSize) + j]= intV;
1131
                        error = v - intV;
1132 28bf81c9 Michael Niedermayer
                }
1133
        }
1134 adeaecb9 Michael Niedermayer
        
1135
        (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
1136
        for(i=0; i<*outFilterSize; i++)
1137
        {
1138
                int j= dstW*(*outFilterSize);
1139
                (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
1140
        }
1141 c7f822d9 Michael Niedermayer
1142 6c7506de Michael Niedermayer
        free(filter);
1143 7630f2e0 Michael Niedermayer
}
1144 31190492 Arpi
1145 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
1146 b7dc6f66 Michael Niedermayer
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
1147 28bf81c9 Michael Niedermayer
{
1148 b7dc6f66 Michael Niedermayer
        uint8_t *fragmentA;
1149 6e1c66bc Aurelien Jacobs
        long imm8OfPShufW1A;
1150
        long imm8OfPShufW2A;
1151
        long fragmentLengthA;
1152 b7dc6f66 Michael Niedermayer
        uint8_t *fragmentB;
1153 6e1c66bc Aurelien Jacobs
        long imm8OfPShufW1B;
1154
        long imm8OfPShufW2B;
1155
        long fragmentLengthB;
1156 b7dc6f66 Michael Niedermayer
        int fragmentPos;
1157 28bf81c9 Michael Niedermayer
1158
        int xpos, i;
1159
1160
        // create an optimized horizontal scaling routine
1161
1162
        //code fragment
1163
1164
        asm volatile(
1165
                "jmp 9f                                \n\t"
1166
        // Begin
1167
                "0:                                \n\t"
1168 6e1c66bc Aurelien Jacobs
                "movq (%%"REG_d", %%"REG_a"), %%mm3\n\t" 
1169
                "movd (%%"REG_c", %%"REG_S"), %%mm0\n\t" 
1170
                "movd 1(%%"REG_c", %%"REG_S"), %%mm1\n\t"
1171 b7dc6f66 Michael Niedermayer
                "punpcklbw %%mm7, %%mm1                \n\t"
1172
                "punpcklbw %%mm7, %%mm0                \n\t"
1173 28bf81c9 Michael Niedermayer
                "pshufw $0xFF, %%mm1, %%mm1        \n\t"
1174
                "1:                                \n\t"
1175
                "pshufw $0xFF, %%mm0, %%mm0        \n\t"
1176
                "2:                                \n\t"
1177
                "psubw %%mm1, %%mm0                \n\t"
1178 6d606c4f Aurelien Jacobs
                "movl 8(%%"REG_b", %%"REG_a"), %%esi\n\t"
1179 28bf81c9 Michael Niedermayer
                "pmullw %%mm3, %%mm0                \n\t"
1180
                "psllw $7, %%mm1                \n\t"
1181
                "paddw %%mm1, %%mm0                \n\t"
1182
1183 6e1c66bc Aurelien Jacobs
                "movq %%mm0, (%%"REG_D", %%"REG_a")\n\t"
1184 28bf81c9 Michael Niedermayer
1185 6e1c66bc Aurelien Jacobs
                "add $8, %%"REG_a"                \n\t"
1186 28bf81c9 Michael Niedermayer
        // End
1187
                "9:                                \n\t"
1188
//                "int $3\n\t"
1189 6e1c66bc Aurelien Jacobs
                "lea 0b, %0                        \n\t"
1190
                "lea 1b, %1                        \n\t"
1191
                "lea 2b, %2                        \n\t"
1192
                "dec %1                                \n\t"
1193
                "dec %2                                \n\t"
1194
                "sub %0, %1                        \n\t"
1195
                "sub %0, %2                        \n\t"
1196
                "lea 9b, %3                        \n\t"
1197
                "sub %0, %3                        \n\t"
1198 b7dc6f66 Michael Niedermayer
1199
1200
                :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
1201
                "=r" (fragmentLengthA)
1202 28bf81c9 Michael Niedermayer
        );
1203
1204 b7dc6f66 Michael Niedermayer
        asm volatile(
1205
                "jmp 9f                                \n\t"
1206
        // Begin
1207
                "0:                                \n\t"
1208 6e1c66bc Aurelien Jacobs
                "movq (%%"REG_d", %%"REG_a"), %%mm3\n\t" 
1209
                "movd (%%"REG_c", %%"REG_S"), %%mm0\n\t" 
1210 b7dc6f66 Michael Niedermayer
                "punpcklbw %%mm7, %%mm0                \n\t"
1211
                "pshufw $0xFF, %%mm0, %%mm1        \n\t"
1212
                "1:                                \n\t"
1213
                "pshufw $0xFF, %%mm0, %%mm0        \n\t"
1214
                "2:                                \n\t"
1215
                "psubw %%mm1, %%mm0                \n\t"
1216 6d606c4f Aurelien Jacobs
                "movl 8(%%"REG_b", %%"REG_a"), %%esi\n\t"
1217 b7dc6f66 Michael Niedermayer
                "pmullw %%mm3, %%mm0                \n\t"
1218
                "psllw $7, %%mm1                \n\t"
1219
                "paddw %%mm1, %%mm0                \n\t"
1220
1221 6e1c66bc Aurelien Jacobs
                "movq %%mm0, (%%"REG_D", %%"REG_a")\n\t"
1222 28bf81c9 Michael Niedermayer
1223 6e1c66bc Aurelien Jacobs
                "add $8, %%"REG_a"                \n\t"
1224 b7dc6f66 Michael Niedermayer
        // End
1225
                "9:                                \n\t"
1226
//                "int $3\n\t"
1227 6e1c66bc Aurelien Jacobs
                "lea 0b, %0                        \n\t"
1228
                "lea 1b, %1                        \n\t"
1229
                "lea 2b, %2                        \n\t"
1230
                "dec %1                                \n\t"
1231
                "dec %2                                \n\t"
1232
                "sub %0, %1                        \n\t"
1233
                "sub %0, %2                        \n\t"
1234
                "lea 9b, %3                        \n\t"
1235
                "sub %0, %3                        \n\t"
1236 b7dc6f66 Michael Niedermayer
1237
1238
                :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
1239
                "=r" (fragmentLengthB)
1240
        );
1241
1242
        xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
1243
        fragmentPos=0;
1244
        
1245
        for(i=0; i<dstW/numSplits; i++)
1246 28bf81c9 Michael Niedermayer
        {
1247
                int xx=xpos>>16;
1248
1249
                if((i&3) == 0)
1250
                {
1251
                        int a=0;
1252
                        int b=((xpos+xInc)>>16) - xx;
1253
                        int c=((xpos+xInc*2)>>16) - xx;
1254
                        int d=((xpos+xInc*3)>>16) - xx;
1255
1256 b7dc6f66 Michael Niedermayer
                        filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
1257
                        filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
1258
                        filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
1259
                        filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
1260
                        filterPos[i/2]= xx;
1261
1262
                        if(d+1<4)
1263
                        {
1264
                                int maxShift= 3-(d+1);
1265
                                int shift=0;
1266
1267
                                memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
1268
1269
                                funnyCode[fragmentPos + imm8OfPShufW1B]=
1270
                                        (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
1271
                                funnyCode[fragmentPos + imm8OfPShufW2B]=
1272
                                        a | (b<<2) | (c<<4) | (d<<6);
1273
1274
                                if(i+3>=dstW) shift=maxShift; //avoid overread
1275
                                else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
1276
1277
                                if(shift && i>=shift)
1278
                                {
1279
                                        funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
1280
                                        funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
1281
                                        filterPos[i/2]-=shift;
1282
                                }
1283
1284
                                fragmentPos+= fragmentLengthB;
1285
                        }
1286
                        else
1287
                        {
1288
                                int maxShift= 3-d;
1289
                                int shift=0;
1290
1291
                                memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
1292 28bf81c9 Michael Niedermayer
1293 b7dc6f66 Michael Niedermayer
                                funnyCode[fragmentPos + imm8OfPShufW1A]=
1294
                                funnyCode[fragmentPos + imm8OfPShufW2A]=
1295
                                        a | (b<<2) | (c<<4) | (d<<6);
1296 28bf81c9 Michael Niedermayer
1297 b7dc6f66 Michael Niedermayer
                                if(i+4>=dstW) shift=maxShift; //avoid overread
1298
                                else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
1299 28bf81c9 Michael Niedermayer
1300 b7dc6f66 Michael Niedermayer
                                if(shift && i>=shift)
1301
                                {
1302
                                        funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
1303
                                        funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
1304
                                        filterPos[i/2]-=shift;
1305
                                }
1306
1307
                                fragmentPos+= fragmentLengthA;
1308
                        }
1309
1310
                        funnyCode[fragmentPos]= RET;
1311 28bf81c9 Michael Niedermayer
                }
1312
                xpos+=xInc;
1313
        }
1314 b7dc6f66 Michael Niedermayer
        filterPos[i/2]= xpos>>16; // needed to jump to the next part
1315 28bf81c9 Michael Niedermayer
}
1316 6e1c66bc Aurelien Jacobs
#endif // ARCH_X86 || ARCH_X86_64
1317 28bf81c9 Michael Niedermayer
1318
static void globalInit(){
1319 31190492 Arpi
    // generating tables:
1320
    int i;
1321 c1b0bfb4 Michael Niedermayer
    for(i=0; i<768; i++){
1322
        int c= MIN(MAX(i-256, 0), 255);
1323
        clip_table[i]=c;
1324 b18ea156 Michael Niedermayer
    }
1325 516b1f82 Michael Niedermayer
}
1326 c1b0bfb4 Michael Niedermayer
1327 516b1f82 Michael Niedermayer
static SwsFunc getSwsFunc(int flags){
1328
    
1329 28bf81c9 Michael Niedermayer
#ifdef RUNTIME_CPUDETECT
1330 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
1331 28bf81c9 Michael Niedermayer
        // ordered per speed fasterst first
1332 516b1f82 Michael Niedermayer
        if(flags & SWS_CPU_CAPS_MMX2)
1333
                return swScale_MMX2;
1334
        else if(flags & SWS_CPU_CAPS_3DNOW)
1335
                return swScale_3DNow;
1336
        else if(flags & SWS_CPU_CAPS_MMX)
1337
                return swScale_MMX;
1338 28bf81c9 Michael Niedermayer
        else
1339 516b1f82 Michael Niedermayer
                return swScale_C;
1340 28bf81c9 Michael Niedermayer
1341
#else
1342 a2faa401 Romain Dolbeau
#ifdef ARCH_POWERPC
1343
        if(flags & SWS_CPU_CAPS_ALTIVEC)
1344
          return swScale_altivec;
1345
        else
1346
          return swScale_C;
1347
#endif
1348 516b1f82 Michael Niedermayer
        return swScale_C;
1349 28bf81c9 Michael Niedermayer
#endif
1350
#else //RUNTIME_CPUDETECT
1351
#ifdef HAVE_MMX2
1352 516b1f82 Michael Niedermayer
        return swScale_MMX2;
1353 28bf81c9 Michael Niedermayer
#elif defined (HAVE_3DNOW)
1354 516b1f82 Michael Niedermayer
        return swScale_3DNow;
1355 28bf81c9 Michael Niedermayer
#elif defined (HAVE_MMX)
1356 516b1f82 Michael Niedermayer
        return swScale_MMX;
1357 a2faa401 Romain Dolbeau
#elif defined (HAVE_ALTIVEC)
1358
        return swScale_altivec;
1359 28bf81c9 Michael Niedermayer
#else
1360 516b1f82 Michael Niedermayer
        return swScale_C;
1361 28bf81c9 Michael Niedermayer
#endif
1362
#endif //!RUNTIME_CPUDETECT
1363 31190492 Arpi
}
1364 7630f2e0 Michael Niedermayer
1365 d4e24275 Michael Niedermayer
static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1366 0d9f3d85 Arpi
             int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1367
        uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1368
        /* Copy Y plane */
1369
        if(dstStride[0]==srcStride[0])
1370
                memcpy(dst, src[0], srcSliceH*dstStride[0]);
1371
        else
1372
        {
1373
                int i;
1374
                uint8_t *srcPtr= src[0];
1375
                uint8_t *dstPtr= dst;
1376
                for(i=0; i<srcSliceH; i++)
1377
                {
1378
                        memcpy(dstPtr, srcPtr, srcStride[0]);
1379
                        srcPtr+= srcStride[0];
1380
                        dstPtr+= dstStride[0];
1381
                }
1382
        }
1383
        dst = dstParam[1] + dstStride[1]*srcSliceY;
1384 fccb9b2b Michael Niedermayer
        interleaveBytes( src[1],src[2],dst,c->srcW,srcSliceH,srcStride[1],srcStride[2],dstStride[0] );
1385
1386 d4e24275 Michael Niedermayer
        return srcSliceH;
1387 0d9f3d85 Arpi
}
1388
1389 d4e24275 Michael Niedermayer
static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1390 0d9f3d85 Arpi
             int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1391
        uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1392
1393 fccb9b2b Michael Niedermayer
        yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1394
1395 d4e24275 Michael Niedermayer
        return srcSliceH;
1396 0d9f3d85 Arpi
}
1397
1398 caeaabe7 Alex Beregszaszi
static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1399
             int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1400
        uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1401
1402
        yv12touyvy( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1403
1404
        return srcSliceH;
1405
}
1406
1407 e09d12f4 Michael Niedermayer
/* {RGB,BGR}{15,16,24,32} -> {RGB,BGR}{15,16,24,32} */
1408 d4e24275 Michael Niedermayer
static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1409 e09d12f4 Michael Niedermayer
                           int srcSliceH, uint8_t* dst[], int dstStride[]){
1410
        const int srcFormat= c->srcFormat;
1411
        const int dstFormat= c->dstFormat;
1412
        const int srcBpp= ((srcFormat&0xFF) + 7)>>3;
1413
        const int dstBpp= ((dstFormat&0xFF) + 7)>>3;
1414
        const int srcId= (srcFormat&0xFF)>>2; // 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 
1415
        const int dstId= (dstFormat&0xFF)>>2;
1416
        void (*conv)(const uint8_t *src, uint8_t *dst, unsigned src_size)=NULL;
1417
1418
        /* BGR -> BGR */
1419 20380eb8 Michael Niedermayer
        if(   (isBGR(srcFormat) && isBGR(dstFormat))
1420
           || (isRGB(srcFormat) && isRGB(dstFormat))){
1421 e09d12f4 Michael Niedermayer
                switch(srcId | (dstId<<4)){
1422
                case 0x34: conv= rgb16to15; break;
1423
                case 0x36: conv= rgb24to15; break;
1424
                case 0x38: conv= rgb32to15; break;
1425
                case 0x43: conv= rgb15to16; break;
1426
                case 0x46: conv= rgb24to16; break;
1427
                case 0x48: conv= rgb32to16; break;
1428
                case 0x63: conv= rgb15to24; break;
1429
                case 0x64: conv= rgb16to24; break;
1430
                case 0x68: conv= rgb32to24; break;
1431
                case 0x83: conv= rgb15to32; break;
1432
                case 0x84: conv= rgb16to32; break;
1433
                case 0x86: conv= rgb24to32; break;
1434
                default: MSG_ERR("swScaler: internal error %s -> %s converter\n", 
1435
                                 vo_format_name(srcFormat), vo_format_name(dstFormat)); break;
1436 b935781b Michael Niedermayer
                }
1437 20380eb8 Michael Niedermayer
        }else if(   (isBGR(srcFormat) && isRGB(dstFormat))
1438
                 || (isRGB(srcFormat) && isBGR(dstFormat))){
1439 e09d12f4 Michael Niedermayer
                switch(srcId | (dstId<<4)){
1440
                case 0x33: conv= rgb15tobgr15; break;
1441
                case 0x34: conv= rgb16tobgr15; break;
1442
                case 0x36: conv= rgb24tobgr15; break;
1443
                case 0x38: conv= rgb32tobgr15; break;
1444
                case 0x43: conv= rgb15tobgr16; break;
1445
                case 0x44: conv= rgb16tobgr16; break;
1446
                case 0x46: conv= rgb24tobgr16; break;
1447
                case 0x48: conv= rgb32tobgr16; break;
1448
                case 0x63: conv= rgb15tobgr24; break;
1449
                case 0x64: conv= rgb16tobgr24; break;
1450
                case 0x66: conv= rgb24tobgr24; break;
1451
                case 0x68: conv= rgb32tobgr24; break;
1452
                case 0x83: conv= rgb15tobgr32; break;
1453
                case 0x84: conv= rgb16tobgr32; break;
1454
                case 0x86: conv= rgb24tobgr32; break;
1455
                case 0x88: conv= rgb32tobgr32; break;
1456
                default: MSG_ERR("swScaler: internal error %s -> %s converter\n", 
1457
                                 vo_format_name(srcFormat), vo_format_name(dstFormat)); break;
1458 0d9f3d85 Arpi
                }
1459 20380eb8 Michael Niedermayer
        }else{
1460
                MSG_ERR("swScaler: internal error %s -> %s converter\n", 
1461
                         vo_format_name(srcFormat), vo_format_name(dstFormat));
1462 e09d12f4 Michael Niedermayer
        }
1463 20380eb8 Michael Niedermayer
1464 e09d12f4 Michael Niedermayer
        if(dstStride[0]*srcBpp == srcStride[0]*dstBpp)
1465
                conv(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1466 0d9f3d85 Arpi
        else
1467
        {
1468
                int i;
1469
                uint8_t *srcPtr= src[0];
1470
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1471
1472
                for(i=0; i<srcSliceH; i++)
1473
                {
1474 e09d12f4 Michael Niedermayer
                        conv(srcPtr, dstPtr, c->srcW*srcBpp);
1475 0d9f3d85 Arpi
                        srcPtr+= srcStride[0];
1476
                        dstPtr+= dstStride[0];
1477
                }
1478
        }     
1479 d4e24275 Michael Niedermayer
        return srcSliceH;
1480 0d9f3d85 Arpi
}
1481
1482 d4e24275 Michael Niedermayer
static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1483 ec22603f Michael Niedermayer
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1484
1485
        rgb24toyv12(
1486
                src[0], 
1487
                dst[0]+ srcSliceY    *dstStride[0], 
1488
                dst[1]+(srcSliceY>>1)*dstStride[1], 
1489
                dst[2]+(srcSliceY>>1)*dstStride[2],
1490
                c->srcW, srcSliceH, 
1491
                dstStride[0], dstStride[1], srcStride[0]);
1492 d4e24275 Michael Niedermayer
        return srcSliceH;
1493 ec22603f Michael Niedermayer
}
1494
1495 d4e24275 Michael Niedermayer
static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1496 b241cbf2 Michael Niedermayer
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1497
        int i;
1498
1499
        /* copy Y */
1500
        if(srcStride[0]==dstStride[0]) 
1501
                memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
1502
        else{
1503
                uint8_t *srcPtr= src[0];
1504
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1505
1506
                for(i=0; i<srcSliceH; i++)
1507
                {
1508
                        memcpy(dstPtr, srcPtr, c->srcW);
1509
                        srcPtr+= srcStride[0];
1510
                        dstPtr+= dstStride[0];
1511
                }
1512
        }
1513
1514
        if(c->dstFormat==IMGFMT_YV12){
1515
                planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
1516
                planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
1517
        }else{
1518
                planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
1519
                planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
1520
        }
1521 d4e24275 Michael Niedermayer
        return srcSliceH;
1522 b241cbf2 Michael Niedermayer
}
1523
1524 44c1035c Michael Niedermayer
/**
1525
 * bring pointers in YUV order instead of YVU
1526
 */
1527 fccb9b2b Michael Niedermayer
static inline void sws_orderYUV(int format, uint8_t * sortedP[], int sortedStride[], uint8_t * p[], int stride[]){
1528
        if(format == IMGFMT_YV12 || format == IMGFMT_YVU9
1529 d80e2fa2 Michael Niedermayer
           || format == IMGFMT_444P || format == IMGFMT_422P || format == IMGFMT_411P){
1530 44c1035c Michael Niedermayer
                sortedP[0]= p[0];
1531 fccb9b2b Michael Niedermayer
                sortedP[1]= p[2];
1532
                sortedP[2]= p[1];
1533 c7a810cc Michael Niedermayer
                sortedStride[0]= stride[0];
1534 fccb9b2b Michael Niedermayer
                sortedStride[1]= stride[2];
1535
                sortedStride[2]= stride[1];
1536 44c1035c Michael Niedermayer
        }
1537 a4c90ea3 Michael Niedermayer
        else if(isPacked(format) || isGray(format) || format == IMGFMT_Y8)
1538 44c1035c Michael Niedermayer
        {
1539
                sortedP[0]= p[0];
1540
                sortedP[1]= 
1541
                sortedP[2]= NULL;
1542 c7a810cc Michael Niedermayer
                sortedStride[0]= stride[0];
1543 44c1035c Michael Niedermayer
                sortedStride[1]= 
1544
                sortedStride[2]= 0;
1545
        }
1546 fccb9b2b Michael Niedermayer
        else if(format == IMGFMT_I420 || format == IMGFMT_IYUV)
1547 44c1035c Michael Niedermayer
        {
1548
                sortedP[0]= p[0];
1549 fccb9b2b Michael Niedermayer
                sortedP[1]= p[1];
1550
                sortedP[2]= p[2];
1551 c7a810cc Michael Niedermayer
                sortedStride[0]= stride[0];
1552 fccb9b2b Michael Niedermayer
                sortedStride[1]= stride[1];
1553
                sortedStride[2]= stride[2];
1554 e09d12f4 Michael Niedermayer
        }else{
1555
                MSG_ERR("internal error in orderYUV\n");
1556 44c1035c Michael Niedermayer
        }
1557
}
1558 b935781b Michael Niedermayer
1559 b6654a54 Michael Niedermayer
/* unscaled copy like stuff (assumes nearly identical formats) */
1560 3e499f53 Michael Niedermayer
static int simpleCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1561
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1562 b6654a54 Michael Niedermayer
1563
        if(isPacked(c->srcFormat))
1564
        {
1565
                if(dstStride[0]==srcStride[0])
1566
                        memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
1567
                else
1568
                {
1569
                        int i;
1570
                        uint8_t *srcPtr= src[0];
1571
                        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1572 a861d4d7 Michael Niedermayer
                        int length=0;
1573
1574
                        /* universal length finder */
1575 9bd8bd1a Michael Niedermayer
                        while(length+c->srcW <= ABS(dstStride[0]) 
1576
                           && length+c->srcW <= ABS(srcStride[0])) length+= c->srcW;
1577 a861d4d7 Michael Niedermayer
                        ASSERT(length!=0);
1578 b6654a54 Michael Niedermayer
1579
                        for(i=0; i<srcSliceH; i++)
1580
                        {
1581
                                memcpy(dstPtr, srcPtr, length);
1582
                                srcPtr+= srcStride[0];
1583
                                dstPtr+= dstStride[0];
1584
                        }
1585
                }
1586
        }
1587
        else 
1588 44c1035c Michael Niedermayer
        { /* Planar YUV or gray */
1589 b6654a54 Michael Niedermayer
                int plane;
1590
                for(plane=0; plane<3; plane++)
1591
                {
1592 e616aa93 Michael Niedermayer
                        int length= plane==0 ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
1593
                        int y=      plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
1594
                        int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
1595 44c1035c Michael Niedermayer
1596
                        if((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
1597 0d9f3d85 Arpi
                        {
1598 44c1035c Michael Niedermayer
                                if(!isGray(c->dstFormat))
1599 e616aa93 Michael Niedermayer
                                        memset(dst[plane], 128, dstStride[plane]*height);
1600 0d9f3d85 Arpi
                        }
1601 b6654a54 Michael Niedermayer
                        else
1602
                        {
1603 44c1035c Michael Niedermayer
                                if(dstStride[plane]==srcStride[plane])
1604
                                        memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
1605
                                else
1606 b6654a54 Michael Niedermayer
                                {
1607 44c1035c Michael Niedermayer
                                        int i;
1608
                                        uint8_t *srcPtr= src[plane];
1609
                                        uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
1610
                                        for(i=0; i<height; i++)
1611
                                        {
1612
                                                memcpy(dstPtr, srcPtr, length);
1613
                                                srcPtr+= srcStride[plane];
1614
                                                dstPtr+= dstStride[plane];
1615
                                        }
1616 b6654a54 Michael Niedermayer
                                }
1617
                        }
1618
                }
1619
        }
1620 d4e24275 Michael Niedermayer
        return srcSliceH;
1621 37079906 Michael Niedermayer
}
1622 28bf81c9 Michael Niedermayer
1623 44c1035c Michael Niedermayer
static int remove_dup_fourcc(int fourcc)
1624 0d9f3d85 Arpi
{
1625
        switch(fourcc)
1626
        {
1627 fccb9b2b Michael Niedermayer
            case IMGFMT_I420:
1628
            case IMGFMT_IYUV: return IMGFMT_YV12;
1629 0d9f3d85 Arpi
            case IMGFMT_Y8  : return IMGFMT_Y800;
1630 0c51ef97 Arpi
            case IMGFMT_IF09: return IMGFMT_YVU9;
1631 0d9f3d85 Arpi
            default: return fourcc;
1632
        }
1633
}
1634
1635 c7a810cc Michael Niedermayer
static void getSubSampleFactors(int *h, int *v, int format){
1636
        switch(format){
1637 7322a67c Michael Niedermayer
        case IMGFMT_UYVY:
1638 c7a810cc Michael Niedermayer
        case IMGFMT_YUY2:
1639
                *h=1;
1640
                *v=0;
1641
                break;
1642
        case IMGFMT_YV12:
1643 e616aa93 Michael Niedermayer
        case IMGFMT_Y800: //FIXME remove after different subsamplings are fully implemented
1644 c7a810cc Michael Niedermayer
                *h=1;
1645
                *v=1;
1646
                break;
1647
        case IMGFMT_YVU9:
1648
                *h=2;
1649
                *v=2;
1650
                break;
1651 d80e2fa2 Michael Niedermayer
        case IMGFMT_444P:
1652
                *h=0;
1653
                *v=0;
1654
                break;
1655
        case IMGFMT_422P:
1656
                *h=1;
1657
                *v=0;
1658
                break;
1659
        case IMGFMT_411P:
1660
                *h=2;
1661
                *v=0;
1662
                break;
1663 c7a810cc Michael Niedermayer
        default:
1664
                *h=0;
1665
                *v=0;
1666
                break;
1667
        }
1668
}
1669
1670 5427e242 Michael Niedermayer
static uint16_t roundToInt16(int64_t f){
1671
        int r= (f + (1<<15))>>16;
1672
             if(r<-0x7FFF) return 0x8000;
1673
        else if(r> 0x7FFF) return 0x7FFF;
1674
        else               return r;
1675 0481412a Michael Niedermayer
}
1676
1677
/**
1678 5427e242 Michael Niedermayer
 * @param inv_table the yuv2rgb coeffs, normally Inverse_Table_6_9[x]
1679 0481412a Michael Niedermayer
 * @param fullRange if 1 then the luma range is 0..255 if 0 its 16..235
1680 5427e242 Michael Niedermayer
 * @return -1 if not supported
1681 0481412a Michael Niedermayer
 */
1682 5427e242 Michael Niedermayer
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
1683
        int64_t crv =  inv_table[0];
1684
        int64_t cbu =  inv_table[1];
1685
        int64_t cgu = -inv_table[2];
1686
        int64_t cgv = -inv_table[3];
1687
        int64_t cy  = 1<<16;
1688
        int64_t oy  = 0;
1689
1690
        if(isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
1691
        memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
1692
        memcpy(c->dstColorspaceTable,     table, sizeof(int)*4);
1693
1694
        c->brightness= brightness;
1695
        c->contrast  = contrast;
1696
        c->saturation= saturation;
1697
        c->srcRange  = srcRange;
1698
        c->dstRange  = dstRange;
1699 0481412a Michael Niedermayer
1700
        c->uOffset=   0x0400040004000400LL;
1701
        c->vOffset=   0x0400040004000400LL;
1702
1703 5427e242 Michael Niedermayer
        if(!srcRange){
1704
                cy= (cy*255) / 219;
1705
                oy= 16<<16;
1706 0481412a Michael Niedermayer
        }
1707
1708 5427e242 Michael Niedermayer
        cy = (cy *contrast             )>>16;
1709
        crv= (crv*contrast * saturation)>>32;
1710
        cbu= (cbu*contrast * saturation)>>32;
1711
        cgu= (cgu*contrast * saturation)>>32;
1712
        cgv= (cgv*contrast * saturation)>>32;
1713 0481412a Michael Niedermayer
1714 5427e242 Michael Niedermayer
        oy -= 256*brightness;
1715 0481412a Michael Niedermayer
1716
        c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
1717
        c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
1718
        c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
1719
        c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
1720
        c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
1721
        c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
1722 5427e242 Michael Niedermayer
1723
        yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
1724
        //FIXME factorize
1725 a31de956 Michael Niedermayer
1726
#ifdef HAVE_ALTIVEC
1727 582552fb Luca Barbato
        yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation);
1728 a31de956 Michael Niedermayer
#endif        
1729 5427e242 Michael Niedermayer
        return 0;
1730
}
1731
1732
/**
1733
 * @return -1 if not supported
1734
 */
1735
int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
1736
        if(isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
1737
1738
        *inv_table = c->srcColorspaceTable;
1739
        *table     = c->dstColorspaceTable;
1740
        *srcRange  = c->srcRange;
1741
        *dstRange  = c->dstRange;
1742
        *brightness= c->brightness;
1743
        *contrast  = c->contrast;
1744
        *saturation= c->saturation;
1745
        
1746
        return 0;        
1747 0481412a Michael Niedermayer
}
1748
1749 fccb9b2b Michael Niedermayer
SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int dstH, int origDstFormat, int flags,
1750 66d1cdb6 Michael Niedermayer
                         SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){
1751 28bf81c9 Michael Niedermayer
1752
        SwsContext *c;
1753
        int i;
1754 ec62c38f Michael Niedermayer
        int usesVFilter, usesHFilter;
1755 e09d12f4 Michael Niedermayer
        int unscaled, needsDither;
1756 fccb9b2b Michael Niedermayer
        int srcFormat, dstFormat;
1757 c7f822d9 Michael Niedermayer
        SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
1758 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
1759 516b1f82 Michael Niedermayer
        if(flags & SWS_CPU_CAPS_MMX)
1760 5cebb24b Michael Niedermayer
                asm volatile("emms\n\t"::: "memory");
1761
#endif
1762 516b1f82 Michael Niedermayer
1763
#ifndef RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
1764 a2faa401 Romain Dolbeau
        flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC);
1765 516b1f82 Michael Niedermayer
#ifdef HAVE_MMX2
1766
        flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
1767
#elif defined (HAVE_3DNOW)
1768
        flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
1769
#elif defined (HAVE_MMX)
1770
        flags |= SWS_CPU_CAPS_MMX;
1771 a2faa401 Romain Dolbeau
#elif defined (HAVE_ALTIVEC)
1772
        flags |= SWS_CPU_CAPS_ALTIVEC;
1773 516b1f82 Michael Niedermayer
#endif
1774
#endif
1775
        if(clip_table[512] != 255) globalInit();
1776 700490a4 Michael Niedermayer
        if(rgb15to16 == NULL) sws_rgb2rgb_init(flags);
1777 fccb9b2b Michael Niedermayer
1778 77a416e8 Gabucino
        /* avoid duplicate Formats, so we don't need to check to much */
1779 fccb9b2b Michael Niedermayer
        srcFormat = remove_dup_fourcc(origSrcFormat);
1780
        dstFormat = remove_dup_fourcc(origDstFormat);
1781 44c1035c Michael Niedermayer
1782
        unscaled = (srcW == dstW && srcH == dstH);
1783 e09d12f4 Michael Niedermayer
        needsDither= (isBGR(dstFormat) || isRGB(dstFormat)) 
1784
                     && (dstFormat&0xFF)<24
1785
                     && ((dstFormat&0xFF)<(srcFormat&0xFF) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
1786 44c1035c Michael Niedermayer
1787
        if(!isSupportedIn(srcFormat)) 
1788 b81cf274 Michael Niedermayer
        {
1789 44c1035c Michael Niedermayer
                MSG_ERR("swScaler: %s is not supported as input format\n", vo_format_name(srcFormat));
1790
                return NULL;
1791
        }
1792
        if(!isSupportedOut(dstFormat))
1793
        {
1794
                MSG_ERR("swScaler: %s is not supported as output format\n", vo_format_name(dstFormat));
1795
                return NULL;
1796 b81cf274 Michael Niedermayer
        }
1797 44c1035c Michael Niedermayer
1798 28bf81c9 Michael Niedermayer
        /* sanity check */
1799 b81cf274 Michael Niedermayer
        if(srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
1800
        {
1801 0d9f3d85 Arpi
                 MSG_ERR("swScaler: %dx%d -> %dx%d is invalid scaling dimension\n", 
1802 b81cf274 Michael Niedermayer
                        srcW, srcH, dstW, dstH);
1803
                return NULL;
1804
        }
1805 28bf81c9 Michael Niedermayer
1806 c7f822d9 Michael Niedermayer
        if(!dstFilter) dstFilter= &dummyFilter;
1807
        if(!srcFilter) srcFilter= &dummyFilter;
1808
1809 28bf81c9 Michael Niedermayer
        c= memalign(64, sizeof(SwsContext));
1810 c7f822d9 Michael Niedermayer
        memset(c, 0, sizeof(SwsContext));
1811 28bf81c9 Michael Niedermayer
1812
        c->srcW= srcW;
1813
        c->srcH= srcH;
1814
        c->dstW= dstW;
1815
        c->dstH= dstH;
1816 5521b193 Michael Niedermayer
        c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
1817
        c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
1818 28bf81c9 Michael Niedermayer
        c->flags= flags;
1819
        c->dstFormat= dstFormat;
1820
        c->srcFormat= srcFormat;
1821 fccb9b2b Michael Niedermayer
        c->origDstFormat= origDstFormat;
1822
        c->origSrcFormat= origSrcFormat;
1823 379a2036 Michael Niedermayer
        c->vRounder= 4* 0x0001000100010001ULL;
1824 28bf81c9 Michael Niedermayer
1825 ec62c38f Michael Niedermayer
        usesHFilter= usesVFilter= 0;
1826
        if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesVFilter=1;
1827
        if(dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesHFilter=1;
1828
        if(dstFilter->chrV!=NULL && dstFilter->chrV->length>1) usesVFilter=1;
1829
        if(dstFilter->chrH!=NULL && dstFilter->chrH->length>1) usesHFilter=1;
1830
        if(srcFilter->lumV!=NULL && srcFilter->lumV->length>1) usesVFilter=1;
1831
        if(srcFilter->lumH!=NULL && srcFilter->lumH->length>1) usesHFilter=1;
1832
        if(srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesVFilter=1;
1833
        if(srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesHFilter=1;
1834 e616aa93 Michael Niedermayer
1835
        getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
1836
        getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
1837
1838
        // reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation
1839
        if((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
1840
1841 5859233b Michael Niedermayer
        // drop some chroma lines if the user wants it
1842
        c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
1843
        c->chrSrcVSubSample+= c->vChrDrop;
1844 e616aa93 Michael Niedermayer
1845 5859233b Michael Niedermayer
        // drop every 2. pixel for chroma calculation unless user wants full chroma
1846 e616aa93 Michael Niedermayer
        if((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)) 
1847
                c->chrSrcHSubSample=1;
1848
1849 66d1cdb6 Michael Niedermayer
        if(param){
1850
                c->param[0] = param[0];
1851
                c->param[1] = param[1];
1852
        }else{
1853
                c->param[0] =
1854
                c->param[1] = SWS_PARAM_DEFAULT;
1855
        }
1856
1857 e616aa93 Michael Niedermayer
        c->chrIntHSubSample= c->chrDstHSubSample;
1858
        c->chrIntVSubSample= c->chrSrcVSubSample;
1859 5427e242 Michael Niedermayer
1860 e616aa93 Michael Niedermayer
        // note the -((-x)>>y) is so that we allways round toward +inf
1861
        c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
1862
        c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
1863
        c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
1864
        c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
1865 5427e242 Michael Niedermayer
1866
        sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], 0, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, 0, 0, 1<<16, 1<<16); 
1867 cf7d1c1a Michael Niedermayer
1868 b935781b Michael Niedermayer
        /* unscaled special Cases */
1869 ec62c38f Michael Niedermayer
        if(unscaled && !usesHFilter && !usesVFilter)
1870 37079906 Michael Niedermayer
        {
1871 0d9f3d85 Arpi
                /* yv12_to_nv12 */
1872 fccb9b2b Michael Niedermayer
                if(srcFormat == IMGFMT_YV12 && dstFormat == IMGFMT_NV12)
1873 0d9f3d85 Arpi
                {
1874
                        c->swScale= PlanarToNV12Wrapper;
1875
                }
1876 37079906 Michael Niedermayer
                /* yuv2bgr */
1877 fccb9b2b Michael Niedermayer
                if((srcFormat==IMGFMT_YV12 || srcFormat==IMGFMT_422P) && (isBGR(dstFormat) || isRGB(dstFormat)))
1878 37079906 Michael Niedermayer
                {
1879 5427e242 Michael Niedermayer
                        c->swScale= yuv2rgb_get_func_ptr(c);
1880 37079906 Michael Niedermayer
                }
1881 b241cbf2 Michael Niedermayer
                
1882 fccb9b2b Michael Niedermayer
                if( srcFormat==IMGFMT_YVU9 && dstFormat==IMGFMT_YV12 )
1883 b241cbf2 Michael Niedermayer
                {
1884
                        c->swScale= yvu9toyv12Wrapper;
1885
                }
1886
1887 ec22603f Michael Niedermayer
                /* bgr24toYV12 */
1888
                if(srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_YV12)
1889
                        c->swScale= bgr24toyv12Wrapper;
1890 e09d12f4 Michael Niedermayer
                
1891
                /* rgb/bgr -> rgb/bgr (no dither needed forms) */
1892
                if(   (isBGR(srcFormat) || isRGB(srcFormat))
1893
                   && (isBGR(dstFormat) || isRGB(dstFormat)) 
1894
                   && !needsDither)
1895
                        c->swScale= rgb2rgbWrapper;
1896
1897
                /* LQ converters if -sws 0 or -sws 4*/
1898
                if(c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
1899
                        /* rgb/bgr -> rgb/bgr (dither needed forms) */
1900
                        if(  (isBGR(srcFormat) || isRGB(srcFormat))
1901
                          && (isBGR(dstFormat) || isRGB(dstFormat)) 
1902
                          && needsDither)
1903
                                c->swScale= rgb2rgbWrapper;
1904 2ce486d8 Michael Niedermayer
1905
                        /* yv12_to_yuy2 */
1906 caeaabe7 Alex Beregszaszi
                        if(srcFormat == IMGFMT_YV12 && 
1907
                            (dstFormat == IMGFMT_YUY2 || dstFormat == IMGFMT_UYVY))
1908 2ce486d8 Michael Niedermayer
                        {
1909 caeaabe7 Alex Beregszaszi
                                if (dstFormat == IMGFMT_YUY2)
1910
                                    c->swScale= PlanarToYuy2Wrapper;
1911
                                else
1912
                                    c->swScale= PlanarToUyvyWrapper;
1913 2ce486d8 Michael Niedermayer
                        }
1914 e09d12f4 Michael Niedermayer
                }
1915 ec22603f Michael Niedermayer
1916 b71cf33c Romain Dolbeau
#ifdef HAVE_ALTIVEC
1917
                if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
1918
                    ((srcFormat == IMGFMT_YV12 && 
1919
                      (dstFormat == IMGFMT_YUY2 || dstFormat == IMGFMT_UYVY)))) {
1920
                  // unscaled YV12 -> packed YUV, we want speed
1921
                  if (dstFormat == IMGFMT_YUY2)
1922
                    c->swScale= yv12toyuy2_unscaled_altivec;
1923
                  else
1924
                    c->swScale= yv12touyvy_unscaled_altivec;
1925
                }
1926
#endif
1927
1928 20380eb8 Michael Niedermayer
                /* simple copy */
1929
                if(   srcFormat == dstFormat
1930
                   || (isPlanarYUV(srcFormat) && isGray(dstFormat))
1931
                   || (isPlanarYUV(dstFormat) && isGray(srcFormat))
1932
                  )
1933
                {
1934
                        c->swScale= simpleCopy;
1935
                }
1936
1937 e09d12f4 Michael Niedermayer
                if(c->swScale){
1938 ec22603f Michael Niedermayer
                        if(flags&SWS_PRINT_INFO)
1939 0d9f3d85 Arpi
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
1940 ec22603f Michael Niedermayer
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
1941
                        return c;
1942
                }
1943 37079906 Michael Niedermayer
        }
1944
1945 516b1f82 Michael Niedermayer
        if(flags & SWS_CPU_CAPS_MMX2)
1946 28bf81c9 Michael Niedermayer
        {
1947
                c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
1948
                if(!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
1949
                {
1950
                        if(flags&SWS_PRINT_INFO)
1951 0d9f3d85 Arpi
                                MSG_INFO("SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n");
1952 28bf81c9 Michael Niedermayer
                }
1953 ec62c38f Michael Niedermayer
                if(usesHFilter) c->canMMX2BeUsed=0;
1954 28bf81c9 Michael Niedermayer
        }
1955
        else
1956
                c->canMMX2BeUsed=0;
1957
1958 1e621b18 Michael Niedermayer
        c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
1959
        c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
1960
1961 28bf81c9 Michael Niedermayer
        // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
1962
        // but only for the FAST_BILINEAR mode otherwise do correct scaling
1963
        // n-2 is the last chrominance sample available
1964
        // this is not perfect, but noone shuld notice the difference, the more correct variant
1965
        // would be like the vertical one, but that would require some special code for the
1966
        // first and last pixel
1967
        if(flags&SWS_FAST_BILINEAR)
1968
        {
1969 1e621b18 Michael Niedermayer
                if(c->canMMX2BeUsed)
1970
                {
1971
                        c->lumXInc+= 20;
1972
                        c->chrXInc+= 20;
1973
                }
1974 77a416e8 Gabucino
                //we don't use the x86asm scaler if mmx is available
1975 516b1f82 Michael Niedermayer
                else if(flags & SWS_CPU_CAPS_MMX)
1976 1e621b18 Michael Niedermayer
                {
1977
                        c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
1978
                        c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
1979
                }
1980 28bf81c9 Michael Niedermayer
        }
1981
1982
        /* precalculate horizontal scaler filter coefficients */
1983
        {
1984 8c266f0c Romain Dolbeau
                const int filterAlign=
1985
                  (flags & SWS_CPU_CAPS_MMX) ? 4 :
1986
                  (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
1987
                  1;
1988 28bf81c9 Michael Niedermayer
1989 c7f822d9 Michael Niedermayer
                initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
1990 81a571a8 Michael Niedermayer
                                 srcW      ,       dstW, filterAlign, 1<<14,
1991
                                 (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
1992 66d1cdb6 Michael Niedermayer
                                 srcFilter->lumH, dstFilter->lumH, c->param);
1993 c7f822d9 Michael Niedermayer
                initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
1994 81a571a8 Michael Niedermayer
                                 c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
1995
                                 (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
1996 66d1cdb6 Michael Niedermayer
                                 srcFilter->chrH, dstFilter->chrH, c->param);
1997 28bf81c9 Michael Niedermayer
1998 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
1999 77a416e8 Gabucino
// can't downscale !!!
2000 28bf81c9 Michael Niedermayer
                if(c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
2001
                {
2002 b7dc6f66 Michael Niedermayer
                        c->lumMmx2Filter   = (int16_t*)memalign(8, (dstW        /8+8)*sizeof(int16_t));
2003
                        c->chrMmx2Filter   = (int16_t*)memalign(8, (c->chrDstW  /4+8)*sizeof(int16_t));
2004
                        c->lumMmx2FilterPos= (int32_t*)memalign(8, (dstW      /2/8+8)*sizeof(int32_t));
2005
                        c->chrMmx2FilterPos= (int32_t*)memalign(8, (c->chrDstW/2/4+8)*sizeof(int32_t));
2006
2007
                        initMMX2HScaler(      dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
2008
                        initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
2009 28bf81c9 Michael Niedermayer
                }
2010
#endif
2011
        } // Init Horizontal stuff
2012
2013
2014
2015
        /* precalculate vertical scaler filter coefficients */
2016 8c266f0c Romain Dolbeau
        {
2017
                const int filterAlign=
2018
                  (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2019
                  1;
2020
2021
                initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
2022
                                srcH      ,        dstH, filterAlign, (1<<12)-4,
2023
                                (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2024 66d1cdb6 Michael Niedermayer
                                srcFilter->lumV, dstFilter->lumV, c->param);
2025 8c266f0c Romain Dolbeau
                initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
2026
                                c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
2027
                                (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2028 66d1cdb6 Michael Niedermayer
                                srcFilter->chrV, dstFilter->chrV, c->param);
2029 8c266f0c Romain Dolbeau
        }
2030 28bf81c9 Michael Niedermayer
2031 77a416e8 Gabucino
        // Calculate Buffer Sizes so that they won't run out while handling these damn slices
2032 28bf81c9 Michael Niedermayer
        c->vLumBufSize= c->vLumFilterSize;
2033
        c->vChrBufSize= c->vChrFilterSize;
2034
        for(i=0; i<dstH; i++)
2035
        {
2036
                int chrI= i*c->chrDstH / dstH;
2037
                int nextSlice= MAX(c->vLumFilterPos[i   ] + c->vLumFilterSize - 1,
2038 e616aa93 Michael Niedermayer
                                 ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
2039 c4f1e443 Michael Niedermayer
2040
                nextSlice>>= c->chrSrcVSubSample;
2041
                nextSlice<<= c->chrSrcVSubSample;
2042 28bf81c9 Michael Niedermayer
                if(c->vLumFilterPos[i   ] + c->vLumBufSize < nextSlice)
2043
                        c->vLumBufSize= nextSlice - c->vLumFilterPos[i   ];
2044 e616aa93 Michael Niedermayer
                if(c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
2045
                        c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
2046 28bf81c9 Michael Niedermayer
        }
2047
2048
        // allocate pixbufs (we use dynamic allocation because otherwise we would need to
2049 c7f822d9 Michael Niedermayer
        c->lumPixBuf= (int16_t**)memalign(4, c->vLumBufSize*2*sizeof(int16_t*));
2050
        c->chrPixBuf= (int16_t**)memalign(4, c->vChrBufSize*2*sizeof(int16_t*));
2051 6c7506de Michael Niedermayer
        //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
2052 28bf81c9 Michael Niedermayer
        for(i=0; i<c->vLumBufSize; i++)
2053
                c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(8, 4000);
2054
        for(i=0; i<c->vChrBufSize; i++)
2055
                c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(8, 8000);
2056
2057
        //try to avoid drawing green stuff between the right end and the stride end
2058
        for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000);
2059
        for(i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000);
2060
2061
        ASSERT(c->chrDstH <= dstH)
2062
2063
        if(flags&SWS_PRINT_INFO)
2064
        {
2065
#ifdef DITHER1XBPP
2066 5521b193 Michael Niedermayer
                char *dither= " dithered";
2067
#else
2068
                char *dither= "";
2069 28bf81c9 Michael Niedermayer
#endif
2070
                if(flags&SWS_FAST_BILINEAR)
2071 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: FAST_BILINEAR scaler, ");
2072 28bf81c9 Michael Niedermayer
                else if(flags&SWS_BILINEAR)
2073 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: BILINEAR scaler, ");
2074 28bf81c9 Michael Niedermayer
                else if(flags&SWS_BICUBIC)
2075 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: BICUBIC scaler, ");
2076 1e621b18 Michael Niedermayer
                else if(flags&SWS_X)
2077 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: Experimental scaler, ");
2078 ff7ba856 Michael Niedermayer
                else if(flags&SWS_POINT)
2079 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: Nearest Neighbor / POINT scaler, ");
2080 d8863d37 Michael Niedermayer
                else if(flags&SWS_AREA)
2081 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: Area Averageing scaler, ");
2082 81a571a8 Michael Niedermayer
                else if(flags&SWS_BICUBLIN)
2083 a86c461c Michael Niedermayer
                        MSG_INFO("\nSwScaler: luma BICUBIC / chroma BILINEAR scaler, ");
2084
                else if(flags&SWS_GAUSS)
2085
                        MSG_INFO("\nSwScaler: Gaussian scaler, ");
2086
                else if(flags&SWS_SINC)
2087
                        MSG_INFO("\nSwScaler: Sinc scaler, ");
2088
                else if(flags&SWS_LANCZOS)
2089
                        MSG_INFO("\nSwScaler: Lanczos scaler, ");
2090
                else if(flags&SWS_SPLINE)
2091
                        MSG_INFO("\nSwScaler: Bicubic spline scaler, ");
2092 28bf81c9 Michael Niedermayer
                else
2093 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: ehh flags invalid?! ");
2094 28bf81c9 Michael Niedermayer
2095 0d9f3d85 Arpi
                if(dstFormat==IMGFMT_BGR15 || dstFormat==IMGFMT_BGR16)
2096
                        MSG_INFO("from %s to%s %s ", 
2097
                                vo_format_name(srcFormat), dither, vo_format_name(dstFormat));
2098
                else
2099
                        MSG_INFO("from %s to %s ", 
2100
                                vo_format_name(srcFormat), vo_format_name(dstFormat));
2101 28bf81c9 Michael Niedermayer
2102 516b1f82 Michael Niedermayer
                if(flags & SWS_CPU_CAPS_MMX2)
2103 0d9f3d85 Arpi
                        MSG_INFO("using MMX2\n");
2104 516b1f82 Michael Niedermayer
                else if(flags & SWS_CPU_CAPS_3DNOW)
2105 0d9f3d85 Arpi
                        MSG_INFO("using 3DNOW\n");
2106 516b1f82 Michael Niedermayer
                else if(flags & SWS_CPU_CAPS_MMX)
2107 0d9f3d85 Arpi
                        MSG_INFO("using MMX\n");
2108 a2faa401 Romain Dolbeau
                else if(flags & SWS_CPU_CAPS_ALTIVEC)
2109
                        MSG_INFO("using AltiVec\n");
2110
                else 
2111 0d9f3d85 Arpi
                        MSG_INFO("using C\n");
2112 28bf81c9 Michael Niedermayer
        }
2113
2114 516b1f82 Michael Niedermayer
        if(flags & SWS_PRINT_INFO)
2115 28bf81c9 Michael Niedermayer
        {
2116 516b1f82 Michael Niedermayer
                if(flags & SWS_CPU_CAPS_MMX)
2117 28bf81c9 Michael Niedermayer
                {
2118
                        if(c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
2119 0d9f3d85 Arpi
                                MSG_V("SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
2120 28bf81c9 Michael Niedermayer
                        else
2121
                        {
2122
                                if(c->hLumFilterSize==4)
2123 0d9f3d85 Arpi
                                        MSG_V("SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n");
2124 28bf81c9 Michael Niedermayer
                                else if(c->hLumFilterSize==8)
2125 0d9f3d85 Arpi
                                        MSG_V("SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n");
2126 28bf81c9 Michael Niedermayer
                                else
2127 0d9f3d85 Arpi
                                        MSG_V("SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n");
2128 28bf81c9 Michael Niedermayer
2129
                                if(c->hChrFilterSize==4)
2130 0d9f3d85 Arpi
                                        MSG_V("SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n");
2131 28bf81c9 Michael Niedermayer
                                else if(c->hChrFilterSize==8)
2132 0d9f3d85 Arpi
                                        MSG_V("SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n");
2133 28bf81c9 Michael Niedermayer
                                else
2134 0d9f3d85 Arpi
                                        MSG_V("SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n");
2135 28bf81c9 Michael Niedermayer
                        }
2136
                }
2137
                else
2138
                {
2139 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
2140 0d9f3d85 Arpi
                        MSG_V("SwScaler: using X86-Asm scaler for horizontal scaling\n");
2141 28bf81c9 Michael Niedermayer
#else
2142
                        if(flags & SWS_FAST_BILINEAR)
2143 0d9f3d85 Arpi
                                MSG_V("SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n");
2144 28bf81c9 Michael Niedermayer
                        else
2145 0d9f3d85 Arpi
                                MSG_V("SwScaler: using C scaler for horizontal scaling\n");
2146 28bf81c9 Michael Niedermayer
#endif
2147
                }
2148 6c7506de Michael Niedermayer
                if(isPlanarYUV(dstFormat))
2149 28bf81c9 Michael Niedermayer
                {
2150
                        if(c->vLumFilterSize==1)
2151 516b1f82 Michael Niedermayer
                                MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2152 28bf81c9 Michael Niedermayer
                        else
2153 516b1f82 Michael Niedermayer
                                MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2154 28bf81c9 Michael Niedermayer
                }
2155
                else
2156
                {
2157
                        if(c->vLumFilterSize==1 && c->vChrFilterSize==2)
2158 0d9f3d85 Arpi
                                MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
2159 516b1f82 Michael Niedermayer
                                       "SwScaler:       2-tap scaler for vertical chrominance scaling (BGR)\n",(flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2160 28bf81c9 Michael Niedermayer
                        else if(c->vLumFilterSize==2 && c->vChrFilterSize==2)
2161 516b1f82 Michael Niedermayer
                                MSG_V("SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2162 28bf81c9 Michael Niedermayer
                        else
2163 516b1f82 Michael Niedermayer
                                MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2164 28bf81c9 Michael Niedermayer
                }
2165
2166
                if(dstFormat==IMGFMT_BGR24)
2167 0d9f3d85 Arpi
                        MSG_V("SwScaler: using %s YV12->BGR24 Converter\n",
2168 516b1f82 Michael Niedermayer
                                (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
2169 fd284805 Michael Niedermayer
                else if(dstFormat==IMGFMT_BGR32)
2170 516b1f82 Michael Niedermayer
                        MSG_V("SwScaler: using %s YV12->BGR32 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2171 fd284805 Michael Niedermayer
                else if(dstFormat==IMGFMT_BGR16)
2172 516b1f82 Michael Niedermayer
                        MSG_V("SwScaler: using %s YV12->BGR16 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2173 fd284805 Michael Niedermayer
                else if(dstFormat==IMGFMT_BGR15)
2174 516b1f82 Michael Niedermayer
                        MSG_V("SwScaler: using %s YV12->BGR15 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2175 28bf81c9 Michael Niedermayer
2176 0d9f3d85 Arpi
                MSG_V("SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
2177 28bf81c9 Michael Niedermayer
        }
2178 516b1f82 Michael Niedermayer
        if(flags & SWS_PRINT_INFO)
2179 1e621b18 Michael Niedermayer
        {
2180 0d9f3d85 Arpi
                MSG_DBG2("SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2181 1e621b18 Michael Niedermayer
                        c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
2182 0d9f3d85 Arpi
                MSG_DBG2("SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2183 1e621b18 Michael Niedermayer
                        c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
2184
        }
2185 37079906 Michael Niedermayer
2186 516b1f82 Michael Niedermayer
        c->swScale= getSwsFunc(flags);
2187 28bf81c9 Michael Niedermayer
        return c;
2188
}
2189
2190
/**
2191 77a416e8 Gabucino
 * swscale warper, so we don't need to export the SwsContext.
2192 fccb9b2b Michael Niedermayer
 * assumes planar YUV to be in YUV order instead of YVU
2193
 */
2194
int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2195
                           int srcSliceH, uint8_t* dst[], int dstStride[]){
2196 46888748 Michael Niedermayer
        //copy strides, so they can safely be modified
2197
        int srcStride2[3]= {srcStride[0], srcStride[1], srcStride[2]};
2198
        int dstStride2[3]= {dstStride[0], dstStride[1], dstStride[2]};
2199
        return c->swScale(c, src, srcStride2, srcSliceY, srcSliceH, dst, dstStride2);
2200 fccb9b2b Michael Niedermayer
}
2201
2202
/**
2203 77a416e8 Gabucino
 * swscale warper, so we don't need to export the SwsContext
2204 d4e24275 Michael Niedermayer
 */
2205 3e499f53 Michael Niedermayer
int sws_scale(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], int srcSliceY,
2206
                           int srcSliceH, uint8_t* dstParam[], int dstStrideParam[]){
2207
        int srcStride[3];
2208
        int dstStride[3];
2209
        uint8_t *src[3];
2210
        uint8_t *dst[3];
2211 fccb9b2b Michael Niedermayer
        sws_orderYUV(c->origSrcFormat, src, srcStride, srcParam, srcStrideParam);
2212
        sws_orderYUV(c->origDstFormat, dst, dstStride, dstParam, dstStrideParam);
2213 3e499f53 Michael Niedermayer
//printf("sws: slice %d %d\n", srcSliceY, srcSliceH);
2214 a4c90ea3 Michael Niedermayer
2215 5bf01354 Michael Niedermayer
        return c->swScale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
2216 d4e24275 Michael Niedermayer
}
2217
2218 e21206a8 Michael Niedermayer
SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, 
2219
                                float lumaSharpen, float chromaSharpen,
2220
                                float chromaHShift, float chromaVShift,
2221
                                int verbose)
2222
{
2223
        SwsFilter *filter= malloc(sizeof(SwsFilter));
2224
2225
        if(lumaGBlur!=0.0){
2226
                filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0);
2227
                filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0);
2228
        }else{
2229
                filter->lumH= sws_getIdentityVec();
2230
                filter->lumV= sws_getIdentityVec();
2231
        }
2232
2233
        if(chromaGBlur!=0.0){
2234
                filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0);
2235
                filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0);
2236
        }else{
2237
                filter->chrH= sws_getIdentityVec();
2238
                filter->chrV= sws_getIdentityVec();
2239
        }
2240
2241
        if(chromaSharpen!=0.0){
2242
                SwsVector *g= sws_getConstVec(-1.0, 3);
2243
                SwsVector *id= sws_getConstVec(10.0/chromaSharpen, 1);
2244
                g->coeff[1]=2.0;
2245
                sws_addVec(id, g);
2246
                sws_convVec(filter->chrH, id);
2247
                sws_convVec(filter->chrV, id);
2248
                sws_freeVec(g);
2249
                sws_freeVec(id);
2250
        }
2251
2252
        if(lumaSharpen!=0.0){
2253
                SwsVector *g= sws_getConstVec(-1.0, 3);
2254
                SwsVector *id= sws_getConstVec(10.0/lumaSharpen, 1);
2255
                g->coeff[1]=2.0;
2256
                sws_addVec(id, g);
2257
                sws_convVec(filter->lumH, id);
2258
                sws_convVec(filter->lumV, id);
2259
                sws_freeVec(g);
2260
                sws_freeVec(id);
2261
        }
2262
2263
        if(chromaHShift != 0.0)
2264
                sws_shiftVec(filter->chrH, (int)(chromaHShift+0.5));
2265
2266
        if(chromaVShift != 0.0)
2267
                sws_shiftVec(filter->chrV, (int)(chromaVShift+0.5));
2268
2269
        sws_normalizeVec(filter->chrH, 1.0);
2270
        sws_normalizeVec(filter->chrV, 1.0);
2271
        sws_normalizeVec(filter->lumH, 1.0);
2272
        sws_normalizeVec(filter->lumV, 1.0);
2273
2274
        if(verbose) sws_printVec(filter->chrH);
2275
        if(verbose) sws_printVec(filter->lumH);
2276
2277
        return filter;
2278
}
2279
2280 d4e24275 Michael Niedermayer
/**
2281 28bf81c9 Michael Niedermayer
 * returns a normalized gaussian curve used to filter stuff
2282
 * quality=3 is high quality, lowwer is lowwer quality
2283
 */
2284 d4e24275 Michael Niedermayer
SwsVector *sws_getGaussianVec(double variance, double quality){
2285 28bf81c9 Michael Niedermayer
        const int length= (int)(variance*quality + 0.5) | 1;
2286
        int i;
2287
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2288
        double middle= (length-1)*0.5;
2289 c7f822d9 Michael Niedermayer
        SwsVector *vec= malloc(sizeof(SwsVector));
2290
2291
        vec->coeff= coeff;
2292
        vec->length= length;
2293 28bf81c9 Michael Niedermayer
2294
        for(i=0; i<length; i++)
2295
        {
2296
                double dist= i-middle;
2297
                coeff[i]= exp( -dist*dist/(2*variance*variance) ) / sqrt(2*variance*PI);
2298
        }
2299
2300 d4e24275 Michael Niedermayer
        sws_normalizeVec(vec, 1.0);
2301 c7f822d9 Michael Niedermayer
2302
        return vec;
2303 28bf81c9 Michael Niedermayer
}
2304
2305 d4e24275 Michael Niedermayer
SwsVector *sws_getConstVec(double c, int length){
2306 5521b193 Michael Niedermayer
        int i;
2307
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2308
        SwsVector *vec= malloc(sizeof(SwsVector));
2309
2310
        vec->coeff= coeff;
2311
        vec->length= length;
2312
2313
        for(i=0; i<length; i++)
2314
                coeff[i]= c;
2315
2316
        return vec;
2317
}
2318
2319
2320 d4e24275 Michael Niedermayer
SwsVector *sws_getIdentityVec(void){
2321 c7f822d9 Michael Niedermayer
        double *coeff= memalign(sizeof(double), sizeof(double));
2322
        SwsVector *vec= malloc(sizeof(SwsVector));
2323
        coeff[0]= 1.0;
2324
2325
        vec->coeff= coeff;
2326
        vec->length= 1;
2327
2328
        return vec;
2329
}
2330
2331 d4e24275 Michael Niedermayer
void sws_normalizeVec(SwsVector *a, double height){
2332 28bf81c9 Michael Niedermayer
        int i;
2333
        double sum=0;
2334
        double inv;
2335
2336 c7f822d9 Michael Niedermayer
        for(i=0; i<a->length; i++)
2337
                sum+= a->coeff[i];
2338 28bf81c9 Michael Niedermayer
2339
        inv= height/sum;
2340
2341 c7f822d9 Michael Niedermayer
        for(i=0; i<a->length; i++)
2342 8664c807 Michael Niedermayer
                a->coeff[i]*= inv;
2343 28bf81c9 Michael Niedermayer
}
2344
2345 d4e24275 Michael Niedermayer
void sws_scaleVec(SwsVector *a, double scalar){
2346 c7f822d9 Michael Niedermayer
        int i;
2347
2348
        for(i=0; i<a->length; i++)
2349
                a->coeff[i]*= scalar;
2350
}
2351
2352 d4e24275 Michael Niedermayer
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){
2353 c7f822d9 Michael Niedermayer
        int length= a->length + b->length - 1;
2354 28bf81c9 Michael Niedermayer
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2355
        int i, j;
2356 c7f822d9 Michael Niedermayer
        SwsVector *vec= malloc(sizeof(SwsVector));
2357
2358
        vec->coeff= coeff;
2359
        vec->length= length;
2360 28bf81c9 Michael Niedermayer
2361
        for(i=0; i<length; i++) coeff[i]= 0.0;
2362
2363 c7f822d9 Michael Niedermayer
        for(i=0; i<a->length; i++)
2364 28bf81c9 Michael Niedermayer
        {
2365 c7f822d9 Michael Niedermayer
                for(j=0; j<b->length; j++)
2366 28bf81c9 Michael Niedermayer
                {
2367 c7f822d9 Michael Niedermayer
                        coeff[i+j]+= a->coeff[i]*b->coeff[j];
2368 28bf81c9 Michael Niedermayer
                }
2369
        }
2370
2371 c7f822d9 Michael Niedermayer
        return vec;
2372 28bf81c9 Michael Niedermayer
}
2373
2374 d4e24275 Michael Niedermayer
static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){
2375 c7f822d9 Michael Niedermayer
        int length= MAX(a->length, b->length);
2376 28bf81c9 Michael Niedermayer
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2377
        int i;
2378 c7f822d9 Michael Niedermayer
        SwsVector *vec= malloc(sizeof(SwsVector));
2379
2380
        vec->coeff= coeff;
2381
        vec->length= length;
2382 28bf81c9 Michael Niedermayer
2383
        for(i=0; i<length; i++) coeff[i]= 0.0;
2384
2385 c7f822d9 Michael Niedermayer
        for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2386
        for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];
2387
2388
        return vec;
2389 28bf81c9 Michael Niedermayer
}
2390 c7f822d9 Michael Niedermayer
2391 d4e24275 Michael Niedermayer
static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){
2392 c7f822d9 Michael Niedermayer
        int length= MAX(a->length, b->length);
2393
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2394
        int i;
2395
        SwsVector *vec= malloc(sizeof(SwsVector));
2396
2397
        vec->coeff= coeff;
2398
        vec->length= length;
2399
2400
        for(i=0; i<length; i++) coeff[i]= 0.0;
2401
2402
        for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2403
        for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];
2404
2405
        return vec;
2406
}
2407
2408
/* shift left / or right if "shift" is negative */
2409 d4e24275 Michael Niedermayer
static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){
2410 c7f822d9 Michael Niedermayer
        int length= a->length + ABS(shift)*2;
2411
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2412 ff7ba856 Michael Niedermayer
        int i;
2413 c7f822d9 Michael Niedermayer
        SwsVector *vec= malloc(sizeof(SwsVector));
2414
2415
        vec->coeff= coeff;
2416
        vec->length= length;
2417
2418
        for(i=0; i<length; i++) coeff[i]= 0.0;
2419
2420
        for(i=0; i<a->length; i++)
2421
        {
2422
                coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
2423
        }
2424
2425
        return vec;
2426
}
2427
2428 d4e24275 Michael Niedermayer
void sws_shiftVec(SwsVector *a, int shift){
2429
        SwsVector *shifted= sws_getShiftedVec(a, shift);
2430 5cebb24b Michael Niedermayer
        free(a->coeff);
2431
        a->coeff= shifted->coeff;
2432
        a->length= shifted->length;
2433
        free(shifted);
2434
}
2435
2436 d4e24275 Michael Niedermayer
void sws_addVec(SwsVector *a, SwsVector *b){
2437
        SwsVector *sum= sws_sumVec(a, b);
2438 5cebb24b Michael Niedermayer
        free(a->coeff);
2439
        a->coeff= sum->coeff;
2440
        a->length= sum->length;
2441
        free(sum);
2442
}
2443
2444 d4e24275 Michael Niedermayer
void sws_subVec(SwsVector *a, SwsVector *b){
2445
        SwsVector *diff= sws_diffVec(a, b);
2446 5cebb24b Michael Niedermayer
        free(a->coeff);
2447
        a->coeff= diff->coeff;
2448
        a->length= diff->length;
2449
        free(diff);
2450
}
2451
2452 d4e24275 Michael Niedermayer
void sws_convVec(SwsVector *a, SwsVector *b){
2453
        SwsVector *conv= sws_getConvVec(a, b);
2454
        free(a->coeff);  
2455 5cebb24b Michael Niedermayer
        a->coeff= conv->coeff;
2456
        a->length= conv->length;
2457
        free(conv);
2458
}
2459
2460 d4e24275 Michael Niedermayer
SwsVector *sws_cloneVec(SwsVector *a){
2461 5cebb24b Michael Niedermayer
        double *coeff= memalign(sizeof(double), a->length*sizeof(double));
2462
        int i;
2463
        SwsVector *vec= malloc(sizeof(SwsVector));
2464
2465
        vec->coeff= coeff;
2466
        vec->length= a->length;
2467
2468
        for(i=0; i<a->length; i++) coeff[i]= a->coeff[i];
2469
2470
        return vec;
2471
}
2472
2473 d4e24275 Michael Niedermayer
void sws_printVec(SwsVector *a){
2474 c7f822d9 Michael Niedermayer
        int i;
2475
        double max=0;
2476
        double min=0;
2477
        double range;
2478
2479
        for(i=0; i<a->length; i++)
2480
                if(a->coeff[i]>max) max= a->coeff[i];
2481
2482
        for(i=0; i<a->length; i++)
2483
                if(a->coeff[i]<min) min= a->coeff[i];
2484
2485
        range= max - min;
2486
2487
        for(i=0; i<a->length; i++)
2488
        {
2489
                int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
2490 0d9f3d85 Arpi
                MSG_DBG2("%1.3f ", a->coeff[i]);
2491
                for(;x>0; x--) MSG_DBG2(" ");
2492
                MSG_DBG2("|\n");
2493 c7f822d9 Michael Niedermayer
        }
2494
}
2495
2496 d4e24275 Michael Niedermayer
void sws_freeVec(SwsVector *a){
2497 c7f822d9 Michael Niedermayer
        if(!a) return;
2498
        if(a->coeff) free(a->coeff);
2499
        a->coeff=NULL;
2500
        a->length=0;
2501
        free(a);
2502
}
2503
2504 e21206a8 Michael Niedermayer
void sws_freeFilter(SwsFilter *filter){
2505
        if(!filter) return;
2506
2507
        if(filter->lumH) sws_freeVec(filter->lumH);
2508
        if(filter->lumV) sws_freeVec(filter->lumV);
2509
        if(filter->chrH) sws_freeVec(filter->chrH);
2510
        if(filter->chrV) sws_freeVec(filter->chrV);
2511
        free(filter);
2512
}
2513
2514
2515 d4e24275 Michael Niedermayer
void sws_freeContext(SwsContext *c){
2516 c7f822d9 Michael Niedermayer
        int i;
2517
        if(!c) return;
2518
2519
        if(c->lumPixBuf)
2520
        {
2521 6c7506de Michael Niedermayer
                for(i=0; i<c->vLumBufSize; i++)
2522 c7f822d9 Michael Niedermayer
                {
2523
                        if(c->lumPixBuf[i]) free(c->lumPixBuf[i]);
2524
                        c->lumPixBuf[i]=NULL;
2525
                }
2526
                free(c->lumPixBuf);
2527
                c->lumPixBuf=NULL;
2528
        }
2529
2530
        if(c->chrPixBuf)
2531
        {
2532 6c7506de Michael Niedermayer
                for(i=0; i<c->vChrBufSize; i++)
2533 c7f822d9 Michael Niedermayer
                {
2534
                        if(c->chrPixBuf[i]) free(c->chrPixBuf[i]);
2535
                        c->chrPixBuf[i]=NULL;
2536
                }
2537
                free(c->chrPixBuf);
2538
                c->chrPixBuf=NULL;
2539
        }
2540
2541
        if(c->vLumFilter) free(c->vLumFilter);
2542
        c->vLumFilter = NULL;
2543
        if(c->vChrFilter) free(c->vChrFilter);
2544
        c->vChrFilter = NULL;
2545
        if(c->hLumFilter) free(c->hLumFilter);
2546
        c->hLumFilter = NULL;
2547
        if(c->hChrFilter) free(c->hChrFilter);
2548
        c->hChrFilter = NULL;
2549
2550
        if(c->vLumFilterPos) free(c->vLumFilterPos);
2551
        c->vLumFilterPos = NULL;
2552
        if(c->vChrFilterPos) free(c->vChrFilterPos);
2553
        c->vChrFilterPos = NULL;
2554
        if(c->hLumFilterPos) free(c->hLumFilterPos);
2555
        c->hLumFilterPos = NULL;
2556
        if(c->hChrFilterPos) free(c->hChrFilterPos);
2557
        c->hChrFilterPos = NULL;
2558
2559 b7dc6f66 Michael Niedermayer
        if(c->lumMmx2Filter) free(c->lumMmx2Filter);
2560
        c->lumMmx2Filter=NULL;
2561
        if(c->chrMmx2Filter) free(c->chrMmx2Filter);
2562
        c->chrMmx2Filter=NULL;
2563
        if(c->lumMmx2FilterPos) free(c->lumMmx2FilterPos);
2564
        c->lumMmx2FilterPos=NULL;
2565
        if(c->chrMmx2FilterPos) free(c->chrMmx2FilterPos);
2566
        c->chrMmx2FilterPos=NULL;
2567 cf7d1c1a Michael Niedermayer
        if(c->yuvTable) free(c->yuvTable);
2568
        c->yuvTable=NULL;
2569 b7dc6f66 Michael Niedermayer
2570 c7f822d9 Michael Niedermayer
        free(c);
2571
}