Statistics
| Branch: | Revision:

ffmpeg / postproc / swscale.c @ 38d5c282

History | View | Annotate | Download (73.1 KB)

1 fe8054c0 Michael Niedermayer
/*
2 5427e242 Michael Niedermayer
    Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3 fe8054c0 Michael Niedermayer

4
    This program is free software; you can redistribute it and/or modify
5
    it under the terms of the GNU General Public License as published by
6
    the Free Software Foundation; either version 2 of the License, or
7
    (at your option) any later version.
8 31190492 Arpi

9 fe8054c0 Michael Niedermayer
    This program is distributed in the hope that it will be useful,
10
    but WITHOUT ANY WARRANTY; without even the implied warranty of
11
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
    GNU General Public License for more details.
13 31190492 Arpi

14 fe8054c0 Michael Niedermayer
    You should have received a copy of the GNU General Public License
15
    along with this program; if not, write to the Free Software
16
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
*/
18 783e9cc9 Michael Niedermayer
19 28bf81c9 Michael Niedermayer
/*
20 7322a67c Michael Niedermayer
  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8/Y800, YVU9/IF09
21 caeaabe7 Alex Beregszaszi
  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
22 e09d12f4 Michael Niedermayer
  {BGR,RGB}{1,4,8,15,16} support dithering
23 a861d4d7 Michael Niedermayer
  
24 e09d12f4 Michael Niedermayer
  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
25
  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
26
  x -> x
27
  YUV9 -> YV12
28
  YUV9/YV12 -> Y800
29
  Y800 -> YUV9/YV12
30 b935781b Michael Niedermayer
  BGR24 -> BGR32 & RGB24 -> RGB32
31
  BGR32 -> BGR24 & RGB32 -> RGB24
32 4bb3fa5e Michael Niedermayer
  BGR15 -> BGR16
33 b935781b Michael Niedermayer
*/
34
35
/* 
36 e09d12f4 Michael Niedermayer
tested special converters (most are tested actually but i didnt write it down ...)
37
 YV12 -> BGR16
38 b935781b Michael Niedermayer
 YV12 -> YV12
39 4bb3fa5e Michael Niedermayer
 BGR15 -> BGR16
40 1e1c4fe9 Michael Niedermayer
 BGR16 -> BGR16
41 e09d12f4 Michael Niedermayer
 YVU9 -> YV12
42 b935781b Michael Niedermayer

43
untested special converters
44 1e1c4fe9 Michael Niedermayer
  YV12/I420 -> BGR15/BGR24/BGR32 (its the yuv2rgb stuff, so it should be ok)
45
  YV12/I420 -> YV12/I420
46
  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
47 b935781b Michael Niedermayer
  BGR24 -> BGR32 & RGB24 -> RGB32
48
  BGR32 -> BGR24 & RGB32 -> RGB24
49 ec22603f Michael Niedermayer
  BGR24 -> YV12
50 28bf81c9 Michael Niedermayer
*/
51
52 d3f41512 Michael Niedermayer
#include <inttypes.h>
53 dda87e9f Pierre Lombard
#include <string.h>
54 077ea8a7 Michael Niedermayer
#include <math.h>
55 c1b0bfb4 Michael Niedermayer
#include <stdio.h>
56 d3f41512 Michael Niedermayer
#include "../config.h"
57 9b464428 Felix Bünemann
#include "../mangle.h"
58 81b7c056 Michael Niedermayer
#include <assert.h>
59 c1b0bfb4 Michael Niedermayer
#ifdef HAVE_MALLOC_H
60
#include <malloc.h>
61 b6acbc3c Björn Sandell
#else
62
#include <stdlib.h>
63 c1b0bfb4 Michael Niedermayer
#endif
64 38d5c282 Aurelien Jacobs
#ifdef HAVE_SYS_MMAN_H
65
#include <sys/mman.h>
66
#endif
67 d604bab9 Michael Niedermayer
#include "swscale.h"
68 5427e242 Michael Niedermayer
#include "swscale_internal.h"
69 7630f2e0 Michael Niedermayer
#include "../cpudetect.h"
70 a861d4d7 Michael Niedermayer
#include "../bswap.h"
71 28bf81c9 Michael Niedermayer
#include "../libvo/img_format.h"
72 37079906 Michael Niedermayer
#include "rgb2rgb.h"
73 b0db4198 Michael Niedermayer
#include "../libvo/fastmemcpy.h"
74 0d9f3d85 Arpi
75 541c4eb9 Michael Niedermayer
#undef MOVNTQ
76 7d7f78b5 Michael Niedermayer
#undef PAVGB
77 d3f41512 Michael Niedermayer
78 783e9cc9 Michael Niedermayer
//#undef HAVE_MMX2
79 7f56a527 Michael Niedermayer
//#define HAVE_3DNOW
80 d3f41512 Michael Niedermayer
//#undef HAVE_MMX
81 783e9cc9 Michael Niedermayer
//#undef ARCH_X86
82 2ba1bff0 Michael Niedermayer
//#define WORDS_BIGENDIAN
83 d604bab9 Michael Niedermayer
#define DITHER1XBPP
84 d3f41512 Michael Niedermayer
85 ac6a2e45 Michael Niedermayer
#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
86
87 1e621b18 Michael Niedermayer
#define RET 0xC3 //near return opcode for X86
88 c1b0bfb4 Michael Niedermayer
89 28bf81c9 Michael Niedermayer
#ifdef MP_DEBUG
90 81b7c056 Michael Niedermayer
#define ASSERT(x) assert(x);
91 28bf81c9 Michael Niedermayer
#else
92 c1b0bfb4 Michael Niedermayer
#define ASSERT(x) ;
93 28bf81c9 Michael Niedermayer
#endif
94
95
#ifdef M_PI
96
#define PI M_PI
97
#else
98
#define PI 3.14159265358979323846
99
#endif
100 c1b0bfb4 Michael Niedermayer
101 6c7506de Michael Niedermayer
//FIXME replace this with something faster
102 fccb9b2b Michael Niedermayer
#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YVU9 \
103 d80e2fa2 Michael Niedermayer
                        || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
104 7322a67c Michael Niedermayer
#define isYUV(x)       ((x)==IMGFMT_UYVY || (x)==IMGFMT_YUY2 || isPlanarYUV(x))
105 44c1035c Michael Niedermayer
#define isGray(x)      ((x)==IMGFMT_Y800)
106 cf7d1c1a Michael Niedermayer
#define isRGB(x)       (((x)&IMGFMT_RGB_MASK)==IMGFMT_RGB)
107
#define isBGR(x)       (((x)&IMGFMT_BGR_MASK)==IMGFMT_BGR)
108 fccb9b2b Michael Niedermayer
#define isSupportedIn(x)  ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\
109 b72034dd Michael Niedermayer
                        || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\
110 a861d4d7 Michael Niedermayer
                        || (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\
111 d80e2fa2 Michael Niedermayer
                        || (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9\
112
                        || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
113 caeaabe7 Alex Beregszaszi
#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\
114 d80e2fa2 Michael Niedermayer
                        || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P\
115 cf7d1c1a Michael Niedermayer
                        || isRGB(x) || isBGR(x)\
116 e616aa93 Michael Niedermayer
                        || (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9)
117 7322a67c Michael Niedermayer
#define isPacked(x)    ((x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY ||isRGB(x) || isBGR(x))
118 6ff0ad6b Michael Niedermayer
119
#define RGB2YUV_SHIFT 16
120 1e621b18 Michael Niedermayer
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
121
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
122
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
123
#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
124
#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
125
#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
126
#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
127
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
128
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
129 6c7506de Michael Niedermayer
130 0481412a Michael Niedermayer
extern const int32_t Inverse_Table_6_9[8][4];
131
132 783e9cc9 Michael Niedermayer
/*
133
NOTES
134 d604bab9 Michael Niedermayer
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
135 31190492 Arpi

136 783e9cc9 Michael Niedermayer
TODO
137 d604bab9 Michael Niedermayer
more intelligent missalignment avoidance for the horizontal scaler
138 c1b0bfb4 Michael Niedermayer
write special vertical cubic upscale version
139
Optimize C code (yv12 / minmax)
140 ff7ba856 Michael Niedermayer
add support for packed pixel yuv input & output
141 6ff0ad6b Michael Niedermayer
add support for Y8 output
142
optimize bgr24 & bgr32
143 ff7ba856 Michael Niedermayer
add BGR4 output support
144 1e621b18 Michael Niedermayer
write special BGR->BGR scaler
145 783e9cc9 Michael Niedermayer
*/
146 31190492 Arpi
147 d604bab9 Michael Niedermayer
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
148 2ff198c1 Michael Niedermayer
#define MIN(a,b) ((a) > (b) ? (b) : (a))
149
#define MAX(a,b) ((a) < (b) ? (b) : (a))
150 d604bab9 Michael Niedermayer
151 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
152 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) bF8=       0xF8F8F8F8F8F8F8F8LL;
153
static uint64_t attribute_used __attribute__((aligned(8))) bFC=       0xFCFCFCFCFCFCFCFCLL;
154 d604bab9 Michael Niedermayer
static uint64_t __attribute__((aligned(8))) w10=       0x0010001000100010LL;
155 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) w02=       0x0002000200020002LL;
156
static uint64_t attribute_used __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
157
static uint64_t attribute_used __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
158
static uint64_t attribute_used __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
159
static uint64_t attribute_used __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL;
160 d604bab9 Michael Niedermayer
161 db7a2e0d Matthieu Castet
static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
162
static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
163
static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
164
static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;
165 d8fa3c54 Michael Niedermayer
166
static uint64_t __attribute__((aligned(8))) dither4[2]={
167
        0x0103010301030103LL,
168
        0x0200020002000200LL,};
169
170
static uint64_t __attribute__((aligned(8))) dither8[2]={
171
        0x0602060206020602LL,
172
        0x0004000400040004LL,};
173 d604bab9 Michael Niedermayer
174
static uint64_t __attribute__((aligned(8))) b16Mask=   0x001F001F001F001FLL;
175 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) g16Mask=   0x07E007E007E007E0LL;
176
static uint64_t attribute_used __attribute__((aligned(8))) r16Mask=   0xF800F800F800F800LL;
177 d604bab9 Michael Niedermayer
static uint64_t __attribute__((aligned(8))) b15Mask=   0x001F001F001F001FLL;
178 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) g15Mask=   0x03E003E003E003E0LL;
179
static uint64_t attribute_used __attribute__((aligned(8))) r15Mask=   0x7C007C007C007C00LL;
180 d604bab9 Michael Niedermayer
181 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) M24A=   0x00FF0000FF0000FFLL;
182
static uint64_t attribute_used __attribute__((aligned(8))) M24B=   0xFF0000FF0000FF00LL;
183
static uint64_t attribute_used __attribute__((aligned(8))) M24C=   0x0000FF0000FF0000LL;
184 99d2cb72 Michael Niedermayer
185 ac6a2e45 Michael Niedermayer
#ifdef FAST_BGR2YV12
186 db7a2e0d Matthieu Castet
static const uint64_t bgr2YCoeff  attribute_used __attribute__((aligned(8))) = 0x000000210041000DULL;
187
static const uint64_t bgr2UCoeff  attribute_used __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
188
static const uint64_t bgr2VCoeff  attribute_used __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
189 ac6a2e45 Michael Niedermayer
#else
190 db7a2e0d Matthieu Castet
static const uint64_t bgr2YCoeff  attribute_used __attribute__((aligned(8))) = 0x000020E540830C8BULL;
191
static const uint64_t bgr2UCoeff  attribute_used __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
192
static const uint64_t bgr2VCoeff  attribute_used __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
193 ac6a2e45 Michael Niedermayer
#endif
194 db7a2e0d Matthieu Castet
static const uint64_t bgr2YOffset attribute_used __attribute__((aligned(8))) = 0x1010101010101010ULL;
195
static const uint64_t bgr2UVOffset attribute_used __attribute__((aligned(8)))= 0x8080808080808080ULL;
196
static const uint64_t w1111       attribute_used __attribute__((aligned(8))) = 0x0001000100010001ULL;
197 d604bab9 Michael Niedermayer
#endif
198 783e9cc9 Michael Niedermayer
199
// clipping helper table for C implementations:
200
static unsigned char clip_table[768];
201
202 d4e24275 Michael Niedermayer
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
203
                  
204 cf7d1c1a Michael Niedermayer
extern const uint8_t dither_2x2_4[2][8];
205
extern const uint8_t dither_2x2_8[2][8];
206
extern const uint8_t dither_8x8_32[8][8];
207
extern const uint8_t dither_8x8_73[8][8];
208
extern const uint8_t dither_8x8_220[8][8];
209 5cebb24b Michael Niedermayer
210 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
211 96034638 Michael Niedermayer
void in_asm_used_var_warning_killer()
212
{
213 20380eb8 Michael Niedermayer
 volatile int i= bF8+bFC+w10+
214 5ac80202 Michael Niedermayer
 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+
215 6ff0ad6b Michael Niedermayer
 M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101;
216 96034638 Michael Niedermayer
 if(i) i=0;
217
}
218
#endif
219 d604bab9 Michael Niedermayer
220 5859233b Michael Niedermayer
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
221 e3d2500f Michael Niedermayer
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
222 5859233b Michael Niedermayer
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
223 e3d2500f Michael Niedermayer
{
224
        //FIXME Optimize (just quickly writen not opti..)
225
        int i;
226 5859233b Michael Niedermayer
        for(i=0; i<dstW; i++)
227 e3d2500f Michael Niedermayer
        {
228 379a2036 Michael Niedermayer
                int val=1<<18;
229 e3d2500f Michael Niedermayer
                int j;
230
                for(j=0; j<lumFilterSize; j++)
231
                        val += lumSrc[j][i] * lumFilter[j];
232
233
                dest[i]= MIN(MAX(val>>19, 0), 255);
234
        }
235
236
        if(uDest != NULL)
237 5859233b Michael Niedermayer
                for(i=0; i<chrDstW; i++)
238 e3d2500f Michael Niedermayer
                {
239 379a2036 Michael Niedermayer
                        int u=1<<18;
240
                        int v=1<<18;
241 e3d2500f Michael Niedermayer
                        int j;
242 627690b5 Michael Niedermayer
                        for(j=0; j<chrFilterSize; j++)
243 e3d2500f Michael Niedermayer
                        {
244
                                u += chrSrc[j][i] * chrFilter[j];
245
                                v += chrSrc[j][i + 2048] * chrFilter[j];
246
                        }
247
248
                        uDest[i]= MIN(MAX(u>>19, 0), 255);
249
                        vDest[i]= MIN(MAX(v>>19, 0), 255);
250
                }
251
}
252
253 46de8b73 Michael Niedermayer
254 25593e29 Michael Niedermayer
#define YSCALE_YUV_2_PACKEDX_C(type) \
255 cf7d1c1a Michael Niedermayer
                for(i=0; i<(dstW>>1); i++){\
256
                        int j;\
257 379a2036 Michael Niedermayer
                        int Y1=1<<18;\
258
                        int Y2=1<<18;\
259
                        int U=1<<18;\
260
                        int V=1<<18;\
261 cf7d1c1a Michael Niedermayer
                        type *r, *b, *g;\
262
                        const int i2= 2*i;\
263
                        \
264
                        for(j=0; j<lumFilterSize; j++)\
265
                        {\
266
                                Y1 += lumSrc[j][i2] * lumFilter[j];\
267
                                Y2 += lumSrc[j][i2+1] * lumFilter[j];\
268
                        }\
269
                        for(j=0; j<chrFilterSize; j++)\
270
                        {\
271
                                U += chrSrc[j][i] * chrFilter[j];\
272
                                V += chrSrc[j][i+2048] * chrFilter[j];\
273
                        }\
274
                        Y1>>=19;\
275
                        Y2>>=19;\
276
                        U >>=19;\
277
                        V >>=19;\
278
                        if((Y1|Y2|U|V)&256)\
279
                        {\
280
                                if(Y1>255)   Y1=255;\
281
                                else if(Y1<0)Y1=0;\
282
                                if(Y2>255)   Y2=255;\
283
                                else if(Y2<0)Y2=0;\
284
                                if(U>255)    U=255;\
285
                                else if(U<0) U=0;\
286
                                if(V>255)    V=255;\
287
                                else if(V<0) V=0;\
288 46de8b73 Michael Niedermayer
                        }
289
                        
290
#define YSCALE_YUV_2_RGBX_C(type) \
291 25593e29 Michael Niedermayer
                        YSCALE_YUV_2_PACKEDX_C(type)\
292 cf7d1c1a Michael Niedermayer
                        r = c->table_rV[V];\
293
                        g = c->table_gU[U] + c->table_gV[V];\
294
                        b = c->table_bU[U];\
295
296 25593e29 Michael Niedermayer
#define YSCALE_YUV_2_PACKED2_C \
297 cf7d1c1a Michael Niedermayer
                for(i=0; i<(dstW>>1); i++){\
298
                        const int i2= 2*i;\
299
                        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;\
300
                        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;\
301
                        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;\
302
                        int V= (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;\
303 46de8b73 Michael Niedermayer
304
#define YSCALE_YUV_2_RGB2_C(type) \
305 25593e29 Michael Niedermayer
                        YSCALE_YUV_2_PACKED2_C\
306 cf7d1c1a Michael Niedermayer
                        type *r, *b, *g;\
307
                        r = c->table_rV[V];\
308
                        g = c->table_gU[U] + c->table_gV[V];\
309
                        b = c->table_bU[U];\
310
311 25593e29 Michael Niedermayer
#define YSCALE_YUV_2_PACKED1_C \
312 cf7d1c1a Michael Niedermayer
                for(i=0; i<(dstW>>1); i++){\
313
                        const int i2= 2*i;\
314
                        int Y1= buf0[i2  ]>>7;\
315
                        int Y2= buf0[i2+1]>>7;\
316
                        int U= (uvbuf1[i     ])>>7;\
317
                        int V= (uvbuf1[i+2048])>>7;\
318 46de8b73 Michael Niedermayer
319
#define YSCALE_YUV_2_RGB1_C(type) \
320 25593e29 Michael Niedermayer
                        YSCALE_YUV_2_PACKED1_C\
321 cf7d1c1a Michael Niedermayer
                        type *r, *b, *g;\
322
                        r = c->table_rV[V];\
323
                        g = c->table_gU[U] + c->table_gV[V];\
324
                        b = c->table_bU[U];\
325
326 25593e29 Michael Niedermayer
#define YSCALE_YUV_2_PACKED1B_C \
327 cf7d1c1a Michael Niedermayer
                for(i=0; i<(dstW>>1); i++){\
328
                        const int i2= 2*i;\
329
                        int Y1= buf0[i2  ]>>7;\
330
                        int Y2= buf0[i2+1]>>7;\
331
                        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
332
                        int V= (uvbuf0[i+2048] + uvbuf1[i+2048])>>8;\
333 46de8b73 Michael Niedermayer
334
#define YSCALE_YUV_2_RGB1B_C(type) \
335 25593e29 Michael Niedermayer
                        YSCALE_YUV_2_PACKED1B_C\
336 cf7d1c1a Michael Niedermayer
                        type *r, *b, *g;\
337
                        r = c->table_rV[V];\
338
                        g = c->table_gU[U] + c->table_gV[V];\
339
                        b = c->table_bU[U];\
340
341 46de8b73 Michael Niedermayer
#define YSCALE_YUV_2_ANYRGB_C(func, func2)\
342 cf7d1c1a Michael Niedermayer
        switch(c->dstFormat)\
343
        {\
344
        case IMGFMT_BGR32:\
345
        case IMGFMT_RGB32:\
346
                func(uint32_t)\
347
                        ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
348
                        ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
349
                }                \
350
                break;\
351
        case IMGFMT_RGB24:\
352
                func(uint8_t)\
353
                        ((uint8_t*)dest)[0]= r[Y1];\
354
                        ((uint8_t*)dest)[1]= g[Y1];\
355
                        ((uint8_t*)dest)[2]= b[Y1];\
356
                        ((uint8_t*)dest)[3]= r[Y2];\
357
                        ((uint8_t*)dest)[4]= g[Y2];\
358
                        ((uint8_t*)dest)[5]= b[Y2];\
359 ae4cffd9 D Richard Felker III
                        dest+=6;\
360 cf7d1c1a Michael Niedermayer
                }\
361
                break;\
362
        case IMGFMT_BGR24:\
363
                func(uint8_t)\
364
                        ((uint8_t*)dest)[0]= b[Y1];\
365
                        ((uint8_t*)dest)[1]= g[Y1];\
366
                        ((uint8_t*)dest)[2]= r[Y1];\
367
                        ((uint8_t*)dest)[3]= b[Y2];\
368
                        ((uint8_t*)dest)[4]= g[Y2];\
369
                        ((uint8_t*)dest)[5]= r[Y2];\
370 ae4cffd9 D Richard Felker III
                        dest+=6;\
371 cf7d1c1a Michael Niedermayer
                }\
372
                break;\
373
        case IMGFMT_RGB16:\
374
        case IMGFMT_BGR16:\
375
                {\
376
                        const int dr1= dither_2x2_8[y&1    ][0];\
377
                        const int dg1= dither_2x2_4[y&1    ][0];\
378
                        const int db1= dither_2x2_8[(y&1)^1][0];\
379
                        const int dr2= dither_2x2_8[y&1    ][1];\
380
                        const int dg2= dither_2x2_4[y&1    ][1];\
381
                        const int db2= dither_2x2_8[(y&1)^1][1];\
382
                        func(uint16_t)\
383
                                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
384
                                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
385
                        }\
386
                }\
387
                break;\
388
        case IMGFMT_RGB15:\
389
        case IMGFMT_BGR15:\
390
                {\
391
                        const int dr1= dither_2x2_8[y&1    ][0];\
392
                        const int dg1= dither_2x2_8[y&1    ][1];\
393
                        const int db1= dither_2x2_8[(y&1)^1][0];\
394
                        const int dr2= dither_2x2_8[y&1    ][1];\
395
                        const int dg2= dither_2x2_8[y&1    ][0];\
396
                        const int db2= dither_2x2_8[(y&1)^1][1];\
397
                        func(uint16_t)\
398
                                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
399
                                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
400
                        }\
401
                }\
402
                break;\
403
        case IMGFMT_RGB8:\
404
        case IMGFMT_BGR8:\
405
                {\
406
                        const uint8_t * const d64= dither_8x8_73[y&7];\
407
                        const uint8_t * const d32= dither_8x8_32[y&7];\
408
                        func(uint8_t)\
409
                                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
410
                                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
411
                        }\
412
                }\
413
                break;\
414
        case IMGFMT_RGB4:\
415
        case IMGFMT_BGR4:\
416
                {\
417
                        const uint8_t * const d64= dither_8x8_73 [y&7];\
418
                        const uint8_t * const d128=dither_8x8_220[y&7];\
419
                        func(uint8_t)\
420 799fd467 Michael Niedermayer
                                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
421 f17457ac Michael Niedermayer
                                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
422
                        }\
423
                }\
424
                break;\
425
        case IMGFMT_RG4B:\
426
        case IMGFMT_BG4B:\
427
                {\
428
                        const uint8_t * const d64= dither_8x8_73 [y&7];\
429
                        const uint8_t * const d128=dither_8x8_220[y&7];\
430
                        func(uint8_t)\
431 cf7d1c1a Michael Niedermayer
                                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
432
                                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
433
                        }\
434
                }\
435
                break;\
436
        case IMGFMT_RGB1:\
437
        case IMGFMT_BGR1:\
438
                {\
439
                        const uint8_t * const d128=dither_8x8_220[y&7];\
440
                        uint8_t *g= c->table_gU[128] + c->table_gV[128];\
441
                        for(i=0; i<dstW-7; i+=8){\
442
                                int acc;\
443
                                acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
444
                                acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
445
                                acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
446
                                acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
447
                                acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
448
                                acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
449
                                acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
450
                                acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
451
                                ((uint8_t*)dest)[0]= acc;\
452 ae4cffd9 D Richard Felker III
                                dest++;\
453 cf7d1c1a Michael Niedermayer
                        }\
454
\
455
/*\
456
((uint8_t*)dest)-= dstW>>4;\
457
{\
458
                        int acc=0;\
459
                        int left=0;\
460
                        static int top[1024];\
461
                        static int last_new[1024][1024];\
462
                        static int last_in3[1024][1024];\
463
                        static int drift[1024][1024];\
464
                        int topLeft=0;\
465
                        int shift=0;\
466
                        int count=0;\
467
                        const uint8_t * const d128=dither_8x8_220[y&7];\
468
                        int error_new=0;\
469
                        int error_in3=0;\
470
                        int f=0;\
471
                        \
472
                        for(i=dstW>>1; i<dstW; i++){\
473
                                int in= ((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19);\
474
                                int in2 = (76309 * (in - 16) + 32768) >> 16;\
475
                                int in3 = (in2 < 0) ? 0 : ((in2 > 255) ? 255 : in2);\
476
                                int old= (left*7 + topLeft + top[i]*5 + top[i+1]*3)/20 + in3\
477
                                        + (last_new[y][i] - in3)*f/256;\
478
                                int new= old> 128 ? 255 : 0;\
479
\
480
                                error_new+= ABS(last_new[y][i] - new);\
481
                                error_in3+= ABS(last_in3[y][i] - in3);\
482
                                f= error_new - error_in3*4;\
483
                                if(f<0) f=0;\
484
                                if(f>256) f=256;\
485
\
486
                                topLeft= top[i];\
487
                                left= top[i]= old - new;\
488
                                last_new[y][i]= new;\
489
                                last_in3[y][i]= in3;\
490
\
491
                                acc+= acc + (new&1);\
492
                                if((i&7)==6){\
493
                                        ((uint8_t*)dest)[0]= acc;\
494
                                        ((uint8_t*)dest)++;\
495
                                }\
496
                        }\
497
}\
498
*/\
499
                }\
500
                break;\
501 46de8b73 Michael Niedermayer
        case IMGFMT_YUY2:\
502
                func2\
503
                        ((uint8_t*)dest)[2*i2+0]= Y1;\
504
                        ((uint8_t*)dest)[2*i2+1]= U;\
505
                        ((uint8_t*)dest)[2*i2+2]= Y2;\
506
                        ((uint8_t*)dest)[2*i2+3]= V;\
507
                }                \
508
                break;\
509 caeaabe7 Alex Beregszaszi
        case IMGFMT_UYVY:\
510
                func2\
511
                        ((uint8_t*)dest)[2*i2+0]= U;\
512
                        ((uint8_t*)dest)[2*i2+1]= Y1;\
513
                        ((uint8_t*)dest)[2*i2+2]= V;\
514
                        ((uint8_t*)dest)[2*i2+3]= Y2;\
515
                }                \
516
                break;\
517 cf7d1c1a Michael Niedermayer
        }\
518
519
520 25593e29 Michael Niedermayer
static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
521 e3d2500f Michael Niedermayer
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
522 cf7d1c1a Michael Niedermayer
                                    uint8_t *dest, int dstW, int y)
523 e3d2500f Michael Niedermayer
{
524 cf7d1c1a Michael Niedermayer
        int i;
525
        switch(c->dstFormat)
526 e3d2500f Michael Niedermayer
        {
527 cf7d1c1a Michael Niedermayer
        case IMGFMT_RGB32:
528
        case IMGFMT_BGR32:
529
                YSCALE_YUV_2_RGBX_C(uint32_t)
530
                        ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];
531
                        ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];
532 e3d2500f Michael Niedermayer
                }
533 cf7d1c1a Michael Niedermayer
                break;
534
        case IMGFMT_RGB24:
535
                YSCALE_YUV_2_RGBX_C(uint8_t)
536
                        ((uint8_t*)dest)[0]= r[Y1];
537
                        ((uint8_t*)dest)[1]= g[Y1];
538
                        ((uint8_t*)dest)[2]= b[Y1];
539
                        ((uint8_t*)dest)[3]= r[Y2];
540
                        ((uint8_t*)dest)[4]= g[Y2];
541
                        ((uint8_t*)dest)[5]= b[Y2];
542 ae4cffd9 D Richard Felker III
                        dest+=6;
543 cf7d1c1a Michael Niedermayer
                }
544
                break;
545
        case IMGFMT_BGR24:
546
                YSCALE_YUV_2_RGBX_C(uint8_t)
547
                        ((uint8_t*)dest)[0]= b[Y1];
548
                        ((uint8_t*)dest)[1]= g[Y1];
549
                        ((uint8_t*)dest)[2]= r[Y1];
550
                        ((uint8_t*)dest)[3]= b[Y2];
551
                        ((uint8_t*)dest)[4]= g[Y2];
552
                        ((uint8_t*)dest)[5]= r[Y2];
553 ae4cffd9 D Richard Felker III
                        dest+=6;
554 cf7d1c1a Michael Niedermayer
                }
555
                break;
556
        case IMGFMT_RGB16:
557
        case IMGFMT_BGR16:
558
                {
559
                        const int dr1= dither_2x2_8[y&1    ][0];
560
                        const int dg1= dither_2x2_4[y&1    ][0];
561
                        const int db1= dither_2x2_8[(y&1)^1][0];
562
                        const int dr2= dither_2x2_8[y&1    ][1];
563
                        const int dg2= dither_2x2_4[y&1    ][1];
564
                        const int db2= dither_2x2_8[(y&1)^1][1];
565
                        YSCALE_YUV_2_RGBX_C(uint16_t)
566
                                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
567
                                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
568 e3d2500f Michael Niedermayer
                        }
569
                }
570 cf7d1c1a Michael Niedermayer
                break;
571
        case IMGFMT_RGB15:
572
        case IMGFMT_BGR15:
573
                {
574
                        const int dr1= dither_2x2_8[y&1    ][0];
575
                        const int dg1= dither_2x2_8[y&1    ][1];
576
                        const int db1= dither_2x2_8[(y&1)^1][0];
577
                        const int dr2= dither_2x2_8[y&1    ][1];
578
                        const int dg2= dither_2x2_8[y&1    ][0];
579
                        const int db2= dither_2x2_8[(y&1)^1][1];
580
                        YSCALE_YUV_2_RGBX_C(uint16_t)
581
                                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
582
                                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
583 e3d2500f Michael Niedermayer
                        }
584 cf7d1c1a Michael Niedermayer
                }
585
                break;
586
        case IMGFMT_RGB8:
587
        case IMGFMT_BGR8:
588
                {
589
                        const uint8_t * const d64= dither_8x8_73[y&7];
590
                        const uint8_t * const d32= dither_8x8_32[y&7];
591
                        YSCALE_YUV_2_RGBX_C(uint8_t)
592
                                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];
593
                                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];
594 e3d2500f Michael Niedermayer
                        }
595
                }
596 cf7d1c1a Michael Niedermayer
                break;
597
        case IMGFMT_RGB4:
598
        case IMGFMT_BGR4:
599
                {
600
                        const uint8_t * const d64= dither_8x8_73 [y&7];
601
                        const uint8_t * const d128=dither_8x8_220[y&7];
602
                        YSCALE_YUV_2_RGBX_C(uint8_t)
603 799fd467 Michael Niedermayer
                                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]
604 f17457ac Michael Niedermayer
                                                  +((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);
605
                        }
606
                }
607
                break;
608
        case IMGFMT_RG4B:
609
        case IMGFMT_BG4B:
610
                {
611
                        const uint8_t * const d64= dither_8x8_73 [y&7];
612
                        const uint8_t * const d128=dither_8x8_220[y&7];
613
                        YSCALE_YUV_2_RGBX_C(uint8_t)
614 cf7d1c1a Michael Niedermayer
                                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];
615
                                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];
616 e3d2500f Michael Niedermayer
                        }
617 cf7d1c1a Michael Niedermayer
                }
618
                break;
619
        case IMGFMT_RGB1:
620
        case IMGFMT_BGR1:
621
                {
622
                        const uint8_t * const d128=dither_8x8_220[y&7];
623
                        uint8_t *g= c->table_gU[128] + c->table_gV[128];
624
                        int acc=0;
625
                        for(i=0; i<dstW-1; i+=2){
626
                                int j;
627 379a2036 Michael Niedermayer
                                int Y1=1<<18;
628
                                int Y2=1<<18;
629 cf7d1c1a Michael Niedermayer
630
                                for(j=0; j<lumFilterSize; j++)
631
                                {
632
                                        Y1 += lumSrc[j][i] * lumFilter[j];
633
                                        Y2 += lumSrc[j][i+1] * lumFilter[j];
634
                                }
635
                                Y1>>=19;
636
                                Y2>>=19;
637
                                if((Y1|Y2)&256)
638
                                {
639
                                        if(Y1>255)   Y1=255;
640
                                        else if(Y1<0)Y1=0;
641
                                        if(Y2>255)   Y2=255;
642
                                        else if(Y2<0)Y2=0;
643
                                }
644
                                acc+= acc + g[Y1+d128[(i+0)&7]];
645
                                acc+= acc + g[Y2+d128[(i+1)&7]];
646
                                if((i&7)==6){
647
                                        ((uint8_t*)dest)[0]= acc;
648 ae4cffd9 D Richard Felker III
                                        dest++;
649 cf7d1c1a Michael Niedermayer
                                }
650 e3d2500f Michael Niedermayer
                        }
651
                }
652 cf7d1c1a Michael Niedermayer
                break;
653 46de8b73 Michael Niedermayer
        case IMGFMT_YUY2:
654 25593e29 Michael Niedermayer
                YSCALE_YUV_2_PACKEDX_C(void)
655 46de8b73 Michael Niedermayer
                        ((uint8_t*)dest)[2*i2+0]= Y1;
656
                        ((uint8_t*)dest)[2*i2+1]= U;
657
                        ((uint8_t*)dest)[2*i2+2]= Y2;
658
                        ((uint8_t*)dest)[2*i2+3]= V;
659
                }
660
                break;
661 caeaabe7 Alex Beregszaszi
        case IMGFMT_UYVY:
662
                YSCALE_YUV_2_PACKEDX_C(void)
663
                        ((uint8_t*)dest)[2*i2+0]= U;
664
                        ((uint8_t*)dest)[2*i2+1]= Y1;
665
                        ((uint8_t*)dest)[2*i2+2]= V;
666
                        ((uint8_t*)dest)[2*i2+3]= Y2;
667
                }
668
                break;
669 e3d2500f Michael Niedermayer
        }
670
}
671
672
673 7630f2e0 Michael Niedermayer
//Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
674
//Plain C versions
675 726a959a Michael Niedermayer
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
676
#define COMPILE_C
677
#endif
678
679 a2faa401 Romain Dolbeau
#ifdef ARCH_POWERPC
680
#ifdef HAVE_ALTIVEC
681
#define COMPILE_ALTIVEC
682
#endif //HAVE_ALTIVEC
683
#endif //ARCH_POWERPC
684
685 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
686 726a959a Michael Niedermayer
687
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
688
#define COMPILE_MMX
689
#endif
690
691
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
692
#define COMPILE_MMX2
693
#endif
694
695
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
696
#define COMPILE_3DNOW
697
#endif
698 6e1c66bc Aurelien Jacobs
#endif //ARCH_X86 || ARCH_X86_64
699 726a959a Michael Niedermayer
700
#undef HAVE_MMX
701
#undef HAVE_MMX2
702
#undef HAVE_3DNOW
703
704
#ifdef COMPILE_C
705 7630f2e0 Michael Niedermayer
#undef HAVE_MMX
706
#undef HAVE_MMX2
707
#undef HAVE_3DNOW
708 a2faa401 Romain Dolbeau
#undef HAVE_ALTIVEC
709 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _C
710
#include "swscale_template.c"
711 726a959a Michael Niedermayer
#endif
712 397c035e Michael Niedermayer
713 a2faa401 Romain Dolbeau
#ifdef ARCH_POWERPC
714
#ifdef COMPILE_ALTIVEC
715
#undef RENAME
716
#define HAVE_ALTIVEC
717
#define RENAME(a) a ## _altivec
718
#include "swscale_template.c"
719
#endif
720
#endif //ARCH_POWERPC
721
722 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
723 397c035e Michael Niedermayer
724 7630f2e0 Michael Niedermayer
//X86 versions
725
/*
726
#undef RENAME
727
#undef HAVE_MMX
728
#undef HAVE_MMX2
729
#undef HAVE_3DNOW
730
#define ARCH_X86
731
#define RENAME(a) a ## _X86
732
#include "swscale_template.c"
733 1faf0867 Michael Niedermayer
*/
734 7630f2e0 Michael Niedermayer
//MMX versions
735 726a959a Michael Niedermayer
#ifdef COMPILE_MMX
736 7630f2e0 Michael Niedermayer
#undef RENAME
737
#define HAVE_MMX
738
#undef HAVE_MMX2
739
#undef HAVE_3DNOW
740
#define RENAME(a) a ## _MMX
741
#include "swscale_template.c"
742 726a959a Michael Niedermayer
#endif
743 7630f2e0 Michael Niedermayer
744
//MMX2 versions
745 726a959a Michael Niedermayer
#ifdef COMPILE_MMX2
746 7630f2e0 Michael Niedermayer
#undef RENAME
747
#define HAVE_MMX
748
#define HAVE_MMX2
749
#undef HAVE_3DNOW
750
#define RENAME(a) a ## _MMX2
751
#include "swscale_template.c"
752 726a959a Michael Niedermayer
#endif
753 7630f2e0 Michael Niedermayer
754
//3DNOW versions
755 726a959a Michael Niedermayer
#ifdef COMPILE_3DNOW
756 7630f2e0 Michael Niedermayer
#undef RENAME
757
#define HAVE_MMX
758
#undef HAVE_MMX2
759
#define HAVE_3DNOW
760
#define RENAME(a) a ## _3DNow
761
#include "swscale_template.c"
762 726a959a Michael Niedermayer
#endif
763 7630f2e0 Michael Niedermayer
764 6e1c66bc Aurelien Jacobs
#endif //ARCH_X86 || ARCH_X86_64
765 7630f2e0 Michael Niedermayer
766 77a416e8 Gabucino
// minor note: the HAVE_xyz is messed up after that line so don't use it
767 d604bab9 Michael Niedermayer
768 a86c461c Michael Niedermayer
static double getSplineCoeff(double a, double b, double c, double d, double dist)
769
{
770
//        printf("%f %f %f %f %f\n", a,b,c,d,dist);
771
        if(dist<=1.0)         return ((d*dist + c)*dist + b)*dist +a;
772
        else                return getSplineCoeff(        0.0, 
773
                                                 b+ 2.0*c + 3.0*d,
774
                                                        c + 3.0*d,
775
                                                -b- 3.0*c - 6.0*d,
776
                                                dist-1.0);
777
}
778 6c7506de Michael Niedermayer
779 c7f822d9 Michael Niedermayer
static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
780
                              int srcW, int dstW, int filterAlign, int one, int flags,
781 66d1cdb6 Michael Niedermayer
                              SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
782 28bf81c9 Michael Niedermayer
{
783
        int i;
784 c7f822d9 Michael Niedermayer
        int filterSize;
785
        int filter2Size;
786
        int minFilterSize;
787
        double *filter=NULL;
788
        double *filter2=NULL;
789 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
790 516b1f82 Michael Niedermayer
        if(flags & SWS_CPU_CAPS_MMX)
791 28bf81c9 Michael Niedermayer
                asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
792 726a959a Michael Niedermayer
#endif
793 31190492 Arpi
794 adeaecb9 Michael Niedermayer
        // Note the +1 is for the MMXscaler which reads over the end
795 6c7506de Michael Niedermayer
        *filterPos = (int16_t*)memalign(8, (dstW+1)*sizeof(int16_t));
796
797 28bf81c9 Michael Niedermayer
        if(ABS(xInc - 0x10000) <10) // unscaled
798
        {
799
                int i;
800 c7f822d9 Michael Niedermayer
                filterSize= 1;
801
                filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
802
                for(i=0; i<dstW*filterSize; i++) filter[i]=0;
803 28bf81c9 Michael Niedermayer
804
                for(i=0; i<dstW; i++)
805
                {
806 c7f822d9 Michael Niedermayer
                        filter[i*filterSize]=1;
807
                        (*filterPos)[i]=i;
808 28bf81c9 Michael Niedermayer
                }
809
810
        }
811 ff7ba856 Michael Niedermayer
        else if(flags&SWS_POINT) // lame looking point sampling mode
812
        {
813
                int i;
814
                int xDstInSrc;
815
                filterSize= 1;
816
                filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
817
                
818
                xDstInSrc= xInc/2 - 0x8000;
819
                for(i=0; i<dstW; i++)
820
                {
821 8a01d20c Michael Niedermayer
                        int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
822 ff7ba856 Michael Niedermayer
823
                        (*filterPos)[i]= xx;
824
                        filter[i]= 1.0;
825
                        xDstInSrc+= xInc;
826
                }
827
        }
828 a86c461c Michael Niedermayer
        else if((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
829 28bf81c9 Michael Niedermayer
        {
830
                int i;
831
                int xDstInSrc;
832 c7f822d9 Michael Niedermayer
                if     (flags&SWS_BICUBIC) filterSize= 4;
833
                else if(flags&SWS_X      ) filterSize= 4;
834 d8863d37 Michael Niedermayer
                else                           filterSize= 2; // SWS_BILINEAR / SWS_AREA 
835 c7f822d9 Michael Niedermayer
                filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
836 28bf81c9 Michael Niedermayer
837
                xDstInSrc= xInc/2 - 0x8000;
838
                for(i=0; i<dstW; i++)
839
                {
840 8a01d20c Michael Niedermayer
                        int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
841 28bf81c9 Michael Niedermayer
                        int j;
842
843 c7f822d9 Michael Niedermayer
                        (*filterPos)[i]= xx;
844 d8863d37 Michael Niedermayer
                                //Bilinear upscale / linear interpolate / Area averaging
845 c7f822d9 Michael Niedermayer
                                for(j=0; j<filterSize; j++)
846 28bf81c9 Michael Niedermayer
                                {
847
                                        double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16);
848
                                        double coeff= 1.0 - d;
849
                                        if(coeff<0) coeff=0;
850 c7f822d9 Michael Niedermayer
                                        filter[i*filterSize + j]= coeff;
851 28bf81c9 Michael Niedermayer
                                        xx++;
852
                                }
853
                        xDstInSrc+= xInc;
854
                }
855
        }
856 a86c461c Michael Niedermayer
        else
857 28bf81c9 Michael Niedermayer
        {
858 a86c461c Michael Niedermayer
                double xDstInSrc;
859
                double sizeFactor, filterSizeInSrc;
860
                const double xInc1= (double)xInc / (double)(1<<16);
861
862
                if     (flags&SWS_BICUBIC)        sizeFactor= 4.0;
863
                else if(flags&SWS_X)                sizeFactor= 8.0;
864
                else if(flags&SWS_AREA)                sizeFactor= 1.0; //downscale only, for upscale it is bilinear
865
                else if(flags&SWS_GAUSS)        sizeFactor= 8.0;   // infinite ;)
866 66d1cdb6 Michael Niedermayer
                else if(flags&SWS_LANCZOS)        sizeFactor= param[0] != SWS_PARAM_DEFAULT ? 2.0*param[0] : 6.0;
867 93768378 Michael Niedermayer
                else if(flags&SWS_SINC)                sizeFactor= 20.0; // infinite ;)
868 a86c461c Michael Niedermayer
                else if(flags&SWS_SPLINE)        sizeFactor= 20.0;  // infinite ;)
869
                else if(flags&SWS_BILINEAR)        sizeFactor= 2.0;
870 93768378 Michael Niedermayer
                else {
871
                        sizeFactor= 0.0; //GCC warning killer
872
                        ASSERT(0)
873
                }
874 a86c461c Michael Niedermayer
                
875
                if(xInc1 <= 1.0)        filterSizeInSrc= sizeFactor; // upscale
876
                else                        filterSizeInSrc= sizeFactor*srcW / (double)dstW;
877 81b7c056 Michael Niedermayer
878 a86c461c Michael Niedermayer
                filterSize= (int)ceil(1 + filterSizeInSrc); // will be reduced later if possible
879
                if(filterSize > srcW-2) filterSize=srcW-2;
880 28bf81c9 Michael Niedermayer
881 a86c461c Michael Niedermayer
                filter= (double*)memalign(16, dstW*sizeof(double)*filterSize);
882
883
                xDstInSrc= xInc1 / 2.0 - 0.5;
884 28bf81c9 Michael Niedermayer
                for(i=0; i<dstW; i++)
885
                {
886 a86c461c Michael Niedermayer
                        int xx= (int)(xDstInSrc - (filterSize-1)*0.5 + 0.5);
887 28bf81c9 Michael Niedermayer
                        int j;
888 c7f822d9 Michael Niedermayer
                        (*filterPos)[i]= xx;
889
                        for(j=0; j<filterSize; j++)
890 28bf81c9 Michael Niedermayer
                        {
891 a86c461c Michael Niedermayer
                                double d= ABS(xx - xDstInSrc)/filterSizeInSrc*sizeFactor;
892 28bf81c9 Michael Niedermayer
                                double coeff;
893 a86c461c Michael Niedermayer
                                if(flags & SWS_BICUBIC)
894 28bf81c9 Michael Niedermayer
                                {
895 66d1cdb6 Michael Niedermayer
                                        double B= param[0] != SWS_PARAM_DEFAULT ? param[0] : 0.0;
896
                                        double C= param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6;
897
898
                                        if(d<1.0) 
899
                                                coeff = (12-9*B-6*C)*d*d*d + (-18+12*B+6*C)*d*d + 6-2*B;
900 28bf81c9 Michael Niedermayer
                                        else if(d<2.0)
901 66d1cdb6 Michael Niedermayer
                                                coeff = (-B-6*C)*d*d*d + (6*B+30*C)*d*d + (-12*B-48*C)*d +8*B+24*C;
902 28bf81c9 Michael Niedermayer
                                        else
903
                                                coeff=0.0;
904
                                }
905 a86c461c Michael Niedermayer
/*                                else if(flags & SWS_X)
906
                                {
907
                                        double p= param ? param*0.01 : 0.3;
908
                                        coeff = d ? sin(d*PI)/(d*PI) : 1.0;
909
                                        coeff*= pow(2.0, - p*d*d);
910
                                }*/
911
                                else if(flags & SWS_X)
912
                                {
913 66d1cdb6 Michael Niedermayer
                                        double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
914 a86c461c Michael Niedermayer
                                        
915
                                        if(d<1.0)
916
                                                coeff = cos(d*PI);
917
                                        else
918
                                                coeff=-1.0;
919
                                        if(coeff<0.0)         coeff= -pow(-coeff, A);
920
                                        else                coeff=  pow( coeff, A);
921
                                        coeff= coeff*0.5 + 0.5;
922
                                }
923 d8863d37 Michael Niedermayer
                                else if(flags & SWS_AREA)
924 28bf81c9 Michael Niedermayer
                                {
925 a86c461c Michael Niedermayer
                                        double srcPixelSize= 1.0/xInc1;
926 d8863d37 Michael Niedermayer
                                        if(d + srcPixelSize/2 < 0.5) coeff= 1.0;
927
                                        else if(d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
928
                                        else coeff=0.0;
929
                                }
930 a86c461c Michael Niedermayer
                                else if(flags & SWS_GAUSS)
931
                                {
932 66d1cdb6 Michael Niedermayer
                                        double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
933 a86c461c Michael Niedermayer
                                        coeff = pow(2.0, - p*d*d);
934
                                }
935
                                else if(flags & SWS_SINC)
936
                                {
937
                                        coeff = d ? sin(d*PI)/(d*PI) : 1.0;
938
                                }
939
                                else if(flags & SWS_LANCZOS)
940
                                {
941 66d1cdb6 Michael Niedermayer
                                        double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; 
942 a86c461c Michael Niedermayer
                                        coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0;
943
                                        if(d>p) coeff=0;
944
                                }
945
                                else if(flags & SWS_BILINEAR)
946 28bf81c9 Michael Niedermayer
                                {
947
                                        coeff= 1.0 - d;
948
                                        if(coeff<0) coeff=0;
949
                                }
950 a86c461c Michael Niedermayer
                                else if(flags & SWS_SPLINE)
951
                                {
952
                                        double p=-2.196152422706632;
953
                                        coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, d);
954
                                }
955 93768378 Michael Niedermayer
                                else {
956
                                        coeff= 0.0; //GCC warning killer
957
                                        ASSERT(0)
958
                                }
959 a86c461c Michael Niedermayer
960 c7f822d9 Michael Niedermayer
                                filter[i*filterSize + j]= coeff;
961 28bf81c9 Michael Niedermayer
                                xx++;
962
                        }
963 a86c461c Michael Niedermayer
                        xDstInSrc+= xInc1;
964 28bf81c9 Michael Niedermayer
                }
965
        }
966
967 c7f822d9 Michael Niedermayer
        /* apply src & dst Filter to filter -> filter2
968
           free(filter);
969
        */
970 81b7c056 Michael Niedermayer
        ASSERT(filterSize>0)
971 c7f822d9 Michael Niedermayer
        filter2Size= filterSize;
972
        if(srcFilter) filter2Size+= srcFilter->length - 1;
973
        if(dstFilter) filter2Size+= dstFilter->length - 1;
974 81b7c056 Michael Niedermayer
        ASSERT(filter2Size>0)
975 c7f822d9 Michael Niedermayer
        filter2= (double*)memalign(8, filter2Size*dstW*sizeof(double));
976
977
        for(i=0; i<dstW; i++)
978
        {
979
                int j;
980
                SwsVector scaleFilter;
981
                SwsVector *outVec;
982
983
                scaleFilter.coeff= filter + i*filterSize;
984
                scaleFilter.length= filterSize;
985
986 d4e24275 Michael Niedermayer
                if(srcFilter) outVec= sws_getConvVec(srcFilter, &scaleFilter);
987 c7f822d9 Michael Niedermayer
                else              outVec= &scaleFilter;
988
989
                ASSERT(outVec->length == filter2Size)
990
                //FIXME dstFilter
991
992
                for(j=0; j<outVec->length; j++)
993
                {
994
                        filter2[i*filter2Size + j]= outVec->coeff[j];
995
                }
996
997
                (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
998
999 d4e24275 Michael Niedermayer
                if(outVec != &scaleFilter) sws_freeVec(outVec);
1000 c7f822d9 Michael Niedermayer
        }
1001
        free(filter); filter=NULL;
1002
1003
        /* try to reduce the filter-size (step1 find size and shift left) */
1004
        // Assume its near normalized (*0.5 or *2.0 is ok but * 0.001 is not)
1005
        minFilterSize= 0;
1006
        for(i=dstW-1; i>=0; i--)
1007
        {
1008
                int min= filter2Size;
1009
                int j;
1010
                double cutOff=0.0;
1011
1012
                /* get rid off near zero elements on the left by shifting left */
1013
                for(j=0; j<filter2Size; j++)
1014
                {
1015
                        int k;
1016
                        cutOff += ABS(filter2[i*filter2Size]);
1017
1018
                        if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1019
1020 77a416e8 Gabucino
                        /* preserve Monotonicity because the core can't handle the filter otherwise */
1021 c7f822d9 Michael Niedermayer
                        if(i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1022
1023
                        // Move filter coeffs left
1024
                        for(k=1; k<filter2Size; k++)
1025
                                filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1026
                        filter2[i*filter2Size + k - 1]= 0.0;
1027
                        (*filterPos)[i]++;
1028
                }
1029
1030
                cutOff=0.0;
1031
                /* count near zeros on the right */
1032
                for(j=filter2Size-1; j>0; j--)
1033
                {
1034
                        cutOff += ABS(filter2[i*filter2Size + j]);
1035
1036
                        if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1037
                        min--;
1038
                }
1039
1040
                if(min>minFilterSize) minFilterSize= min;
1041
        }
1042
1043 8c266f0c Romain Dolbeau
        if (flags & SWS_CPU_CAPS_ALTIVEC) {
1044
          // we can handle the special case 4,
1045
          // so we don't want to go to the full 8
1046
          if (minFilterSize < 5)
1047
            filterAlign = 4;
1048
1049
          // we really don't want to waste our time
1050
          // doing useless computation, so fall-back on
1051
          // the scalar C code for very small filter.
1052
          // vectorizing is worth it only if you have
1053
          // decent-sized vector.
1054
          if (minFilterSize < 3)
1055
            filterAlign = 1;
1056
        }
1057
1058 81b7c056 Michael Niedermayer
        ASSERT(minFilterSize > 0)
1059 6c7506de Michael Niedermayer
        filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
1060 81b7c056 Michael Niedermayer
        ASSERT(filterSize > 0)
1061 6c7506de Michael Niedermayer
        filter= (double*)memalign(8, filterSize*dstW*sizeof(double));
1062
        *outFilterSize= filterSize;
1063
1064 4a53a912 Alban Bedel
        if(flags&SWS_PRINT_INFO)
1065 0d9f3d85 Arpi
                MSG_INFO("SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
1066 c7f822d9 Michael Niedermayer
        /* try to reduce the filter-size (step2 reduce it) */
1067
        for(i=0; i<dstW; i++)
1068
        {
1069
                int j;
1070
1071 6c7506de Michael Niedermayer
                for(j=0; j<filterSize; j++)
1072
                {
1073
                        if(j>=filter2Size) filter[i*filterSize + j]= 0.0;
1074
                        else                   filter[i*filterSize + j]= filter2[i*filter2Size + j];
1075
                }
1076 c7f822d9 Michael Niedermayer
        }
1077 6c7506de Michael Niedermayer
        free(filter2); filter2=NULL;
1078
        
1079 c7f822d9 Michael Niedermayer
1080
        //FIXME try to align filterpos if possible
1081
1082 28bf81c9 Michael Niedermayer
        //fix borders
1083
        for(i=0; i<dstW; i++)
1084
        {
1085
                int j;
1086 c7f822d9 Michael Niedermayer
                if((*filterPos)[i] < 0)
1087 28bf81c9 Michael Niedermayer
                {
1088
                        // Move filter coeffs left to compensate for filterPos
1089 6c7506de Michael Niedermayer
                        for(j=1; j<filterSize; j++)
1090 28bf81c9 Michael Niedermayer
                        {
1091 c7f822d9 Michael Niedermayer
                                int left= MAX(j + (*filterPos)[i], 0);
1092 6c7506de Michael Niedermayer
                                filter[i*filterSize + left] += filter[i*filterSize + j];
1093
                                filter[i*filterSize + j]=0;
1094 28bf81c9 Michael Niedermayer
                        }
1095 c7f822d9 Michael Niedermayer
                        (*filterPos)[i]= 0;
1096 28bf81c9 Michael Niedermayer
                }
1097
1098 6c7506de Michael Niedermayer
                if((*filterPos)[i] + filterSize > srcW)
1099 28bf81c9 Michael Niedermayer
                {
1100 6c7506de Michael Niedermayer
                        int shift= (*filterPos)[i] + filterSize - srcW;
1101 28bf81c9 Michael Niedermayer
                        // Move filter coeffs right to compensate for filterPos
1102 6c7506de Michael Niedermayer
                        for(j=filterSize-2; j>=0; j--)
1103 28bf81c9 Michael Niedermayer
                        {
1104 6c7506de Michael Niedermayer
                                int right= MIN(j + shift, filterSize-1);
1105
                                filter[i*filterSize +right] += filter[i*filterSize +j];
1106
                                filter[i*filterSize +j]=0;
1107 28bf81c9 Michael Niedermayer
                        }
1108 6c7506de Michael Niedermayer
                        (*filterPos)[i]= srcW - filterSize;
1109 28bf81c9 Michael Niedermayer
                }
1110
        }
1111
1112 6c7506de Michael Niedermayer
        // Note the +1 is for the MMXscaler which reads over the end
1113
        *outFilter= (int16_t*)memalign(8, *outFilterSize*(dstW+1)*sizeof(int16_t));
1114
        memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t));
1115 c7f822d9 Michael Niedermayer
1116
        /* Normalize & Store in outFilter */
1117 28bf81c9 Michael Niedermayer
        for(i=0; i<dstW; i++)
1118
        {
1119
                int j;
1120 ff490720 Michael Niedermayer
                double error=0;
1121 28bf81c9 Michael Niedermayer
                double sum=0;
1122
                double scale= one;
1123 ff490720 Michael Niedermayer
1124 6c7506de Michael Niedermayer
                for(j=0; j<filterSize; j++)
1125 28bf81c9 Michael Niedermayer
                {
1126 6c7506de Michael Niedermayer
                        sum+= filter[i*filterSize + j];
1127 28bf81c9 Michael Niedermayer
                }
1128
                scale/= sum;
1129 93768378 Michael Niedermayer
                for(j=0; j<*outFilterSize; j++)
1130 28bf81c9 Michael Niedermayer
                {
1131 ff490720 Michael Niedermayer
                        double v= filter[i*filterSize + j]*scale + error;
1132
                        int intV= floor(v + 0.5);
1133
                        (*outFilter)[i*(*outFilterSize) + j]= intV;
1134
                        error = v - intV;
1135 28bf81c9 Michael Niedermayer
                }
1136
        }
1137 adeaecb9 Michael Niedermayer
        
1138
        (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
1139
        for(i=0; i<*outFilterSize; i++)
1140
        {
1141
                int j= dstW*(*outFilterSize);
1142
                (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
1143
        }
1144 c7f822d9 Michael Niedermayer
1145 6c7506de Michael Niedermayer
        free(filter);
1146 7630f2e0 Michael Niedermayer
}
1147 31190492 Arpi
1148 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
1149 b7dc6f66 Michael Niedermayer
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
1150 28bf81c9 Michael Niedermayer
{
1151 b7dc6f66 Michael Niedermayer
        uint8_t *fragmentA;
1152 6e1c66bc Aurelien Jacobs
        long imm8OfPShufW1A;
1153
        long imm8OfPShufW2A;
1154
        long fragmentLengthA;
1155 b7dc6f66 Michael Niedermayer
        uint8_t *fragmentB;
1156 6e1c66bc Aurelien Jacobs
        long imm8OfPShufW1B;
1157
        long imm8OfPShufW2B;
1158
        long fragmentLengthB;
1159 b7dc6f66 Michael Niedermayer
        int fragmentPos;
1160 28bf81c9 Michael Niedermayer
1161
        int xpos, i;
1162
1163
        // create an optimized horizontal scaling routine
1164
1165
        //code fragment
1166
1167
        asm volatile(
1168
                "jmp 9f                                \n\t"
1169
        // Begin
1170
                "0:                                \n\t"
1171 6e1c66bc Aurelien Jacobs
                "movq (%%"REG_d", %%"REG_a"), %%mm3\n\t" 
1172
                "movd (%%"REG_c", %%"REG_S"), %%mm0\n\t" 
1173
                "movd 1(%%"REG_c", %%"REG_S"), %%mm1\n\t"
1174 b7dc6f66 Michael Niedermayer
                "punpcklbw %%mm7, %%mm1                \n\t"
1175
                "punpcklbw %%mm7, %%mm0                \n\t"
1176 28bf81c9 Michael Niedermayer
                "pshufw $0xFF, %%mm1, %%mm1        \n\t"
1177
                "1:                                \n\t"
1178
                "pshufw $0xFF, %%mm0, %%mm0        \n\t"
1179
                "2:                                \n\t"
1180
                "psubw %%mm1, %%mm0                \n\t"
1181 6d606c4f Aurelien Jacobs
                "movl 8(%%"REG_b", %%"REG_a"), %%esi\n\t"
1182 28bf81c9 Michael Niedermayer
                "pmullw %%mm3, %%mm0                \n\t"
1183
                "psllw $7, %%mm1                \n\t"
1184
                "paddw %%mm1, %%mm0                \n\t"
1185
1186 6e1c66bc Aurelien Jacobs
                "movq %%mm0, (%%"REG_D", %%"REG_a")\n\t"
1187 28bf81c9 Michael Niedermayer
1188 6e1c66bc Aurelien Jacobs
                "add $8, %%"REG_a"                \n\t"
1189 28bf81c9 Michael Niedermayer
        // End
1190
                "9:                                \n\t"
1191
//                "int $3\n\t"
1192 6e1c66bc Aurelien Jacobs
                "lea 0b, %0                        \n\t"
1193
                "lea 1b, %1                        \n\t"
1194
                "lea 2b, %2                        \n\t"
1195
                "dec %1                                \n\t"
1196
                "dec %2                                \n\t"
1197
                "sub %0, %1                        \n\t"
1198
                "sub %0, %2                        \n\t"
1199
                "lea 9b, %3                        \n\t"
1200
                "sub %0, %3                        \n\t"
1201 b7dc6f66 Michael Niedermayer
1202
1203
                :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
1204
                "=r" (fragmentLengthA)
1205 28bf81c9 Michael Niedermayer
        );
1206
1207 b7dc6f66 Michael Niedermayer
        asm volatile(
1208
                "jmp 9f                                \n\t"
1209
        // Begin
1210
                "0:                                \n\t"
1211 6e1c66bc Aurelien Jacobs
                "movq (%%"REG_d", %%"REG_a"), %%mm3\n\t" 
1212
                "movd (%%"REG_c", %%"REG_S"), %%mm0\n\t" 
1213 b7dc6f66 Michael Niedermayer
                "punpcklbw %%mm7, %%mm0                \n\t"
1214
                "pshufw $0xFF, %%mm0, %%mm1        \n\t"
1215
                "1:                                \n\t"
1216
                "pshufw $0xFF, %%mm0, %%mm0        \n\t"
1217
                "2:                                \n\t"
1218
                "psubw %%mm1, %%mm0                \n\t"
1219 6d606c4f Aurelien Jacobs
                "movl 8(%%"REG_b", %%"REG_a"), %%esi\n\t"
1220 b7dc6f66 Michael Niedermayer
                "pmullw %%mm3, %%mm0                \n\t"
1221
                "psllw $7, %%mm1                \n\t"
1222
                "paddw %%mm1, %%mm0                \n\t"
1223
1224 6e1c66bc Aurelien Jacobs
                "movq %%mm0, (%%"REG_D", %%"REG_a")\n\t"
1225 28bf81c9 Michael Niedermayer
1226 6e1c66bc Aurelien Jacobs
                "add $8, %%"REG_a"                \n\t"
1227 b7dc6f66 Michael Niedermayer
        // End
1228
                "9:                                \n\t"
1229
//                "int $3\n\t"
1230 6e1c66bc Aurelien Jacobs
                "lea 0b, %0                        \n\t"
1231
                "lea 1b, %1                        \n\t"
1232
                "lea 2b, %2                        \n\t"
1233
                "dec %1                                \n\t"
1234
                "dec %2                                \n\t"
1235
                "sub %0, %1                        \n\t"
1236
                "sub %0, %2                        \n\t"
1237
                "lea 9b, %3                        \n\t"
1238
                "sub %0, %3                        \n\t"
1239 b7dc6f66 Michael Niedermayer
1240
1241
                :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
1242
                "=r" (fragmentLengthB)
1243
        );
1244
1245
        xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
1246
        fragmentPos=0;
1247
        
1248
        for(i=0; i<dstW/numSplits; i++)
1249 28bf81c9 Michael Niedermayer
        {
1250
                int xx=xpos>>16;
1251
1252
                if((i&3) == 0)
1253
                {
1254
                        int a=0;
1255
                        int b=((xpos+xInc)>>16) - xx;
1256
                        int c=((xpos+xInc*2)>>16) - xx;
1257
                        int d=((xpos+xInc*3)>>16) - xx;
1258
1259 b7dc6f66 Michael Niedermayer
                        filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
1260
                        filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
1261
                        filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
1262
                        filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
1263
                        filterPos[i/2]= xx;
1264
1265
                        if(d+1<4)
1266
                        {
1267
                                int maxShift= 3-(d+1);
1268
                                int shift=0;
1269
1270
                                memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
1271
1272
                                funnyCode[fragmentPos + imm8OfPShufW1B]=
1273
                                        (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
1274
                                funnyCode[fragmentPos + imm8OfPShufW2B]=
1275
                                        a | (b<<2) | (c<<4) | (d<<6);
1276
1277
                                if(i+3>=dstW) shift=maxShift; //avoid overread
1278
                                else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
1279
1280
                                if(shift && i>=shift)
1281
                                {
1282
                                        funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
1283
                                        funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
1284
                                        filterPos[i/2]-=shift;
1285
                                }
1286
1287
                                fragmentPos+= fragmentLengthB;
1288
                        }
1289
                        else
1290
                        {
1291
                                int maxShift= 3-d;
1292
                                int shift=0;
1293
1294
                                memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
1295 28bf81c9 Michael Niedermayer
1296 b7dc6f66 Michael Niedermayer
                                funnyCode[fragmentPos + imm8OfPShufW1A]=
1297
                                funnyCode[fragmentPos + imm8OfPShufW2A]=
1298
                                        a | (b<<2) | (c<<4) | (d<<6);
1299 28bf81c9 Michael Niedermayer
1300 b7dc6f66 Michael Niedermayer
                                if(i+4>=dstW) shift=maxShift; //avoid overread
1301
                                else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
1302 28bf81c9 Michael Niedermayer
1303 b7dc6f66 Michael Niedermayer
                                if(shift && i>=shift)
1304
                                {
1305
                                        funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
1306
                                        funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
1307
                                        filterPos[i/2]-=shift;
1308
                                }
1309
1310
                                fragmentPos+= fragmentLengthA;
1311
                        }
1312
1313
                        funnyCode[fragmentPos]= RET;
1314 28bf81c9 Michael Niedermayer
                }
1315
                xpos+=xInc;
1316
        }
1317 b7dc6f66 Michael Niedermayer
        filterPos[i/2]= xpos>>16; // needed to jump to the next part
1318 28bf81c9 Michael Niedermayer
}
1319 6e1c66bc Aurelien Jacobs
#endif // ARCH_X86 || ARCH_X86_64
1320 28bf81c9 Michael Niedermayer
1321
static void globalInit(){
1322 31190492 Arpi
    // generating tables:
1323
    int i;
1324 c1b0bfb4 Michael Niedermayer
    for(i=0; i<768; i++){
1325
        int c= MIN(MAX(i-256, 0), 255);
1326
        clip_table[i]=c;
1327 b18ea156 Michael Niedermayer
    }
1328 516b1f82 Michael Niedermayer
}
1329 c1b0bfb4 Michael Niedermayer
1330 516b1f82 Michael Niedermayer
static SwsFunc getSwsFunc(int flags){
1331
    
1332 28bf81c9 Michael Niedermayer
#ifdef RUNTIME_CPUDETECT
1333 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
1334 28bf81c9 Michael Niedermayer
        // ordered per speed fasterst first
1335 516b1f82 Michael Niedermayer
        if(flags & SWS_CPU_CAPS_MMX2)
1336
                return swScale_MMX2;
1337
        else if(flags & SWS_CPU_CAPS_3DNOW)
1338
                return swScale_3DNow;
1339
        else if(flags & SWS_CPU_CAPS_MMX)
1340
                return swScale_MMX;
1341 28bf81c9 Michael Niedermayer
        else
1342 516b1f82 Michael Niedermayer
                return swScale_C;
1343 28bf81c9 Michael Niedermayer
1344
#else
1345 a2faa401 Romain Dolbeau
#ifdef ARCH_POWERPC
1346
        if(flags & SWS_CPU_CAPS_ALTIVEC)
1347
          return swScale_altivec;
1348
        else
1349
          return swScale_C;
1350
#endif
1351 516b1f82 Michael Niedermayer
        return swScale_C;
1352 28bf81c9 Michael Niedermayer
#endif
1353
#else //RUNTIME_CPUDETECT
1354
#ifdef HAVE_MMX2
1355 516b1f82 Michael Niedermayer
        return swScale_MMX2;
1356 28bf81c9 Michael Niedermayer
#elif defined (HAVE_3DNOW)
1357 516b1f82 Michael Niedermayer
        return swScale_3DNow;
1358 28bf81c9 Michael Niedermayer
#elif defined (HAVE_MMX)
1359 516b1f82 Michael Niedermayer
        return swScale_MMX;
1360 a2faa401 Romain Dolbeau
#elif defined (HAVE_ALTIVEC)
1361
        return swScale_altivec;
1362 28bf81c9 Michael Niedermayer
#else
1363 516b1f82 Michael Niedermayer
        return swScale_C;
1364 28bf81c9 Michael Niedermayer
#endif
1365
#endif //!RUNTIME_CPUDETECT
1366 31190492 Arpi
}
1367 7630f2e0 Michael Niedermayer
1368 d4e24275 Michael Niedermayer
static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1369 0d9f3d85 Arpi
             int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1370
        uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1371
        /* Copy Y plane */
1372
        if(dstStride[0]==srcStride[0])
1373
                memcpy(dst, src[0], srcSliceH*dstStride[0]);
1374
        else
1375
        {
1376
                int i;
1377
                uint8_t *srcPtr= src[0];
1378
                uint8_t *dstPtr= dst;
1379
                for(i=0; i<srcSliceH; i++)
1380
                {
1381
                        memcpy(dstPtr, srcPtr, srcStride[0]);
1382
                        srcPtr+= srcStride[0];
1383
                        dstPtr+= dstStride[0];
1384
                }
1385
        }
1386
        dst = dstParam[1] + dstStride[1]*srcSliceY;
1387 fccb9b2b Michael Niedermayer
        interleaveBytes( src[1],src[2],dst,c->srcW,srcSliceH,srcStride[1],srcStride[2],dstStride[0] );
1388
1389 d4e24275 Michael Niedermayer
        return srcSliceH;
1390 0d9f3d85 Arpi
}
1391
1392 d4e24275 Michael Niedermayer
static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1393 0d9f3d85 Arpi
             int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1394
        uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1395
1396 fccb9b2b Michael Niedermayer
        yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1397
1398 d4e24275 Michael Niedermayer
        return srcSliceH;
1399 0d9f3d85 Arpi
}
1400
1401 caeaabe7 Alex Beregszaszi
static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1402
             int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1403
        uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1404
1405
        yv12touyvy( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1406
1407
        return srcSliceH;
1408
}
1409
1410 e09d12f4 Michael Niedermayer
/* {RGB,BGR}{15,16,24,32} -> {RGB,BGR}{15,16,24,32} */
1411 d4e24275 Michael Niedermayer
static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1412 e09d12f4 Michael Niedermayer
                           int srcSliceH, uint8_t* dst[], int dstStride[]){
1413
        const int srcFormat= c->srcFormat;
1414
        const int dstFormat= c->dstFormat;
1415
        const int srcBpp= ((srcFormat&0xFF) + 7)>>3;
1416
        const int dstBpp= ((dstFormat&0xFF) + 7)>>3;
1417
        const int srcId= (srcFormat&0xFF)>>2; // 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 
1418
        const int dstId= (dstFormat&0xFF)>>2;
1419
        void (*conv)(const uint8_t *src, uint8_t *dst, unsigned src_size)=NULL;
1420
1421
        /* BGR -> BGR */
1422 20380eb8 Michael Niedermayer
        if(   (isBGR(srcFormat) && isBGR(dstFormat))
1423
           || (isRGB(srcFormat) && isRGB(dstFormat))){
1424 e09d12f4 Michael Niedermayer
                switch(srcId | (dstId<<4)){
1425
                case 0x34: conv= rgb16to15; break;
1426
                case 0x36: conv= rgb24to15; break;
1427
                case 0x38: conv= rgb32to15; break;
1428
                case 0x43: conv= rgb15to16; break;
1429
                case 0x46: conv= rgb24to16; break;
1430
                case 0x48: conv= rgb32to16; break;
1431
                case 0x63: conv= rgb15to24; break;
1432
                case 0x64: conv= rgb16to24; break;
1433
                case 0x68: conv= rgb32to24; break;
1434
                case 0x83: conv= rgb15to32; break;
1435
                case 0x84: conv= rgb16to32; break;
1436
                case 0x86: conv= rgb24to32; break;
1437
                default: MSG_ERR("swScaler: internal error %s -> %s converter\n", 
1438
                                 vo_format_name(srcFormat), vo_format_name(dstFormat)); break;
1439 b935781b Michael Niedermayer
                }
1440 20380eb8 Michael Niedermayer
        }else if(   (isBGR(srcFormat) && isRGB(dstFormat))
1441
                 || (isRGB(srcFormat) && isBGR(dstFormat))){
1442 e09d12f4 Michael Niedermayer
                switch(srcId | (dstId<<4)){
1443
                case 0x33: conv= rgb15tobgr15; break;
1444
                case 0x34: conv= rgb16tobgr15; break;
1445
                case 0x36: conv= rgb24tobgr15; break;
1446
                case 0x38: conv= rgb32tobgr15; break;
1447
                case 0x43: conv= rgb15tobgr16; break;
1448
                case 0x44: conv= rgb16tobgr16; break;
1449
                case 0x46: conv= rgb24tobgr16; break;
1450
                case 0x48: conv= rgb32tobgr16; break;
1451
                case 0x63: conv= rgb15tobgr24; break;
1452
                case 0x64: conv= rgb16tobgr24; break;
1453
                case 0x66: conv= rgb24tobgr24; break;
1454
                case 0x68: conv= rgb32tobgr24; break;
1455
                case 0x83: conv= rgb15tobgr32; break;
1456
                case 0x84: conv= rgb16tobgr32; break;
1457
                case 0x86: conv= rgb24tobgr32; break;
1458
                case 0x88: conv= rgb32tobgr32; break;
1459
                default: MSG_ERR("swScaler: internal error %s -> %s converter\n", 
1460
                                 vo_format_name(srcFormat), vo_format_name(dstFormat)); break;
1461 0d9f3d85 Arpi
                }
1462 20380eb8 Michael Niedermayer
        }else{
1463
                MSG_ERR("swScaler: internal error %s -> %s converter\n", 
1464
                         vo_format_name(srcFormat), vo_format_name(dstFormat));
1465 e09d12f4 Michael Niedermayer
        }
1466 20380eb8 Michael Niedermayer
1467 e09d12f4 Michael Niedermayer
        if(dstStride[0]*srcBpp == srcStride[0]*dstBpp)
1468
                conv(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1469 0d9f3d85 Arpi
        else
1470
        {
1471
                int i;
1472
                uint8_t *srcPtr= src[0];
1473
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1474
1475
                for(i=0; i<srcSliceH; i++)
1476
                {
1477 e09d12f4 Michael Niedermayer
                        conv(srcPtr, dstPtr, c->srcW*srcBpp);
1478 0d9f3d85 Arpi
                        srcPtr+= srcStride[0];
1479
                        dstPtr+= dstStride[0];
1480
                }
1481
        }     
1482 d4e24275 Michael Niedermayer
        return srcSliceH;
1483 0d9f3d85 Arpi
}
1484
1485 d4e24275 Michael Niedermayer
static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1486 ec22603f Michael Niedermayer
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1487
1488
        rgb24toyv12(
1489
                src[0], 
1490
                dst[0]+ srcSliceY    *dstStride[0], 
1491
                dst[1]+(srcSliceY>>1)*dstStride[1], 
1492
                dst[2]+(srcSliceY>>1)*dstStride[2],
1493
                c->srcW, srcSliceH, 
1494
                dstStride[0], dstStride[1], srcStride[0]);
1495 d4e24275 Michael Niedermayer
        return srcSliceH;
1496 ec22603f Michael Niedermayer
}
1497
1498 d4e24275 Michael Niedermayer
static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1499 b241cbf2 Michael Niedermayer
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1500
        int i;
1501
1502
        /* copy Y */
1503
        if(srcStride[0]==dstStride[0]) 
1504
                memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
1505
        else{
1506
                uint8_t *srcPtr= src[0];
1507
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1508
1509
                for(i=0; i<srcSliceH; i++)
1510
                {
1511
                        memcpy(dstPtr, srcPtr, c->srcW);
1512
                        srcPtr+= srcStride[0];
1513
                        dstPtr+= dstStride[0];
1514
                }
1515
        }
1516
1517
        if(c->dstFormat==IMGFMT_YV12){
1518
                planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
1519
                planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
1520
        }else{
1521
                planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
1522
                planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
1523
        }
1524 d4e24275 Michael Niedermayer
        return srcSliceH;
1525 b241cbf2 Michael Niedermayer
}
1526
1527 44c1035c Michael Niedermayer
/**
1528
 * bring pointers in YUV order instead of YVU
1529
 */
1530 fccb9b2b Michael Niedermayer
static inline void sws_orderYUV(int format, uint8_t * sortedP[], int sortedStride[], uint8_t * p[], int stride[]){
1531
        if(format == IMGFMT_YV12 || format == IMGFMT_YVU9
1532 d80e2fa2 Michael Niedermayer
           || format == IMGFMT_444P || format == IMGFMT_422P || format == IMGFMT_411P){
1533 44c1035c Michael Niedermayer
                sortedP[0]= p[0];
1534 fccb9b2b Michael Niedermayer
                sortedP[1]= p[2];
1535
                sortedP[2]= p[1];
1536 c7a810cc Michael Niedermayer
                sortedStride[0]= stride[0];
1537 fccb9b2b Michael Niedermayer
                sortedStride[1]= stride[2];
1538
                sortedStride[2]= stride[1];
1539 44c1035c Michael Niedermayer
        }
1540 a4c90ea3 Michael Niedermayer
        else if(isPacked(format) || isGray(format) || format == IMGFMT_Y8)
1541 44c1035c Michael Niedermayer
        {
1542
                sortedP[0]= p[0];
1543
                sortedP[1]= 
1544
                sortedP[2]= NULL;
1545 c7a810cc Michael Niedermayer
                sortedStride[0]= stride[0];
1546 44c1035c Michael Niedermayer
                sortedStride[1]= 
1547
                sortedStride[2]= 0;
1548
        }
1549 fccb9b2b Michael Niedermayer
        else if(format == IMGFMT_I420 || format == IMGFMT_IYUV)
1550 44c1035c Michael Niedermayer
        {
1551
                sortedP[0]= p[0];
1552 fccb9b2b Michael Niedermayer
                sortedP[1]= p[1];
1553
                sortedP[2]= p[2];
1554 c7a810cc Michael Niedermayer
                sortedStride[0]= stride[0];
1555 fccb9b2b Michael Niedermayer
                sortedStride[1]= stride[1];
1556
                sortedStride[2]= stride[2];
1557 e09d12f4 Michael Niedermayer
        }else{
1558
                MSG_ERR("internal error in orderYUV\n");
1559 44c1035c Michael Niedermayer
        }
1560
}
1561 b935781b Michael Niedermayer
1562 b6654a54 Michael Niedermayer
/* unscaled copy like stuff (assumes nearly identical formats) */
1563 3e499f53 Michael Niedermayer
static int simpleCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1564
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1565 b6654a54 Michael Niedermayer
1566
        if(isPacked(c->srcFormat))
1567
        {
1568
                if(dstStride[0]==srcStride[0])
1569
                        memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
1570
                else
1571
                {
1572
                        int i;
1573
                        uint8_t *srcPtr= src[0];
1574
                        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1575 a861d4d7 Michael Niedermayer
                        int length=0;
1576
1577
                        /* universal length finder */
1578 9bd8bd1a Michael Niedermayer
                        while(length+c->srcW <= ABS(dstStride[0]) 
1579
                           && length+c->srcW <= ABS(srcStride[0])) length+= c->srcW;
1580 a861d4d7 Michael Niedermayer
                        ASSERT(length!=0);
1581 b6654a54 Michael Niedermayer
1582
                        for(i=0; i<srcSliceH; i++)
1583
                        {
1584
                                memcpy(dstPtr, srcPtr, length);
1585
                                srcPtr+= srcStride[0];
1586
                                dstPtr+= dstStride[0];
1587
                        }
1588
                }
1589
        }
1590
        else 
1591 44c1035c Michael Niedermayer
        { /* Planar YUV or gray */
1592 b6654a54 Michael Niedermayer
                int plane;
1593
                for(plane=0; plane<3; plane++)
1594
                {
1595 e616aa93 Michael Niedermayer
                        int length= plane==0 ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
1596
                        int y=      plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
1597
                        int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
1598 44c1035c Michael Niedermayer
1599
                        if((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
1600 0d9f3d85 Arpi
                        {
1601 44c1035c Michael Niedermayer
                                if(!isGray(c->dstFormat))
1602 e616aa93 Michael Niedermayer
                                        memset(dst[plane], 128, dstStride[plane]*height);
1603 0d9f3d85 Arpi
                        }
1604 b6654a54 Michael Niedermayer
                        else
1605
                        {
1606 44c1035c Michael Niedermayer
                                if(dstStride[plane]==srcStride[plane])
1607
                                        memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
1608
                                else
1609 b6654a54 Michael Niedermayer
                                {
1610 44c1035c Michael Niedermayer
                                        int i;
1611
                                        uint8_t *srcPtr= src[plane];
1612
                                        uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
1613
                                        for(i=0; i<height; i++)
1614
                                        {
1615
                                                memcpy(dstPtr, srcPtr, length);
1616
                                                srcPtr+= srcStride[plane];
1617
                                                dstPtr+= dstStride[plane];
1618
                                        }
1619 b6654a54 Michael Niedermayer
                                }
1620
                        }
1621
                }
1622
        }
1623 d4e24275 Michael Niedermayer
        return srcSliceH;
1624 37079906 Michael Niedermayer
}
1625 28bf81c9 Michael Niedermayer
1626 44c1035c Michael Niedermayer
static int remove_dup_fourcc(int fourcc)
1627 0d9f3d85 Arpi
{
1628
        switch(fourcc)
1629
        {
1630 fccb9b2b Michael Niedermayer
            case IMGFMT_I420:
1631
            case IMGFMT_IYUV: return IMGFMT_YV12;
1632 0d9f3d85 Arpi
            case IMGFMT_Y8  : return IMGFMT_Y800;
1633 0c51ef97 Arpi
            case IMGFMT_IF09: return IMGFMT_YVU9;
1634 0d9f3d85 Arpi
            default: return fourcc;
1635
        }
1636
}
1637
1638 c7a810cc Michael Niedermayer
static void getSubSampleFactors(int *h, int *v, int format){
1639
        switch(format){
1640 7322a67c Michael Niedermayer
        case IMGFMT_UYVY:
1641 c7a810cc Michael Niedermayer
        case IMGFMT_YUY2:
1642
                *h=1;
1643
                *v=0;
1644
                break;
1645
        case IMGFMT_YV12:
1646 e616aa93 Michael Niedermayer
        case IMGFMT_Y800: //FIXME remove after different subsamplings are fully implemented
1647 c7a810cc Michael Niedermayer
                *h=1;
1648
                *v=1;
1649
                break;
1650
        case IMGFMT_YVU9:
1651
                *h=2;
1652
                *v=2;
1653
                break;
1654 d80e2fa2 Michael Niedermayer
        case IMGFMT_444P:
1655
                *h=0;
1656
                *v=0;
1657
                break;
1658
        case IMGFMT_422P:
1659
                *h=1;
1660
                *v=0;
1661
                break;
1662
        case IMGFMT_411P:
1663
                *h=2;
1664
                *v=0;
1665
                break;
1666 c7a810cc Michael Niedermayer
        default:
1667
                *h=0;
1668
                *v=0;
1669
                break;
1670
        }
1671
}
1672
1673 5427e242 Michael Niedermayer
static uint16_t roundToInt16(int64_t f){
1674
        int r= (f + (1<<15))>>16;
1675
             if(r<-0x7FFF) return 0x8000;
1676
        else if(r> 0x7FFF) return 0x7FFF;
1677
        else               return r;
1678 0481412a Michael Niedermayer
}
1679
1680
/**
1681 5427e242 Michael Niedermayer
 * @param inv_table the yuv2rgb coeffs, normally Inverse_Table_6_9[x]
1682 0481412a Michael Niedermayer
 * @param fullRange if 1 then the luma range is 0..255 if 0 its 16..235
1683 5427e242 Michael Niedermayer
 * @return -1 if not supported
1684 0481412a Michael Niedermayer
 */
1685 5427e242 Michael Niedermayer
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
1686
        int64_t crv =  inv_table[0];
1687
        int64_t cbu =  inv_table[1];
1688
        int64_t cgu = -inv_table[2];
1689
        int64_t cgv = -inv_table[3];
1690
        int64_t cy  = 1<<16;
1691
        int64_t oy  = 0;
1692
1693
        if(isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
1694
        memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
1695
        memcpy(c->dstColorspaceTable,     table, sizeof(int)*4);
1696
1697
        c->brightness= brightness;
1698
        c->contrast  = contrast;
1699
        c->saturation= saturation;
1700
        c->srcRange  = srcRange;
1701
        c->dstRange  = dstRange;
1702 0481412a Michael Niedermayer
1703
        c->uOffset=   0x0400040004000400LL;
1704
        c->vOffset=   0x0400040004000400LL;
1705
1706 5427e242 Michael Niedermayer
        if(!srcRange){
1707
                cy= (cy*255) / 219;
1708
                oy= 16<<16;
1709 0481412a Michael Niedermayer
        }
1710
1711 5427e242 Michael Niedermayer
        cy = (cy *contrast             )>>16;
1712
        crv= (crv*contrast * saturation)>>32;
1713
        cbu= (cbu*contrast * saturation)>>32;
1714
        cgu= (cgu*contrast * saturation)>>32;
1715
        cgv= (cgv*contrast * saturation)>>32;
1716 0481412a Michael Niedermayer
1717 5427e242 Michael Niedermayer
        oy -= 256*brightness;
1718 0481412a Michael Niedermayer
1719
        c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
1720
        c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
1721
        c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
1722
        c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
1723
        c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
1724
        c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
1725 5427e242 Michael Niedermayer
1726
        yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
1727
        //FIXME factorize
1728 a31de956 Michael Niedermayer
1729
#ifdef HAVE_ALTIVEC
1730 582552fb Luca Barbato
        yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation);
1731 a31de956 Michael Niedermayer
#endif        
1732 5427e242 Michael Niedermayer
        return 0;
1733
}
1734
1735
/**
1736
 * @return -1 if not supported
1737
 */
1738
int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
1739
        if(isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
1740
1741
        *inv_table = c->srcColorspaceTable;
1742
        *table     = c->dstColorspaceTable;
1743
        *srcRange  = c->srcRange;
1744
        *dstRange  = c->dstRange;
1745
        *brightness= c->brightness;
1746
        *contrast  = c->contrast;
1747
        *saturation= c->saturation;
1748
        
1749
        return 0;        
1750 0481412a Michael Niedermayer
}
1751
1752 fccb9b2b Michael Niedermayer
SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int dstH, int origDstFormat, int flags,
1753 66d1cdb6 Michael Niedermayer
                         SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){
1754 28bf81c9 Michael Niedermayer
1755
        SwsContext *c;
1756
        int i;
1757 ec62c38f Michael Niedermayer
        int usesVFilter, usesHFilter;
1758 e09d12f4 Michael Niedermayer
        int unscaled, needsDither;
1759 fccb9b2b Michael Niedermayer
        int srcFormat, dstFormat;
1760 c7f822d9 Michael Niedermayer
        SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
1761 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
1762 516b1f82 Michael Niedermayer
        if(flags & SWS_CPU_CAPS_MMX)
1763 5cebb24b Michael Niedermayer
                asm volatile("emms\n\t"::: "memory");
1764
#endif
1765 516b1f82 Michael Niedermayer
1766
#ifndef RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
1767 a2faa401 Romain Dolbeau
        flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC);
1768 516b1f82 Michael Niedermayer
#ifdef HAVE_MMX2
1769
        flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
1770
#elif defined (HAVE_3DNOW)
1771
        flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
1772
#elif defined (HAVE_MMX)
1773
        flags |= SWS_CPU_CAPS_MMX;
1774 a2faa401 Romain Dolbeau
#elif defined (HAVE_ALTIVEC)
1775
        flags |= SWS_CPU_CAPS_ALTIVEC;
1776 516b1f82 Michael Niedermayer
#endif
1777
#endif
1778
        if(clip_table[512] != 255) globalInit();
1779 700490a4 Michael Niedermayer
        if(rgb15to16 == NULL) sws_rgb2rgb_init(flags);
1780 fccb9b2b Michael Niedermayer
1781 77a416e8 Gabucino
        /* avoid duplicate Formats, so we don't need to check to much */
1782 fccb9b2b Michael Niedermayer
        srcFormat = remove_dup_fourcc(origSrcFormat);
1783
        dstFormat = remove_dup_fourcc(origDstFormat);
1784 44c1035c Michael Niedermayer
1785
        unscaled = (srcW == dstW && srcH == dstH);
1786 e09d12f4 Michael Niedermayer
        needsDither= (isBGR(dstFormat) || isRGB(dstFormat)) 
1787
                     && (dstFormat&0xFF)<24
1788
                     && ((dstFormat&0xFF)<(srcFormat&0xFF) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
1789 44c1035c Michael Niedermayer
1790
        if(!isSupportedIn(srcFormat)) 
1791 b81cf274 Michael Niedermayer
        {
1792 44c1035c Michael Niedermayer
                MSG_ERR("swScaler: %s is not supported as input format\n", vo_format_name(srcFormat));
1793
                return NULL;
1794
        }
1795
        if(!isSupportedOut(dstFormat))
1796
        {
1797
                MSG_ERR("swScaler: %s is not supported as output format\n", vo_format_name(dstFormat));
1798
                return NULL;
1799 b81cf274 Michael Niedermayer
        }
1800 44c1035c Michael Niedermayer
1801 28bf81c9 Michael Niedermayer
        /* sanity check */
1802 b81cf274 Michael Niedermayer
        if(srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
1803
        {
1804 0d9f3d85 Arpi
                 MSG_ERR("swScaler: %dx%d -> %dx%d is invalid scaling dimension\n", 
1805 b81cf274 Michael Niedermayer
                        srcW, srcH, dstW, dstH);
1806
                return NULL;
1807
        }
1808 28bf81c9 Michael Niedermayer
1809 c7f822d9 Michael Niedermayer
        if(!dstFilter) dstFilter= &dummyFilter;
1810
        if(!srcFilter) srcFilter= &dummyFilter;
1811
1812 28bf81c9 Michael Niedermayer
        c= memalign(64, sizeof(SwsContext));
1813 c7f822d9 Michael Niedermayer
        memset(c, 0, sizeof(SwsContext));
1814 28bf81c9 Michael Niedermayer
1815
        c->srcW= srcW;
1816
        c->srcH= srcH;
1817
        c->dstW= dstW;
1818
        c->dstH= dstH;
1819 5521b193 Michael Niedermayer
        c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
1820
        c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
1821 28bf81c9 Michael Niedermayer
        c->flags= flags;
1822
        c->dstFormat= dstFormat;
1823
        c->srcFormat= srcFormat;
1824 fccb9b2b Michael Niedermayer
        c->origDstFormat= origDstFormat;
1825
        c->origSrcFormat= origSrcFormat;
1826 379a2036 Michael Niedermayer
        c->vRounder= 4* 0x0001000100010001ULL;
1827 28bf81c9 Michael Niedermayer
1828 ec62c38f Michael Niedermayer
        usesHFilter= usesVFilter= 0;
1829
        if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesVFilter=1;
1830
        if(dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesHFilter=1;
1831
        if(dstFilter->chrV!=NULL && dstFilter->chrV->length>1) usesVFilter=1;
1832
        if(dstFilter->chrH!=NULL && dstFilter->chrH->length>1) usesHFilter=1;
1833
        if(srcFilter->lumV!=NULL && srcFilter->lumV->length>1) usesVFilter=1;
1834
        if(srcFilter->lumH!=NULL && srcFilter->lumH->length>1) usesHFilter=1;
1835
        if(srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesVFilter=1;
1836
        if(srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesHFilter=1;
1837 e616aa93 Michael Niedermayer
1838
        getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
1839
        getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
1840
1841
        // reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation
1842
        if((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
1843
1844 5859233b Michael Niedermayer
        // drop some chroma lines if the user wants it
1845
        c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
1846
        c->chrSrcVSubSample+= c->vChrDrop;
1847 e616aa93 Michael Niedermayer
1848 5859233b Michael Niedermayer
        // drop every 2. pixel for chroma calculation unless user wants full chroma
1849 e616aa93 Michael Niedermayer
        if((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)) 
1850
                c->chrSrcHSubSample=1;
1851
1852 66d1cdb6 Michael Niedermayer
        if(param){
1853
                c->param[0] = param[0];
1854
                c->param[1] = param[1];
1855
        }else{
1856
                c->param[0] =
1857
                c->param[1] = SWS_PARAM_DEFAULT;
1858
        }
1859
1860 e616aa93 Michael Niedermayer
        c->chrIntHSubSample= c->chrDstHSubSample;
1861
        c->chrIntVSubSample= c->chrSrcVSubSample;
1862 5427e242 Michael Niedermayer
1863 e616aa93 Michael Niedermayer
        // note the -((-x)>>y) is so that we allways round toward +inf
1864
        c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
1865
        c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
1866
        c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
1867
        c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
1868 5427e242 Michael Niedermayer
1869
        sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], 0, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, 0, 0, 1<<16, 1<<16); 
1870 cf7d1c1a Michael Niedermayer
1871 b935781b Michael Niedermayer
        /* unscaled special Cases */
1872 ec62c38f Michael Niedermayer
        if(unscaled && !usesHFilter && !usesVFilter)
1873 37079906 Michael Niedermayer
        {
1874 0d9f3d85 Arpi
                /* yv12_to_nv12 */
1875 fccb9b2b Michael Niedermayer
                if(srcFormat == IMGFMT_YV12 && dstFormat == IMGFMT_NV12)
1876 0d9f3d85 Arpi
                {
1877
                        c->swScale= PlanarToNV12Wrapper;
1878
                }
1879 37079906 Michael Niedermayer
                /* yuv2bgr */
1880 fccb9b2b Michael Niedermayer
                if((srcFormat==IMGFMT_YV12 || srcFormat==IMGFMT_422P) && (isBGR(dstFormat) || isRGB(dstFormat)))
1881 37079906 Michael Niedermayer
                {
1882 5427e242 Michael Niedermayer
                        c->swScale= yuv2rgb_get_func_ptr(c);
1883 37079906 Michael Niedermayer
                }
1884 b241cbf2 Michael Niedermayer
                
1885 fccb9b2b Michael Niedermayer
                if( srcFormat==IMGFMT_YVU9 && dstFormat==IMGFMT_YV12 )
1886 b241cbf2 Michael Niedermayer
                {
1887
                        c->swScale= yvu9toyv12Wrapper;
1888
                }
1889
1890 ec22603f Michael Niedermayer
                /* bgr24toYV12 */
1891
                if(srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_YV12)
1892
                        c->swScale= bgr24toyv12Wrapper;
1893 e09d12f4 Michael Niedermayer
                
1894
                /* rgb/bgr -> rgb/bgr (no dither needed forms) */
1895
                if(   (isBGR(srcFormat) || isRGB(srcFormat))
1896
                   && (isBGR(dstFormat) || isRGB(dstFormat)) 
1897
                   && !needsDither)
1898
                        c->swScale= rgb2rgbWrapper;
1899
1900
                /* LQ converters if -sws 0 or -sws 4*/
1901
                if(c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
1902
                        /* rgb/bgr -> rgb/bgr (dither needed forms) */
1903
                        if(  (isBGR(srcFormat) || isRGB(srcFormat))
1904
                          && (isBGR(dstFormat) || isRGB(dstFormat)) 
1905
                          && needsDither)
1906
                                c->swScale= rgb2rgbWrapper;
1907 2ce486d8 Michael Niedermayer
1908
                        /* yv12_to_yuy2 */
1909 caeaabe7 Alex Beregszaszi
                        if(srcFormat == IMGFMT_YV12 && 
1910
                            (dstFormat == IMGFMT_YUY2 || dstFormat == IMGFMT_UYVY))
1911 2ce486d8 Michael Niedermayer
                        {
1912 caeaabe7 Alex Beregszaszi
                                if (dstFormat == IMGFMT_YUY2)
1913
                                    c->swScale= PlanarToYuy2Wrapper;
1914
                                else
1915
                                    c->swScale= PlanarToUyvyWrapper;
1916 2ce486d8 Michael Niedermayer
                        }
1917 e09d12f4 Michael Niedermayer
                }
1918 ec22603f Michael Niedermayer
1919 b71cf33c Romain Dolbeau
#ifdef HAVE_ALTIVEC
1920
                if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
1921
                    ((srcFormat == IMGFMT_YV12 && 
1922
                      (dstFormat == IMGFMT_YUY2 || dstFormat == IMGFMT_UYVY)))) {
1923
                  // unscaled YV12 -> packed YUV, we want speed
1924
                  if (dstFormat == IMGFMT_YUY2)
1925
                    c->swScale= yv12toyuy2_unscaled_altivec;
1926
                  else
1927
                    c->swScale= yv12touyvy_unscaled_altivec;
1928
                }
1929
#endif
1930
1931 20380eb8 Michael Niedermayer
                /* simple copy */
1932
                if(   srcFormat == dstFormat
1933
                   || (isPlanarYUV(srcFormat) && isGray(dstFormat))
1934
                   || (isPlanarYUV(dstFormat) && isGray(srcFormat))
1935
                  )
1936
                {
1937
                        c->swScale= simpleCopy;
1938
                }
1939
1940 e09d12f4 Michael Niedermayer
                if(c->swScale){
1941 ec22603f Michael Niedermayer
                        if(flags&SWS_PRINT_INFO)
1942 0d9f3d85 Arpi
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
1943 ec22603f Michael Niedermayer
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
1944
                        return c;
1945
                }
1946 37079906 Michael Niedermayer
        }
1947
1948 516b1f82 Michael Niedermayer
        if(flags & SWS_CPU_CAPS_MMX2)
1949 28bf81c9 Michael Niedermayer
        {
1950
                c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
1951
                if(!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
1952
                {
1953
                        if(flags&SWS_PRINT_INFO)
1954 0d9f3d85 Arpi
                                MSG_INFO("SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n");
1955 28bf81c9 Michael Niedermayer
                }
1956 ec62c38f Michael Niedermayer
                if(usesHFilter) c->canMMX2BeUsed=0;
1957 28bf81c9 Michael Niedermayer
        }
1958
        else
1959
                c->canMMX2BeUsed=0;
1960
1961 1e621b18 Michael Niedermayer
        c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
1962
        c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
1963
1964 28bf81c9 Michael Niedermayer
        // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
1965
        // but only for the FAST_BILINEAR mode otherwise do correct scaling
1966
        // n-2 is the last chrominance sample available
1967
        // this is not perfect, but noone shuld notice the difference, the more correct variant
1968
        // would be like the vertical one, but that would require some special code for the
1969
        // first and last pixel
1970
        if(flags&SWS_FAST_BILINEAR)
1971
        {
1972 1e621b18 Michael Niedermayer
                if(c->canMMX2BeUsed)
1973
                {
1974
                        c->lumXInc+= 20;
1975
                        c->chrXInc+= 20;
1976
                }
1977 77a416e8 Gabucino
                //we don't use the x86asm scaler if mmx is available
1978 516b1f82 Michael Niedermayer
                else if(flags & SWS_CPU_CAPS_MMX)
1979 1e621b18 Michael Niedermayer
                {
1980
                        c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
1981
                        c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
1982
                }
1983 28bf81c9 Michael Niedermayer
        }
1984
1985
        /* precalculate horizontal scaler filter coefficients */
1986
        {
1987 8c266f0c Romain Dolbeau
                const int filterAlign=
1988
                  (flags & SWS_CPU_CAPS_MMX) ? 4 :
1989
                  (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
1990
                  1;
1991 28bf81c9 Michael Niedermayer
1992 c7f822d9 Michael Niedermayer
                initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
1993 81a571a8 Michael Niedermayer
                                 srcW      ,       dstW, filterAlign, 1<<14,
1994
                                 (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
1995 66d1cdb6 Michael Niedermayer
                                 srcFilter->lumH, dstFilter->lumH, c->param);
1996 c7f822d9 Michael Niedermayer
                initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
1997 81a571a8 Michael Niedermayer
                                 c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
1998
                                 (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
1999 66d1cdb6 Michael Niedermayer
                                 srcFilter->chrH, dstFilter->chrH, c->param);
2000 28bf81c9 Michael Niedermayer
2001 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
2002 77a416e8 Gabucino
// can't downscale !!!
2003 28bf81c9 Michael Niedermayer
                if(c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
2004
                {
2005 38d5c282 Aurelien Jacobs
#define MAX_FUNNY_CODE_SIZE 10000
2006
#ifdef HAVE_SYS_MMAN_H
2007
                        c->funnyYCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2008
                        c->funnyUVCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2009
#else
2010
                        c->funnyYCode = (uint8_t*)memalign(32, MAX_FUNNY_CODE_SIZE);
2011
                        c->funnyUVCode = (uint8_t*)memalign(32, MAX_FUNNY_CODE_SIZE);
2012
#endif
2013
2014 b7dc6f66 Michael Niedermayer
                        c->lumMmx2Filter   = (int16_t*)memalign(8, (dstW        /8+8)*sizeof(int16_t));
2015
                        c->chrMmx2Filter   = (int16_t*)memalign(8, (c->chrDstW  /4+8)*sizeof(int16_t));
2016
                        c->lumMmx2FilterPos= (int32_t*)memalign(8, (dstW      /2/8+8)*sizeof(int32_t));
2017
                        c->chrMmx2FilterPos= (int32_t*)memalign(8, (c->chrDstW/2/4+8)*sizeof(int32_t));
2018
2019
                        initMMX2HScaler(      dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
2020
                        initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
2021 28bf81c9 Michael Niedermayer
                }
2022
#endif
2023
        } // Init Horizontal stuff
2024
2025
2026
2027
        /* precalculate vertical scaler filter coefficients */
2028 8c266f0c Romain Dolbeau
        {
2029
                const int filterAlign=
2030
                  (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2031
                  1;
2032
2033
                initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
2034
                                srcH      ,        dstH, filterAlign, (1<<12)-4,
2035
                                (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2036 66d1cdb6 Michael Niedermayer
                                srcFilter->lumV, dstFilter->lumV, c->param);
2037 8c266f0c Romain Dolbeau
                initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
2038
                                c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
2039
                                (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2040 66d1cdb6 Michael Niedermayer
                                srcFilter->chrV, dstFilter->chrV, c->param);
2041 8c266f0c Romain Dolbeau
        }
2042 28bf81c9 Michael Niedermayer
2043 77a416e8 Gabucino
        // Calculate Buffer Sizes so that they won't run out while handling these damn slices
2044 28bf81c9 Michael Niedermayer
        c->vLumBufSize= c->vLumFilterSize;
2045
        c->vChrBufSize= c->vChrFilterSize;
2046
        for(i=0; i<dstH; i++)
2047
        {
2048
                int chrI= i*c->chrDstH / dstH;
2049
                int nextSlice= MAX(c->vLumFilterPos[i   ] + c->vLumFilterSize - 1,
2050 e616aa93 Michael Niedermayer
                                 ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
2051 c4f1e443 Michael Niedermayer
2052
                nextSlice>>= c->chrSrcVSubSample;
2053
                nextSlice<<= c->chrSrcVSubSample;
2054 28bf81c9 Michael Niedermayer
                if(c->vLumFilterPos[i   ] + c->vLumBufSize < nextSlice)
2055
                        c->vLumBufSize= nextSlice - c->vLumFilterPos[i   ];
2056 e616aa93 Michael Niedermayer
                if(c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
2057
                        c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
2058 28bf81c9 Michael Niedermayer
        }
2059
2060
        // allocate pixbufs (we use dynamic allocation because otherwise we would need to
2061 c7f822d9 Michael Niedermayer
        c->lumPixBuf= (int16_t**)memalign(4, c->vLumBufSize*2*sizeof(int16_t*));
2062
        c->chrPixBuf= (int16_t**)memalign(4, c->vChrBufSize*2*sizeof(int16_t*));
2063 6c7506de Michael Niedermayer
        //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
2064 28bf81c9 Michael Niedermayer
        for(i=0; i<c->vLumBufSize; i++)
2065
                c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(8, 4000);
2066
        for(i=0; i<c->vChrBufSize; i++)
2067
                c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(8, 8000);
2068
2069
        //try to avoid drawing green stuff between the right end and the stride end
2070
        for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000);
2071
        for(i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000);
2072
2073
        ASSERT(c->chrDstH <= dstH)
2074
2075
        if(flags&SWS_PRINT_INFO)
2076
        {
2077
#ifdef DITHER1XBPP
2078 5521b193 Michael Niedermayer
                char *dither= " dithered";
2079
#else
2080
                char *dither= "";
2081 28bf81c9 Michael Niedermayer
#endif
2082
                if(flags&SWS_FAST_BILINEAR)
2083 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: FAST_BILINEAR scaler, ");
2084 28bf81c9 Michael Niedermayer
                else if(flags&SWS_BILINEAR)
2085 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: BILINEAR scaler, ");
2086 28bf81c9 Michael Niedermayer
                else if(flags&SWS_BICUBIC)
2087 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: BICUBIC scaler, ");
2088 1e621b18 Michael Niedermayer
                else if(flags&SWS_X)
2089 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: Experimental scaler, ");
2090 ff7ba856 Michael Niedermayer
                else if(flags&SWS_POINT)
2091 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: Nearest Neighbor / POINT scaler, ");
2092 d8863d37 Michael Niedermayer
                else if(flags&SWS_AREA)
2093 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: Area Averageing scaler, ");
2094 81a571a8 Michael Niedermayer
                else if(flags&SWS_BICUBLIN)
2095 a86c461c Michael Niedermayer
                        MSG_INFO("\nSwScaler: luma BICUBIC / chroma BILINEAR scaler, ");
2096
                else if(flags&SWS_GAUSS)
2097
                        MSG_INFO("\nSwScaler: Gaussian scaler, ");
2098
                else if(flags&SWS_SINC)
2099
                        MSG_INFO("\nSwScaler: Sinc scaler, ");
2100
                else if(flags&SWS_LANCZOS)
2101
                        MSG_INFO("\nSwScaler: Lanczos scaler, ");
2102
                else if(flags&SWS_SPLINE)
2103
                        MSG_INFO("\nSwScaler: Bicubic spline scaler, ");
2104 28bf81c9 Michael Niedermayer
                else
2105 0d9f3d85 Arpi
                        MSG_INFO("\nSwScaler: ehh flags invalid?! ");
2106 28bf81c9 Michael Niedermayer
2107 0d9f3d85 Arpi
                if(dstFormat==IMGFMT_BGR15 || dstFormat==IMGFMT_BGR16)
2108
                        MSG_INFO("from %s to%s %s ", 
2109
                                vo_format_name(srcFormat), dither, vo_format_name(dstFormat));
2110
                else
2111
                        MSG_INFO("from %s to %s ", 
2112
                                vo_format_name(srcFormat), vo_format_name(dstFormat));
2113 28bf81c9 Michael Niedermayer
2114 516b1f82 Michael Niedermayer
                if(flags & SWS_CPU_CAPS_MMX2)
2115 0d9f3d85 Arpi
                        MSG_INFO("using MMX2\n");
2116 516b1f82 Michael Niedermayer
                else if(flags & SWS_CPU_CAPS_3DNOW)
2117 0d9f3d85 Arpi
                        MSG_INFO("using 3DNOW\n");
2118 516b1f82 Michael Niedermayer
                else if(flags & SWS_CPU_CAPS_MMX)
2119 0d9f3d85 Arpi
                        MSG_INFO("using MMX\n");
2120 a2faa401 Romain Dolbeau
                else if(flags & SWS_CPU_CAPS_ALTIVEC)
2121
                        MSG_INFO("using AltiVec\n");
2122
                else 
2123 0d9f3d85 Arpi
                        MSG_INFO("using C\n");
2124 28bf81c9 Michael Niedermayer
        }
2125
2126 516b1f82 Michael Niedermayer
        if(flags & SWS_PRINT_INFO)
2127 28bf81c9 Michael Niedermayer
        {
2128 516b1f82 Michael Niedermayer
                if(flags & SWS_CPU_CAPS_MMX)
2129 28bf81c9 Michael Niedermayer
                {
2130
                        if(c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
2131 0d9f3d85 Arpi
                                MSG_V("SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
2132 28bf81c9 Michael Niedermayer
                        else
2133
                        {
2134
                                if(c->hLumFilterSize==4)
2135 0d9f3d85 Arpi
                                        MSG_V("SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n");
2136 28bf81c9 Michael Niedermayer
                                else if(c->hLumFilterSize==8)
2137 0d9f3d85 Arpi
                                        MSG_V("SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n");
2138 28bf81c9 Michael Niedermayer
                                else
2139 0d9f3d85 Arpi
                                        MSG_V("SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n");
2140 28bf81c9 Michael Niedermayer
2141
                                if(c->hChrFilterSize==4)
2142 0d9f3d85 Arpi
                                        MSG_V("SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n");
2143 28bf81c9 Michael Niedermayer
                                else if(c->hChrFilterSize==8)
2144 0d9f3d85 Arpi
                                        MSG_V("SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n");
2145 28bf81c9 Michael Niedermayer
                                else
2146 0d9f3d85 Arpi
                                        MSG_V("SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n");
2147 28bf81c9 Michael Niedermayer
                        }
2148
                }
2149
                else
2150
                {
2151 6e1c66bc Aurelien Jacobs
#if defined(ARCH_X86) || defined(ARCH_X86_64)
2152 0d9f3d85 Arpi
                        MSG_V("SwScaler: using X86-Asm scaler for horizontal scaling\n");
2153 28bf81c9 Michael Niedermayer
#else
2154
                        if(flags & SWS_FAST_BILINEAR)
2155 0d9f3d85 Arpi
                                MSG_V("SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n");
2156 28bf81c9 Michael Niedermayer
                        else
2157 0d9f3d85 Arpi
                                MSG_V("SwScaler: using C scaler for horizontal scaling\n");
2158 28bf81c9 Michael Niedermayer
#endif
2159
                }
2160 6c7506de Michael Niedermayer
                if(isPlanarYUV(dstFormat))
2161 28bf81c9 Michael Niedermayer
                {
2162
                        if(c->vLumFilterSize==1)
2163 516b1f82 Michael Niedermayer
                                MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2164 28bf81c9 Michael Niedermayer
                        else
2165 516b1f82 Michael Niedermayer
                                MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2166 28bf81c9 Michael Niedermayer
                }
2167
                else
2168
                {
2169
                        if(c->vLumFilterSize==1 && c->vChrFilterSize==2)
2170 0d9f3d85 Arpi
                                MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
2171 516b1f82 Michael Niedermayer
                                       "SwScaler:       2-tap scaler for vertical chrominance scaling (BGR)\n",(flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2172 28bf81c9 Michael Niedermayer
                        else if(c->vLumFilterSize==2 && c->vChrFilterSize==2)
2173 516b1f82 Michael Niedermayer
                                MSG_V("SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2174 28bf81c9 Michael Niedermayer
                        else
2175 516b1f82 Michael Niedermayer
                                MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2176 28bf81c9 Michael Niedermayer
                }
2177
2178
                if(dstFormat==IMGFMT_BGR24)
2179 0d9f3d85 Arpi
                        MSG_V("SwScaler: using %s YV12->BGR24 Converter\n",
2180 516b1f82 Michael Niedermayer
                                (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
2181 fd284805 Michael Niedermayer
                else if(dstFormat==IMGFMT_BGR32)
2182 516b1f82 Michael Niedermayer
                        MSG_V("SwScaler: using %s YV12->BGR32 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2183 fd284805 Michael Niedermayer
                else if(dstFormat==IMGFMT_BGR16)
2184 516b1f82 Michael Niedermayer
                        MSG_V("SwScaler: using %s YV12->BGR16 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2185 fd284805 Michael Niedermayer
                else if(dstFormat==IMGFMT_BGR15)
2186 516b1f82 Michael Niedermayer
                        MSG_V("SwScaler: using %s YV12->BGR15 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2187 28bf81c9 Michael Niedermayer
2188 0d9f3d85 Arpi
                MSG_V("SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
2189 28bf81c9 Michael Niedermayer
        }
2190 516b1f82 Michael Niedermayer
        if(flags & SWS_PRINT_INFO)
2191 1e621b18 Michael Niedermayer
        {
2192 0d9f3d85 Arpi
                MSG_DBG2("SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2193 1e621b18 Michael Niedermayer
                        c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
2194 0d9f3d85 Arpi
                MSG_DBG2("SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2195 1e621b18 Michael Niedermayer
                        c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
2196
        }
2197 37079906 Michael Niedermayer
2198 516b1f82 Michael Niedermayer
        c->swScale= getSwsFunc(flags);
2199 28bf81c9 Michael Niedermayer
        return c;
2200
}
2201
2202
/**
2203 77a416e8 Gabucino
 * swscale warper, so we don't need to export the SwsContext.
2204 fccb9b2b Michael Niedermayer
 * assumes planar YUV to be in YUV order instead of YVU
2205
 */
2206
int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2207
                           int srcSliceH, uint8_t* dst[], int dstStride[]){
2208 46888748 Michael Niedermayer
        //copy strides, so they can safely be modified
2209
        int srcStride2[3]= {srcStride[0], srcStride[1], srcStride[2]};
2210
        int dstStride2[3]= {dstStride[0], dstStride[1], dstStride[2]};
2211
        return c->swScale(c, src, srcStride2, srcSliceY, srcSliceH, dst, dstStride2);
2212 fccb9b2b Michael Niedermayer
}
2213
2214
/**
2215 77a416e8 Gabucino
 * swscale warper, so we don't need to export the SwsContext
2216 d4e24275 Michael Niedermayer
 */
2217 3e499f53 Michael Niedermayer
int sws_scale(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], int srcSliceY,
2218
                           int srcSliceH, uint8_t* dstParam[], int dstStrideParam[]){
2219
        int srcStride[3];
2220
        int dstStride[3];
2221
        uint8_t *src[3];
2222
        uint8_t *dst[3];
2223 fccb9b2b Michael Niedermayer
        sws_orderYUV(c->origSrcFormat, src, srcStride, srcParam, srcStrideParam);
2224
        sws_orderYUV(c->origDstFormat, dst, dstStride, dstParam, dstStrideParam);
2225 3e499f53 Michael Niedermayer
//printf("sws: slice %d %d\n", srcSliceY, srcSliceH);
2226 a4c90ea3 Michael Niedermayer
2227 5bf01354 Michael Niedermayer
        return c->swScale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
2228 d4e24275 Michael Niedermayer
}
2229
2230 e21206a8 Michael Niedermayer
SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, 
2231
                                float lumaSharpen, float chromaSharpen,
2232
                                float chromaHShift, float chromaVShift,
2233
                                int verbose)
2234
{
2235
        SwsFilter *filter= malloc(sizeof(SwsFilter));
2236
2237
        if(lumaGBlur!=0.0){
2238
                filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0);
2239
                filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0);
2240
        }else{
2241
                filter->lumH= sws_getIdentityVec();
2242
                filter->lumV= sws_getIdentityVec();
2243
        }
2244
2245
        if(chromaGBlur!=0.0){
2246
                filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0);
2247
                filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0);
2248
        }else{
2249
                filter->chrH= sws_getIdentityVec();
2250
                filter->chrV= sws_getIdentityVec();
2251
        }
2252
2253
        if(chromaSharpen!=0.0){
2254
                SwsVector *g= sws_getConstVec(-1.0, 3);
2255
                SwsVector *id= sws_getConstVec(10.0/chromaSharpen, 1);
2256
                g->coeff[1]=2.0;
2257
                sws_addVec(id, g);
2258
                sws_convVec(filter->chrH, id);
2259
                sws_convVec(filter->chrV, id);
2260
                sws_freeVec(g);
2261
                sws_freeVec(id);
2262
        }
2263
2264
        if(lumaSharpen!=0.0){
2265
                SwsVector *g= sws_getConstVec(-1.0, 3);
2266
                SwsVector *id= sws_getConstVec(10.0/lumaSharpen, 1);
2267
                g->coeff[1]=2.0;
2268
                sws_addVec(id, g);
2269
                sws_convVec(filter->lumH, id);
2270
                sws_convVec(filter->lumV, id);
2271
                sws_freeVec(g);
2272
                sws_freeVec(id);
2273
        }
2274
2275
        if(chromaHShift != 0.0)
2276
                sws_shiftVec(filter->chrH, (int)(chromaHShift+0.5));
2277
2278
        if(chromaVShift != 0.0)
2279
                sws_shiftVec(filter->chrV, (int)(chromaVShift+0.5));
2280
2281
        sws_normalizeVec(filter->chrH, 1.0);
2282
        sws_normalizeVec(filter->chrV, 1.0);
2283
        sws_normalizeVec(filter->lumH, 1.0);
2284
        sws_normalizeVec(filter->lumV, 1.0);
2285
2286
        if(verbose) sws_printVec(filter->chrH);
2287
        if(verbose) sws_printVec(filter->lumH);
2288
2289
        return filter;
2290
}
2291
2292 d4e24275 Michael Niedermayer
/**
2293 28bf81c9 Michael Niedermayer
 * returns a normalized gaussian curve used to filter stuff
2294
 * quality=3 is high quality, lowwer is lowwer quality
2295
 */
2296 d4e24275 Michael Niedermayer
SwsVector *sws_getGaussianVec(double variance, double quality){
2297 28bf81c9 Michael Niedermayer
        const int length= (int)(variance*quality + 0.5) | 1;
2298
        int i;
2299
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2300
        double middle= (length-1)*0.5;
2301 c7f822d9 Michael Niedermayer
        SwsVector *vec= malloc(sizeof(SwsVector));
2302
2303
        vec->coeff= coeff;
2304
        vec->length= length;
2305 28bf81c9 Michael Niedermayer
2306
        for(i=0; i<length; i++)
2307
        {
2308
                double dist= i-middle;
2309
                coeff[i]= exp( -dist*dist/(2*variance*variance) ) / sqrt(2*variance*PI);
2310
        }
2311
2312 d4e24275 Michael Niedermayer
        sws_normalizeVec(vec, 1.0);
2313 c7f822d9 Michael Niedermayer
2314
        return vec;
2315 28bf81c9 Michael Niedermayer
}
2316
2317 d4e24275 Michael Niedermayer
SwsVector *sws_getConstVec(double c, int length){
2318 5521b193 Michael Niedermayer
        int i;
2319
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2320
        SwsVector *vec= malloc(sizeof(SwsVector));
2321
2322
        vec->coeff= coeff;
2323
        vec->length= length;
2324
2325
        for(i=0; i<length; i++)
2326
                coeff[i]= c;
2327
2328
        return vec;
2329
}
2330
2331
2332 d4e24275 Michael Niedermayer
SwsVector *sws_getIdentityVec(void){
2333 c7f822d9 Michael Niedermayer
        double *coeff= memalign(sizeof(double), sizeof(double));
2334
        SwsVector *vec= malloc(sizeof(SwsVector));
2335
        coeff[0]= 1.0;
2336
2337
        vec->coeff= coeff;
2338
        vec->length= 1;
2339
2340
        return vec;
2341
}
2342
2343 d4e24275 Michael Niedermayer
void sws_normalizeVec(SwsVector *a, double height){
2344 28bf81c9 Michael Niedermayer
        int i;
2345
        double sum=0;
2346
        double inv;
2347
2348 c7f822d9 Michael Niedermayer
        for(i=0; i<a->length; i++)
2349
                sum+= a->coeff[i];
2350 28bf81c9 Michael Niedermayer
2351
        inv= height/sum;
2352
2353 c7f822d9 Michael Niedermayer
        for(i=0; i<a->length; i++)
2354 8664c807 Michael Niedermayer
                a->coeff[i]*= inv;
2355 28bf81c9 Michael Niedermayer
}
2356
2357 d4e24275 Michael Niedermayer
void sws_scaleVec(SwsVector *a, double scalar){
2358 c7f822d9 Michael Niedermayer
        int i;
2359
2360
        for(i=0; i<a->length; i++)
2361
                a->coeff[i]*= scalar;
2362
}
2363
2364 d4e24275 Michael Niedermayer
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){
2365 c7f822d9 Michael Niedermayer
        int length= a->length + b->length - 1;
2366 28bf81c9 Michael Niedermayer
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2367
        int i, j;
2368 c7f822d9 Michael Niedermayer
        SwsVector *vec= malloc(sizeof(SwsVector));
2369
2370
        vec->coeff= coeff;
2371
        vec->length= length;
2372 28bf81c9 Michael Niedermayer
2373
        for(i=0; i<length; i++) coeff[i]= 0.0;
2374
2375 c7f822d9 Michael Niedermayer
        for(i=0; i<a->length; i++)
2376 28bf81c9 Michael Niedermayer
        {
2377 c7f822d9 Michael Niedermayer
                for(j=0; j<b->length; j++)
2378 28bf81c9 Michael Niedermayer
                {
2379 c7f822d9 Michael Niedermayer
                        coeff[i+j]+= a->coeff[i]*b->coeff[j];
2380 28bf81c9 Michael Niedermayer
                }
2381
        }
2382
2383 c7f822d9 Michael Niedermayer
        return vec;
2384 28bf81c9 Michael Niedermayer
}
2385
2386 d4e24275 Michael Niedermayer
static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){
2387 c7f822d9 Michael Niedermayer
        int length= MAX(a->length, b->length);
2388 28bf81c9 Michael Niedermayer
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2389
        int i;
2390 c7f822d9 Michael Niedermayer
        SwsVector *vec= malloc(sizeof(SwsVector));
2391
2392
        vec->coeff= coeff;
2393
        vec->length= length;
2394 28bf81c9 Michael Niedermayer
2395
        for(i=0; i<length; i++) coeff[i]= 0.0;
2396
2397 c7f822d9 Michael Niedermayer
        for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2398
        for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];
2399
2400
        return vec;
2401 28bf81c9 Michael Niedermayer
}
2402 c7f822d9 Michael Niedermayer
2403 d4e24275 Michael Niedermayer
static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){
2404 c7f822d9 Michael Niedermayer
        int length= MAX(a->length, b->length);
2405
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2406
        int i;
2407
        SwsVector *vec= malloc(sizeof(SwsVector));
2408
2409
        vec->coeff= coeff;
2410
        vec->length= length;
2411
2412
        for(i=0; i<length; i++) coeff[i]= 0.0;
2413
2414
        for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2415
        for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];
2416
2417
        return vec;
2418
}
2419
2420
/* shift left / or right if "shift" is negative */
2421 d4e24275 Michael Niedermayer
static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){
2422 c7f822d9 Michael Niedermayer
        int length= a->length + ABS(shift)*2;
2423
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2424 ff7ba856 Michael Niedermayer
        int i;
2425 c7f822d9 Michael Niedermayer
        SwsVector *vec= malloc(sizeof(SwsVector));
2426
2427
        vec->coeff= coeff;
2428
        vec->length= length;
2429
2430
        for(i=0; i<length; i++) coeff[i]= 0.0;
2431
2432
        for(i=0; i<a->length; i++)
2433
        {
2434
                coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
2435
        }
2436
2437
        return vec;
2438
}
2439
2440 d4e24275 Michael Niedermayer
void sws_shiftVec(SwsVector *a, int shift){
2441
        SwsVector *shifted= sws_getShiftedVec(a, shift);
2442 5cebb24b Michael Niedermayer
        free(a->coeff);
2443
        a->coeff= shifted->coeff;
2444
        a->length= shifted->length;
2445
        free(shifted);
2446
}
2447
2448 d4e24275 Michael Niedermayer
void sws_addVec(SwsVector *a, SwsVector *b){
2449
        SwsVector *sum= sws_sumVec(a, b);
2450 5cebb24b Michael Niedermayer
        free(a->coeff);
2451
        a->coeff= sum->coeff;
2452
        a->length= sum->length;
2453
        free(sum);
2454
}
2455
2456 d4e24275 Michael Niedermayer
void sws_subVec(SwsVector *a, SwsVector *b){
2457
        SwsVector *diff= sws_diffVec(a, b);
2458 5cebb24b Michael Niedermayer
        free(a->coeff);
2459
        a->coeff= diff->coeff;
2460
        a->length= diff->length;
2461
        free(diff);
2462
}
2463
2464 d4e24275 Michael Niedermayer
void sws_convVec(SwsVector *a, SwsVector *b){
2465
        SwsVector *conv= sws_getConvVec(a, b);
2466
        free(a->coeff);  
2467 5cebb24b Michael Niedermayer
        a->coeff= conv->coeff;
2468
        a->length= conv->length;
2469
        free(conv);
2470
}
2471
2472 d4e24275 Michael Niedermayer
SwsVector *sws_cloneVec(SwsVector *a){
2473 5cebb24b Michael Niedermayer
        double *coeff= memalign(sizeof(double), a->length*sizeof(double));
2474
        int i;
2475
        SwsVector *vec= malloc(sizeof(SwsVector));
2476
2477
        vec->coeff= coeff;
2478
        vec->length= a->length;
2479
2480
        for(i=0; i<a->length; i++) coeff[i]= a->coeff[i];
2481
2482
        return vec;
2483
}
2484
2485 d4e24275 Michael Niedermayer
void sws_printVec(SwsVector *a){
2486 c7f822d9 Michael Niedermayer
        int i;
2487
        double max=0;
2488
        double min=0;
2489
        double range;
2490
2491
        for(i=0; i<a->length; i++)
2492
                if(a->coeff[i]>max) max= a->coeff[i];
2493
2494
        for(i=0; i<a->length; i++)
2495
                if(a->coeff[i]<min) min= a->coeff[i];
2496
2497
        range= max - min;
2498
2499
        for(i=0; i<a->length; i++)
2500
        {
2501
                int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
2502 0d9f3d85 Arpi
                MSG_DBG2("%1.3f ", a->coeff[i]);
2503
                for(;x>0; x--) MSG_DBG2(" ");
2504
                MSG_DBG2("|\n");
2505 c7f822d9 Michael Niedermayer
        }
2506
}
2507
2508 d4e24275 Michael Niedermayer
void sws_freeVec(SwsVector *a){
2509 c7f822d9 Michael Niedermayer
        if(!a) return;
2510
        if(a->coeff) free(a->coeff);
2511
        a->coeff=NULL;
2512
        a->length=0;
2513
        free(a);
2514
}
2515
2516 e21206a8 Michael Niedermayer
void sws_freeFilter(SwsFilter *filter){
2517
        if(!filter) return;
2518
2519
        if(filter->lumH) sws_freeVec(filter->lumH);
2520
        if(filter->lumV) sws_freeVec(filter->lumV);
2521
        if(filter->chrH) sws_freeVec(filter->chrH);
2522
        if(filter->chrV) sws_freeVec(filter->chrV);
2523
        free(filter);
2524
}
2525
2526
2527 d4e24275 Michael Niedermayer
void sws_freeContext(SwsContext *c){
2528 c7f822d9 Michael Niedermayer
        int i;
2529
        if(!c) return;
2530
2531
        if(c->lumPixBuf)
2532
        {
2533 6c7506de Michael Niedermayer
                for(i=0; i<c->vLumBufSize; i++)
2534 c7f822d9 Michael Niedermayer
                {
2535
                        if(c->lumPixBuf[i]) free(c->lumPixBuf[i]);
2536
                        c->lumPixBuf[i]=NULL;
2537
                }
2538
                free(c->lumPixBuf);
2539
                c->lumPixBuf=NULL;
2540
        }
2541
2542
        if(c->chrPixBuf)
2543
        {
2544 6c7506de Michael Niedermayer
                for(i=0; i<c->vChrBufSize; i++)
2545 c7f822d9 Michael Niedermayer
                {
2546
                        if(c->chrPixBuf[i]) free(c->chrPixBuf[i]);
2547
                        c->chrPixBuf[i]=NULL;
2548
                }
2549
                free(c->chrPixBuf);
2550
                c->chrPixBuf=NULL;
2551
        }
2552
2553
        if(c->vLumFilter) free(c->vLumFilter);
2554
        c->vLumFilter = NULL;
2555
        if(c->vChrFilter) free(c->vChrFilter);
2556
        c->vChrFilter = NULL;
2557
        if(c->hLumFilter) free(c->hLumFilter);
2558
        c->hLumFilter = NULL;
2559
        if(c->hChrFilter) free(c->hChrFilter);
2560
        c->hChrFilter = NULL;
2561
2562
        if(c->vLumFilterPos) free(c->vLumFilterPos);
2563
        c->vLumFilterPos = NULL;
2564
        if(c->vChrFilterPos) free(c->vChrFilterPos);
2565
        c->vChrFilterPos = NULL;
2566
        if(c->hLumFilterPos) free(c->hLumFilterPos);
2567
        c->hLumFilterPos = NULL;
2568
        if(c->hChrFilterPos) free(c->hChrFilterPos);
2569
        c->hChrFilterPos = NULL;
2570
2571 38d5c282 Aurelien Jacobs
#ifdef HAVE_SYS_MMAN_H
2572
        if(c->funnyYCode) munmap(c->funnyYCode, MAX_FUNNY_CODE_SIZE);
2573
        if(c->funnyUVCode) munmap(c->funnyUVCode, MAX_FUNNY_CODE_SIZE);
2574
#else
2575
        if(c->funnyYCode) free(c->funnyYCode);
2576
        if(c->funnyUVCode) free(c->funnyUVCode);
2577
#endif
2578
        c->funnyYCode=NULL;
2579
        c->funnyUVCode=NULL;
2580
2581 b7dc6f66 Michael Niedermayer
        if(c->lumMmx2Filter) free(c->lumMmx2Filter);
2582
        c->lumMmx2Filter=NULL;
2583
        if(c->chrMmx2Filter) free(c->chrMmx2Filter);
2584
        c->chrMmx2Filter=NULL;
2585
        if(c->lumMmx2FilterPos) free(c->lumMmx2FilterPos);
2586
        c->lumMmx2FilterPos=NULL;
2587
        if(c->chrMmx2FilterPos) free(c->chrMmx2FilterPos);
2588
        c->chrMmx2FilterPos=NULL;
2589 cf7d1c1a Michael Niedermayer
        if(c->yuvTable) free(c->yuvTable);
2590
        c->yuvTable=NULL;
2591 b7dc6f66 Michael Niedermayer
2592 c7f822d9 Michael Niedermayer
        free(c);
2593
}