Statistics
| Branch: | Revision:

ffmpeg / postproc / swscale.c @ 5bf01354

History | View | Annotate | Download (74.4 KB)

1
/*
2
    Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3

4
    This program is free software; you can redistribute it and/or modify
5
    it under the terms of the GNU General Public License as published by
6
    the Free Software Foundation; either version 2 of the License, or
7
    (at your option) any later version.
8

9
    This program is distributed in the hope that it will be useful,
10
    but WITHOUT ANY WARRANTY; without even the implied warranty of
11
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
    GNU General Public License for more details.
13

14
    You should have received a copy of the GNU General Public License
15
    along with this program; if not, write to the Free Software
16
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
*/
18

    
19
/*
20
  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8/Y800, YVU9/IF09
21
  supported output formats: YV12, I420/IYUV, YUY2, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
22
  {BGR,RGB}{1,4,8,15,16} support dithering
23
  
24
  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
25
  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
26
  x -> x
27
  YUV9 -> YV12
28
  YUV9/YV12 -> Y800
29
  Y800 -> YUV9/YV12
30
  BGR24 -> BGR32 & RGB24 -> RGB32
31
  BGR32 -> BGR24 & RGB32 -> RGB24
32
  BGR15 -> BGR16
33
*/
34

    
35
/* 
36
tested special converters (most are tested actually but i didnt write it down ...)
37
 YV12 -> BGR16
38
 YV12 -> YV12
39
 BGR15 -> BGR16
40
 BGR16 -> BGR16
41
 YVU9 -> YV12
42

43
untested special converters
44
  YV12/I420 -> BGR15/BGR24/BGR32 (its the yuv2rgb stuff, so it should be ok)
45
  YV12/I420 -> YV12/I420
46
  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
47
  BGR24 -> BGR32 & RGB24 -> RGB32
48
  BGR32 -> BGR24 & RGB32 -> RGB24
49
  BGR24 -> YV12
50
*/
51

    
52
#include <inttypes.h>
53
#include <string.h>
54
#include <math.h>
55
#include <stdio.h>
56
#include "../config.h"
57
#include "../mangle.h"
58
#include <assert.h>
59
#ifdef HAVE_MALLOC_H
60
#include <malloc.h>
61
#else
62
#include <stdlib.h>
63
#endif
64
#include "swscale.h"
65
#include "swscale_internal.h"
66
#include "../cpudetect.h"
67
#include "../bswap.h"
68
#include "../libvo/img_format.h"
69
#include "rgb2rgb.h"
70
#include "../libvo/fastmemcpy.h"
71
#include "../mp_msg.h"
72

    
73
#define MSG_WARN(args...) mp_msg(MSGT_SWS,MSGL_WARN, ##args )
74
#define MSG_FATAL(args...) mp_msg(MSGT_SWS,MSGL_FATAL, ##args )
75
#define MSG_ERR(args...) mp_msg(MSGT_SWS,MSGL_ERR, ##args )
76
#define MSG_V(args...) mp_msg(MSGT_SWS,MSGL_V, ##args )
77
#define MSG_DBG2(args...) mp_msg(MSGT_SWS,MSGL_DBG2, ##args )
78
#define MSG_INFO(args...) mp_msg(MSGT_SWS,MSGL_INFO, ##args )
79

    
80
#undef MOVNTQ
81
#undef PAVGB
82

    
83
//#undef HAVE_MMX2
84
//#define HAVE_3DNOW
85
//#undef HAVE_MMX
86
//#undef ARCH_X86
87
//#define WORDS_BIGENDIAN
88
#define DITHER1XBPP
89

    
90
#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
91

    
92
#define RET 0xC3 //near return opcode for X86
93

    
94
#ifdef MP_DEBUG
95
#define ASSERT(x) assert(x);
96
#else
97
#define ASSERT(x) ;
98
#endif
99

    
100
#ifdef M_PI
101
#define PI M_PI
102
#else
103
#define PI 3.14159265358979323846
104
#endif
105

    
106
//FIXME replace this with something faster
107
#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YVU9 \
108
                        || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
109
#define isYUV(x)       ((x)==IMGFMT_UYVY || (x)==IMGFMT_YUY2 || isPlanarYUV(x))
110
#define isGray(x)      ((x)==IMGFMT_Y800)
111
#define isRGB(x)       (((x)&IMGFMT_RGB_MASK)==IMGFMT_RGB)
112
#define isBGR(x)       (((x)&IMGFMT_BGR_MASK)==IMGFMT_BGR)
113
#define isSupportedIn(x)  ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY\
114
                        || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\
115
                        || (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\
116
                        || (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9\
117
                        || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P)
118
#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_YUY2\
119
                        || (x)==IMGFMT_444P || (x)==IMGFMT_422P || (x)==IMGFMT_411P\
120
                        || isRGB(x) || isBGR(x)\
121
                        || (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9)
122
#define isPacked(x)    ((x)==IMGFMT_YUY2 || (x)==IMGFMT_UYVY ||isRGB(x) || isBGR(x))
123

    
124
#define RGB2YUV_SHIFT 16
125
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
126
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
127
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
128
#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
129
#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
130
#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
131
#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
132
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
133
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
134

    
135
extern int verbose; // defined in mplayer.c
136
extern const int32_t Inverse_Table_6_9[8][4];
137

    
138
/*
139
NOTES
140
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
141

142
TODO
143
more intelligent missalignment avoidance for the horizontal scaler
144
write special vertical cubic upscale version
145
Optimize C code (yv12 / minmax)
146
add support for packed pixel yuv input & output
147
add support for Y8 output
148
optimize bgr24 & bgr32
149
add BGR4 output support
150
write special BGR->BGR scaler
151
*/
152

    
153
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
154
#define MIN(a,b) ((a) > (b) ? (b) : (a))
155
#define MAX(a,b) ((a) < (b) ? (b) : (a))
156

    
157
#ifdef ARCH_X86
158
#define CAN_COMPILE_X86_ASM
159
#endif
160

    
161
#ifdef CAN_COMPILE_X86_ASM
162
static uint64_t __attribute__((aligned(8))) bF8=       0xF8F8F8F8F8F8F8F8LL;
163
static uint64_t __attribute__((aligned(8))) bFC=       0xFCFCFCFCFCFCFCFCLL;
164
static uint64_t __attribute__((aligned(8))) w10=       0x0010001000100010LL;
165
static uint64_t __attribute__((aligned(8))) w02=       0x0002000200020002LL;
166
static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
167
static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
168
static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
169
static uint64_t __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL;
170

    
171
static volatile uint64_t __attribute__((aligned(8))) b5Dither;
172
static volatile uint64_t __attribute__((aligned(8))) g5Dither;
173
static volatile uint64_t __attribute__((aligned(8))) g6Dither;
174
static volatile uint64_t __attribute__((aligned(8))) r5Dither;
175

    
176
static uint64_t __attribute__((aligned(8))) dither4[2]={
177
        0x0103010301030103LL,
178
        0x0200020002000200LL,};
179

    
180
static uint64_t __attribute__((aligned(8))) dither8[2]={
181
        0x0602060206020602LL,
182
        0x0004000400040004LL,};
183

    
184
static uint64_t __attribute__((aligned(8))) b16Mask=   0x001F001F001F001FLL;
185
static uint64_t __attribute__((aligned(8))) g16Mask=   0x07E007E007E007E0LL;
186
static uint64_t __attribute__((aligned(8))) r16Mask=   0xF800F800F800F800LL;
187
static uint64_t __attribute__((aligned(8))) b15Mask=   0x001F001F001F001FLL;
188
static uint64_t __attribute__((aligned(8))) g15Mask=   0x03E003E003E003E0LL;
189
static uint64_t __attribute__((aligned(8))) r15Mask=   0x7C007C007C007C00LL;
190

    
191
static uint64_t __attribute__((aligned(8))) M24A=   0x00FF0000FF0000FFLL;
192
static uint64_t __attribute__((aligned(8))) M24B=   0xFF0000FF0000FF00LL;
193
static uint64_t __attribute__((aligned(8))) M24C=   0x0000FF0000FF0000LL;
194

    
195
#ifdef FAST_BGR2YV12
196
static const uint64_t bgr2YCoeff  __attribute__((aligned(8))) = 0x000000210041000DULL;
197
static const uint64_t bgr2UCoeff  __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
198
static const uint64_t bgr2VCoeff  __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
199
#else
200
static const uint64_t bgr2YCoeff  __attribute__((aligned(8))) = 0x000020E540830C8BULL;
201
static const uint64_t bgr2UCoeff  __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
202
static const uint64_t bgr2VCoeff  __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
203
#endif
204
static const uint64_t bgr2YOffset __attribute__((aligned(8))) = 0x1010101010101010ULL;
205
static const uint64_t bgr2UVOffset __attribute__((aligned(8)))= 0x8080808080808080ULL;
206
static const uint64_t w1111       __attribute__((aligned(8))) = 0x0001000100010001ULL;
207
#endif
208

    
209
// clipping helper table for C implementations:
210
static unsigned char clip_table[768];
211

    
212
//global sws_flags from the command line
213
int sws_flags=2;
214

    
215
//global srcFilter
216
SwsFilter src_filter= {NULL, NULL, NULL, NULL};
217

    
218
float sws_lum_gblur= 0.0;
219
float sws_chr_gblur= 0.0;
220
int sws_chr_vshift= 0;
221
int sws_chr_hshift= 0;
222
float sws_chr_sharpen= 0.0;
223
float sws_lum_sharpen= 0.0;
224

    
225
/* cpuCaps combined from cpudetect and whats actually compiled in
226
   (if there is no support for something compiled in it wont appear here) */
227
static CpuCaps cpuCaps;
228

    
229
int (*swScale)(SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
230
             int srcSliceH, uint8_t* dst[], int dstStride[])=NULL;
231

    
232
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
233
                  
234
extern const uint8_t dither_2x2_4[2][8];
235
extern const uint8_t dither_2x2_8[2][8];
236
extern const uint8_t dither_8x8_32[8][8];
237
extern const uint8_t dither_8x8_73[8][8];
238
extern const uint8_t dither_8x8_220[8][8];
239

    
240
#ifdef CAN_COMPILE_X86_ASM
241
void in_asm_used_var_warning_killer()
242
{
243
 volatile int i= bF8+bFC+w10+
244
 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+
245
 M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101;
246
 if(i) i=0;
247
}
248
#endif
249

    
250
static int testFormat[]={
251
IMGFMT_YVU9,
252
IMGFMT_YV12,
253
//IMGFMT_IYUV,
254
IMGFMT_I420,
255
IMGFMT_BGR15,
256
IMGFMT_BGR16,
257
IMGFMT_BGR24,
258
IMGFMT_BGR32,
259
IMGFMT_RGB24,
260
IMGFMT_RGB32,
261
//IMGFMT_Y8,
262
IMGFMT_Y800,
263
//IMGFMT_YUY2,
264
0
265
};
266

    
267
static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, int w, int h){
268
        int x,y;
269
        uint64_t ssd=0;
270

    
271
        for(y=0; y<h; y++){
272
                for(x=0; x<w; x++){
273
                        int d= src1[x + y*stride1] - src2[x + y*stride2];
274
                        ssd+= d*d;
275
                }
276
        }
277
        return ssd;
278
}
279

    
280
// test by ref -> src -> dst -> out & compare out against ref
281
// ref & out are YV12
282
static void doTest(uint8_t *ref[3], int refStride[3], int w, int h, int srcFormat, int dstFormat, 
283
                   int srcW, int srcH, int dstW, int dstH, int flags){
284
        uint8_t *src[3];
285
        uint8_t *dst[3];
286
        uint8_t *out[3];
287
        int srcStride[3], dstStride[3];
288
        int i;
289
        uint64_t ssdY, ssdU, ssdV;
290
        SwsContext *srcContext, *dstContext, *outContext;
291
        
292
        for(i=0; i<3; i++){
293
                // avoid stride % bpp != 0
294
                if(srcFormat==IMGFMT_RGB24 || srcFormat==IMGFMT_BGR24)
295
                        srcStride[i]= srcW*3;
296
                else
297
                        srcStride[i]= srcW*4;
298
                
299
                if(dstFormat==IMGFMT_RGB24 || dstFormat==IMGFMT_BGR24)
300
                        dstStride[i]= dstW*3;
301
                else
302
                        dstStride[i]= dstW*4;
303
        
304
                src[i]= malloc(srcStride[i]*srcH);
305
                dst[i]= malloc(dstStride[i]*dstH);
306
                out[i]= malloc(refStride[i]*h);
307
        }
308

    
309
        srcContext= sws_getContext(w, h, IMGFMT_YV12, srcW, srcH, srcFormat, flags, NULL, NULL);
310
        dstContext= sws_getContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, flags, NULL, NULL);
311
        outContext= sws_getContext(dstW, dstH, dstFormat, w, h, IMGFMT_YV12, flags, NULL, NULL);
312
        if(srcContext==NULL ||dstContext==NULL ||outContext==NULL){
313
                printf("Failed allocating swsContext\n");
314
                goto end;
315
        }
316
//        printf("test %X %X %X -> %X %X %X\n", (int)ref[0], (int)ref[1], (int)ref[2],
317
//                (int)src[0], (int)src[1], (int)src[2]);
318

    
319
        srcContext->swScale(srcContext, ref, refStride, 0, h   , src, srcStride);
320
        dstContext->swScale(dstContext, src, srcStride, 0, srcH, dst, dstStride);
321
        outContext->swScale(outContext, dst, dstStride, 0, dstH, out, refStride);
322
             
323
        ssdY= getSSD(ref[0], out[0], refStride[0], refStride[0], w, h);
324
        ssdU= getSSD(ref[1], out[1], refStride[1], refStride[1], (w+1)>>1, (h+1)>>1);
325
        ssdV= getSSD(ref[2], out[2], refStride[2], refStride[2], (w+1)>>1, (h+1)>>1);
326
        
327
        if(isGray(srcFormat) || isGray(dstFormat)) ssdU=ssdV=0; //FIXME check that output is really gray
328
        
329
        ssdY/= w*h;
330
        ssdU/= w*h/4;
331
        ssdV/= w*h/4;
332
        
333
        if(ssdY>100 || ssdU>50 || ssdV>50){
334
                printf(" %s %dx%d -> %s %4dx%4d flags=%2d SSD=%5lld,%5lld,%5lld\n", 
335
                        vo_format_name(srcFormat), srcW, srcH, 
336
                        vo_format_name(dstFormat), dstW, dstH,
337
                        flags,
338
                        ssdY, ssdU, ssdV);
339
        }
340

    
341
        end:
342
        
343
        sws_freeContext(srcContext);
344
        sws_freeContext(dstContext);
345
        sws_freeContext(outContext);
346

    
347
        for(i=0; i<3; i++){
348
                free(src[i]);
349
                free(dst[i]);
350
                free(out[i]);
351
        }
352
}
353

    
354
static void selfTest(uint8_t *src[3], int stride[3], int w, int h){
355
        int srcFormat, dstFormat, srcFormatIndex, dstFormatIndex;
356
        int srcW, srcH, dstW, dstH;
357
        int flags;
358

    
359
        for(srcFormatIndex=0; ;srcFormatIndex++){
360
                srcFormat= testFormat[srcFormatIndex];
361
                if(!srcFormat) break;
362
                for(dstFormatIndex=0; ;dstFormatIndex++){
363
                        dstFormat= testFormat[dstFormatIndex];
364
                        if(!dstFormat) break;
365
                        if(!isSupportedOut(dstFormat)) continue;
366
printf("%s -> %s\n", 
367
        vo_format_name(srcFormat),
368
        vo_format_name(dstFormat));
369

    
370
                        srcW= w+w/3;
371
                        srcH= h+h/3;
372
                        for(dstW=w; dstW<w*2; dstW+= dstW/3){
373
                                for(dstH=h; dstH<h*2; dstH+= dstH/3){
374
                                        for(flags=1; flags<33; flags*=2)
375
                                                doTest(src, stride, w, h, srcFormat, dstFormat,
376
                                                        srcW, srcH, dstW, dstH, flags);
377
                                }
378
                        }
379
                }
380
        }
381
}
382

    
383
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
384
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
385
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
386
{
387
        //FIXME Optimize (just quickly writen not opti..)
388
        int i;
389
        for(i=0; i<dstW; i++)
390
        {
391
                int val=0;
392
                int j;
393
                for(j=0; j<lumFilterSize; j++)
394
                        val += lumSrc[j][i] * lumFilter[j];
395

    
396
                dest[i]= MIN(MAX(val>>19, 0), 255);
397
        }
398

    
399
        if(uDest != NULL)
400
                for(i=0; i<chrDstW; i++)
401
                {
402
                        int u=0;
403
                        int v=0;
404
                        int j;
405
                        for(j=0; j<chrFilterSize; j++)
406
                        {
407
                                u += chrSrc[j][i] * chrFilter[j];
408
                                v += chrSrc[j][i + 2048] * chrFilter[j];
409
                        }
410

    
411
                        uDest[i]= MIN(MAX(u>>19, 0), 255);
412
                        vDest[i]= MIN(MAX(v>>19, 0), 255);
413
                }
414
}
415

    
416

    
417
#define YSCALE_YUV_2_PACKEDX_C(type) \
418
                for(i=0; i<(dstW>>1); i++){\
419
                        int j;\
420
                        int Y1=0;\
421
                        int Y2=0;\
422
                        int U=0;\
423
                        int V=0;\
424
                        type *r, *b, *g;\
425
                        const int i2= 2*i;\
426
                        \
427
                        for(j=0; j<lumFilterSize; j++)\
428
                        {\
429
                                Y1 += lumSrc[j][i2] * lumFilter[j];\
430
                                Y2 += lumSrc[j][i2+1] * lumFilter[j];\
431
                        }\
432
                        for(j=0; j<chrFilterSize; j++)\
433
                        {\
434
                                U += chrSrc[j][i] * chrFilter[j];\
435
                                V += chrSrc[j][i+2048] * chrFilter[j];\
436
                        }\
437
                        Y1>>=19;\
438
                        Y2>>=19;\
439
                        U >>=19;\
440
                        V >>=19;\
441
                        if((Y1|Y2|U|V)&256)\
442
                        {\
443
                                if(Y1>255)   Y1=255;\
444
                                else if(Y1<0)Y1=0;\
445
                                if(Y2>255)   Y2=255;\
446
                                else if(Y2<0)Y2=0;\
447
                                if(U>255)    U=255;\
448
                                else if(U<0) U=0;\
449
                                if(V>255)    V=255;\
450
                                else if(V<0) V=0;\
451
                        }
452
                        
453
#define YSCALE_YUV_2_RGBX_C(type) \
454
                        YSCALE_YUV_2_PACKEDX_C(type)\
455
                        r = c->table_rV[V];\
456
                        g = c->table_gU[U] + c->table_gV[V];\
457
                        b = c->table_bU[U];\
458

    
459
#define YSCALE_YUV_2_PACKED2_C \
460
                for(i=0; i<(dstW>>1); i++){\
461
                        const int i2= 2*i;\
462
                        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;\
463
                        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;\
464
                        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;\
465
                        int V= (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;\
466

    
467
#define YSCALE_YUV_2_RGB2_C(type) \
468
                        YSCALE_YUV_2_PACKED2_C\
469
                        type *r, *b, *g;\
470
                        r = c->table_rV[V];\
471
                        g = c->table_gU[U] + c->table_gV[V];\
472
                        b = c->table_bU[U];\
473

    
474
#define YSCALE_YUV_2_PACKED1_C \
475
                for(i=0; i<(dstW>>1); i++){\
476
                        const int i2= 2*i;\
477
                        int Y1= buf0[i2  ]>>7;\
478
                        int Y2= buf0[i2+1]>>7;\
479
                        int U= (uvbuf1[i     ])>>7;\
480
                        int V= (uvbuf1[i+2048])>>7;\
481

    
482
#define YSCALE_YUV_2_RGB1_C(type) \
483
                        YSCALE_YUV_2_PACKED1_C\
484
                        type *r, *b, *g;\
485
                        r = c->table_rV[V];\
486
                        g = c->table_gU[U] + c->table_gV[V];\
487
                        b = c->table_bU[U];\
488

    
489
#define YSCALE_YUV_2_PACKED1B_C \
490
                for(i=0; i<(dstW>>1); i++){\
491
                        const int i2= 2*i;\
492
                        int Y1= buf0[i2  ]>>7;\
493
                        int Y2= buf0[i2+1]>>7;\
494
                        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
495
                        int V= (uvbuf0[i+2048] + uvbuf1[i+2048])>>8;\
496

    
497
#define YSCALE_YUV_2_RGB1B_C(type) \
498
                        YSCALE_YUV_2_PACKED1B_C\
499
                        type *r, *b, *g;\
500
                        r = c->table_rV[V];\
501
                        g = c->table_gU[U] + c->table_gV[V];\
502
                        b = c->table_bU[U];\
503

    
504
#define YSCALE_YUV_2_ANYRGB_C(func, func2)\
505
        switch(c->dstFormat)\
506
        {\
507
        case IMGFMT_BGR32:\
508
        case IMGFMT_RGB32:\
509
                func(uint32_t)\
510
                        ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
511
                        ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
512
                }                \
513
                break;\
514
        case IMGFMT_RGB24:\
515
                func(uint8_t)\
516
                        ((uint8_t*)dest)[0]= r[Y1];\
517
                        ((uint8_t*)dest)[1]= g[Y1];\
518
                        ((uint8_t*)dest)[2]= b[Y1];\
519
                        ((uint8_t*)dest)[3]= r[Y2];\
520
                        ((uint8_t*)dest)[4]= g[Y2];\
521
                        ((uint8_t*)dest)[5]= b[Y2];\
522
                        ((uint8_t*)dest)+=6;\
523
                }\
524
                break;\
525
        case IMGFMT_BGR24:\
526
                func(uint8_t)\
527
                        ((uint8_t*)dest)[0]= b[Y1];\
528
                        ((uint8_t*)dest)[1]= g[Y1];\
529
                        ((uint8_t*)dest)[2]= r[Y1];\
530
                        ((uint8_t*)dest)[3]= b[Y2];\
531
                        ((uint8_t*)dest)[4]= g[Y2];\
532
                        ((uint8_t*)dest)[5]= r[Y2];\
533
                        ((uint8_t*)dest)+=6;\
534
                }\
535
                break;\
536
        case IMGFMT_RGB16:\
537
        case IMGFMT_BGR16:\
538
                {\
539
                        const int dr1= dither_2x2_8[y&1    ][0];\
540
                        const int dg1= dither_2x2_4[y&1    ][0];\
541
                        const int db1= dither_2x2_8[(y&1)^1][0];\
542
                        const int dr2= dither_2x2_8[y&1    ][1];\
543
                        const int dg2= dither_2x2_4[y&1    ][1];\
544
                        const int db2= dither_2x2_8[(y&1)^1][1];\
545
                        func(uint16_t)\
546
                                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
547
                                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
548
                        }\
549
                }\
550
                break;\
551
        case IMGFMT_RGB15:\
552
        case IMGFMT_BGR15:\
553
                {\
554
                        const int dr1= dither_2x2_8[y&1    ][0];\
555
                        const int dg1= dither_2x2_8[y&1    ][1];\
556
                        const int db1= dither_2x2_8[(y&1)^1][0];\
557
                        const int dr2= dither_2x2_8[y&1    ][1];\
558
                        const int dg2= dither_2x2_8[y&1    ][0];\
559
                        const int db2= dither_2x2_8[(y&1)^1][1];\
560
                        func(uint16_t)\
561
                                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
562
                                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
563
                        }\
564
                }\
565
                break;\
566
        case IMGFMT_RGB8:\
567
        case IMGFMT_BGR8:\
568
                {\
569
                        const uint8_t * const d64= dither_8x8_73[y&7];\
570
                        const uint8_t * const d32= dither_8x8_32[y&7];\
571
                        func(uint8_t)\
572
                                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
573
                                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
574
                        }\
575
                }\
576
                break;\
577
        case IMGFMT_RGB4:\
578
        case IMGFMT_BGR4:\
579
                {\
580
                        const uint8_t * const d64= dither_8x8_73 [y&7];\
581
                        const uint8_t * const d128=dither_8x8_220[y&7];\
582
                        func(uint8_t)\
583
                                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
584
                                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
585
                        }\
586
                }\
587
                break;\
588
        case IMGFMT_RG4B:\
589
        case IMGFMT_BG4B:\
590
                {\
591
                        const uint8_t * const d64= dither_8x8_73 [y&7];\
592
                        const uint8_t * const d128=dither_8x8_220[y&7];\
593
                        func(uint8_t)\
594
                                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
595
                                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
596
                        }\
597
                }\
598
                break;\
599
        case IMGFMT_RGB1:\
600
        case IMGFMT_BGR1:\
601
                {\
602
                        const uint8_t * const d128=dither_8x8_220[y&7];\
603
                        uint8_t *g= c->table_gU[128] + c->table_gV[128];\
604
                        for(i=0; i<dstW-7; i+=8){\
605
                                int acc;\
606
                                acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
607
                                acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
608
                                acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
609
                                acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
610
                                acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
611
                                acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
612
                                acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
613
                                acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
614
                                ((uint8_t*)dest)[0]= acc;\
615
                                ((uint8_t*)dest)++;\
616
                        }\
617
\
618
/*\
619
((uint8_t*)dest)-= dstW>>4;\
620
{\
621
                        int acc=0;\
622
                        int left=0;\
623
                        static int top[1024];\
624
                        static int last_new[1024][1024];\
625
                        static int last_in3[1024][1024];\
626
                        static int drift[1024][1024];\
627
                        int topLeft=0;\
628
                        int shift=0;\
629
                        int count=0;\
630
                        const uint8_t * const d128=dither_8x8_220[y&7];\
631
                        int error_new=0;\
632
                        int error_in3=0;\
633
                        int f=0;\
634
                        \
635
                        for(i=dstW>>1; i<dstW; i++){\
636
                                int in= ((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19);\
637
                                int in2 = (76309 * (in - 16) + 32768) >> 16;\
638
                                int in3 = (in2 < 0) ? 0 : ((in2 > 255) ? 255 : in2);\
639
                                int old= (left*7 + topLeft + top[i]*5 + top[i+1]*3)/20 + in3\
640
                                        + (last_new[y][i] - in3)*f/256;\
641
                                int new= old> 128 ? 255 : 0;\
642
\
643
                                error_new+= ABS(last_new[y][i] - new);\
644
                                error_in3+= ABS(last_in3[y][i] - in3);\
645
                                f= error_new - error_in3*4;\
646
                                if(f<0) f=0;\
647
                                if(f>256) f=256;\
648
\
649
                                topLeft= top[i];\
650
                                left= top[i]= old - new;\
651
                                last_new[y][i]= new;\
652
                                last_in3[y][i]= in3;\
653
\
654
                                acc+= acc + (new&1);\
655
                                if((i&7)==6){\
656
                                        ((uint8_t*)dest)[0]= acc;\
657
                                        ((uint8_t*)dest)++;\
658
                                }\
659
                        }\
660
}\
661
*/\
662
                }\
663
                break;\
664
        case IMGFMT_YUY2:\
665
                func2\
666
                        ((uint8_t*)dest)[2*i2+0]= Y1;\
667
                        ((uint8_t*)dest)[2*i2+1]= U;\
668
                        ((uint8_t*)dest)[2*i2+2]= Y2;\
669
                        ((uint8_t*)dest)[2*i2+3]= V;\
670
                }                \
671
                break;\
672
        }\
673

    
674

    
675
static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
676
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
677
                                    uint8_t *dest, int dstW, int y)
678
{
679
        int i;
680
        switch(c->dstFormat)
681
        {
682
        case IMGFMT_RGB32:
683
        case IMGFMT_BGR32:
684
                YSCALE_YUV_2_RGBX_C(uint32_t)
685
                        ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];
686
                        ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];
687
                }
688
                break;
689
        case IMGFMT_RGB24:
690
                YSCALE_YUV_2_RGBX_C(uint8_t)
691
                        ((uint8_t*)dest)[0]= r[Y1];
692
                        ((uint8_t*)dest)[1]= g[Y1];
693
                        ((uint8_t*)dest)[2]= b[Y1];
694
                        ((uint8_t*)dest)[3]= r[Y2];
695
                        ((uint8_t*)dest)[4]= g[Y2];
696
                        ((uint8_t*)dest)[5]= b[Y2];
697
                        ((uint8_t*)dest)+=6;
698
                }
699
                break;
700
        case IMGFMT_BGR24:
701
                YSCALE_YUV_2_RGBX_C(uint8_t)
702
                        ((uint8_t*)dest)[0]= b[Y1];
703
                        ((uint8_t*)dest)[1]= g[Y1];
704
                        ((uint8_t*)dest)[2]= r[Y1];
705
                        ((uint8_t*)dest)[3]= b[Y2];
706
                        ((uint8_t*)dest)[4]= g[Y2];
707
                        ((uint8_t*)dest)[5]= r[Y2];
708
                        ((uint8_t*)dest)+=6;
709
                }
710
                break;
711
        case IMGFMT_RGB16:
712
        case IMGFMT_BGR16:
713
                {
714
                        const int dr1= dither_2x2_8[y&1    ][0];
715
                        const int dg1= dither_2x2_4[y&1    ][0];
716
                        const int db1= dither_2x2_8[(y&1)^1][0];
717
                        const int dr2= dither_2x2_8[y&1    ][1];
718
                        const int dg2= dither_2x2_4[y&1    ][1];
719
                        const int db2= dither_2x2_8[(y&1)^1][1];
720
                        YSCALE_YUV_2_RGBX_C(uint16_t)
721
                                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
722
                                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
723
                        }
724
                }
725
                break;
726
        case IMGFMT_RGB15:
727
        case IMGFMT_BGR15:
728
                {
729
                        const int dr1= dither_2x2_8[y&1    ][0];
730
                        const int dg1= dither_2x2_8[y&1    ][1];
731
                        const int db1= dither_2x2_8[(y&1)^1][0];
732
                        const int dr2= dither_2x2_8[y&1    ][1];
733
                        const int dg2= dither_2x2_8[y&1    ][0];
734
                        const int db2= dither_2x2_8[(y&1)^1][1];
735
                        YSCALE_YUV_2_RGBX_C(uint16_t)
736
                                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
737
                                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
738
                        }
739
                }
740
                break;
741
        case IMGFMT_RGB8:
742
        case IMGFMT_BGR8:
743
                {
744
                        const uint8_t * const d64= dither_8x8_73[y&7];
745
                        const uint8_t * const d32= dither_8x8_32[y&7];
746
                        YSCALE_YUV_2_RGBX_C(uint8_t)
747
                                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];
748
                                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];
749
                        }
750
                }
751
                break;
752
        case IMGFMT_RGB4:
753
        case IMGFMT_BGR4:
754
                {
755
                        const uint8_t * const d64= dither_8x8_73 [y&7];
756
                        const uint8_t * const d128=dither_8x8_220[y&7];
757
                        YSCALE_YUV_2_RGBX_C(uint8_t)
758
                                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]
759
                                                  +((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);
760
                        }
761
                }
762
                break;
763
        case IMGFMT_RG4B:
764
        case IMGFMT_BG4B:
765
                {
766
                        const uint8_t * const d64= dither_8x8_73 [y&7];
767
                        const uint8_t * const d128=dither_8x8_220[y&7];
768
                        YSCALE_YUV_2_RGBX_C(uint8_t)
769
                                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];
770
                                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];
771
                        }
772
                }
773
                break;
774
        case IMGFMT_RGB1:
775
        case IMGFMT_BGR1:
776
                {
777
                        const uint8_t * const d128=dither_8x8_220[y&7];
778
                        uint8_t *g= c->table_gU[128] + c->table_gV[128];
779
                        int acc=0;
780
                        for(i=0; i<dstW-1; i+=2){
781
                                int j;
782
                                int Y1=0;
783
                                int Y2=0;
784

    
785
                                for(j=0; j<lumFilterSize; j++)
786
                                {
787
                                        Y1 += lumSrc[j][i] * lumFilter[j];
788
                                        Y2 += lumSrc[j][i+1] * lumFilter[j];
789
                                }
790
                                Y1>>=19;
791
                                Y2>>=19;
792
                                if((Y1|Y2)&256)
793
                                {
794
                                        if(Y1>255)   Y1=255;
795
                                        else if(Y1<0)Y1=0;
796
                                        if(Y2>255)   Y2=255;
797
                                        else if(Y2<0)Y2=0;
798
                                }
799
                                acc+= acc + g[Y1+d128[(i+0)&7]];
800
                                acc+= acc + g[Y2+d128[(i+1)&7]];
801
                                if((i&7)==6){
802
                                        ((uint8_t*)dest)[0]= acc;
803
                                        ((uint8_t*)dest)++;
804
                                }
805
                        }
806
                }
807
                break;
808
        case IMGFMT_YUY2:
809
                YSCALE_YUV_2_PACKEDX_C(void)
810
                        ((uint8_t*)dest)[2*i2+0]= Y1;
811
                        ((uint8_t*)dest)[2*i2+1]= U;
812
                        ((uint8_t*)dest)[2*i2+2]= Y2;
813
                        ((uint8_t*)dest)[2*i2+3]= V;
814
                }
815
                break;
816
        }
817
}
818

    
819

    
820
//Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
821
//Plain C versions
822
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
823
#define COMPILE_C
824
#endif
825

    
826
#ifdef CAN_COMPILE_X86_ASM
827

    
828
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
829
#define COMPILE_MMX
830
#endif
831

    
832
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
833
#define COMPILE_MMX2
834
#endif
835

    
836
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
837
#define COMPILE_3DNOW
838
#endif
839
#endif //CAN_COMPILE_X86_ASM
840

    
841
#undef HAVE_MMX
842
#undef HAVE_MMX2
843
#undef HAVE_3DNOW
844

    
845
#ifdef COMPILE_C
846
#undef HAVE_MMX
847
#undef HAVE_MMX2
848
#undef HAVE_3DNOW
849
#define RENAME(a) a ## _C
850
#include "swscale_template.c"
851
#endif
852

    
853
#ifdef CAN_COMPILE_X86_ASM
854

    
855
//X86 versions
856
/*
857
#undef RENAME
858
#undef HAVE_MMX
859
#undef HAVE_MMX2
860
#undef HAVE_3DNOW
861
#define ARCH_X86
862
#define RENAME(a) a ## _X86
863
#include "swscale_template.c"
864
*/
865
//MMX versions
866
#ifdef COMPILE_MMX
867
#undef RENAME
868
#define HAVE_MMX
869
#undef HAVE_MMX2
870
#undef HAVE_3DNOW
871
#define RENAME(a) a ## _MMX
872
#include "swscale_template.c"
873
#endif
874

    
875
//MMX2 versions
876
#ifdef COMPILE_MMX2
877
#undef RENAME
878
#define HAVE_MMX
879
#define HAVE_MMX2
880
#undef HAVE_3DNOW
881
#define RENAME(a) a ## _MMX2
882
#include "swscale_template.c"
883
#endif
884

    
885
//3DNOW versions
886
#ifdef COMPILE_3DNOW
887
#undef RENAME
888
#define HAVE_MMX
889
#undef HAVE_MMX2
890
#define HAVE_3DNOW
891
#define RENAME(a) a ## _3DNow
892
#include "swscale_template.c"
893
#endif
894

    
895
#endif //CAN_COMPILE_X86_ASM
896

    
897
// minor note: the HAVE_xyz is messed up after that line so dont use it
898

    
899

    
900
// old global scaler, dont use for new code
901
// will use sws_flags from the command line
902
void SwScale_YV12slice(unsigned char* src[], int srcStride[], int srcSliceY ,
903
                             int srcSliceH, uint8_t* dst[], int dstStride, int dstbpp,
904
                             int srcW, int srcH, int dstW, int dstH){
905

    
906
        static SwsContext *context=NULL;
907
        int dstFormat;
908
        int dstStride3[3]= {dstStride, dstStride>>1, dstStride>>1};
909

    
910
        switch(dstbpp)
911
        {
912
                case 8 : dstFormat= IMGFMT_Y8;                break;
913
                case 12: dstFormat= IMGFMT_YV12;        break;
914
                case 15: dstFormat= IMGFMT_BGR15;        break;
915
                case 16: dstFormat= IMGFMT_BGR16;        break;
916
                case 24: dstFormat= IMGFMT_BGR24;        break;
917
                case 32: dstFormat= IMGFMT_BGR32;        break;
918
                default: return;
919
        }
920

    
921
        if(!context) context=sws_getContextFromCmdLine(srcW, srcH, IMGFMT_YV12, dstW, dstH, dstFormat);
922

    
923
        context->swScale(context, src, srcStride, srcSliceY, srcSliceH, dst, dstStride3);
924
}
925

    
926
void sws_getFlagsAndFilterFromCmdLine(int *flags, SwsFilter **srcFilterParam, SwsFilter **dstFilterParam)
927
{
928
        static int firstTime=1;
929
        *flags=0;
930

    
931
#ifdef ARCH_X86
932
        if(gCpuCaps.hasMMX)
933
                asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
934
#endif
935
        if(firstTime)
936
        {
937
                firstTime=0;
938
                *flags= SWS_PRINT_INFO;
939
        }
940
        else if(verbose>1) *flags= SWS_PRINT_INFO;
941

    
942
        if(src_filter.lumH) sws_freeVec(src_filter.lumH);
943
        if(src_filter.lumV) sws_freeVec(src_filter.lumV);
944
        if(src_filter.chrH) sws_freeVec(src_filter.chrH);
945
        if(src_filter.chrV) sws_freeVec(src_filter.chrV);
946

    
947
        if(sws_lum_gblur!=0.0){
948
                src_filter.lumH= sws_getGaussianVec(sws_lum_gblur, 3.0);
949
                src_filter.lumV= sws_getGaussianVec(sws_lum_gblur, 3.0);
950
        }else{
951
                src_filter.lumH= sws_getIdentityVec();
952
                src_filter.lumV= sws_getIdentityVec();
953
        }
954

    
955
        if(sws_chr_gblur!=0.0){
956
                src_filter.chrH= sws_getGaussianVec(sws_chr_gblur, 3.0);
957
                src_filter.chrV= sws_getGaussianVec(sws_chr_gblur, 3.0);
958
        }else{
959
                src_filter.chrH= sws_getIdentityVec();
960
                src_filter.chrV= sws_getIdentityVec();
961
        }
962

    
963
        if(sws_chr_sharpen!=0.0){
964
                SwsVector *g= sws_getConstVec(-1.0, 3);
965
                SwsVector *id= sws_getConstVec(10.0/sws_chr_sharpen, 1);
966
                g->coeff[1]=2.0;
967
                sws_addVec(id, g);
968
                sws_convVec(src_filter.chrH, id);
969
                sws_convVec(src_filter.chrV, id);
970
                sws_freeVec(g);
971
                sws_freeVec(id);
972
        }
973

    
974
        if(sws_lum_sharpen!=0.0){
975
                SwsVector *g= sws_getConstVec(-1.0, 3);
976
                SwsVector *id= sws_getConstVec(10.0/sws_lum_sharpen, 1);
977
                g->coeff[1]=2.0;
978
                sws_addVec(id, g);
979
                sws_convVec(src_filter.lumH, id);
980
                sws_convVec(src_filter.lumV, id);
981
                sws_freeVec(g);
982
                sws_freeVec(id);
983
        }
984

    
985
        if(sws_chr_hshift)
986
                sws_shiftVec(src_filter.chrH, sws_chr_hshift);
987

    
988
        if(sws_chr_vshift)
989
                sws_shiftVec(src_filter.chrV, sws_chr_vshift);
990

    
991
        sws_normalizeVec(src_filter.chrH, 1.0);
992
        sws_normalizeVec(src_filter.chrV, 1.0);
993
        sws_normalizeVec(src_filter.lumH, 1.0);
994
        sws_normalizeVec(src_filter.lumV, 1.0);
995

    
996
        if(verbose > 1) sws_printVec(src_filter.chrH);
997
        if(verbose > 1) sws_printVec(src_filter.lumH);
998

    
999
        switch(sws_flags)
1000
        {
1001
                case 0: *flags|= SWS_FAST_BILINEAR; break;
1002
                case 1: *flags|= SWS_BILINEAR; break;
1003
                case 2: *flags|= SWS_BICUBIC; break;
1004
                case 3: *flags|= SWS_X; break;
1005
                case 4: *flags|= SWS_POINT; break;
1006
                case 5: *flags|= SWS_AREA; break;
1007
                case 6: *flags|= SWS_BICUBLIN; break;
1008
                case 7: *flags|= SWS_GAUSS; break;
1009
                case 8: *flags|= SWS_SINC; break;
1010
                case 9: *flags|= SWS_LANCZOS; break;
1011
                case 10:*flags|= SWS_SPLINE; break;
1012
                default:*flags|= SWS_BILINEAR; break;
1013
        }
1014
        
1015
        *srcFilterParam= &src_filter;
1016
        *dstFilterParam= NULL;
1017
}
1018

    
1019
// will use sws_flags & src_filter (from cmd line)
1020
SwsContext *sws_getContextFromCmdLine(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat)
1021
{
1022
        int flags;
1023
        SwsFilter *dstFilterParam, *srcFilterParam;
1024
        sws_getFlagsAndFilterFromCmdLine(&flags, &srcFilterParam, &dstFilterParam);
1025

    
1026
        return sws_getContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, flags, srcFilterParam, dstFilterParam);
1027
}
1028

    
1029
static double getSplineCoeff(double a, double b, double c, double d, double dist)
1030
{
1031
//        printf("%f %f %f %f %f\n", a,b,c,d,dist);
1032
        if(dist<=1.0)         return ((d*dist + c)*dist + b)*dist +a;
1033
        else                return getSplineCoeff(        0.0, 
1034
                                                 b+ 2.0*c + 3.0*d,
1035
                                                        c + 3.0*d,
1036
                                                -b- 3.0*c - 6.0*d,
1037
                                                dist-1.0);
1038
}
1039

    
1040
static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
1041
                              int srcW, int dstW, int filterAlign, int one, int flags,
1042
                              SwsVector *srcFilter, SwsVector *dstFilter)
1043
{
1044
        int i;
1045
        int filterSize;
1046
        int filter2Size;
1047
        int minFilterSize;
1048
        double *filter=NULL;
1049
        double *filter2=NULL;
1050
#ifdef ARCH_X86
1051
        if(gCpuCaps.hasMMX)
1052
                asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
1053
#endif
1054

    
1055
        // Note the +1 is for the MMXscaler which reads over the end
1056
        *filterPos = (int16_t*)memalign(8, (dstW+1)*sizeof(int16_t));
1057

    
1058
        if(ABS(xInc - 0x10000) <10) // unscaled
1059
        {
1060
                int i;
1061
                filterSize= 1;
1062
                filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
1063
                for(i=0; i<dstW*filterSize; i++) filter[i]=0;
1064

    
1065
                for(i=0; i<dstW; i++)
1066
                {
1067
                        filter[i*filterSize]=1;
1068
                        (*filterPos)[i]=i;
1069
                }
1070

    
1071
        }
1072
        else if(flags&SWS_POINT) // lame looking point sampling mode
1073
        {
1074
                int i;
1075
                int xDstInSrc;
1076
                filterSize= 1;
1077
                filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
1078
                
1079
                xDstInSrc= xInc/2 - 0x8000;
1080
                for(i=0; i<dstW; i++)
1081
                {
1082
                        int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1083

    
1084
                        (*filterPos)[i]= xx;
1085
                        filter[i]= 1.0;
1086
                        xDstInSrc+= xInc;
1087
                }
1088
        }
1089
        else if((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
1090
        {
1091
                int i;
1092
                int xDstInSrc;
1093
                if     (flags&SWS_BICUBIC) filterSize= 4;
1094
                else if(flags&SWS_X      ) filterSize= 4;
1095
                else                           filterSize= 2; // SWS_BILINEAR / SWS_AREA 
1096
                filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
1097

    
1098
                xDstInSrc= xInc/2 - 0x8000;
1099
                for(i=0; i<dstW; i++)
1100
                {
1101
                        int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1102
                        int j;
1103

    
1104
                        (*filterPos)[i]= xx;
1105
                                //Bilinear upscale / linear interpolate / Area averaging
1106
                                for(j=0; j<filterSize; j++)
1107
                                {
1108
                                        double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16);
1109
                                        double coeff= 1.0 - d;
1110
                                        if(coeff<0) coeff=0;
1111
                                        filter[i*filterSize + j]= coeff;
1112
                                        xx++;
1113
                                }
1114
                        xDstInSrc+= xInc;
1115
                }
1116
        }
1117
        else
1118
        {
1119
                double xDstInSrc;
1120
                double sizeFactor, filterSizeInSrc;
1121
                const double xInc1= (double)xInc / (double)(1<<16);
1122
                int param= (flags&SWS_PARAM_MASK)>>SWS_PARAM_SHIFT;
1123

    
1124
                if     (flags&SWS_BICUBIC)        sizeFactor= 4.0;
1125
                else if(flags&SWS_X)                sizeFactor= 8.0;
1126
                else if(flags&SWS_AREA)                sizeFactor= 1.0; //downscale only, for upscale it is bilinear
1127
                else if(flags&SWS_GAUSS)        sizeFactor= 8.0;   // infinite ;)
1128
                else if(flags&SWS_LANCZOS)        sizeFactor= param ? 2.0*param : 6.0;
1129
                else if(flags&SWS_SINC)                sizeFactor= 20.0; // infinite ;)
1130
                else if(flags&SWS_SPLINE)        sizeFactor= 20.0;  // infinite ;)
1131
                else if(flags&SWS_BILINEAR)        sizeFactor= 2.0;
1132
                else {
1133
                        sizeFactor= 0.0; //GCC warning killer
1134
                        ASSERT(0)
1135
                }
1136
                
1137
                if(xInc1 <= 1.0)        filterSizeInSrc= sizeFactor; // upscale
1138
                else                        filterSizeInSrc= sizeFactor*srcW / (double)dstW;
1139

    
1140
                filterSize= (int)ceil(1 + filterSizeInSrc); // will be reduced later if possible
1141
                if(filterSize > srcW-2) filterSize=srcW-2;
1142

    
1143
                filter= (double*)memalign(16, dstW*sizeof(double)*filterSize);
1144

    
1145
                xDstInSrc= xInc1 / 2.0 - 0.5;
1146
                for(i=0; i<dstW; i++)
1147
                {
1148
                        int xx= (int)(xDstInSrc - (filterSize-1)*0.5 + 0.5);
1149
                        int j;
1150
                        (*filterPos)[i]= xx;
1151
                        for(j=0; j<filterSize; j++)
1152
                        {
1153
                                double d= ABS(xx - xDstInSrc)/filterSizeInSrc*sizeFactor;
1154
                                double coeff;
1155
                                if(flags & SWS_BICUBIC)
1156
                                {
1157
                                        double A= param ? -param*0.01 : -0.60;
1158
                                        
1159
                                        // Equation is from VirtualDub
1160
                                        if(d<1.0)
1161
                                                coeff = (1.0 - (A+3.0)*d*d + (A+2.0)*d*d*d);
1162
                                        else if(d<2.0)
1163
                                                coeff = (-4.0*A + 8.0*A*d - 5.0*A*d*d + A*d*d*d);
1164
                                        else
1165
                                                coeff=0.0;
1166
                                }
1167
/*                                else if(flags & SWS_X)
1168
                                {
1169
                                        double p= param ? param*0.01 : 0.3;
1170
                                        coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1171
                                        coeff*= pow(2.0, - p*d*d);
1172
                                }*/
1173
                                else if(flags & SWS_X)
1174
                                {
1175
                                        double A= param ? param*0.1 : 1.0;
1176
                                        
1177
                                        if(d<1.0)
1178
                                                coeff = cos(d*PI);
1179
                                        else
1180
                                                coeff=-1.0;
1181
                                        if(coeff<0.0)         coeff= -pow(-coeff, A);
1182
                                        else                coeff=  pow( coeff, A);
1183
                                        coeff= coeff*0.5 + 0.5;
1184
                                }
1185
                                else if(flags & SWS_AREA)
1186
                                {
1187
                                        double srcPixelSize= 1.0/xInc1;
1188
                                        if(d + srcPixelSize/2 < 0.5) coeff= 1.0;
1189
                                        else if(d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
1190
                                        else coeff=0.0;
1191
                                }
1192
                                else if(flags & SWS_GAUSS)
1193
                                {
1194
                                        double p= param ? param*0.1 : 3.0;
1195
                                        coeff = pow(2.0, - p*d*d);
1196
                                }
1197
                                else if(flags & SWS_SINC)
1198
                                {
1199
                                        coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1200
                                }
1201
                                else if(flags & SWS_LANCZOS)
1202
                                {
1203
                                        double p= param ? param : 3.0; 
1204
                                        coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0;
1205
                                        if(d>p) coeff=0;
1206
                                }
1207
                                else if(flags & SWS_BILINEAR)
1208
                                {
1209
                                        coeff= 1.0 - d;
1210
                                        if(coeff<0) coeff=0;
1211
                                }
1212
                                else if(flags & SWS_SPLINE)
1213
                                {
1214
                                        double p=-2.196152422706632;
1215
                                        coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, d);
1216
                                }
1217
                                else {
1218
                                        coeff= 0.0; //GCC warning killer
1219
                                        ASSERT(0)
1220
                                }
1221

    
1222
                                filter[i*filterSize + j]= coeff;
1223
                                xx++;
1224
                        }
1225
                        xDstInSrc+= xInc1;
1226
                }
1227
        }
1228

    
1229
        /* apply src & dst Filter to filter -> filter2
1230
           free(filter);
1231
        */
1232
        ASSERT(filterSize>0)
1233
        filter2Size= filterSize;
1234
        if(srcFilter) filter2Size+= srcFilter->length - 1;
1235
        if(dstFilter) filter2Size+= dstFilter->length - 1;
1236
        ASSERT(filter2Size>0)
1237
        filter2= (double*)memalign(8, filter2Size*dstW*sizeof(double));
1238

    
1239
        for(i=0; i<dstW; i++)
1240
        {
1241
                int j;
1242
                SwsVector scaleFilter;
1243
                SwsVector *outVec;
1244

    
1245
                scaleFilter.coeff= filter + i*filterSize;
1246
                scaleFilter.length= filterSize;
1247

    
1248
                if(srcFilter) outVec= sws_getConvVec(srcFilter, &scaleFilter);
1249
                else              outVec= &scaleFilter;
1250

    
1251
                ASSERT(outVec->length == filter2Size)
1252
                //FIXME dstFilter
1253

    
1254
                for(j=0; j<outVec->length; j++)
1255
                {
1256
                        filter2[i*filter2Size + j]= outVec->coeff[j];
1257
                }
1258

    
1259
                (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
1260

    
1261
                if(outVec != &scaleFilter) sws_freeVec(outVec);
1262
        }
1263
        free(filter); filter=NULL;
1264

    
1265
        /* try to reduce the filter-size (step1 find size and shift left) */
1266
        // Assume its near normalized (*0.5 or *2.0 is ok but * 0.001 is not)
1267
        minFilterSize= 0;
1268
        for(i=dstW-1; i>=0; i--)
1269
        {
1270
                int min= filter2Size;
1271
                int j;
1272
                double cutOff=0.0;
1273

    
1274
                /* get rid off near zero elements on the left by shifting left */
1275
                for(j=0; j<filter2Size; j++)
1276
                {
1277
                        int k;
1278
                        cutOff += ABS(filter2[i*filter2Size]);
1279

    
1280
                        if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1281

    
1282
                        /* preserve Monotonicity because the core cant handle the filter otherwise */
1283
                        if(i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1284

    
1285
                        // Move filter coeffs left
1286
                        for(k=1; k<filter2Size; k++)
1287
                                filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1288
                        filter2[i*filter2Size + k - 1]= 0.0;
1289
                        (*filterPos)[i]++;
1290
                }
1291

    
1292
                cutOff=0.0;
1293
                /* count near zeros on the right */
1294
                for(j=filter2Size-1; j>0; j--)
1295
                {
1296
                        cutOff += ABS(filter2[i*filter2Size + j]);
1297

    
1298
                        if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1299
                        min--;
1300
                }
1301

    
1302
                if(min>minFilterSize) minFilterSize= min;
1303
        }
1304

    
1305
        ASSERT(minFilterSize > 0)
1306
        filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
1307
        ASSERT(filterSize > 0)
1308
        filter= (double*)memalign(8, filterSize*dstW*sizeof(double));
1309
        *outFilterSize= filterSize;
1310

    
1311
        if(flags&SWS_PRINT_INFO)
1312
                MSG_INFO("SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
1313
        /* try to reduce the filter-size (step2 reduce it) */
1314
        for(i=0; i<dstW; i++)
1315
        {
1316
                int j;
1317

    
1318
                for(j=0; j<filterSize; j++)
1319
                {
1320
                        if(j>=filter2Size) filter[i*filterSize + j]= 0.0;
1321
                        else                   filter[i*filterSize + j]= filter2[i*filter2Size + j];
1322
                }
1323
        }
1324
        free(filter2); filter2=NULL;
1325
        
1326

    
1327
        //FIXME try to align filterpos if possible
1328

    
1329
        //fix borders
1330
        for(i=0; i<dstW; i++)
1331
        {
1332
                int j;
1333
                if((*filterPos)[i] < 0)
1334
                {
1335
                        // Move filter coeffs left to compensate for filterPos
1336
                        for(j=1; j<filterSize; j++)
1337
                        {
1338
                                int left= MAX(j + (*filterPos)[i], 0);
1339
                                filter[i*filterSize + left] += filter[i*filterSize + j];
1340
                                filter[i*filterSize + j]=0;
1341
                        }
1342
                        (*filterPos)[i]= 0;
1343
                }
1344

    
1345
                if((*filterPos)[i] + filterSize > srcW)
1346
                {
1347
                        int shift= (*filterPos)[i] + filterSize - srcW;
1348
                        // Move filter coeffs right to compensate for filterPos
1349
                        for(j=filterSize-2; j>=0; j--)
1350
                        {
1351
                                int right= MIN(j + shift, filterSize-1);
1352
                                filter[i*filterSize +right] += filter[i*filterSize +j];
1353
                                filter[i*filterSize +j]=0;
1354
                        }
1355
                        (*filterPos)[i]= srcW - filterSize;
1356
                }
1357
        }
1358

    
1359
        // Note the +1 is for the MMXscaler which reads over the end
1360
        *outFilter= (int16_t*)memalign(8, *outFilterSize*(dstW+1)*sizeof(int16_t));
1361
        memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t));
1362

    
1363
        /* Normalize & Store in outFilter */
1364
        for(i=0; i<dstW; i++)
1365
        {
1366
                int j;
1367
                double sum=0;
1368
                double scale= one;
1369
                for(j=0; j<filterSize; j++)
1370
                {
1371
                        sum+= filter[i*filterSize + j];
1372
                }
1373
                scale/= sum;
1374
                for(j=0; j<*outFilterSize; j++)
1375
                {
1376
                        (*outFilter)[i*(*outFilterSize) + j]= (int)(filter[i*filterSize + j]*scale);
1377
                }
1378
        }
1379
        
1380
        (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
1381
        for(i=0; i<*outFilterSize; i++)
1382
        {
1383
                int j= dstW*(*outFilterSize);
1384
                (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
1385
        }
1386

    
1387
        free(filter);
1388
}
1389

    
1390
#ifdef ARCH_X86
1391
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
1392
{
1393
        uint8_t *fragmentA;
1394
        int imm8OfPShufW1A;
1395
        int imm8OfPShufW2A;
1396
        int fragmentLengthA;
1397
        uint8_t *fragmentB;
1398
        int imm8OfPShufW1B;
1399
        int imm8OfPShufW2B;
1400
        int fragmentLengthB;
1401
        int fragmentPos;
1402

    
1403
        int xpos, i;
1404

    
1405
        // create an optimized horizontal scaling routine
1406

    
1407
        //code fragment
1408

    
1409
        asm volatile(
1410
                "jmp 9f                                \n\t"
1411
        // Begin
1412
                "0:                                \n\t"
1413
                "movq (%%edx, %%eax), %%mm3        \n\t" 
1414
                "movd (%%ecx, %%esi), %%mm0        \n\t" 
1415
                "movd 1(%%ecx, %%esi), %%mm1        \n\t"
1416
                "punpcklbw %%mm7, %%mm1                \n\t"
1417
                "punpcklbw %%mm7, %%mm0                \n\t"
1418
                "pshufw $0xFF, %%mm1, %%mm1        \n\t"
1419
                "1:                                \n\t"
1420
                "pshufw $0xFF, %%mm0, %%mm0        \n\t"
1421
                "2:                                \n\t"
1422
                "psubw %%mm1, %%mm0                \n\t"
1423
                "movl 8(%%ebx, %%eax), %%esi        \n\t"
1424
                "pmullw %%mm3, %%mm0                \n\t"
1425
                "psllw $7, %%mm1                \n\t"
1426
                "paddw %%mm1, %%mm0                \n\t"
1427

    
1428
                "movq %%mm0, (%%edi, %%eax)        \n\t"
1429

    
1430
                "addl $8, %%eax                        \n\t"
1431
        // End
1432
                "9:                                \n\t"
1433
//                "int $3\n\t"
1434
                "leal 0b, %0                        \n\t"
1435
                "leal 1b, %1                        \n\t"
1436
                "leal 2b, %2                        \n\t"
1437
                "decl %1                        \n\t"
1438
                "decl %2                        \n\t"
1439
                "subl %0, %1                        \n\t"
1440
                "subl %0, %2                        \n\t"
1441
                "leal 9b, %3                        \n\t"
1442
                "subl %0, %3                        \n\t"
1443

    
1444

    
1445
                :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
1446
                "=r" (fragmentLengthA)
1447
        );
1448

    
1449
        asm volatile(
1450
                "jmp 9f                                \n\t"
1451
        // Begin
1452
                "0:                                \n\t"
1453
                "movq (%%edx, %%eax), %%mm3        \n\t" 
1454
                "movd (%%ecx, %%esi), %%mm0        \n\t" 
1455
                "punpcklbw %%mm7, %%mm0                \n\t"
1456
                "pshufw $0xFF, %%mm0, %%mm1        \n\t"
1457
                "1:                                \n\t"
1458
                "pshufw $0xFF, %%mm0, %%mm0        \n\t"
1459
                "2:                                \n\t"
1460
                "psubw %%mm1, %%mm0                \n\t"
1461
                "movl 8(%%ebx, %%eax), %%esi        \n\t"
1462
                "pmullw %%mm3, %%mm0                \n\t"
1463
                "psllw $7, %%mm1                \n\t"
1464
                "paddw %%mm1, %%mm0                \n\t"
1465

    
1466
                "movq %%mm0, (%%edi, %%eax)        \n\t"
1467

    
1468
                "addl $8, %%eax                        \n\t"
1469
        // End
1470
                "9:                                \n\t"
1471
//                "int $3\n\t"
1472
                "leal 0b, %0                        \n\t"
1473
                "leal 1b, %1                        \n\t"
1474
                "leal 2b, %2                        \n\t"
1475
                "decl %1                        \n\t"
1476
                "decl %2                        \n\t"
1477
                "subl %0, %1                        \n\t"
1478
                "subl %0, %2                        \n\t"
1479
                "leal 9b, %3                        \n\t"
1480
                "subl %0, %3                        \n\t"
1481

    
1482

    
1483
                :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
1484
                "=r" (fragmentLengthB)
1485
        );
1486

    
1487
        xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
1488
        fragmentPos=0;
1489
        
1490
        for(i=0; i<dstW/numSplits; i++)
1491
        {
1492
                int xx=xpos>>16;
1493

    
1494
                if((i&3) == 0)
1495
                {
1496
                        int a=0;
1497
                        int b=((xpos+xInc)>>16) - xx;
1498
                        int c=((xpos+xInc*2)>>16) - xx;
1499
                        int d=((xpos+xInc*3)>>16) - xx;
1500

    
1501
                        filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
1502
                        filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
1503
                        filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
1504
                        filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
1505
                        filterPos[i/2]= xx;
1506

    
1507
                        if(d+1<4)
1508
                        {
1509
                                int maxShift= 3-(d+1);
1510
                                int shift=0;
1511

    
1512
                                memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
1513

    
1514
                                funnyCode[fragmentPos + imm8OfPShufW1B]=
1515
                                        (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
1516
                                funnyCode[fragmentPos + imm8OfPShufW2B]=
1517
                                        a | (b<<2) | (c<<4) | (d<<6);
1518

    
1519
                                if(i+3>=dstW) shift=maxShift; //avoid overread
1520
                                else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
1521

    
1522
                                if(shift && i>=shift)
1523
                                {
1524
                                        funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
1525
                                        funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
1526
                                        filterPos[i/2]-=shift;
1527
                                }
1528

    
1529
                                fragmentPos+= fragmentLengthB;
1530
                        }
1531
                        else
1532
                        {
1533
                                int maxShift= 3-d;
1534
                                int shift=0;
1535

    
1536
                                memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
1537

    
1538
                                funnyCode[fragmentPos + imm8OfPShufW1A]=
1539
                                funnyCode[fragmentPos + imm8OfPShufW2A]=
1540
                                        a | (b<<2) | (c<<4) | (d<<6);
1541

    
1542
                                if(i+4>=dstW) shift=maxShift; //avoid overread
1543
                                else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
1544

    
1545
                                if(shift && i>=shift)
1546
                                {
1547
                                        funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
1548
                                        funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
1549
                                        filterPos[i/2]-=shift;
1550
                                }
1551

    
1552
                                fragmentPos+= fragmentLengthA;
1553
                        }
1554

    
1555
                        funnyCode[fragmentPos]= RET;
1556
                }
1557
                xpos+=xInc;
1558
        }
1559
        filterPos[i/2]= xpos>>16; // needed to jump to the next part
1560
}
1561
#endif // ARCH_X86
1562

    
1563
//FIXME remove
1564
void SwScale_Init(){
1565
}
1566

    
1567
static void globalInit(){
1568
    // generating tables:
1569
    int i;
1570
    for(i=0; i<768; i++){
1571
        int c= MIN(MAX(i-256, 0), 255);
1572
        clip_table[i]=c;
1573
    }
1574

    
1575
cpuCaps= gCpuCaps;
1576

    
1577
#ifdef RUNTIME_CPUDETECT
1578
#ifdef CAN_COMPILE_X86_ASM
1579
        // ordered per speed fasterst first
1580
        if(gCpuCaps.hasMMX2)
1581
                swScale= swScale_MMX2;
1582
        else if(gCpuCaps.has3DNow)
1583
                swScale= swScale_3DNow;
1584
        else if(gCpuCaps.hasMMX)
1585
                swScale= swScale_MMX;
1586
        else
1587
                swScale= swScale_C;
1588

    
1589
#else
1590
        swScale= swScale_C;
1591
        cpuCaps.hasMMX2 = cpuCaps.hasMMX = cpuCaps.has3DNow = 0;
1592
#endif
1593
#else //RUNTIME_CPUDETECT
1594
#ifdef HAVE_MMX2
1595
        swScale= swScale_MMX2;
1596
        cpuCaps.has3DNow = 0;
1597
#elif defined (HAVE_3DNOW)
1598
        swScale= swScale_3DNow;
1599
        cpuCaps.hasMMX2 = 0;
1600
#elif defined (HAVE_MMX)
1601
        swScale= swScale_MMX;
1602
        cpuCaps.hasMMX2 = cpuCaps.has3DNow = 0;
1603
#else
1604
        swScale= swScale_C;
1605
        cpuCaps.hasMMX2 = cpuCaps.hasMMX = cpuCaps.has3DNow = 0;
1606
#endif
1607
#endif //!RUNTIME_CPUDETECT
1608
}
1609

    
1610
static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1611
             int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1612
        uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1613
        /* Copy Y plane */
1614
        if(dstStride[0]==srcStride[0])
1615
                memcpy(dst, src[0], srcSliceH*dstStride[0]);
1616
        else
1617
        {
1618
                int i;
1619
                uint8_t *srcPtr= src[0];
1620
                uint8_t *dstPtr= dst;
1621
                for(i=0; i<srcSliceH; i++)
1622
                {
1623
                        memcpy(dstPtr, srcPtr, srcStride[0]);
1624
                        srcPtr+= srcStride[0];
1625
                        dstPtr+= dstStride[0];
1626
                }
1627
        }
1628
        dst = dstParam[1] + dstStride[1]*srcSliceY;
1629
        interleaveBytes( src[1],src[2],dst,c->srcW,srcSliceH,srcStride[1],srcStride[2],dstStride[0] );
1630

    
1631
        return srcSliceH;
1632
}
1633

    
1634
static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1635
             int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1636
        uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1637

    
1638
        yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1639

    
1640
        return srcSliceH;
1641
}
1642

    
1643
/* {RGB,BGR}{15,16,24,32} -> {RGB,BGR}{15,16,24,32} */
1644
static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1645
                           int srcSliceH, uint8_t* dst[], int dstStride[]){
1646
        const int srcFormat= c->srcFormat;
1647
        const int dstFormat= c->dstFormat;
1648
        const int srcBpp= ((srcFormat&0xFF) + 7)>>3;
1649
        const int dstBpp= ((dstFormat&0xFF) + 7)>>3;
1650
        const int srcId= (srcFormat&0xFF)>>2; // 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 
1651
        const int dstId= (dstFormat&0xFF)>>2;
1652
        void (*conv)(const uint8_t *src, uint8_t *dst, unsigned src_size)=NULL;
1653

    
1654
        /* BGR -> BGR */
1655
        if(   (isBGR(srcFormat) && isBGR(dstFormat))
1656
           || (isRGB(srcFormat) && isRGB(dstFormat))){
1657
                switch(srcId | (dstId<<4)){
1658
                case 0x34: conv= rgb16to15; break;
1659
                case 0x36: conv= rgb24to15; break;
1660
                case 0x38: conv= rgb32to15; break;
1661
                case 0x43: conv= rgb15to16; break;
1662
                case 0x46: conv= rgb24to16; break;
1663
                case 0x48: conv= rgb32to16; break;
1664
                case 0x63: conv= rgb15to24; break;
1665
                case 0x64: conv= rgb16to24; break;
1666
                case 0x68: conv= rgb32to24; break;
1667
                case 0x83: conv= rgb15to32; break;
1668
                case 0x84: conv= rgb16to32; break;
1669
                case 0x86: conv= rgb24to32; break;
1670
                default: MSG_ERR("swScaler: internal error %s -> %s converter\n", 
1671
                                 vo_format_name(srcFormat), vo_format_name(dstFormat)); break;
1672
                }
1673
        }else if(   (isBGR(srcFormat) && isRGB(dstFormat))
1674
                 || (isRGB(srcFormat) && isBGR(dstFormat))){
1675
                switch(srcId | (dstId<<4)){
1676
                case 0x33: conv= rgb15tobgr15; break;
1677
                case 0x34: conv= rgb16tobgr15; break;
1678
                case 0x36: conv= rgb24tobgr15; break;
1679
                case 0x38: conv= rgb32tobgr15; break;
1680
                case 0x43: conv= rgb15tobgr16; break;
1681
                case 0x44: conv= rgb16tobgr16; break;
1682
                case 0x46: conv= rgb24tobgr16; break;
1683
                case 0x48: conv= rgb32tobgr16; break;
1684
                case 0x63: conv= rgb15tobgr24; break;
1685
                case 0x64: conv= rgb16tobgr24; break;
1686
                case 0x66: conv= rgb24tobgr24; break;
1687
                case 0x68: conv= rgb32tobgr24; break;
1688
                case 0x83: conv= rgb15tobgr32; break;
1689
                case 0x84: conv= rgb16tobgr32; break;
1690
                case 0x86: conv= rgb24tobgr32; break;
1691
                case 0x88: conv= rgb32tobgr32; break;
1692
                default: MSG_ERR("swScaler: internal error %s -> %s converter\n", 
1693
                                 vo_format_name(srcFormat), vo_format_name(dstFormat)); break;
1694
                }
1695
        }else{
1696
                MSG_ERR("swScaler: internal error %s -> %s converter\n", 
1697
                         vo_format_name(srcFormat), vo_format_name(dstFormat));
1698
        }
1699

    
1700
        if(dstStride[0]*srcBpp == srcStride[0]*dstBpp)
1701
                conv(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1702
        else
1703
        {
1704
                int i;
1705
                uint8_t *srcPtr= src[0];
1706
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1707

    
1708
                for(i=0; i<srcSliceH; i++)
1709
                {
1710
                        conv(srcPtr, dstPtr, c->srcW*srcBpp);
1711
                        srcPtr+= srcStride[0];
1712
                        dstPtr+= dstStride[0];
1713
                }
1714
        }     
1715
        return srcSliceH;
1716
}
1717

    
1718
static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1719
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1720

    
1721
        rgb24toyv12(
1722
                src[0], 
1723
                dst[0]+ srcSliceY    *dstStride[0], 
1724
                dst[1]+(srcSliceY>>1)*dstStride[1], 
1725
                dst[2]+(srcSliceY>>1)*dstStride[2],
1726
                c->srcW, srcSliceH, 
1727
                dstStride[0], dstStride[1], srcStride[0]);
1728
        return srcSliceH;
1729
}
1730

    
1731
static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1732
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1733
        int i;
1734

    
1735
        /* copy Y */
1736
        if(srcStride[0]==dstStride[0]) 
1737
                memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
1738
        else{
1739
                uint8_t *srcPtr= src[0];
1740
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1741

    
1742
                for(i=0; i<srcSliceH; i++)
1743
                {
1744
                        memcpy(dstPtr, srcPtr, c->srcW);
1745
                        srcPtr+= srcStride[0];
1746
                        dstPtr+= dstStride[0];
1747
                }
1748
        }
1749

    
1750
        if(c->dstFormat==IMGFMT_YV12){
1751
                planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
1752
                planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
1753
        }else{
1754
                planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
1755
                planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
1756
        }
1757
        return srcSliceH;
1758
}
1759

    
1760
/**
1761
 * bring pointers in YUV order instead of YVU
1762
 */
1763
static inline void sws_orderYUV(int format, uint8_t * sortedP[], int sortedStride[], uint8_t * p[], int stride[]){
1764
        if(format == IMGFMT_YV12 || format == IMGFMT_YVU9
1765
           || format == IMGFMT_444P || format == IMGFMT_422P || format == IMGFMT_411P){
1766
                sortedP[0]= p[0];
1767
                sortedP[1]= p[2];
1768
                sortedP[2]= p[1];
1769
                sortedStride[0]= stride[0];
1770
                sortedStride[1]= stride[2];
1771
                sortedStride[2]= stride[1];
1772
        }
1773
        else if(isPacked(format) || isGray(format))
1774
        {
1775
                sortedP[0]= p[0];
1776
                sortedP[1]= 
1777
                sortedP[2]= NULL;
1778
                sortedStride[0]= stride[0];
1779
                sortedStride[1]= 
1780
                sortedStride[2]= 0;
1781
        }
1782
        else if(format == IMGFMT_I420 || format == IMGFMT_IYUV)
1783
        {
1784
                sortedP[0]= p[0];
1785
                sortedP[1]= p[1];
1786
                sortedP[2]= p[2];
1787
                sortedStride[0]= stride[0];
1788
                sortedStride[1]= stride[1];
1789
                sortedStride[2]= stride[2];
1790
        }else{
1791
                MSG_ERR("internal error in orderYUV\n");
1792
        }
1793
}
1794

    
1795
/* unscaled copy like stuff (assumes nearly identical formats) */
1796
static int simpleCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1797
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1798

    
1799
        if(isPacked(c->srcFormat))
1800
        {
1801
                if(dstStride[0]==srcStride[0])
1802
                        memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
1803
                else
1804
                {
1805
                        int i;
1806
                        uint8_t *srcPtr= src[0];
1807
                        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1808
                        int length=0;
1809

    
1810
                        /* universal length finder */
1811
                        while(length+c->srcW <= ABS(dstStride[0]) 
1812
                           && length+c->srcW <= ABS(srcStride[0])) length+= c->srcW;
1813
                        ASSERT(length!=0);
1814

    
1815
                        for(i=0; i<srcSliceH; i++)
1816
                        {
1817
                                memcpy(dstPtr, srcPtr, length);
1818
                                srcPtr+= srcStride[0];
1819
                                dstPtr+= dstStride[0];
1820
                        }
1821
                }
1822
        }
1823
        else 
1824
        { /* Planar YUV or gray */
1825
                int plane;
1826
                for(plane=0; plane<3; plane++)
1827
                {
1828
                        int length= plane==0 ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
1829
                        int y=      plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
1830
                        int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
1831

    
1832
                        if((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
1833
                        {
1834
                                if(!isGray(c->dstFormat))
1835
                                        memset(dst[plane], 128, dstStride[plane]*height);
1836
                        }
1837
                        else
1838
                        {
1839
                                if(dstStride[plane]==srcStride[plane])
1840
                                        memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
1841
                                else
1842
                                {
1843
                                        int i;
1844
                                        uint8_t *srcPtr= src[plane];
1845
                                        uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
1846
                                        for(i=0; i<height; i++)
1847
                                        {
1848
                                                memcpy(dstPtr, srcPtr, length);
1849
                                                srcPtr+= srcStride[plane];
1850
                                                dstPtr+= dstStride[plane];
1851
                                        }
1852
                                }
1853
                        }
1854
                }
1855
        }
1856
        return srcSliceH;
1857
}
1858

    
1859
static int remove_dup_fourcc(int fourcc)
1860
{
1861
        switch(fourcc)
1862
        {
1863
            case IMGFMT_I420:
1864
            case IMGFMT_IYUV: return IMGFMT_YV12;
1865
            case IMGFMT_Y8  : return IMGFMT_Y800;
1866
            case IMGFMT_IF09: return IMGFMT_YVU9;
1867
            default: return fourcc;
1868
        }
1869
}
1870

    
1871
static void getSubSampleFactors(int *h, int *v, int format){
1872
        switch(format){
1873
        case IMGFMT_UYVY:
1874
        case IMGFMT_YUY2:
1875
                *h=1;
1876
                *v=0;
1877
                break;
1878
        case IMGFMT_YV12:
1879
        case IMGFMT_Y800: //FIXME remove after different subsamplings are fully implemented
1880
                *h=1;
1881
                *v=1;
1882
                break;
1883
        case IMGFMT_YVU9:
1884
                *h=2;
1885
                *v=2;
1886
                break;
1887
        case IMGFMT_444P:
1888
                *h=0;
1889
                *v=0;
1890
                break;
1891
        case IMGFMT_422P:
1892
                *h=1;
1893
                *v=0;
1894
                break;
1895
        case IMGFMT_411P:
1896
                *h=2;
1897
                *v=0;
1898
                break;
1899
        default:
1900
                *h=0;
1901
                *v=0;
1902
                break;
1903
        }
1904
}
1905

    
1906
static uint16_t roundToInt16(int64_t f){
1907
        int r= (f + (1<<15))>>16;
1908
             if(r<-0x7FFF) return 0x8000;
1909
        else if(r> 0x7FFF) return 0x7FFF;
1910
        else               return r;
1911
}
1912

    
1913
/**
1914
 * @param inv_table the yuv2rgb coeffs, normally Inverse_Table_6_9[x]
1915
 * @param fullRange if 1 then the luma range is 0..255 if 0 its 16..235
1916
 * @return -1 if not supported
1917
 */
1918
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
1919
        int64_t crv =  inv_table[0];
1920
        int64_t cbu =  inv_table[1];
1921
        int64_t cgu = -inv_table[2];
1922
        int64_t cgv = -inv_table[3];
1923
        int64_t cy  = 1<<16;
1924
        int64_t oy  = 0;
1925

    
1926
        if(isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
1927
        memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
1928
        memcpy(c->dstColorspaceTable,     table, sizeof(int)*4);
1929

    
1930
        c->brightness= brightness;
1931
        c->contrast  = contrast;
1932
        c->saturation= saturation;
1933
        c->srcRange  = srcRange;
1934
        c->dstRange  = dstRange;
1935

    
1936
        c->uOffset=   0x0400040004000400LL;
1937
        c->vOffset=   0x0400040004000400LL;
1938

    
1939
        if(!srcRange){
1940
                cy= (cy*255) / 219;
1941
                oy= 16<<16;
1942
        }
1943

    
1944
        cy = (cy *contrast             )>>16;
1945
        crv= (crv*contrast * saturation)>>32;
1946
        cbu= (cbu*contrast * saturation)>>32;
1947
        cgu= (cgu*contrast * saturation)>>32;
1948
        cgv= (cgv*contrast * saturation)>>32;
1949

    
1950
        oy -= 256*brightness;
1951

    
1952
        c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
1953
        c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
1954
        c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
1955
        c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
1956
        c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
1957
        c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
1958

    
1959
        yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
1960
        //FIXME factorize
1961
        
1962
        return 0;
1963
}
1964

    
1965
/**
1966
 * @return -1 if not supported
1967
 */
1968
int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
1969
        if(isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
1970

    
1971
        *inv_table = c->srcColorspaceTable;
1972
        *table     = c->dstColorspaceTable;
1973
        *srcRange  = c->srcRange;
1974
        *dstRange  = c->dstRange;
1975
        *brightness= c->brightness;
1976
        *contrast  = c->contrast;
1977
        *saturation= c->saturation;
1978
        
1979
        return 0;        
1980
}
1981

    
1982
SwsContext *sws_getContext(int srcW, int srcH, int origSrcFormat, int dstW, int dstH, int origDstFormat, int flags,
1983
                         SwsFilter *srcFilter, SwsFilter *dstFilter){
1984

    
1985
        SwsContext *c;
1986
        int i;
1987
        int usesFilter;
1988
        int unscaled, needsDither;
1989
        int srcFormat, dstFormat;
1990
        SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
1991
#ifdef ARCH_X86
1992
        if(gCpuCaps.hasMMX)
1993
                asm volatile("emms\n\t"::: "memory");
1994
#endif
1995
        if(swScale==NULL) globalInit();
1996

    
1997
        /* avoid dupplicate Formats, so we dont need to check to much */
1998
        srcFormat = remove_dup_fourcc(origSrcFormat);
1999
        dstFormat = remove_dup_fourcc(origDstFormat);
2000

    
2001
        unscaled = (srcW == dstW && srcH == dstH);
2002
        needsDither= (isBGR(dstFormat) || isRGB(dstFormat)) 
2003
                     && (dstFormat&0xFF)<24
2004
                     && ((dstFormat&0xFF)<(srcFormat&0xFF) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
2005

    
2006
        if(!isSupportedIn(srcFormat)) 
2007
        {
2008
                MSG_ERR("swScaler: %s is not supported as input format\n", vo_format_name(srcFormat));
2009
                return NULL;
2010
        }
2011
        if(!isSupportedOut(dstFormat))
2012
        {
2013
                MSG_ERR("swScaler: %s is not supported as output format\n", vo_format_name(dstFormat));
2014
                return NULL;
2015
        }
2016

    
2017
        /* sanity check */
2018
        if(srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
2019
        {
2020
                 MSG_ERR("swScaler: %dx%d -> %dx%d is invalid scaling dimension\n", 
2021
                        srcW, srcH, dstW, dstH);
2022
                return NULL;
2023
        }
2024

    
2025
        if(!dstFilter) dstFilter= &dummyFilter;
2026
        if(!srcFilter) srcFilter= &dummyFilter;
2027

    
2028
        c= memalign(64, sizeof(SwsContext));
2029
        memset(c, 0, sizeof(SwsContext));
2030

    
2031
        c->srcW= srcW;
2032
        c->srcH= srcH;
2033
        c->dstW= dstW;
2034
        c->dstH= dstH;
2035
        c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
2036
        c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
2037
        c->flags= flags;
2038
        c->dstFormat= dstFormat;
2039
        c->srcFormat= srcFormat;
2040
        c->origDstFormat= origDstFormat;
2041
        c->origSrcFormat= origSrcFormat;
2042

    
2043
        usesFilter=0;
2044
        if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesFilter=1;
2045
        if(dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesFilter=1;
2046
        if(dstFilter->chrV!=NULL && dstFilter->chrV->length>1) usesFilter=1;
2047
        if(dstFilter->chrH!=NULL && dstFilter->chrH->length>1) usesFilter=1;
2048
        if(srcFilter->lumV!=NULL && srcFilter->lumV->length>1) usesFilter=1;
2049
        if(srcFilter->lumH!=NULL && srcFilter->lumH->length>1) usesFilter=1;
2050
        if(srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesFilter=1;
2051
        if(srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesFilter=1;
2052

    
2053
        getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
2054
        getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
2055

    
2056
        // reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation
2057
        if((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
2058

    
2059
        // drop some chroma lines if the user wants it
2060
        c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
2061
        c->chrSrcVSubSample+= c->vChrDrop;
2062

    
2063
        // drop every 2. pixel for chroma calculation unless user wants full chroma
2064
        if((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)) 
2065
                c->chrSrcHSubSample=1;
2066

    
2067
        c->chrIntHSubSample= c->chrDstHSubSample;
2068
        c->chrIntVSubSample= c->chrSrcVSubSample;
2069

    
2070
        // note the -((-x)>>y) is so that we allways round toward +inf
2071
        c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
2072
        c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
2073
        c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
2074
        c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
2075

    
2076
        sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], 0, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, 0, 0, 1<<16, 1<<16); 
2077

    
2078
        /* unscaled special Cases */
2079
        if(unscaled && !usesFilter)
2080
        {
2081
                /* yv12_to_nv12 */
2082
                if(srcFormat == IMGFMT_YV12 && dstFormat == IMGFMT_NV12)
2083
                {
2084
                        c->swScale= PlanarToNV12Wrapper;
2085
                }
2086
                /* yuv2bgr */
2087
                if((srcFormat==IMGFMT_YV12 || srcFormat==IMGFMT_422P) && (isBGR(dstFormat) || isRGB(dstFormat)))
2088
                {
2089
                        c->swScale= yuv2rgb_get_func_ptr(c);
2090
                }
2091
                
2092
                if( srcFormat==IMGFMT_YVU9 && dstFormat==IMGFMT_YV12 )
2093
                {
2094
                        c->swScale= yvu9toyv12Wrapper;
2095
                }
2096

    
2097
                /* bgr24toYV12 */
2098
                if(srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_YV12)
2099
                        c->swScale= bgr24toyv12Wrapper;
2100
                
2101
                /* rgb/bgr -> rgb/bgr (no dither needed forms) */
2102
                if(   (isBGR(srcFormat) || isRGB(srcFormat))
2103
                   && (isBGR(dstFormat) || isRGB(dstFormat)) 
2104
                   && !needsDither)
2105
                        c->swScale= rgb2rgbWrapper;
2106

    
2107
                /* LQ converters if -sws 0 or -sws 4*/
2108
                if(c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
2109
                        /* rgb/bgr -> rgb/bgr (dither needed forms) */
2110
                        if(  (isBGR(srcFormat) || isRGB(srcFormat))
2111
                          && (isBGR(dstFormat) || isRGB(dstFormat)) 
2112
                          && needsDither)
2113
                                c->swScale= rgb2rgbWrapper;
2114

    
2115
                        /* yv12_to_yuy2 */
2116
                        if(srcFormat == IMGFMT_YV12 && dstFormat == IMGFMT_YUY2)
2117
                        {
2118
                                c->swScale= PlanarToYuy2Wrapper;
2119
                        }
2120
                }
2121

    
2122
                /* simple copy */
2123
                if(   srcFormat == dstFormat
2124
                   || (isPlanarYUV(srcFormat) && isGray(dstFormat))
2125
                   || (isPlanarYUV(dstFormat) && isGray(srcFormat))
2126
                  )
2127
                {
2128
                        c->swScale= simpleCopy;
2129
                }
2130

    
2131
                if(c->swScale){
2132
                        if(flags&SWS_PRINT_INFO)
2133
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
2134
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
2135
                        return c;
2136
                }
2137
        }
2138

    
2139
        if(cpuCaps.hasMMX2)
2140
        {
2141
                c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
2142
                if(!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
2143
                {
2144
                        if(flags&SWS_PRINT_INFO)
2145
                                MSG_INFO("SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n");
2146
                }
2147
        }
2148
        else
2149
                c->canMMX2BeUsed=0;
2150

    
2151
        c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
2152
        c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
2153

    
2154
        // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
2155
        // but only for the FAST_BILINEAR mode otherwise do correct scaling
2156
        // n-2 is the last chrominance sample available
2157
        // this is not perfect, but noone shuld notice the difference, the more correct variant
2158
        // would be like the vertical one, but that would require some special code for the
2159
        // first and last pixel
2160
        if(flags&SWS_FAST_BILINEAR)
2161
        {
2162
                if(c->canMMX2BeUsed)
2163
                {
2164
                        c->lumXInc+= 20;
2165
                        c->chrXInc+= 20;
2166
                }
2167
                //we dont use the x86asm scaler if mmx is available
2168
                else if(cpuCaps.hasMMX)
2169
                {
2170
                        c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
2171
                        c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
2172
                }
2173
        }
2174

    
2175
        /* precalculate horizontal scaler filter coefficients */
2176
        {
2177
                const int filterAlign= cpuCaps.hasMMX ? 4 : 1;
2178

    
2179
                initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
2180
                                 srcW      ,       dstW, filterAlign, 1<<14,
2181
                                 (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2182
                                 srcFilter->lumH, dstFilter->lumH);
2183
                initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
2184
                                 c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
2185
                                 (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2186
                                 srcFilter->chrH, dstFilter->chrH);
2187

    
2188
#ifdef ARCH_X86
2189
// cant downscale !!!
2190
                if(c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
2191
                {
2192
                        c->lumMmx2Filter   = (int16_t*)memalign(8, (dstW        /8+8)*sizeof(int16_t));
2193
                        c->chrMmx2Filter   = (int16_t*)memalign(8, (c->chrDstW  /4+8)*sizeof(int16_t));
2194
                        c->lumMmx2FilterPos= (int32_t*)memalign(8, (dstW      /2/8+8)*sizeof(int32_t));
2195
                        c->chrMmx2FilterPos= (int32_t*)memalign(8, (c->chrDstW/2/4+8)*sizeof(int32_t));
2196

    
2197
                        initMMX2HScaler(      dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
2198
                        initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
2199
                }
2200
#endif
2201
        } // Init Horizontal stuff
2202

    
2203

    
2204

    
2205
        /* precalculate vertical scaler filter coefficients */
2206
        initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
2207
                        srcH      ,        dstH, 1, (1<<12)-4,
2208
                        (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2209
                        srcFilter->lumV, dstFilter->lumV);
2210
        initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
2211
                        c->chrSrcH, c->chrDstH, 1, (1<<12)-4,
2212
                        (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2213
                        srcFilter->chrV, dstFilter->chrV);
2214

    
2215
        // Calculate Buffer Sizes so that they wont run out while handling these damn slices
2216
        c->vLumBufSize= c->vLumFilterSize;
2217
        c->vChrBufSize= c->vChrFilterSize;
2218
        for(i=0; i<dstH; i++)
2219
        {
2220
                int chrI= i*c->chrDstH / dstH;
2221
                int nextSlice= MAX(c->vLumFilterPos[i   ] + c->vLumFilterSize - 1,
2222
                                 ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
2223
                nextSlice&= ~3; // Slices start at boundaries which are divisable through 4
2224
                if(c->vLumFilterPos[i   ] + c->vLumBufSize < nextSlice)
2225
                        c->vLumBufSize= nextSlice - c->vLumFilterPos[i   ];
2226
                if(c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
2227
                        c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
2228
        }
2229

    
2230
        // allocate pixbufs (we use dynamic allocation because otherwise we would need to
2231
        c->lumPixBuf= (int16_t**)memalign(4, c->vLumBufSize*2*sizeof(int16_t*));
2232
        c->chrPixBuf= (int16_t**)memalign(4, c->vChrBufSize*2*sizeof(int16_t*));
2233
        //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
2234
        for(i=0; i<c->vLumBufSize; i++)
2235
                c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(8, 4000);
2236
        for(i=0; i<c->vChrBufSize; i++)
2237
                c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(8, 8000);
2238

    
2239
        //try to avoid drawing green stuff between the right end and the stride end
2240
        for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000);
2241
        for(i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000);
2242

    
2243
        ASSERT(c->chrDstH <= dstH)
2244

    
2245
        if(flags&SWS_PRINT_INFO)
2246
        {
2247
#ifdef DITHER1XBPP
2248
                char *dither= " dithered";
2249
#else
2250
                char *dither= "";
2251
#endif
2252
                if(flags&SWS_FAST_BILINEAR)
2253
                        MSG_INFO("\nSwScaler: FAST_BILINEAR scaler, ");
2254
                else if(flags&SWS_BILINEAR)
2255
                        MSG_INFO("\nSwScaler: BILINEAR scaler, ");
2256
                else if(flags&SWS_BICUBIC)
2257
                        MSG_INFO("\nSwScaler: BICUBIC scaler, ");
2258
                else if(flags&SWS_X)
2259
                        MSG_INFO("\nSwScaler: Experimental scaler, ");
2260
                else if(flags&SWS_POINT)
2261
                        MSG_INFO("\nSwScaler: Nearest Neighbor / POINT scaler, ");
2262
                else if(flags&SWS_AREA)
2263
                        MSG_INFO("\nSwScaler: Area Averageing scaler, ");
2264
                else if(flags&SWS_BICUBLIN)
2265
                        MSG_INFO("\nSwScaler: luma BICUBIC / chroma BILINEAR scaler, ");
2266
                else if(flags&SWS_GAUSS)
2267
                        MSG_INFO("\nSwScaler: Gaussian scaler, ");
2268
                else if(flags&SWS_SINC)
2269
                        MSG_INFO("\nSwScaler: Sinc scaler, ");
2270
                else if(flags&SWS_LANCZOS)
2271
                        MSG_INFO("\nSwScaler: Lanczos scaler, ");
2272
                else if(flags&SWS_SPLINE)
2273
                        MSG_INFO("\nSwScaler: Bicubic spline scaler, ");
2274
                else
2275
                        MSG_INFO("\nSwScaler: ehh flags invalid?! ");
2276

    
2277
                if(dstFormat==IMGFMT_BGR15 || dstFormat==IMGFMT_BGR16)
2278
                        MSG_INFO("from %s to%s %s ", 
2279
                                vo_format_name(srcFormat), dither, vo_format_name(dstFormat));
2280
                else
2281
                        MSG_INFO("from %s to %s ", 
2282
                                vo_format_name(srcFormat), vo_format_name(dstFormat));
2283

    
2284
                if(cpuCaps.hasMMX2)
2285
                        MSG_INFO("using MMX2\n");
2286
                else if(cpuCaps.has3DNow)
2287
                        MSG_INFO("using 3DNOW\n");
2288
                else if(cpuCaps.hasMMX)
2289
                        MSG_INFO("using MMX\n");
2290
                else
2291
                        MSG_INFO("using C\n");
2292
        }
2293

    
2294
        if((flags & SWS_PRINT_INFO) && verbose>0)
2295
        {
2296
                if(cpuCaps.hasMMX)
2297
                {
2298
                        if(c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
2299
                                MSG_V("SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
2300
                        else
2301
                        {
2302
                                if(c->hLumFilterSize==4)
2303
                                        MSG_V("SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n");
2304
                                else if(c->hLumFilterSize==8)
2305
                                        MSG_V("SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n");
2306
                                else
2307
                                        MSG_V("SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n");
2308

    
2309
                                if(c->hChrFilterSize==4)
2310
                                        MSG_V("SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n");
2311
                                else if(c->hChrFilterSize==8)
2312
                                        MSG_V("SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n");
2313
                                else
2314
                                        MSG_V("SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n");
2315
                        }
2316
                }
2317
                else
2318
                {
2319
#ifdef ARCH_X86
2320
                        MSG_V("SwScaler: using X86-Asm scaler for horizontal scaling\n");
2321
#else
2322
                        if(flags & SWS_FAST_BILINEAR)
2323
                                MSG_V("SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n");
2324
                        else
2325
                                MSG_V("SwScaler: using C scaler for horizontal scaling\n");
2326
#endif
2327
                }
2328
                if(isPlanarYUV(dstFormat))
2329
                {
2330
                        if(c->vLumFilterSize==1)
2331
                                MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C");
2332
                        else
2333
                                MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C");
2334
                }
2335
                else
2336
                {
2337
                        if(c->vLumFilterSize==1 && c->vChrFilterSize==2)
2338
                                MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
2339
                                       "SwScaler:       2-tap scaler for vertical chrominance scaling (BGR)\n",cpuCaps.hasMMX ? "MMX" : "C");
2340
                        else if(c->vLumFilterSize==2 && c->vChrFilterSize==2)
2341
                                MSG_V("SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C");
2342
                        else
2343
                                MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C");
2344
                }
2345

    
2346
                if(dstFormat==IMGFMT_BGR24)
2347
                        MSG_V("SwScaler: using %s YV12->BGR24 Converter\n",
2348
                                cpuCaps.hasMMX2 ? "MMX2" : (cpuCaps.hasMMX ? "MMX" : "C"));
2349
                else if(dstFormat==IMGFMT_BGR32)
2350
                        MSG_V("SwScaler: using %s YV12->BGR32 Converter\n", cpuCaps.hasMMX ? "MMX" : "C");
2351
                else if(dstFormat==IMGFMT_BGR16)
2352
                        MSG_V("SwScaler: using %s YV12->BGR16 Converter\n", cpuCaps.hasMMX ? "MMX" : "C");
2353
                else if(dstFormat==IMGFMT_BGR15)
2354
                        MSG_V("SwScaler: using %s YV12->BGR15 Converter\n", cpuCaps.hasMMX ? "MMX" : "C");
2355

    
2356
                MSG_V("SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
2357
        }
2358
        if((flags & SWS_PRINT_INFO) && verbose>1)
2359
        {
2360
                MSG_DBG2("SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2361
                        c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
2362
                MSG_DBG2("SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2363
                        c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
2364
        }
2365

    
2366
        c->swScale= swScale;
2367
        return c;
2368
}
2369

    
2370
/**
2371
 * swscale warper, so we dont need to export the SwsContext.
2372
 * assumes planar YUV to be in YUV order instead of YVU
2373
 */
2374
int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2375
                           int srcSliceH, uint8_t* dst[], int dstStride[]){
2376
        return c->swScale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
2377
}
2378

    
2379
/**
2380
 * swscale warper, so we dont need to export the SwsContext
2381
 */
2382
int sws_scale(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], int srcSliceY,
2383
                           int srcSliceH, uint8_t* dstParam[], int dstStrideParam[]){
2384
        int srcStride[3];
2385
        int dstStride[3];
2386
        uint8_t *src[3];
2387
        uint8_t *dst[3];
2388
        sws_orderYUV(c->origSrcFormat, src, srcStride, srcParam, srcStrideParam);
2389
        sws_orderYUV(c->origDstFormat, dst, dstStride, dstParam, dstStrideParam);
2390
//printf("sws: slice %d %d\n", srcSliceY, srcSliceH);
2391
        return c->swScale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
2392
}
2393

    
2394
/**
2395
 * returns a normalized gaussian curve used to filter stuff
2396
 * quality=3 is high quality, lowwer is lowwer quality
2397
 */
2398

    
2399
SwsVector *sws_getGaussianVec(double variance, double quality){
2400
        const int length= (int)(variance*quality + 0.5) | 1;
2401
        int i;
2402
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2403
        double middle= (length-1)*0.5;
2404
        SwsVector *vec= malloc(sizeof(SwsVector));
2405

    
2406
        vec->coeff= coeff;
2407
        vec->length= length;
2408

    
2409
        for(i=0; i<length; i++)
2410
        {
2411
                double dist= i-middle;
2412
                coeff[i]= exp( -dist*dist/(2*variance*variance) ) / sqrt(2*variance*PI);
2413
        }
2414

    
2415
        sws_normalizeVec(vec, 1.0);
2416

    
2417
        return vec;
2418
}
2419

    
2420
SwsVector *sws_getConstVec(double c, int length){
2421
        int i;
2422
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2423
        SwsVector *vec= malloc(sizeof(SwsVector));
2424

    
2425
        vec->coeff= coeff;
2426
        vec->length= length;
2427

    
2428
        for(i=0; i<length; i++)
2429
                coeff[i]= c;
2430

    
2431
        return vec;
2432
}
2433

    
2434

    
2435
SwsVector *sws_getIdentityVec(void){
2436
        double *coeff= memalign(sizeof(double), sizeof(double));
2437
        SwsVector *vec= malloc(sizeof(SwsVector));
2438
        coeff[0]= 1.0;
2439

    
2440
        vec->coeff= coeff;
2441
        vec->length= 1;
2442

    
2443
        return vec;
2444
}
2445

    
2446
void sws_normalizeVec(SwsVector *a, double height){
2447
        int i;
2448
        double sum=0;
2449
        double inv;
2450

    
2451
        for(i=0; i<a->length; i++)
2452
                sum+= a->coeff[i];
2453

    
2454
        inv= height/sum;
2455

    
2456
        for(i=0; i<a->length; i++)
2457
                a->coeff[i]*= inv;
2458
}
2459

    
2460
void sws_scaleVec(SwsVector *a, double scalar){
2461
        int i;
2462

    
2463
        for(i=0; i<a->length; i++)
2464
                a->coeff[i]*= scalar;
2465
}
2466

    
2467
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){
2468
        int length= a->length + b->length - 1;
2469
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2470
        int i, j;
2471
        SwsVector *vec= malloc(sizeof(SwsVector));
2472

    
2473
        vec->coeff= coeff;
2474
        vec->length= length;
2475

    
2476
        for(i=0; i<length; i++) coeff[i]= 0.0;
2477

    
2478
        for(i=0; i<a->length; i++)
2479
        {
2480
                for(j=0; j<b->length; j++)
2481
                {
2482
                        coeff[i+j]+= a->coeff[i]*b->coeff[j];
2483
                }
2484
        }
2485

    
2486
        return vec;
2487
}
2488

    
2489
static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){
2490
        int length= MAX(a->length, b->length);
2491
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2492
        int i;
2493
        SwsVector *vec= malloc(sizeof(SwsVector));
2494

    
2495
        vec->coeff= coeff;
2496
        vec->length= length;
2497

    
2498
        for(i=0; i<length; i++) coeff[i]= 0.0;
2499

    
2500
        for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2501
        for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];
2502

    
2503
        return vec;
2504
}
2505

    
2506
static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){
2507
        int length= MAX(a->length, b->length);
2508
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2509
        int i;
2510
        SwsVector *vec= malloc(sizeof(SwsVector));
2511

    
2512
        vec->coeff= coeff;
2513
        vec->length= length;
2514

    
2515
        for(i=0; i<length; i++) coeff[i]= 0.0;
2516

    
2517
        for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2518
        for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];
2519

    
2520
        return vec;
2521
}
2522

    
2523
/* shift left / or right if "shift" is negative */
2524
static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){
2525
        int length= a->length + ABS(shift)*2;
2526
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2527
        int i;
2528
        SwsVector *vec= malloc(sizeof(SwsVector));
2529

    
2530
        vec->coeff= coeff;
2531
        vec->length= length;
2532

    
2533
        for(i=0; i<length; i++) coeff[i]= 0.0;
2534

    
2535
        for(i=0; i<a->length; i++)
2536
        {
2537
                coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
2538
        }
2539

    
2540
        return vec;
2541
}
2542

    
2543
void sws_shiftVec(SwsVector *a, int shift){
2544
        SwsVector *shifted= sws_getShiftedVec(a, shift);
2545
        free(a->coeff);
2546
        a->coeff= shifted->coeff;
2547
        a->length= shifted->length;
2548
        free(shifted);
2549
}
2550

    
2551
void sws_addVec(SwsVector *a, SwsVector *b){
2552
        SwsVector *sum= sws_sumVec(a, b);
2553
        free(a->coeff);
2554
        a->coeff= sum->coeff;
2555
        a->length= sum->length;
2556
        free(sum);
2557
}
2558

    
2559
void sws_subVec(SwsVector *a, SwsVector *b){
2560
        SwsVector *diff= sws_diffVec(a, b);
2561
        free(a->coeff);
2562
        a->coeff= diff->coeff;
2563
        a->length= diff->length;
2564
        free(diff);
2565
}
2566

    
2567
void sws_convVec(SwsVector *a, SwsVector *b){
2568
        SwsVector *conv= sws_getConvVec(a, b);
2569
        free(a->coeff);  
2570
        a->coeff= conv->coeff;
2571
        a->length= conv->length;
2572
        free(conv);
2573
}
2574

    
2575
SwsVector *sws_cloneVec(SwsVector *a){
2576
        double *coeff= memalign(sizeof(double), a->length*sizeof(double));
2577
        int i;
2578
        SwsVector *vec= malloc(sizeof(SwsVector));
2579

    
2580
        vec->coeff= coeff;
2581
        vec->length= a->length;
2582

    
2583
        for(i=0; i<a->length; i++) coeff[i]= a->coeff[i];
2584

    
2585
        return vec;
2586
}
2587

    
2588
void sws_printVec(SwsVector *a){
2589
        int i;
2590
        double max=0;
2591
        double min=0;
2592
        double range;
2593

    
2594
        for(i=0; i<a->length; i++)
2595
                if(a->coeff[i]>max) max= a->coeff[i];
2596

    
2597
        for(i=0; i<a->length; i++)
2598
                if(a->coeff[i]<min) min= a->coeff[i];
2599

    
2600
        range= max - min;
2601

    
2602
        for(i=0; i<a->length; i++)
2603
        {
2604
                int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
2605
                MSG_DBG2("%1.3f ", a->coeff[i]);
2606
                for(;x>0; x--) MSG_DBG2(" ");
2607
                MSG_DBG2("|\n");
2608
        }
2609
}
2610

    
2611
void sws_freeVec(SwsVector *a){
2612
        if(!a) return;
2613
        if(a->coeff) free(a->coeff);
2614
        a->coeff=NULL;
2615
        a->length=0;
2616
        free(a);
2617
}
2618

    
2619
void sws_freeContext(SwsContext *c){
2620
        int i;
2621
        if(!c) return;
2622

    
2623
        if(c->lumPixBuf)
2624
        {
2625
                for(i=0; i<c->vLumBufSize; i++)
2626
                {
2627
                        if(c->lumPixBuf[i]) free(c->lumPixBuf[i]);
2628
                        c->lumPixBuf[i]=NULL;
2629
                }
2630
                free(c->lumPixBuf);
2631
                c->lumPixBuf=NULL;
2632
        }
2633

    
2634
        if(c->chrPixBuf)
2635
        {
2636
                for(i=0; i<c->vChrBufSize; i++)
2637
                {
2638
                        if(c->chrPixBuf[i]) free(c->chrPixBuf[i]);
2639
                        c->chrPixBuf[i]=NULL;
2640
                }
2641
                free(c->chrPixBuf);
2642
                c->chrPixBuf=NULL;
2643
        }
2644

    
2645
        if(c->vLumFilter) free(c->vLumFilter);
2646
        c->vLumFilter = NULL;
2647
        if(c->vChrFilter) free(c->vChrFilter);
2648
        c->vChrFilter = NULL;
2649
        if(c->hLumFilter) free(c->hLumFilter);
2650
        c->hLumFilter = NULL;
2651
        if(c->hChrFilter) free(c->hChrFilter);
2652
        c->hChrFilter = NULL;
2653

    
2654
        if(c->vLumFilterPos) free(c->vLumFilterPos);
2655
        c->vLumFilterPos = NULL;
2656
        if(c->vChrFilterPos) free(c->vChrFilterPos);
2657
        c->vChrFilterPos = NULL;
2658
        if(c->hLumFilterPos) free(c->hLumFilterPos);
2659
        c->hLumFilterPos = NULL;
2660
        if(c->hChrFilterPos) free(c->hChrFilterPos);
2661
        c->hChrFilterPos = NULL;
2662

    
2663
        if(c->lumMmx2Filter) free(c->lumMmx2Filter);
2664
        c->lumMmx2Filter=NULL;
2665
        if(c->chrMmx2Filter) free(c->chrMmx2Filter);
2666
        c->chrMmx2Filter=NULL;
2667
        if(c->lumMmx2FilterPos) free(c->lumMmx2FilterPos);
2668
        c->lumMmx2FilterPos=NULL;
2669
        if(c->chrMmx2FilterPos) free(c->chrMmx2FilterPos);
2670
        c->chrMmx2FilterPos=NULL;
2671
        if(c->yuvTable) free(c->yuvTable);
2672
        c->yuvTable=NULL;
2673

    
2674
        free(c);
2675
}
2676