Statistics
| Branch: | Revision:

ffmpeg / postproc / swscale.c @ 5ac80202

History | View | Annotate | Download (71.5 KB)

1
/*
2
    Copyright (C) 2001-2002 Michael Niedermayer <michaelni@gmx.at>
3

4
    This program is free software; you can redistribute it and/or modify
5
    it under the terms of the GNU General Public License as published by
6
    the Free Software Foundation; either version 2 of the License, or
7
    (at your option) any later version.
8

9
    This program is distributed in the hope that it will be useful,
10
    but WITHOUT ANY WARRANTY; without even the implied warranty of
11
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
    GNU General Public License for more details.
13

14
    You should have received a copy of the GNU General Public License
15
    along with this program; if not, write to the Free Software
16
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
*/
18

    
19
/*
20
  supported Input formats: YV12, I420/IYUV, YUY2, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8/Y800, YVU9/IF09
21
  supported output formats: YV12, I420/IYUV, BGR15, BGR16, BGR24, BGR32, Y8/Y800, YVU9/IF09
22
  BGR15/16 support dithering
23
  
24
  unscaled special converters
25
  YV12/I420/IYUV -> BGR15/BGR16/BGR24/BGR32
26
  YV12/I420/IYUV -> YV12/I420/IYUV
27
  YUY2/BGR15/BGR16/BGR24/BGR32/RGB24/RGB32 -> same format
28
  BGR24 -> BGR32 & RGB24 -> RGB32
29
  BGR32 -> BGR24 & RGB32 -> RGB24
30
  BGR15 -> BGR16
31
*/
32

    
33
/* 
34
tested special converters
35
 YV12/I420 -> BGR16
36
 YV12 -> YV12
37
 BGR15 -> BGR16
38
 BGR16 -> BGR16
39

40
untested special converters
41
  YV12/I420 -> BGR15/BGR24/BGR32 (its the yuv2rgb stuff, so it should be ok)
42
  YV12/I420 -> YV12/I420
43
  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
44
  BGR24 -> BGR32 & RGB24 -> RGB32
45
  BGR32 -> BGR24 & RGB32 -> RGB24
46
  BGR24 -> YV12
47
*/
48

    
49
#include <inttypes.h>
50
#include <string.h>
51
#include <math.h>
52
#include <stdio.h>
53
#include "../config.h"
54
#include "../mangle.h"
55
#include <assert.h>
56
#ifdef HAVE_MALLOC_H
57
#include <malloc.h>
58
#else
59
#include <stdlib.h>
60
#endif
61
#include "swscale.h"
62
#include "../cpudetect.h"
63
#include "../bswap.h"
64
#include "../libvo/img_format.h"
65
#include "rgb2rgb.h"
66
#include "../libvo/fastmemcpy.h"
67
#include "../mp_msg.h"
68

    
69
#define MSG_WARN(args...) mp_msg(MSGT_SWS,MSGL_WARN, ##args )
70
#define MSG_FATAL(args...) mp_msg(MSGT_SWS,MSGL_FATAL, ##args )
71
#define MSG_ERR(args...) mp_msg(MSGT_SWS,MSGL_ERR, ##args )
72
#define MSG_V(args...) mp_msg(MSGT_SWS,MSGL_V, ##args )
73
#define MSG_DBG2(args...) mp_msg(MSGT_SWS,MSGL_DBG2, ##args )
74
#define MSG_INFO(args...) mp_msg(MSGT_SWS,MSGL_INFO, ##args )
75

    
76
#undef MOVNTQ
77
#undef PAVGB
78

    
79
//#undef HAVE_MMX2
80
//#define HAVE_3DNOW
81
//#undef HAVE_MMX
82
//#undef ARCH_X86
83
//#define WORDS_BIGENDIAN
84
#define DITHER1XBPP
85

    
86
#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
87

    
88
#define RET 0xC3 //near return opcode for X86
89

    
90
#ifdef MP_DEBUG
91
#define ASSERT(x) assert(x);
92
#else
93
#define ASSERT(x) ;
94
#endif
95

    
96
#ifdef M_PI
97
#define PI M_PI
98
#else
99
#define PI 3.14159265358979323846
100
#endif
101

    
102
//FIXME replace this with something faster
103
#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_YVU9)
104
#define isYUV(x)       ((x)==IMGFMT_YUY2 || isPlanarYUV(x))
105
#define isGray(x)      ((x)==IMGFMT_Y800)
106
#define isSupportedIn(x)  ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_YUY2 \
107
                        || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\
108
                        || (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\
109
                        || (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9)
110
#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 \
111
                        || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\
112
                        || (x)==IMGFMT_Y800 || (x)==IMGFMT_YVU9)
113
#define isRGB(x)       (((x)&IMGFMT_RGB_MASK)==IMGFMT_RGB)
114
#define isBGR(x)       (((x)&IMGFMT_BGR_MASK)==IMGFMT_BGR)
115
#define isPacked(x)    ((x)==IMGFMT_YUY2 || isRGB(x) || isBGR(x))
116

    
117
#define RGB2YUV_SHIFT 16
118
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
119
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
120
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
121
#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
122
#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
123
#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
124
#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
125
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
126
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
127

    
128
extern int verbose; // defined in mplayer.c
129
/*
130
NOTES
131
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
132

133
TODO
134
more intelligent missalignment avoidance for the horizontal scaler
135
write special vertical cubic upscale version
136
Optimize C code (yv12 / minmax)
137
add support for packed pixel yuv input & output
138
add support for Y8 output
139
optimize bgr24 & bgr32
140
add BGR4 output support
141
write special BGR->BGR scaler
142
deglobalize yuv2rgb*.c
143
*/
144

    
145
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
146
#define MIN(a,b) ((a) > (b) ? (b) : (a))
147
#define MAX(a,b) ((a) < (b) ? (b) : (a))
148

    
149
#ifdef ARCH_X86
150
#define CAN_COMPILE_X86_ASM
151
#endif
152

    
153
#ifdef CAN_COMPILE_X86_ASM
154
static uint64_t __attribute__((aligned(8))) yCoeff=    0x2568256825682568LL;
155
static uint64_t __attribute__((aligned(8))) vrCoeff=   0x3343334333433343LL;
156
static uint64_t __attribute__((aligned(8))) ubCoeff=   0x40cf40cf40cf40cfLL;
157
static uint64_t __attribute__((aligned(8))) vgCoeff=   0xE5E2E5E2E5E2E5E2LL;
158
static uint64_t __attribute__((aligned(8))) ugCoeff=   0xF36EF36EF36EF36ELL;
159
static uint64_t __attribute__((aligned(8))) bF8=       0xF8F8F8F8F8F8F8F8LL;
160
static uint64_t __attribute__((aligned(8))) bFC=       0xFCFCFCFCFCFCFCFCLL;
161
static uint64_t __attribute__((aligned(8))) w400=      0x0400040004000400LL;
162
static uint64_t __attribute__((aligned(8))) w80=       0x0080008000800080LL;
163
static uint64_t __attribute__((aligned(8))) w10=       0x0010001000100010LL;
164
static uint64_t __attribute__((aligned(8))) w02=       0x0002000200020002LL;
165
static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
166
static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
167
static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
168
static uint64_t __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL;
169

    
170
static volatile uint64_t __attribute__((aligned(8))) b5Dither;
171
static volatile uint64_t __attribute__((aligned(8))) g5Dither;
172
static volatile uint64_t __attribute__((aligned(8))) g6Dither;
173
static volatile uint64_t __attribute__((aligned(8))) r5Dither;
174

    
175
static uint64_t __attribute__((aligned(8))) dither4[2]={
176
        0x0103010301030103LL,
177
        0x0200020002000200LL,};
178

    
179
static uint64_t __attribute__((aligned(8))) dither8[2]={
180
        0x0602060206020602LL,
181
        0x0004000400040004LL,};
182

    
183
static uint64_t __attribute__((aligned(8))) b16Mask=   0x001F001F001F001FLL;
184
static uint64_t __attribute__((aligned(8))) g16Mask=   0x07E007E007E007E0LL;
185
static uint64_t __attribute__((aligned(8))) r16Mask=   0xF800F800F800F800LL;
186
static uint64_t __attribute__((aligned(8))) b15Mask=   0x001F001F001F001FLL;
187
static uint64_t __attribute__((aligned(8))) g15Mask=   0x03E003E003E003E0LL;
188
static uint64_t __attribute__((aligned(8))) r15Mask=   0x7C007C007C007C00LL;
189

    
190
static uint64_t __attribute__((aligned(8))) M24A=   0x00FF0000FF0000FFLL;
191
static uint64_t __attribute__((aligned(8))) M24B=   0xFF0000FF0000FF00LL;
192
static uint64_t __attribute__((aligned(8))) M24C=   0x0000FF0000FF0000LL;
193

    
194
#ifdef FAST_BGR2YV12
195
static const uint64_t bgr2YCoeff  __attribute__((aligned(8))) = 0x000000210041000DULL;
196
static const uint64_t bgr2UCoeff  __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
197
static const uint64_t bgr2VCoeff  __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
198
#else
199
static const uint64_t bgr2YCoeff  __attribute__((aligned(8))) = 0x000020E540830C8BULL;
200
static const uint64_t bgr2UCoeff  __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
201
static const uint64_t bgr2VCoeff  __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
202
#endif
203
static const uint64_t bgr2YOffset __attribute__((aligned(8))) = 0x1010101010101010ULL;
204
static const uint64_t bgr2UVOffset __attribute__((aligned(8)))= 0x8080808080808080ULL;
205
static const uint64_t w1111       __attribute__((aligned(8))) = 0x0001000100010001ULL;
206
#endif
207

    
208
// clipping helper table for C implementations:
209
static unsigned char clip_table[768];
210

    
211
static unsigned short clip_table16b[768];
212
static unsigned short clip_table16g[768];
213
static unsigned short clip_table16r[768];
214
static unsigned short clip_table15b[768];
215
static unsigned short clip_table15g[768];
216
static unsigned short clip_table15r[768];
217

    
218
// yuv->rgb conversion tables:
219
static    int yuvtab_2568[256];
220
static    int yuvtab_3343[256];
221
static    int yuvtab_0c92[256];
222
static    int yuvtab_1a1e[256];
223
static    int yuvtab_40cf[256];
224
// Needed for cubic scaler to catch overflows
225
static    int clip_yuvtab_2568[768];
226
static    int clip_yuvtab_3343[768];
227
static    int clip_yuvtab_0c92[768];
228
static    int clip_yuvtab_1a1e[768];
229
static    int clip_yuvtab_40cf[768];
230

    
231
//global sws_flags from the command line
232
int sws_flags=2;
233

    
234
//global srcFilter
235
SwsFilter src_filter= {NULL, NULL, NULL, NULL};
236

    
237
float sws_lum_gblur= 0.0;
238
float sws_chr_gblur= 0.0;
239
int sws_chr_vshift= 0;
240
int sws_chr_hshift= 0;
241
float sws_chr_sharpen= 0.0;
242
float sws_lum_sharpen= 0.0;
243

    
244
/* cpuCaps combined from cpudetect and whats actually compiled in
245
   (if there is no support for something compiled in it wont appear here) */
246
static CpuCaps cpuCaps;
247

    
248
void (*swScale)(SwsContext *context, uint8_t* src[], int srcStride[], int srcSliceY,
249
             int srcSliceH, uint8_t* dst[], int dstStride[])=NULL;
250

    
251
static SwsVector *getConvVec(SwsVector *a, SwsVector *b);
252
static inline void orderYUV(int format, uint8_t * sortedP[], int sortedStride[], uint8_t * p[], int stride[]);
253

    
254

    
255
#ifdef CAN_COMPILE_X86_ASM
256
void in_asm_used_var_warning_killer()
257
{
258
 volatile int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+
259
 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+
260
 M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101;
261
 if(i) i=0;
262
}
263
#endif
264

    
265
static int testFormat[]={
266
IMGFMT_YVU9,
267
IMGFMT_YV12,
268
//IMGFMT_IYUV,
269
IMGFMT_I420,
270
IMGFMT_BGR15,
271
IMGFMT_BGR16,
272
IMGFMT_BGR24,
273
IMGFMT_BGR32,
274
//IMGFMT_Y8,
275
IMGFMT_Y800,
276
//IMGFMT_YUY2,
277
0
278
};
279

    
280
static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, int w, int h){
281
        int x,y;
282
        uint64_t ssd=0;
283

    
284
        for(y=0; y<h; y++){
285
                for(x=0; x<w; x++){
286
                        int d= src1[x + y*stride1] - src2[x + y*stride2];
287
                        ssd+= d*d;
288
                }
289
        }
290
        return ssd;
291
}
292

    
293
// test by ref -> src -> dst -> out & compare out against ref
294
// ref & out are YV12
295
static void doTest(uint8_t *ref[3], int refStride[3], int w, int h, int srcFormat, int dstFormat, 
296
                   int srcW, int srcH, int dstW, int dstH, int flags){
297
        uint8_t *src[3];
298
        uint8_t *dst[3];
299
        uint8_t *out[3];
300
        int srcStride[3], dstStride[3];
301
        int i;
302
        uint64_t ssdY, ssdU, ssdV;
303
        SwsContext *srcContext, *dstContext, *outContext;
304
        
305
        for(i=0; i<3; i++){
306
                srcStride[i]= srcW*4;
307
                dstStride[i]= dstW*4;
308
                src[i]= malloc(srcStride[i]*srcH);
309
                dst[i]= malloc(dstStride[i]*dstH);
310
                out[i]= malloc(refStride[i]*h);
311
        }
312

    
313
        srcContext= getSwsContext(w, h, IMGFMT_YV12, srcW, srcH, srcFormat, flags, NULL, NULL);
314
        dstContext= getSwsContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, flags, NULL, NULL);
315
        outContext= getSwsContext(dstW, dstH, dstFormat, w, h, IMGFMT_YV12, flags, NULL, NULL);
316
        if(srcContext==NULL ||dstContext==NULL ||outContext==NULL){
317
                printf("Failed allocating swsContext\n");
318
                goto end;
319
        }
320
//        printf("test %X %X %X -> %X %X %X\n", (int)ref[0], (int)ref[1], (int)ref[2],
321
//                (int)src[0], (int)src[1], (int)src[2]);
322

    
323
        srcContext->swScale(srcContext, ref, refStride, 0, h   , src, srcStride);
324
        dstContext->swScale(dstContext, src, srcStride, 0, srcH, dst, dstStride);
325
        outContext->swScale(outContext, dst, dstStride, 0, dstH, out, refStride);
326
             
327
        ssdY= getSSD(ref[0], out[0], refStride[0], refStride[0], w, h);
328
        ssdU= getSSD(ref[1], out[1], refStride[1], refStride[1], (w+1)>>1, (h+1)>>1);
329
        ssdV= getSSD(ref[2], out[2], refStride[2], refStride[2], (w+1)>>1, (h+1)>>1);
330
        
331
        if(isGray(srcFormat) || isGray(dstFormat)) ssdU=ssdV=0; //FIXME check that output is really gray
332
        
333
        ssdY/= w*h;
334
        ssdU/= w*h/4;
335
        ssdV/= w*h/4;
336
        
337
        if(ssdY>100 || ssdU>50 || ssdV>50){
338
                printf(" %s %dx%d -> %s %4dx%4d flags=%2d SSD=%5lld,%5lld,%5lld\n", 
339
                        vo_format_name(srcFormat), srcW, srcH, 
340
                        vo_format_name(dstFormat), dstW, dstH,
341
                        flags,
342
                        ssdY, ssdU, ssdV);
343
        }
344

    
345
        end:
346
        
347
        freeSwsContext(srcContext);
348
        freeSwsContext(dstContext);
349
        freeSwsContext(outContext);
350

    
351
        for(i=0; i<3; i++){
352
                free(src[i]);
353
                free(dst[i]);
354
                free(out[i]);
355
        }
356
}
357

    
358
static void selfTest(uint8_t *src[3], int stride[3], int w, int h){
359
        int srcFormat, dstFormat, srcFormatIndex, dstFormatIndex;
360
        int srcW, srcH, dstW, dstH;
361
        int flags;
362

    
363
        for(srcFormatIndex=0; ;srcFormatIndex++){
364
                srcFormat= testFormat[srcFormatIndex];
365
                if(!srcFormat) break;
366
                for(dstFormatIndex=0; ;dstFormatIndex++){
367
                        dstFormat= testFormat[dstFormatIndex];
368
                        if(!dstFormat) break;
369
                        if(!isSupportedOut(dstFormat)) continue;
370

    
371
                        srcW= w+w/3;
372
                        srcH= h+h/3;
373
                        for(dstW=w; dstW<w*2; dstW+= dstW/3){
374
                                for(dstH=h; dstH<h*2; dstH+= dstH/3){
375
                                        for(flags=1; flags<33; flags*=2)
376
                                                doTest(src, stride, w, h, srcFormat, dstFormat,
377
                                                        srcW, srcH, dstW, dstH, flags);
378
                                }
379
                        }
380
                }
381
        }
382
}
383

    
384
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
385
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
386
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
387
{
388
        //FIXME Optimize (just quickly writen not opti..)
389
        int i;
390
        for(i=0; i<dstW; i++)
391
        {
392
                int val=0;
393
                int j;
394
                for(j=0; j<lumFilterSize; j++)
395
                        val += lumSrc[j][i] * lumFilter[j];
396

    
397
                dest[i]= MIN(MAX(val>>19, 0), 255);
398
        }
399

    
400
        if(uDest != NULL)
401
                for(i=0; i<chrDstW; i++)
402
                {
403
                        int u=0;
404
                        int v=0;
405
                        int j;
406
                        for(j=0; j<chrFilterSize; j++)
407
                        {
408
                                u += chrSrc[j][i] * chrFilter[j];
409
                                v += chrSrc[j][i + 2048] * chrFilter[j];
410
                        }
411

    
412
                        uDest[i]= MIN(MAX(u>>19, 0), 255);
413
                        vDest[i]= MIN(MAX(v>>19, 0), 255);
414
                }
415
}
416

    
417
static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
418
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
419
                                    uint8_t *dest, int dstW, int dstFormat)
420
{
421
        if(dstFormat==IMGFMT_BGR32)
422
        {
423
                int i;
424
#ifdef WORDS_BIGENDIAN
425
        dest++;
426
#endif
427
                for(i=0; i<(dstW>>1); i++){
428
                        int j;
429
                        int Y1=0;
430
                        int Y2=0;
431
                        int U=0;
432
                        int V=0;
433
                        int Cb, Cr, Cg;
434
                        for(j=0; j<lumFilterSize; j++)
435
                        {
436
                                Y1 += lumSrc[j][2*i] * lumFilter[j];
437
                                Y2 += lumSrc[j][2*i+1] * lumFilter[j];
438
                        }
439
                        for(j=0; j<chrFilterSize; j++)
440
                        {
441
                                U += chrSrc[j][i] * chrFilter[j];
442
                                V += chrSrc[j][i+2048] * chrFilter[j];
443
                        }
444
                        Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
445
                        Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
446
                        U >>= 19;
447
                        V >>= 19;
448

    
449
                        Cb= clip_yuvtab_40cf[U+ 256];
450
                        Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
451
                        Cr= clip_yuvtab_3343[V+ 256];
452

    
453
                        dest[8*i+0]=clip_table[((Y1 + Cb) >>13)];
454
                        dest[8*i+1]=clip_table[((Y1 + Cg) >>13)];
455
                        dest[8*i+2]=clip_table[((Y1 + Cr) >>13)];
456

    
457
                        dest[8*i+4]=clip_table[((Y2 + Cb) >>13)];
458
                        dest[8*i+5]=clip_table[((Y2 + Cg) >>13)];
459
                        dest[8*i+6]=clip_table[((Y2 + Cr) >>13)];
460
                }
461
        }
462
        else if(dstFormat==IMGFMT_BGR24)
463
        {
464
                int i;
465
                for(i=0; i<(dstW>>1); i++){
466
                        int j;
467
                        int Y1=0;
468
                        int Y2=0;
469
                        int U=0;
470
                        int V=0;
471
                        int Cb, Cr, Cg;
472
                        for(j=0; j<lumFilterSize; j++)
473
                        {
474
                                Y1 += lumSrc[j][2*i] * lumFilter[j];
475
                                Y2 += lumSrc[j][2*i+1] * lumFilter[j];
476
                        }
477
                        for(j=0; j<chrFilterSize; j++)
478
                        {
479
                                U += chrSrc[j][i] * chrFilter[j];
480
                                V += chrSrc[j][i+2048] * chrFilter[j];
481
                        }
482
                        Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
483
                        Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
484
                        U >>= 19;
485
                        V >>= 19;
486

    
487
                        Cb= clip_yuvtab_40cf[U+ 256];
488
                        Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
489
                        Cr= clip_yuvtab_3343[V+ 256];
490

    
491
                        dest[0]=clip_table[((Y1 + Cb) >>13)];
492
                        dest[1]=clip_table[((Y1 + Cg) >>13)];
493
                        dest[2]=clip_table[((Y1 + Cr) >>13)];
494

    
495
                        dest[3]=clip_table[((Y2 + Cb) >>13)];
496
                        dest[4]=clip_table[((Y2 + Cg) >>13)];
497
                        dest[5]=clip_table[((Y2 + Cr) >>13)];
498
                        dest+=6;
499
                }
500
        }
501
        else if(dstFormat==IMGFMT_BGR16)
502
        {
503
                int i;
504
#ifdef DITHER1XBPP
505
                static int ditherb1=1<<14;
506
                static int ditherg1=1<<13;
507
                static int ditherr1=2<<14;
508
                static int ditherb2=3<<14;
509
                static int ditherg2=3<<13;
510
                static int ditherr2=0<<14;
511

    
512
                ditherb1 ^= (1^2)<<14;
513
                ditherg1 ^= (1^2)<<13;
514
                ditherr1 ^= (1^2)<<14;
515
                ditherb2 ^= (3^0)<<14;
516
                ditherg2 ^= (3^0)<<13;
517
                ditherr2 ^= (3^0)<<14;
518
#else
519
                const int ditherb1=0;
520
                const int ditherg1=0;
521
                const int ditherr1=0;
522
                const int ditherb2=0;
523
                const int ditherg2=0;
524
                const int ditherr2=0;
525
#endif
526
                for(i=0; i<(dstW>>1); i++){
527
                        int j;
528
                        int Y1=0;
529
                        int Y2=0;
530
                        int U=0;
531
                        int V=0;
532
                        int Cb, Cr, Cg;
533
                        for(j=0; j<lumFilterSize; j++)
534
                        {
535
                                Y1 += lumSrc[j][2*i] * lumFilter[j];
536
                                Y2 += lumSrc[j][2*i+1] * lumFilter[j];
537
                        }
538
                        for(j=0; j<chrFilterSize; j++)
539
                        {
540
                                U += chrSrc[j][i] * chrFilter[j];
541
                                V += chrSrc[j][i+2048] * chrFilter[j];
542
                        }
543
                        Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
544
                        Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
545
                        U >>= 19;
546
                        V >>= 19;
547

    
548
                        Cb= clip_yuvtab_40cf[U+ 256];
549
                        Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
550
                        Cr= clip_yuvtab_3343[V+ 256];
551

    
552
                        ((uint16_t*)dest)[2*i] =
553
                                clip_table16b[(Y1 + Cb + ditherb1) >>13] |
554
                                clip_table16g[(Y1 + Cg + ditherg1) >>13] |
555
                                clip_table16r[(Y1 + Cr + ditherr1) >>13];
556

    
557
                        ((uint16_t*)dest)[2*i+1] =
558
                                clip_table16b[(Y2 + Cb + ditherb2) >>13] |
559
                                clip_table16g[(Y2 + Cg + ditherg2) >>13] |
560
                                clip_table16r[(Y2 + Cr + ditherr2) >>13];
561
                }
562
        }
563
        else if(dstFormat==IMGFMT_BGR15)
564
        {
565
                int i;
566
#ifdef DITHER1XBPP
567
                static int ditherb1=1<<14;
568
                static int ditherg1=1<<14;
569
                static int ditherr1=2<<14;
570
                static int ditherb2=3<<14;
571
                static int ditherg2=3<<14;
572
                static int ditherr2=0<<14;
573

    
574
                ditherb1 ^= (1^2)<<14;
575
                ditherg1 ^= (1^2)<<14;
576
                ditherr1 ^= (1^2)<<14;
577
                ditherb2 ^= (3^0)<<14;
578
                ditherg2 ^= (3^0)<<14;
579
                ditherr2 ^= (3^0)<<14;
580
#else
581
                const int ditherb1=0;
582
                const int ditherg1=0;
583
                const int ditherr1=0;
584
                const int ditherb2=0;
585
                const int ditherg2=0;
586
                const int ditherr2=0;
587
#endif
588
                for(i=0; i<(dstW>>1); i++){
589
                        int j;
590
                        int Y1=0;
591
                        int Y2=0;
592
                        int U=0;
593
                        int V=0;
594
                        int Cb, Cr, Cg;
595
                        for(j=0; j<lumFilterSize; j++)
596
                        {
597
                                Y1 += lumSrc[j][2*i] * lumFilter[j];
598
                                Y2 += lumSrc[j][2*i+1] * lumFilter[j];
599
                        }
600
                        for(j=0; j<chrFilterSize; j++)
601
                        {
602
                                U += chrSrc[j][i] * chrFilter[j];
603
                                V += chrSrc[j][i+2048] * chrFilter[j];
604
                        }
605
                        Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ];
606
                        Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ];
607
                        U >>= 19;
608
                        V >>= 19;
609

    
610
                        Cb= clip_yuvtab_40cf[U+ 256];
611
                        Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256];
612
                        Cr= clip_yuvtab_3343[V+ 256];
613

    
614
                        ((uint16_t*)dest)[2*i] =
615
                                clip_table15b[(Y1 + Cb + ditherb1) >>13] |
616
                                clip_table15g[(Y1 + Cg + ditherg1) >>13] |
617
                                clip_table15r[(Y1 + Cr + ditherr1) >>13];
618

    
619
                        ((uint16_t*)dest)[2*i+1] =
620
                                clip_table15b[(Y2 + Cb + ditherb2) >>13] |
621
                                clip_table15g[(Y2 + Cg + ditherg2) >>13] |
622
                                clip_table15r[(Y2 + Cr + ditherr2) >>13];
623
                }
624
        }
625
}
626

    
627

    
628
//Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
629
//Plain C versions
630
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
631
#define COMPILE_C
632
#endif
633

    
634
#ifdef CAN_COMPILE_X86_ASM
635

    
636
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
637
#define COMPILE_MMX
638
#endif
639

    
640
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
641
#define COMPILE_MMX2
642
#endif
643

    
644
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
645
#define COMPILE_3DNOW
646
#endif
647
#endif //CAN_COMPILE_X86_ASM
648

    
649
#undef HAVE_MMX
650
#undef HAVE_MMX2
651
#undef HAVE_3DNOW
652

    
653
#ifdef COMPILE_C
654
#undef HAVE_MMX
655
#undef HAVE_MMX2
656
#undef HAVE_3DNOW
657
#define RENAME(a) a ## _C
658
#include "swscale_template.c"
659
#endif
660

    
661
#ifdef CAN_COMPILE_X86_ASM
662

    
663
//X86 versions
664
/*
665
#undef RENAME
666
#undef HAVE_MMX
667
#undef HAVE_MMX2
668
#undef HAVE_3DNOW
669
#define ARCH_X86
670
#define RENAME(a) a ## _X86
671
#include "swscale_template.c"
672
*/
673
//MMX versions
674
#ifdef COMPILE_MMX
675
#undef RENAME
676
#define HAVE_MMX
677
#undef HAVE_MMX2
678
#undef HAVE_3DNOW
679
#define RENAME(a) a ## _MMX
680
#include "swscale_template.c"
681
#endif
682

    
683
//MMX2 versions
684
#ifdef COMPILE_MMX2
685
#undef RENAME
686
#define HAVE_MMX
687
#define HAVE_MMX2
688
#undef HAVE_3DNOW
689
#define RENAME(a) a ## _MMX2
690
#include "swscale_template.c"
691
#endif
692

    
693
//3DNOW versions
694
#ifdef COMPILE_3DNOW
695
#undef RENAME
696
#define HAVE_MMX
697
#undef HAVE_MMX2
698
#define HAVE_3DNOW
699
#define RENAME(a) a ## _3DNow
700
#include "swscale_template.c"
701
#endif
702

    
703
#endif //CAN_COMPILE_X86_ASM
704

    
705
// minor note: the HAVE_xyz is messed up after that line so dont use it
706

    
707

    
708
// old global scaler, dont use for new code
709
// will use sws_flags from the command line
710
void SwScale_YV12slice(unsigned char* src[], int srcStride[], int srcSliceY ,
711
                             int srcSliceH, uint8_t* dst[], int dstStride, int dstbpp,
712
                             int srcW, int srcH, int dstW, int dstH){
713

    
714
        static SwsContext *context=NULL;
715
        int dstFormat;
716
        int dstStride3[3]= {dstStride, dstStride>>1, dstStride>>1};
717

    
718
        switch(dstbpp)
719
        {
720
                case 8 : dstFormat= IMGFMT_Y8;                break;
721
                case 12: dstFormat= IMGFMT_YV12;        break;
722
                case 15: dstFormat= IMGFMT_BGR15;        break;
723
                case 16: dstFormat= IMGFMT_BGR16;        break;
724
                case 24: dstFormat= IMGFMT_BGR24;        break;
725
                case 32: dstFormat= IMGFMT_BGR32;        break;
726
                default: return;
727
        }
728

    
729
        if(!context) context=getSwsContextFromCmdLine(srcW, srcH, IMGFMT_YV12, dstW, dstH, dstFormat);
730

    
731
        context->swScale(context, src, srcStride, srcSliceY, srcSliceH, dst, dstStride3);
732
}
733

    
734
void swsGetFlagsAndFilterFromCmdLine(int *flags, SwsFilter **srcFilterParam, SwsFilter **dstFilterParam)
735
{
736
        static int firstTime=1;
737
        *flags=0;
738

    
739
#ifdef ARCH_X86
740
        if(gCpuCaps.hasMMX)
741
                asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
742
#endif
743
        if(firstTime)
744
        {
745
                firstTime=0;
746
                *flags= SWS_PRINT_INFO;
747
        }
748
        else if(verbose>1) *flags= SWS_PRINT_INFO;
749

    
750
        if(src_filter.lumH) freeVec(src_filter.lumH);
751
        if(src_filter.lumV) freeVec(src_filter.lumV);
752
        if(src_filter.chrH) freeVec(src_filter.chrH);
753
        if(src_filter.chrV) freeVec(src_filter.chrV);
754

    
755
        if(sws_lum_gblur!=0.0){
756
                src_filter.lumH= getGaussianVec(sws_lum_gblur, 3.0);
757
                src_filter.lumV= getGaussianVec(sws_lum_gblur, 3.0);
758
        }else{
759
                src_filter.lumH= getIdentityVec();
760
                src_filter.lumV= getIdentityVec();
761
        }
762

    
763
        if(sws_chr_gblur!=0.0){
764
                src_filter.chrH= getGaussianVec(sws_chr_gblur, 3.0);
765
                src_filter.chrV= getGaussianVec(sws_chr_gblur, 3.0);
766
        }else{
767
                src_filter.chrH= getIdentityVec();
768
                src_filter.chrV= getIdentityVec();
769
        }
770

    
771
        if(sws_chr_sharpen!=0.0){
772
                SwsVector *g= getConstVec(-1.0, 3);
773
                SwsVector *id= getConstVec(10.0/sws_chr_sharpen, 1);
774
                g->coeff[1]=2.0;
775
                addVec(id, g);
776
                convVec(src_filter.chrH, id);
777
                convVec(src_filter.chrV, id);
778
                freeVec(g);
779
                freeVec(id);
780
        }
781

    
782
        if(sws_lum_sharpen!=0.0){
783
                SwsVector *g= getConstVec(-1.0, 3);
784
                SwsVector *id= getConstVec(10.0/sws_lum_sharpen, 1);
785
                g->coeff[1]=2.0;
786
                addVec(id, g);
787
                convVec(src_filter.lumH, id);
788
                convVec(src_filter.lumV, id);
789
                freeVec(g);
790
                freeVec(id);
791
        }
792

    
793
        if(sws_chr_hshift)
794
                shiftVec(src_filter.chrH, sws_chr_hshift);
795

    
796
        if(sws_chr_vshift)
797
                shiftVec(src_filter.chrV, sws_chr_vshift);
798

    
799
        normalizeVec(src_filter.chrH, 1.0);
800
        normalizeVec(src_filter.chrV, 1.0);
801
        normalizeVec(src_filter.lumH, 1.0);
802
        normalizeVec(src_filter.lumV, 1.0);
803

    
804
        if(verbose > 1) printVec(src_filter.chrH);
805
        if(verbose > 1) printVec(src_filter.lumH);
806

    
807
        switch(sws_flags)
808
        {
809
                case 0: *flags|= SWS_FAST_BILINEAR; break;
810
                case 1: *flags|= SWS_BILINEAR; break;
811
                case 2: *flags|= SWS_BICUBIC; break;
812
                case 3: *flags|= SWS_X; break;
813
                case 4: *flags|= SWS_POINT; break;
814
                case 5: *flags|= SWS_AREA; break;
815
                default:*flags|= SWS_BILINEAR; break;
816
        }
817
        
818
        *srcFilterParam= &src_filter;
819
        *dstFilterParam= NULL;
820
}
821

    
822
// will use sws_flags & src_filter (from cmd line)
823
SwsContext *getSwsContextFromCmdLine(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat)
824
{
825
        int flags;
826
        SwsFilter *dstFilterParam, *srcFilterParam;
827
        swsGetFlagsAndFilterFromCmdLine(&flags, &srcFilterParam, &dstFilterParam);
828

    
829
        return getSwsContext(srcW, srcH, srcFormat, dstW, dstH, dstFormat, flags, srcFilterParam, dstFilterParam);
830
}
831

    
832

    
833
static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
834
                              int srcW, int dstW, int filterAlign, int one, int flags,
835
                              SwsVector *srcFilter, SwsVector *dstFilter)
836
{
837
        int i;
838
        int filterSize;
839
        int filter2Size;
840
        int minFilterSize;
841
        double *filter=NULL;
842
        double *filter2=NULL;
843
#ifdef ARCH_X86
844
        if(gCpuCaps.hasMMX)
845
                asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
846
#endif
847

    
848
        // Note the +1 is for the MMXscaler which reads over the end
849
        *filterPos = (int16_t*)memalign(8, (dstW+1)*sizeof(int16_t));
850

    
851
        if(ABS(xInc - 0x10000) <10) // unscaled
852
        {
853
                int i;
854
                filterSize= 1;
855
                filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
856
                for(i=0; i<dstW*filterSize; i++) filter[i]=0;
857

    
858
                for(i=0; i<dstW; i++)
859
                {
860
                        filter[i*filterSize]=1;
861
                        (*filterPos)[i]=i;
862
                }
863

    
864
        }
865
        else if(flags&SWS_POINT) // lame looking point sampling mode
866
        {
867
                int i;
868
                int xDstInSrc;
869
                filterSize= 1;
870
                filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
871
                
872
                xDstInSrc= xInc/2 - 0x8000;
873
                for(i=0; i<dstW; i++)
874
                {
875
                        int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
876

    
877
                        (*filterPos)[i]= xx;
878
                        filter[i]= 1.0;
879
                        xDstInSrc+= xInc;
880
                }
881
        }
882
        else if(xInc <= (1<<16) || (flags&SWS_FAST_BILINEAR)) // upscale
883
        {
884
                int i;
885
                int xDstInSrc;
886
                if     (flags&SWS_BICUBIC) filterSize= 4;
887
                else if(flags&SWS_X      ) filterSize= 4;
888
                else                           filterSize= 2; // SWS_BILINEAR / SWS_AREA 
889
                filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
890

    
891
                xDstInSrc= xInc/2 - 0x8000;
892
                for(i=0; i<dstW; i++)
893
                {
894
                        int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
895
                        int j;
896

    
897
                        (*filterPos)[i]= xx;
898
                        if((flags & SWS_BICUBIC) || (flags & SWS_X))
899
                        {
900
                                double d= ABS(((xx+1)<<16) - xDstInSrc)/(double)(1<<16);
901
                                double y1,y2,y3,y4;
902
                                double A= -0.6;
903
                                if(flags & SWS_BICUBIC){
904
                                                // Equation is from VirtualDub
905
                                        y1 = (        +     A*d -       2.0*A*d*d +       A*d*d*d);
906
                                        y2 = (+ 1.0             -     (A+3.0)*d*d + (A+2.0)*d*d*d);
907
                                        y3 = (        -     A*d + (2.0*A+3.0)*d*d - (A+2.0)*d*d*d);
908
                                        y4 = (                  +           A*d*d -       A*d*d*d);
909
                                }else{
910
                                                // cubic interpolation (derived it myself)
911
                                        y1 = (    -2.0*d + 3.0*d*d - 1.0*d*d*d)/6.0;
912
                                        y2 = (6.0 -3.0*d - 6.0*d*d + 3.0*d*d*d)/6.0;
913
                                        y3 = (    +6.0*d + 3.0*d*d - 3.0*d*d*d)/6.0;
914
                                        y4 = (    -1.0*d           + 1.0*d*d*d)/6.0;
915
                                }
916

    
917
                                filter[i*filterSize + 0]= y1;
918
                                filter[i*filterSize + 1]= y2;
919
                                filter[i*filterSize + 2]= y3;
920
                                filter[i*filterSize + 3]= y4;
921
                        }
922
                        else
923
                        {
924
                                //Bilinear upscale / linear interpolate / Area averaging
925
                                for(j=0; j<filterSize; j++)
926
                                {
927
                                        double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16);
928
                                        double coeff= 1.0 - d;
929
                                        if(coeff<0) coeff=0;
930
                                        filter[i*filterSize + j]= coeff;
931
                                        xx++;
932
                                }
933
                        }
934
                        xDstInSrc+= xInc;
935
                }
936
        }
937
        else // downscale
938
        {
939
                int xDstInSrc;
940
                ASSERT(dstW <= srcW)
941

    
942
                if(flags&SWS_BICUBIC)        filterSize= (int)ceil(1 + 4.0*srcW / (double)dstW);
943
                else if(flags&SWS_X)        filterSize= (int)ceil(1 + 4.0*srcW / (double)dstW);
944
                else if(flags&SWS_AREA)        filterSize= (int)ceil(1 + 1.0*srcW / (double)dstW);
945
                else /* BILINEAR */        filterSize= (int)ceil(1 + 2.0*srcW / (double)dstW);
946
                filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
947

    
948
                xDstInSrc= xInc/2 - 0x8000;
949
                for(i=0; i<dstW; i++)
950
                {
951
                        int xx= (int)((double)xDstInSrc/(double)(1<<16) - (filterSize-1)*0.5 + 0.5);
952
                        int j;
953
                        (*filterPos)[i]= xx;
954
                        for(j=0; j<filterSize; j++)
955
                        {
956
                                double d= ABS((xx<<16) - xDstInSrc)/(double)xInc;
957
                                double coeff;
958
                                if((flags & SWS_BICUBIC) || (flags & SWS_X))
959
                                {
960
                                        double A= -0.75;
961
//                                        d*=2;
962
                                        // Equation is from VirtualDub
963
                                        if(d<1.0)
964
                                                coeff = (1.0 - (A+3.0)*d*d + (A+2.0)*d*d*d);
965
                                        else if(d<2.0)
966
                                                coeff = (-4.0*A + 8.0*A*d - 5.0*A*d*d + A*d*d*d);
967
                                        else
968
                                                coeff=0.0;
969
                                }
970
                                else if(flags & SWS_AREA)
971
                                {
972
                                        double srcPixelSize= (1<<16)/(double)xInc;
973
                                        if(d + srcPixelSize/2 < 0.5) coeff= 1.0;
974
                                        else if(d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
975
                                        else coeff=0.0;
976
                                }
977
                                else
978
                                {
979
                                        coeff= 1.0 - d;
980
                                        if(coeff<0) coeff=0;
981
                                }
982
                                filter[i*filterSize + j]= coeff;
983
                                xx++;
984
                        }
985
                        xDstInSrc+= xInc;
986
                }
987
        }
988

    
989
        /* apply src & dst Filter to filter -> filter2
990
           free(filter);
991
        */
992
        ASSERT(filterSize>0)
993
        filter2Size= filterSize;
994
        if(srcFilter) filter2Size+= srcFilter->length - 1;
995
        if(dstFilter) filter2Size+= dstFilter->length - 1;
996
        ASSERT(filter2Size>0)
997
        filter2= (double*)memalign(8, filter2Size*dstW*sizeof(double));
998

    
999
        for(i=0; i<dstW; i++)
1000
        {
1001
                int j;
1002
                SwsVector scaleFilter;
1003
                SwsVector *outVec;
1004

    
1005
                scaleFilter.coeff= filter + i*filterSize;
1006
                scaleFilter.length= filterSize;
1007

    
1008
                if(srcFilter) outVec= getConvVec(srcFilter, &scaleFilter);
1009
                else              outVec= &scaleFilter;
1010

    
1011
                ASSERT(outVec->length == filter2Size)
1012
                //FIXME dstFilter
1013

    
1014
                for(j=0; j<outVec->length; j++)
1015
                {
1016
                        filter2[i*filter2Size + j]= outVec->coeff[j];
1017
                }
1018

    
1019
                (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
1020

    
1021
                if(outVec != &scaleFilter) freeVec(outVec);
1022
        }
1023
        free(filter); filter=NULL;
1024

    
1025
        /* try to reduce the filter-size (step1 find size and shift left) */
1026
        // Assume its near normalized (*0.5 or *2.0 is ok but * 0.001 is not)
1027
        minFilterSize= 0;
1028
        for(i=dstW-1; i>=0; i--)
1029
        {
1030
                int min= filter2Size;
1031
                int j;
1032
                double cutOff=0.0;
1033

    
1034
                /* get rid off near zero elements on the left by shifting left */
1035
                for(j=0; j<filter2Size; j++)
1036
                {
1037
                        int k;
1038
                        cutOff += ABS(filter2[i*filter2Size]);
1039

    
1040
                        if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1041

    
1042
                        /* preserve Monotonicity because the core cant handle the filter otherwise */
1043
                        if(i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1044

    
1045
                        // Move filter coeffs left
1046
                        for(k=1; k<filter2Size; k++)
1047
                                filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1048
                        filter2[i*filter2Size + k - 1]= 0.0;
1049
                        (*filterPos)[i]++;
1050
                }
1051

    
1052
                cutOff=0.0;
1053
                /* count near zeros on the right */
1054
                for(j=filter2Size-1; j>0; j--)
1055
                {
1056
                        cutOff += ABS(filter2[i*filter2Size + j]);
1057

    
1058
                        if(cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1059
                        min--;
1060
                }
1061

    
1062
                if(min>minFilterSize) minFilterSize= min;
1063
        }
1064

    
1065
        ASSERT(minFilterSize > 0)
1066
        filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
1067
        ASSERT(filterSize > 0)
1068
        filter= (double*)memalign(8, filterSize*dstW*sizeof(double));
1069
        *outFilterSize= filterSize;
1070

    
1071
        if(flags&SWS_PRINT_INFO)
1072
                MSG_INFO("SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
1073
        /* try to reduce the filter-size (step2 reduce it) */
1074
        for(i=0; i<dstW; i++)
1075
        {
1076
                int j;
1077

    
1078
                for(j=0; j<filterSize; j++)
1079
                {
1080
                        if(j>=filter2Size) filter[i*filterSize + j]= 0.0;
1081
                        else                   filter[i*filterSize + j]= filter2[i*filter2Size + j];
1082
                }
1083
        }
1084
        free(filter2); filter2=NULL;
1085
        
1086

    
1087
        //FIXME try to align filterpos if possible
1088

    
1089
        //fix borders
1090
        for(i=0; i<dstW; i++)
1091
        {
1092
                int j;
1093
                if((*filterPos)[i] < 0)
1094
                {
1095
                        // Move filter coeffs left to compensate for filterPos
1096
                        for(j=1; j<filterSize; j++)
1097
                        {
1098
                                int left= MAX(j + (*filterPos)[i], 0);
1099
                                filter[i*filterSize + left] += filter[i*filterSize + j];
1100
                                filter[i*filterSize + j]=0;
1101
                        }
1102
                        (*filterPos)[i]= 0;
1103
                }
1104

    
1105
                if((*filterPos)[i] + filterSize > srcW)
1106
                {
1107
                        int shift= (*filterPos)[i] + filterSize - srcW;
1108
                        // Move filter coeffs right to compensate for filterPos
1109
                        for(j=filterSize-2; j>=0; j--)
1110
                        {
1111
                                int right= MIN(j + shift, filterSize-1);
1112
                                filter[i*filterSize +right] += filter[i*filterSize +j];
1113
                                filter[i*filterSize +j]=0;
1114
                        }
1115
                        (*filterPos)[i]= srcW - filterSize;
1116
                }
1117
        }
1118

    
1119
        // Note the +1 is for the MMXscaler which reads over the end
1120
        *outFilter= (int16_t*)memalign(8, *outFilterSize*(dstW+1)*sizeof(int16_t));
1121
        memset(*outFilter, 0, *outFilterSize*(dstW+1)*sizeof(int16_t));
1122

    
1123
        /* Normalize & Store in outFilter */
1124
        for(i=0; i<dstW; i++)
1125
        {
1126
                int j;
1127
                double sum=0;
1128
                double scale= one;
1129
                for(j=0; j<filterSize; j++)
1130
                {
1131
                        sum+= filter[i*filterSize + j];
1132
                }
1133
                scale/= sum;
1134
                for(j=0; j<filterSize; j++)
1135
                {
1136
                        (*outFilter)[i*(*outFilterSize) + j]= (int)(filter[i*filterSize + j]*scale);
1137
                }
1138
        }
1139
        
1140
        (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
1141
        for(i=0; i<*outFilterSize; i++)
1142
        {
1143
                int j= dstW*(*outFilterSize);
1144
                (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
1145
        }
1146

    
1147
        free(filter);
1148
}
1149

    
1150
#ifdef ARCH_X86
1151
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
1152
{
1153
        uint8_t *fragmentA;
1154
        int imm8OfPShufW1A;
1155
        int imm8OfPShufW2A;
1156
        int fragmentLengthA;
1157
        uint8_t *fragmentB;
1158
        int imm8OfPShufW1B;
1159
        int imm8OfPShufW2B;
1160
        int fragmentLengthB;
1161
        int fragmentPos;
1162

    
1163
        int xpos, i;
1164

    
1165
        // create an optimized horizontal scaling routine
1166

    
1167
        //code fragment
1168

    
1169
        asm volatile(
1170
                "jmp 9f                                \n\t"
1171
        // Begin
1172
                "0:                                \n\t"
1173
                "movq (%%edx, %%eax), %%mm3        \n\t" 
1174
                "movd (%%ecx, %%esi), %%mm0        \n\t" 
1175
                "movd 1(%%ecx, %%esi), %%mm1        \n\t"
1176
                "punpcklbw %%mm7, %%mm1                \n\t"
1177
                "punpcklbw %%mm7, %%mm0                \n\t"
1178
                "pshufw $0xFF, %%mm1, %%mm1        \n\t"
1179
                "1:                                \n\t"
1180
                "pshufw $0xFF, %%mm0, %%mm0        \n\t"
1181
                "2:                                \n\t"
1182
                "psubw %%mm1, %%mm0                \n\t"
1183
                "movl 8(%%ebx, %%eax), %%esi        \n\t"
1184
                "pmullw %%mm3, %%mm0                \n\t"
1185
                "psllw $7, %%mm1                \n\t"
1186
                "paddw %%mm1, %%mm0                \n\t"
1187

    
1188
                "movq %%mm0, (%%edi, %%eax)        \n\t"
1189

    
1190
                "addl $8, %%eax                        \n\t"
1191
        // End
1192
                "9:                                \n\t"
1193
//                "int $3\n\t"
1194
                "leal 0b, %0                        \n\t"
1195
                "leal 1b, %1                        \n\t"
1196
                "leal 2b, %2                        \n\t"
1197
                "decl %1                        \n\t"
1198
                "decl %2                        \n\t"
1199
                "subl %0, %1                        \n\t"
1200
                "subl %0, %2                        \n\t"
1201
                "leal 9b, %3                        \n\t"
1202
                "subl %0, %3                        \n\t"
1203

    
1204

    
1205
                :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
1206
                "=r" (fragmentLengthA)
1207
        );
1208

    
1209
        asm volatile(
1210
                "jmp 9f                                \n\t"
1211
        // Begin
1212
                "0:                                \n\t"
1213
                "movq (%%edx, %%eax), %%mm3        \n\t" 
1214
                "movd (%%ecx, %%esi), %%mm0        \n\t" 
1215
                "punpcklbw %%mm7, %%mm0                \n\t"
1216
                "pshufw $0xFF, %%mm0, %%mm1        \n\t"
1217
                "1:                                \n\t"
1218
                "pshufw $0xFF, %%mm0, %%mm0        \n\t"
1219
                "2:                                \n\t"
1220
                "psubw %%mm1, %%mm0                \n\t"
1221
                "movl 8(%%ebx, %%eax), %%esi        \n\t"
1222
                "pmullw %%mm3, %%mm0                \n\t"
1223
                "psllw $7, %%mm1                \n\t"
1224
                "paddw %%mm1, %%mm0                \n\t"
1225

    
1226
                "movq %%mm0, (%%edi, %%eax)        \n\t"
1227

    
1228
                "addl $8, %%eax                        \n\t"
1229
        // End
1230
                "9:                                \n\t"
1231
//                "int $3\n\t"
1232
                "leal 0b, %0                        \n\t"
1233
                "leal 1b, %1                        \n\t"
1234
                "leal 2b, %2                        \n\t"
1235
                "decl %1                        \n\t"
1236
                "decl %2                        \n\t"
1237
                "subl %0, %1                        \n\t"
1238
                "subl %0, %2                        \n\t"
1239
                "leal 9b, %3                        \n\t"
1240
                "subl %0, %3                        \n\t"
1241

    
1242

    
1243
                :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
1244
                "=r" (fragmentLengthB)
1245
        );
1246

    
1247
        xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
1248
        fragmentPos=0;
1249
        
1250
        for(i=0; i<dstW/numSplits; i++)
1251
        {
1252
                int xx=xpos>>16;
1253

    
1254
                if((i&3) == 0)
1255
                {
1256
                        int a=0;
1257
                        int b=((xpos+xInc)>>16) - xx;
1258
                        int c=((xpos+xInc*2)>>16) - xx;
1259
                        int d=((xpos+xInc*3)>>16) - xx;
1260

    
1261
                        filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
1262
                        filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
1263
                        filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
1264
                        filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
1265
                        filterPos[i/2]= xx;
1266

    
1267
                        if(d+1<4)
1268
                        {
1269
                                int maxShift= 3-(d+1);
1270
                                int shift=0;
1271

    
1272
                                memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
1273

    
1274
                                funnyCode[fragmentPos + imm8OfPShufW1B]=
1275
                                        (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
1276
                                funnyCode[fragmentPos + imm8OfPShufW2B]=
1277
                                        a | (b<<2) | (c<<4) | (d<<6);
1278

    
1279
                                if(i+3>=dstW) shift=maxShift; //avoid overread
1280
                                else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
1281

    
1282
                                if(shift && i>=shift)
1283
                                {
1284
                                        funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
1285
                                        funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
1286
                                        filterPos[i/2]-=shift;
1287
                                }
1288

    
1289
                                fragmentPos+= fragmentLengthB;
1290
                        }
1291
                        else
1292
                        {
1293
                                int maxShift= 3-d;
1294
                                int shift=0;
1295

    
1296
                                memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
1297

    
1298
                                funnyCode[fragmentPos + imm8OfPShufW1A]=
1299
                                funnyCode[fragmentPos + imm8OfPShufW2A]=
1300
                                        a | (b<<2) | (c<<4) | (d<<6);
1301

    
1302
                                if(i+4>=dstW) shift=maxShift; //avoid overread
1303
                                else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
1304

    
1305
                                if(shift && i>=shift)
1306
                                {
1307
                                        funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
1308
                                        funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
1309
                                        filterPos[i/2]-=shift;
1310
                                }
1311

    
1312
                                fragmentPos+= fragmentLengthA;
1313
                        }
1314

    
1315
                        funnyCode[fragmentPos]= RET;
1316
                }
1317
                xpos+=xInc;
1318
        }
1319
        filterPos[i/2]= xpos>>16; // needed to jump to the next part
1320
}
1321
#endif // ARCH_X86
1322

    
1323
//FIXME remove
1324
void SwScale_Init(){
1325
}
1326

    
1327
static void globalInit(){
1328
    // generating tables:
1329
    int i;
1330
    for(i=0; i<768; i++){
1331
        int c= MIN(MAX(i-256, 0), 255);
1332
        clip_table[i]=c;
1333
        yuvtab_2568[c]= clip_yuvtab_2568[i]=(0x2568*(c-16))+(256<<13);
1334
        yuvtab_3343[c]= clip_yuvtab_3343[i]=0x3343*(c-128);
1335
        yuvtab_0c92[c]= clip_yuvtab_0c92[i]=-0x0c92*(c-128);
1336
        yuvtab_1a1e[c]= clip_yuvtab_1a1e[i]=-0x1a1e*(c-128);
1337
        yuvtab_40cf[c]= clip_yuvtab_40cf[i]=0x40cf*(c-128);
1338
    }
1339

    
1340
    for(i=0; i<768; i++)
1341
    {
1342
        int v= clip_table[i];
1343
        clip_table16b[i]=  v>>3;
1344
        clip_table16g[i]= (v<<3)&0x07E0;
1345
        clip_table16r[i]= (v<<8)&0xF800;
1346
        clip_table15b[i]=  v>>3;
1347
        clip_table15g[i]= (v<<2)&0x03E0;
1348
        clip_table15r[i]= (v<<7)&0x7C00;
1349
    }
1350

    
1351
cpuCaps= gCpuCaps;
1352

    
1353
#ifdef RUNTIME_CPUDETECT
1354
#ifdef CAN_COMPILE_X86_ASM
1355
        // ordered per speed fasterst first
1356
        if(gCpuCaps.hasMMX2)
1357
                swScale= swScale_MMX2;
1358
        else if(gCpuCaps.has3DNow)
1359
                swScale= swScale_3DNow;
1360
        else if(gCpuCaps.hasMMX)
1361
                swScale= swScale_MMX;
1362
        else
1363
                swScale= swScale_C;
1364

    
1365
#else
1366
        swScale= swScale_C;
1367
        cpuCaps.hasMMX2 = cpuCaps.hasMMX = cpuCaps.has3DNow = 0;
1368
#endif
1369
#else //RUNTIME_CPUDETECT
1370
#ifdef HAVE_MMX2
1371
        swScale= swScale_MMX2;
1372
        cpuCaps.has3DNow = 0;
1373
#elif defined (HAVE_3DNOW)
1374
        swScale= swScale_3DNow;
1375
        cpuCaps.hasMMX2 = 0;
1376
#elif defined (HAVE_MMX)
1377
        swScale= swScale_MMX;
1378
        cpuCaps.hasMMX2 = cpuCaps.has3DNow = 0;
1379
#else
1380
        swScale= swScale_C;
1381
        cpuCaps.hasMMX2 = cpuCaps.hasMMX = cpuCaps.has3DNow = 0;
1382
#endif
1383
#endif //!RUNTIME_CPUDETECT
1384
}
1385

    
1386
static void PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1387
             int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1388
        uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1389
        /* Copy Y plane */
1390
        if(dstStride[0]==srcStride[0])
1391
                memcpy(dst, src[0], srcSliceH*dstStride[0]);
1392
        else
1393
        {
1394
                int i;
1395
                uint8_t *srcPtr= src[0];
1396
                uint8_t *dstPtr= dst;
1397
                for(i=0; i<srcSliceH; i++)
1398
                {
1399
                        memcpy(dstPtr, srcPtr, srcStride[0]);
1400
                        srcPtr+= srcStride[0];
1401
                        dstPtr+= dstStride[0];
1402
                }
1403
        }
1404
        dst = dstParam[1] + dstStride[1]*srcSliceY;
1405
        if(c->srcFormat==IMGFMT_YV12)
1406
                interleaveBytes( src[1],src[2],dst,c->srcW,srcSliceH,srcStride[1],srcStride[2],dstStride[0] );
1407
        else /* I420 & IYUV */
1408
                interleaveBytes( src[2],src[1],dst,c->srcW,srcSliceH,srcStride[2],srcStride[1],dstStride[0] );
1409
}
1410

    
1411

    
1412
/* Warper functions for yuv2bgr */
1413
static void planarYuvToBgr(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1414
             int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1415
        uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1416

    
1417
        if(c->srcFormat==IMGFMT_YV12)
1418
                yuv2rgb( dst,src[0],src[1],src[2],c->srcW,srcSliceH,dstStride[0],srcStride[0],srcStride[1] );
1419
        else /* I420 & IYUV */
1420
                yuv2rgb( dst,src[0],src[2],src[1],c->srcW,srcSliceH,dstStride[0],srcStride[0],srcStride[1] );
1421
}
1422

    
1423
static void PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1424
             int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1425
        uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1426

    
1427
        if(c->srcFormat==IMGFMT_YV12)
1428
                yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1429
        else /* I420 & IYUV */
1430
                yv12toyuy2( src[0],src[2],src[1],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1431
}
1432

    
1433
static void bgr24to32Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1434
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1435
        
1436
        if(dstStride[0]*3==srcStride[0]*4)
1437
                rgb24to32(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1438
        else
1439
        {
1440
                int i;
1441
                uint8_t *srcPtr= src[0];
1442
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1443

    
1444
                for(i=0; i<srcSliceH; i++)
1445
                {
1446
                        rgb24to32(srcPtr, dstPtr, c->srcW*3);
1447
                        srcPtr+= srcStride[0];
1448
                        dstPtr+= dstStride[0];
1449
                }
1450
        }     
1451
}
1452

    
1453
static void bgr24to16Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1454
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1455
        
1456
        if(dstStride[0]*3==srcStride[0]*2)
1457
                rgb24to16(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1458
        else
1459
        {
1460
                int i;
1461
                uint8_t *srcPtr= src[0];
1462
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1463

    
1464
                for(i=0; i<srcSliceH; i++)
1465
                {
1466
                        rgb24to16(srcPtr, dstPtr, c->srcW*3);
1467
                        srcPtr+= srcStride[0];
1468
                        dstPtr+= dstStride[0];
1469
                }
1470
        }     
1471
}
1472

    
1473
static void bgr24to15Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1474
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1475
        
1476
        if(dstStride[0]*3==srcStride[0]*2)
1477
                rgb24to15(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1478
        else
1479
        {
1480
                int i;
1481
                uint8_t *srcPtr= src[0];
1482
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1483

    
1484
                for(i=0; i<srcSliceH; i++)
1485
                {
1486
                        rgb24to15(srcPtr, dstPtr, c->srcW*3);
1487
                        srcPtr+= srcStride[0];
1488
                        dstPtr+= dstStride[0];
1489
                }
1490
        }     
1491
}
1492

    
1493
static void bgr32to24Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1494
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1495
        
1496
        if(dstStride[0]*4==srcStride[0]*3)
1497
                rgb32to24(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1498
        else
1499
        {
1500
                int i;
1501
                uint8_t *srcPtr= src[0];
1502
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1503

    
1504
                for(i=0; i<srcSliceH; i++)
1505
                {
1506
                        rgb32to24(srcPtr, dstPtr, c->srcW<<2);
1507
                        srcPtr+= srcStride[0];
1508
                        dstPtr+= dstStride[0];
1509
                }
1510
        }     
1511
}
1512

    
1513
static void bgr32to16Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1514
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1515
        
1516
        if(dstStride[0]*4==srcStride[0]*2)
1517
                rgb32to16(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1518
        else
1519
        {
1520
                int i;
1521
                uint8_t *srcPtr= src[0];
1522
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1523

    
1524
                for(i=0; i<srcSliceH; i++)
1525
                {
1526
                        rgb32to16(srcPtr, dstPtr, c->srcW<<2);
1527
                        srcPtr+= srcStride[0];
1528
                        dstPtr+= dstStride[0];
1529
                }
1530
        }     
1531
}
1532

    
1533
static void bgr32to15Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1534
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1535
        
1536
        if(dstStride[0]*4==srcStride[0]*2)
1537
                rgb32to15(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1538
        else
1539
        {
1540
                int i;
1541
                uint8_t *srcPtr= src[0];
1542
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1543

    
1544
                for(i=0; i<srcSliceH; i++)
1545
                {
1546
                        rgb32to15(srcPtr, dstPtr, c->srcW<<2);
1547
                        srcPtr+= srcStride[0];
1548
                        dstPtr+= dstStride[0];
1549
                }
1550
        }     
1551
}
1552

    
1553
static void bgr15to16Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1554
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1555
        
1556
        if(dstStride[0]==srcStride[0])
1557
                rgb15to16(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1558
        else
1559
        {
1560
                int i;
1561
                uint8_t *srcPtr= src[0];
1562
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1563

    
1564
                for(i=0; i<srcSliceH; i++)
1565
                {
1566
                        rgb15to16(srcPtr, dstPtr, c->srcW<<1);
1567
                        srcPtr+= srcStride[0];
1568
                        dstPtr+= dstStride[0];
1569
                }
1570
        }     
1571
}
1572

    
1573
static void bgr15to24Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1574
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1575
        
1576
        if(dstStride[0]*2==srcStride[0]*3)
1577
                rgb15to24(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1578
        else
1579
        {
1580
                int i;
1581
                uint8_t *srcPtr= src[0];
1582
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1583

    
1584
                for(i=0; i<srcSliceH; i++)
1585
                {
1586
                        rgb15to24(srcPtr, dstPtr, c->srcW<<1);
1587
                        srcPtr+= srcStride[0];
1588
                        dstPtr+= dstStride[0];
1589
                }
1590
        }     
1591
}
1592

    
1593
static void bgr15to32Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1594
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1595
        
1596
        if(dstStride[0]*2==srcStride[0]*4)
1597
                rgb15to32(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1598
        else
1599
        {
1600
                int i;
1601
                uint8_t *srcPtr= src[0];
1602
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1603

    
1604
                for(i=0; i<srcSliceH; i++)
1605
                {
1606
                        rgb15to32(srcPtr, dstPtr, c->srcW<<1);
1607
                        srcPtr+= srcStride[0];
1608
                        dstPtr+= dstStride[0];
1609
                }
1610
        }     
1611
}
1612

    
1613
static void bgr16to24Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1614
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1615
        
1616
        if(dstStride[0]*2==srcStride[0]*3)
1617
                rgb16to24(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1618
        else
1619
        {
1620
                int i;
1621
                uint8_t *srcPtr= src[0];
1622
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1623

    
1624
                for(i=0; i<srcSliceH; i++)
1625
                {
1626
                        rgb16to24(srcPtr, dstPtr, c->srcW<<1);
1627
                        srcPtr+= srcStride[0];
1628
                        dstPtr+= dstStride[0];
1629
                }
1630
        }     
1631
}
1632

    
1633
static void bgr16to32Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1634
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1635
        
1636
        if(dstStride[0]*2==srcStride[0]*4)
1637
                rgb16to32(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1638
        else
1639
        {
1640
                int i;
1641
                uint8_t *srcPtr= src[0];
1642
                uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1643

    
1644
                for(i=0; i<srcSliceH; i++)
1645
                {
1646
                        rgb16to32(srcPtr, dstPtr, c->srcW<<1);
1647
                        srcPtr+= srcStride[0];
1648
                        dstPtr+= dstStride[0];
1649
                }
1650
        }     
1651
}
1652

    
1653
static void bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1654
             int srcSliceH, uint8_t* dst[], int dstStride[]){
1655

    
1656
        rgb24toyv12(
1657
                src[0], 
1658
                dst[0]+ srcSliceY    *dstStride[0], 
1659
                dst[1]+(srcSliceY>>1)*dstStride[1], 
1660
                dst[2]+(srcSliceY>>1)*dstStride[2],
1661
                c->srcW, srcSliceH, 
1662
                dstStride[0], dstStride[1], srcStride[0]);
1663
}
1664

    
1665
/**
1666
 * bring pointers in YUV order instead of YVU
1667
 */
1668
static inline void orderYUV(int format, uint8_t * sortedP[], int sortedStride[], uint8_t * p[], int stride[]){
1669
        if(format == IMGFMT_YV12 || format == IMGFMT_YVU9){
1670
                sortedP[0]= p[0];
1671
                sortedP[1]= p[1];
1672
                sortedP[2]= p[2];
1673
                sortedStride[0]= stride[0];
1674
                sortedStride[1]= stride[1];
1675
                sortedStride[2]= stride[2];
1676
        }
1677
        else if(isPacked(format) || isGray(format))
1678
        {
1679
                sortedP[0]= p[0];
1680
                sortedP[1]= 
1681
                sortedP[2]= NULL;
1682
                sortedStride[0]= stride[0];
1683
                sortedStride[1]= 
1684
                sortedStride[2]= 0;
1685
        }
1686
        else /* I420 */
1687
        {
1688
                sortedP[0]= p[0];
1689
                sortedP[1]= p[2];
1690
                sortedP[2]= p[1];
1691
                sortedStride[0]= stride[0];
1692
                sortedStride[1]= stride[2];
1693
                sortedStride[2]= stride[1];
1694
        }
1695
}
1696

    
1697
/* unscaled copy like stuff (assumes nearly identical formats) */
1698
static void simpleCopy(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], int srcSliceY,
1699
             int srcSliceH, uint8_t* dstParam[], int dstStrideParam[]){
1700

    
1701
        int srcStride[3];
1702
        int dstStride[3];
1703
        uint8_t *src[3];
1704
        uint8_t *dst[3];
1705

    
1706
        orderYUV(c->srcFormat, src, srcStride, srcParam, srcStrideParam);
1707
        orderYUV(c->dstFormat, dst, dstStride, dstParam, dstStrideParam);
1708

    
1709
        if(isPacked(c->srcFormat))
1710
        {
1711
                if(dstStride[0]==srcStride[0])
1712
                        memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
1713
                else
1714
                {
1715
                        int i;
1716
                        uint8_t *srcPtr= src[0];
1717
                        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1718
                        int length=0;
1719

    
1720
                        /* universal length finder */
1721
                        while(length+c->srcW <= ABS(dstStride[0]) 
1722
                           && length+c->srcW <= ABS(srcStride[0])) length+= c->srcW;
1723
                        ASSERT(length!=0);
1724

    
1725
                        for(i=0; i<srcSliceH; i++)
1726
                        {
1727
                                memcpy(dstPtr, srcPtr, length);
1728
                                srcPtr+= srcStride[0];
1729
                                dstPtr+= dstStride[0];
1730
                        }
1731
                }
1732
        }
1733
        else 
1734
        { /* Planar YUV or gray */
1735
                int plane;
1736
                for(plane=0; plane<3; plane++)
1737
                {
1738
                        int length= plane==0 ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
1739
                        int y=      plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
1740
                        int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
1741

    
1742
                        if((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
1743
                        {
1744
                                if(!isGray(c->dstFormat))
1745
                                        memset(dst[plane], 128, dstStride[plane]*height);
1746
                        }
1747
                        else
1748
                        {
1749
                                if(dstStride[plane]==srcStride[plane])
1750
                                        memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
1751
                                else
1752
                                {
1753
                                        int i;
1754
                                        uint8_t *srcPtr= src[plane];
1755
                                        uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
1756
                                        for(i=0; i<height; i++)
1757
                                        {
1758
                                                memcpy(dstPtr, srcPtr, length);
1759
                                                srcPtr+= srcStride[plane];
1760
                                                dstPtr+= dstStride[plane];
1761
                                        }
1762
                                }
1763
                        }
1764
                }
1765
        }
1766
}
1767

    
1768
static int remove_dup_fourcc(int fourcc)
1769
{
1770
        switch(fourcc)
1771
        {
1772
            case IMGFMT_IYUV: return IMGFMT_I420;
1773
            case IMGFMT_Y8  : return IMGFMT_Y800;
1774
            case IMGFMT_IF09: return IMGFMT_YVU9;
1775
            default: return fourcc;
1776
        }
1777
}
1778

    
1779
static void getSubSampleFactors(int *h, int *v, int format){
1780
        switch(format){
1781
        case IMGFMT_YUY2:
1782
                *h=1;
1783
                *v=0;
1784
                break;
1785
        case IMGFMT_YV12:
1786
        case IMGFMT_I420:
1787
        case IMGFMT_Y800: //FIXME remove after different subsamplings are fully implemented
1788
                *h=1;
1789
                *v=1;
1790
                break;
1791
        case IMGFMT_YVU9:
1792
                *h=2;
1793
                *v=2;
1794
                break;
1795
        default:
1796
                *h=0;
1797
                *v=0;
1798
                break;
1799
        }
1800
}
1801

    
1802
SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags,
1803
                         SwsFilter *srcFilter, SwsFilter *dstFilter){
1804

    
1805
        SwsContext *c;
1806
        int i;
1807
        int usesFilter;
1808
        int unscaled;
1809
        SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
1810
#ifdef ARCH_X86
1811
        if(gCpuCaps.hasMMX)
1812
                asm volatile("emms\n\t"::: "memory");
1813
#endif
1814
        if(swScale==NULL) globalInit();
1815
//srcFormat= IMGFMT_Y800;
1816
//srcFormat= IMGFMT_YVU9;
1817
        /* avoid dupplicate Formats, so we dont need to check to much */
1818
        srcFormat = remove_dup_fourcc(srcFormat);
1819
        dstFormat = remove_dup_fourcc(dstFormat);
1820

    
1821
        unscaled = (srcW == dstW && srcH == dstH);
1822

    
1823
        if(!isSupportedIn(srcFormat)) 
1824
        {
1825
                MSG_ERR("swScaler: %s is not supported as input format\n", vo_format_name(srcFormat));
1826
                return NULL;
1827
        }
1828
        if(!isSupportedOut(dstFormat))
1829
        {
1830
                MSG_ERR("swScaler: %s is not supported as output format\n", vo_format_name(dstFormat));
1831
                return NULL;
1832
        }
1833

    
1834
        /* sanity check */
1835
        if(srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
1836
        {
1837
                 MSG_ERR("swScaler: %dx%d -> %dx%d is invalid scaling dimension\n", 
1838
                        srcW, srcH, dstW, dstH);
1839
                return NULL;
1840
        }
1841

    
1842
        if(!dstFilter) dstFilter= &dummyFilter;
1843
        if(!srcFilter) srcFilter= &dummyFilter;
1844

    
1845
        c= memalign(64, sizeof(SwsContext));
1846
        memset(c, 0, sizeof(SwsContext));
1847

    
1848
        c->srcW= srcW;
1849
        c->srcH= srcH;
1850
        c->dstW= dstW;
1851
        c->dstH= dstH;
1852
        c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
1853
        c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
1854
        c->flags= flags;
1855
        c->dstFormat= dstFormat;
1856
        c->srcFormat= srcFormat;
1857

    
1858
        usesFilter=0;
1859
        if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesFilter=1;
1860
        if(dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesFilter=1;
1861
        if(dstFilter->chrV!=NULL && dstFilter->chrV->length>1) usesFilter=1;
1862
        if(dstFilter->chrH!=NULL && dstFilter->chrH->length>1) usesFilter=1;
1863
        if(srcFilter->lumV!=NULL && srcFilter->lumV->length>1) usesFilter=1;
1864
        if(srcFilter->lumH!=NULL && srcFilter->lumH->length>1) usesFilter=1;
1865
        if(srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesFilter=1;
1866
        if(srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesFilter=1;
1867

    
1868
        getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
1869
        getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
1870

    
1871
        // reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation
1872
        if((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
1873

    
1874
        // drop some chroma lines if the user wants it
1875
        c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
1876
        c->chrSrcVSubSample+= c->vChrDrop;
1877

    
1878
        // drop every 2. pixel for chroma calculation unless user wants full chroma
1879
        if((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)) 
1880
                c->chrSrcHSubSample=1;
1881

    
1882
        c->chrIntHSubSample= c->chrDstHSubSample;
1883
        c->chrIntVSubSample= c->chrSrcVSubSample;
1884
        
1885
        // note the -((-x)>>y) is so that we allways round toward +inf
1886
        c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
1887
        c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
1888
        c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
1889
        c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
1890
/*        printf("%d %d %d %d / %d %d %d %d //\n", 
1891
        c->chrSrcW,
1892
c->chrSrcH,
1893
c->chrDstW,
1894
c->chrDstH,
1895
srcW,
1896
srcH,
1897
dstW,
1898
dstH);*/
1899
        
1900
        /* unscaled special Cases */
1901
        if(unscaled && !usesFilter)
1902
        {
1903
                /* yv12_to_nv12 */
1904
                if((srcFormat == IMGFMT_YV12||srcFormat==IMGFMT_I420)&&dstFormat == IMGFMT_NV12)
1905
                {
1906
                        c->swScale= PlanarToNV12Wrapper;
1907

    
1908
                        if(flags&SWS_PRINT_INFO)
1909
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
1910
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
1911
                        return c;
1912
                }
1913
                /* yv12_to_yuy2 */
1914
                if((srcFormat == IMGFMT_YV12||srcFormat==IMGFMT_I420)&&dstFormat == IMGFMT_YUY2)
1915
                {
1916
                        c->swScale= PlanarToYuy2Wrapper;
1917

    
1918
                        if(flags&SWS_PRINT_INFO)
1919
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
1920
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
1921
                        return c;
1922
                }
1923
                /* yuv2bgr */
1924
                if((srcFormat==IMGFMT_YV12 || srcFormat==IMGFMT_I420) && isBGR(dstFormat))
1925
                {
1926
                        // FIXME multiple yuv2rgb converters wont work that way cuz that thing is full of globals&statics
1927
#ifdef WORDS_BIGENDIAN
1928
                        if(dstFormat==IMGFMT_BGR32)
1929
                                yuv2rgb_init( dstFormat&0xFF /* =bpp */, MODE_BGR);
1930
                        else
1931
                                yuv2rgb_init( dstFormat&0xFF /* =bpp */, MODE_RGB);
1932
#else
1933
                        yuv2rgb_init( dstFormat&0xFF /* =bpp */, MODE_RGB);
1934
#endif
1935
                        c->swScale= planarYuvToBgr;
1936

    
1937
                        if(flags&SWS_PRINT_INFO)
1938
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
1939
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
1940
                        return c;
1941
                }
1942
#if 1
1943
                /* simple copy */
1944
                if(   srcFormat == dstFormat
1945
                   || (srcFormat==IMGFMT_YV12 && dstFormat==IMGFMT_I420)
1946
                   || (srcFormat==IMGFMT_I420 && dstFormat==IMGFMT_YV12)
1947
                   || (isPlanarYUV(srcFormat) && isGray(dstFormat))
1948
                   || (isPlanarYUV(dstFormat) && isGray(srcFormat))
1949
                  )
1950
                {
1951
                        c->swScale= simpleCopy;
1952

    
1953
                        if(flags&SWS_PRINT_INFO)
1954
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
1955
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
1956
                        return c;
1957
                }
1958
#endif
1959
                /* bgr32to24 & rgb32to24*/
1960
                if((srcFormat==IMGFMT_BGR32 && dstFormat==IMGFMT_BGR24)
1961
                 ||(srcFormat==IMGFMT_RGB32 && dstFormat==IMGFMT_RGB24))
1962
                {
1963
                        c->swScale= bgr32to24Wrapper;
1964

    
1965
                        if(flags&SWS_PRINT_INFO)
1966
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
1967
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
1968
                        return c;
1969
                }
1970

    
1971
                /* bgr32to16 & rgb32to16*/
1972
                if((srcFormat==IMGFMT_BGR32 && dstFormat==IMGFMT_BGR16)
1973
                 ||(srcFormat==IMGFMT_RGB32 && dstFormat==IMGFMT_RGB16))
1974
                {
1975
                        c->swScale= bgr32to16Wrapper;
1976

    
1977
                        if(flags&SWS_PRINT_INFO)
1978
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
1979
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
1980
                        return c;
1981
                }
1982

    
1983
                /* bgr32to15 & rgb32to15*/
1984
                if((srcFormat==IMGFMT_BGR32 && dstFormat==IMGFMT_BGR15)
1985
                 ||(srcFormat==IMGFMT_RGB32 && dstFormat==IMGFMT_RGB15))
1986
                {
1987
                        c->swScale= bgr32to15Wrapper;
1988

    
1989
                        if(flags&SWS_PRINT_INFO)
1990
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
1991
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
1992
                        return c;
1993
                }
1994

    
1995
                /* bgr24to32 & rgb24to32*/
1996
                if((srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_BGR32)
1997
                 ||(srcFormat==IMGFMT_RGB24 && dstFormat==IMGFMT_RGB32))
1998
                {
1999
                        c->swScale= bgr24to32Wrapper;
2000

    
2001
                        if(flags&SWS_PRINT_INFO)
2002
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
2003
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
2004
                        return c;
2005
                }
2006

    
2007
                /* bgr24to16 & rgb24to16*/
2008
                if((srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_BGR16)
2009
                 ||(srcFormat==IMGFMT_RGB24 && dstFormat==IMGFMT_RGB16))
2010
                {
2011
                        c->swScale= bgr24to16Wrapper;
2012

    
2013
                        if(flags&SWS_PRINT_INFO)
2014
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
2015
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
2016
                        return c;
2017
                }
2018

    
2019
                /* bgr24to15 & rgb24to15*/
2020
                if((srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_BGR15)
2021
                 ||(srcFormat==IMGFMT_RGB24 && dstFormat==IMGFMT_RGB15))
2022
                {
2023
                        c->swScale= bgr24to15Wrapper;
2024

    
2025
                        if(flags&SWS_PRINT_INFO)
2026
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
2027
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
2028
                        return c;
2029
                }
2030

    
2031
                /* bgr15to16 */
2032
                if(srcFormat==IMGFMT_BGR15 && dstFormat==IMGFMT_BGR16)
2033
                {
2034
                        c->swScale= bgr15to16Wrapper;
2035

    
2036
                        if(flags&SWS_PRINT_INFO)
2037
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
2038
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
2039
                        return c;
2040
                }
2041

    
2042
                /* bgr15to24 */
2043
                if((srcFormat==IMGFMT_BGR15 && dstFormat==IMGFMT_BGR24)
2044
                 ||(srcFormat==IMGFMT_RGB15 && dstFormat==IMGFMT_RGB24))
2045
                {
2046
                        c->swScale= bgr15to24Wrapper;
2047

    
2048
                        if(flags&SWS_PRINT_INFO)
2049
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
2050
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
2051
                        return c;
2052
                }
2053

    
2054
#if 0 //segfaults
2055
                /* bgr15to32 */
2056
                if((srcFormat==IMGFMT_BGR15 && dstFormat==IMGFMT_BGR32)
2057
                 ||(srcFormat==IMGFMT_RGB15 && dstFormat==IMGFMT_RGB32))
2058
                {
2059
                        c->swScale= bgr15to32Wrapper;
2060

2061
                        if(flags&SWS_PRINT_INFO)
2062
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
2063
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
2064
                        return c;
2065
                }
2066
#endif
2067
                /* bgr16to24 */
2068
                if((srcFormat==IMGFMT_BGR16 && dstFormat==IMGFMT_BGR24)
2069
                 ||(srcFormat==IMGFMT_RGB16 && dstFormat==IMGFMT_RGB24))
2070
                {
2071
                        c->swScale= bgr16to24Wrapper;
2072

    
2073
                        if(flags&SWS_PRINT_INFO)
2074
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
2075
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
2076
                        return c;
2077
                }
2078

    
2079
#if 0 //segfaults
2080
                /* bgr16to32 */
2081
                if((srcFormat==IMGFMT_BGR16 && dstFormat==IMGFMT_BGR32)
2082
                 ||(srcFormat==IMGFMT_RGB16 && dstFormat==IMGFMT_RGB32))
2083
                {
2084
                        c->swScale= bgr16to32Wrapper;
2085

2086
                        if(flags&SWS_PRINT_INFO)
2087
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
2088
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
2089
                        return c;
2090
                }
2091
#endif
2092
                /* bgr24toYV12 */
2093
                if(srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_YV12)
2094
                {
2095
                        c->swScale= bgr24toyv12Wrapper;
2096

    
2097
                        if(flags&SWS_PRINT_INFO)
2098
                                MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n", 
2099
                                        vo_format_name(srcFormat), vo_format_name(dstFormat));
2100
                        return c;
2101
                }
2102
        }
2103

    
2104
        if(cpuCaps.hasMMX2)
2105
        {
2106
                c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
2107
                if(!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
2108
                {
2109
                        if(flags&SWS_PRINT_INFO)
2110
                                MSG_INFO("SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n");
2111
                }
2112
        }
2113
        else
2114
                c->canMMX2BeUsed=0;
2115

    
2116
        c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
2117
        c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
2118

    
2119
        // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
2120
        // but only for the FAST_BILINEAR mode otherwise do correct scaling
2121
        // n-2 is the last chrominance sample available
2122
        // this is not perfect, but noone shuld notice the difference, the more correct variant
2123
        // would be like the vertical one, but that would require some special code for the
2124
        // first and last pixel
2125
        if(flags&SWS_FAST_BILINEAR)
2126
        {
2127
                if(c->canMMX2BeUsed)
2128
                {
2129
                        c->lumXInc+= 20;
2130
                        c->chrXInc+= 20;
2131
                }
2132
                //we dont use the x86asm scaler if mmx is available
2133
                else if(cpuCaps.hasMMX)
2134
                {
2135
                        c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
2136
                        c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
2137
                }
2138
        }
2139

    
2140
        /* precalculate horizontal scaler filter coefficients */
2141
        {
2142
                const int filterAlign= cpuCaps.hasMMX ? 4 : 1;
2143

    
2144
                initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
2145
                                 srcW      ,       dstW, filterAlign, 1<<14, flags,
2146
                                 srcFilter->lumH, dstFilter->lumH);
2147
                initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
2148
                                 c->chrSrcW, c->chrDstW, filterAlign, 1<<14, flags,
2149
                                 srcFilter->chrH, dstFilter->chrH);
2150

    
2151
#ifdef ARCH_X86
2152
// cant downscale !!!
2153
                if(c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
2154
                {
2155
                        c->lumMmx2Filter   = (int16_t*)memalign(8, (dstW        /8+8)*sizeof(int16_t));
2156
                        c->chrMmx2Filter   = (int16_t*)memalign(8, (c->chrDstW  /4+8)*sizeof(int16_t));
2157
                        c->lumMmx2FilterPos= (int32_t*)memalign(8, (dstW      /2/8+8)*sizeof(int32_t));
2158
                        c->chrMmx2FilterPos= (int32_t*)memalign(8, (c->chrDstW/2/4+8)*sizeof(int32_t));
2159

    
2160
                        initMMX2HScaler(      dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
2161
                        initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
2162
                }
2163
#endif
2164
        } // Init Horizontal stuff
2165

    
2166

    
2167

    
2168
        /* precalculate vertical scaler filter coefficients */
2169
        initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
2170
                        srcH      ,        dstH, 1, (1<<12)-4, flags,
2171
                        srcFilter->lumV, dstFilter->lumV);
2172
        initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
2173
                        c->chrSrcH, c->chrDstH, 1, (1<<12)-4, flags,
2174
                         srcFilter->chrV, dstFilter->chrV);
2175

    
2176
        // Calculate Buffer Sizes so that they wont run out while handling these damn slices
2177
        c->vLumBufSize= c->vLumFilterSize;
2178
        c->vChrBufSize= c->vChrFilterSize;
2179
        for(i=0; i<dstH; i++)
2180
        {
2181
                int chrI= i*c->chrDstH / dstH;
2182
                int nextSlice= MAX(c->vLumFilterPos[i   ] + c->vLumFilterSize - 1,
2183
                                 ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
2184
                nextSlice&= ~3; // Slices start at boundaries which are divisable through 4
2185
                if(c->vLumFilterPos[i   ] + c->vLumBufSize < nextSlice)
2186
                        c->vLumBufSize= nextSlice - c->vLumFilterPos[i   ];
2187
                if(c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
2188
                        c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
2189
        }
2190

    
2191
        // allocate pixbufs (we use dynamic allocation because otherwise we would need to
2192
        c->lumPixBuf= (int16_t**)memalign(4, c->vLumBufSize*2*sizeof(int16_t*));
2193
        c->chrPixBuf= (int16_t**)memalign(4, c->vChrBufSize*2*sizeof(int16_t*));
2194
        //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
2195
        for(i=0; i<c->vLumBufSize; i++)
2196
                c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= (uint16_t*)memalign(8, 4000);
2197
        for(i=0; i<c->vChrBufSize; i++)
2198
                c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= (uint16_t*)memalign(8, 8000);
2199

    
2200
        //try to avoid drawing green stuff between the right end and the stride end
2201
        for(i=0; i<c->vLumBufSize; i++) memset(c->lumPixBuf[i], 0, 4000);
2202
        for(i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000);
2203

    
2204
        ASSERT(c->chrDstH <= dstH)
2205

    
2206
        // pack filter data for mmx code
2207
        if(cpuCaps.hasMMX)
2208
        {
2209
                c->lumMmxFilter= (int16_t*)memalign(8, c->vLumFilterSize*      dstH*4*sizeof(int16_t));
2210
                c->chrMmxFilter= (int16_t*)memalign(8, c->vChrFilterSize*c->chrDstH*4*sizeof(int16_t));
2211
                for(i=0; i<c->vLumFilterSize*dstH; i++)
2212
                        c->lumMmxFilter[4*i]=c->lumMmxFilter[4*i+1]=c->lumMmxFilter[4*i+2]=c->lumMmxFilter[4*i+3]=
2213
                                c->vLumFilter[i];
2214
                for(i=0; i<c->vChrFilterSize*c->chrDstH; i++)
2215
                        c->chrMmxFilter[4*i]=c->chrMmxFilter[4*i+1]=c->chrMmxFilter[4*i+2]=c->chrMmxFilter[4*i+3]=
2216
                                c->vChrFilter[i];
2217
        }
2218

    
2219
        if(flags&SWS_PRINT_INFO)
2220
        {
2221
#ifdef DITHER1XBPP
2222
                char *dither= " dithered";
2223
#else
2224
                char *dither= "";
2225
#endif
2226
                if(flags&SWS_FAST_BILINEAR)
2227
                        MSG_INFO("\nSwScaler: FAST_BILINEAR scaler, ");
2228
                else if(flags&SWS_BILINEAR)
2229
                        MSG_INFO("\nSwScaler: BILINEAR scaler, ");
2230
                else if(flags&SWS_BICUBIC)
2231
                        MSG_INFO("\nSwScaler: BICUBIC scaler, ");
2232
                else if(flags&SWS_X)
2233
                        MSG_INFO("\nSwScaler: Experimental scaler, ");
2234
                else if(flags&SWS_POINT)
2235
                        MSG_INFO("\nSwScaler: Nearest Neighbor / POINT scaler, ");
2236
                else if(flags&SWS_AREA)
2237
                        MSG_INFO("\nSwScaler: Area Averageing scaler, ");
2238
                else
2239
                        MSG_INFO("\nSwScaler: ehh flags invalid?! ");
2240

    
2241
                if(dstFormat==IMGFMT_BGR15 || dstFormat==IMGFMT_BGR16)
2242
                        MSG_INFO("from %s to%s %s ", 
2243
                                vo_format_name(srcFormat), dither, vo_format_name(dstFormat));
2244
                else
2245
                        MSG_INFO("from %s to %s ", 
2246
                                vo_format_name(srcFormat), vo_format_name(dstFormat));
2247

    
2248
                if(cpuCaps.hasMMX2)
2249
                        MSG_INFO("using MMX2\n");
2250
                else if(cpuCaps.has3DNow)
2251
                        MSG_INFO("using 3DNOW\n");
2252
                else if(cpuCaps.hasMMX)
2253
                        MSG_INFO("using MMX\n");
2254
                else
2255
                        MSG_INFO("using C\n");
2256
        }
2257

    
2258
        if((flags & SWS_PRINT_INFO) && verbose)
2259
        {
2260
                if(cpuCaps.hasMMX)
2261
                {
2262
                        if(c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
2263
                                MSG_V("SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
2264
                        else
2265
                        {
2266
                                if(c->hLumFilterSize==4)
2267
                                        MSG_V("SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n");
2268
                                else if(c->hLumFilterSize==8)
2269
                                        MSG_V("SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n");
2270
                                else
2271
                                        MSG_V("SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n");
2272

    
2273
                                if(c->hChrFilterSize==4)
2274
                                        MSG_V("SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n");
2275
                                else if(c->hChrFilterSize==8)
2276
                                        MSG_V("SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n");
2277
                                else
2278
                                        MSG_V("SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n");
2279
                        }
2280
                }
2281
                else
2282
                {
2283
#ifdef ARCH_X86
2284
                        MSG_V("SwScaler: using X86-Asm scaler for horizontal scaling\n");
2285
#else
2286
                        if(flags & SWS_FAST_BILINEAR)
2287
                                MSG_V("SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n");
2288
                        else
2289
                                MSG_V("SwScaler: using C scaler for horizontal scaling\n");
2290
#endif
2291
                }
2292
                if(isPlanarYUV(dstFormat))
2293
                {
2294
                        if(c->vLumFilterSize==1)
2295
                                MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C");
2296
                        else
2297
                                MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C");
2298
                }
2299
                else
2300
                {
2301
                        if(c->vLumFilterSize==1 && c->vChrFilterSize==2)
2302
                                MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
2303
                                       "SwScaler:       2-tap scaler for vertical chrominance scaling (BGR)\n",cpuCaps.hasMMX ? "MMX" : "C");
2304
                        else if(c->vLumFilterSize==2 && c->vChrFilterSize==2)
2305
                                MSG_V("SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C");
2306
                        else
2307
                                MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C");
2308
                }
2309

    
2310
                if(dstFormat==IMGFMT_BGR24)
2311
                        MSG_V("SwScaler: using %s YV12->BGR24 Converter\n",
2312
                                cpuCaps.hasMMX2 ? "MMX2" : (cpuCaps.hasMMX ? "MMX" : "C"));
2313
                else if(dstFormat==IMGFMT_BGR32)
2314
                        MSG_V("SwScaler: using %s YV12->BGR32 Converter\n", cpuCaps.hasMMX ? "MMX" : "C");
2315
                else if(dstFormat==IMGFMT_BGR16)
2316
                        MSG_V("SwScaler: using %s YV12->BGR16 Converter\n", cpuCaps.hasMMX ? "MMX" : "C");
2317
                else if(dstFormat==IMGFMT_BGR15)
2318
                        MSG_V("SwScaler: using %s YV12->BGR15 Converter\n", cpuCaps.hasMMX ? "MMX" : "C");
2319

    
2320
                MSG_V("SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
2321
        }
2322
        if((flags & SWS_PRINT_INFO) && verbose>1)
2323
        {
2324
                MSG_DBG2("SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2325
                        c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
2326
                MSG_DBG2("SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2327
                        c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
2328
        }
2329

    
2330
        c->swScale= swScale;
2331
        return c;
2332
}
2333

    
2334
/**
2335
 * returns a normalized gaussian curve used to filter stuff
2336
 * quality=3 is high quality, lowwer is lowwer quality
2337
 */
2338

    
2339
SwsVector *getGaussianVec(double variance, double quality){
2340
        const int length= (int)(variance*quality + 0.5) | 1;
2341
        int i;
2342
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2343
        double middle= (length-1)*0.5;
2344
        SwsVector *vec= malloc(sizeof(SwsVector));
2345

    
2346
        vec->coeff= coeff;
2347
        vec->length= length;
2348

    
2349
        for(i=0; i<length; i++)
2350
        {
2351
                double dist= i-middle;
2352
                coeff[i]= exp( -dist*dist/(2*variance*variance) ) / sqrt(2*variance*PI);
2353
        }
2354

    
2355
        normalizeVec(vec, 1.0);
2356

    
2357
        return vec;
2358
}
2359

    
2360
SwsVector *getConstVec(double c, int length){
2361
        int i;
2362
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2363
        SwsVector *vec= malloc(sizeof(SwsVector));
2364

    
2365
        vec->coeff= coeff;
2366
        vec->length= length;
2367

    
2368
        for(i=0; i<length; i++)
2369
                coeff[i]= c;
2370

    
2371
        return vec;
2372
}
2373

    
2374

    
2375
SwsVector *getIdentityVec(void){
2376
        double *coeff= memalign(sizeof(double), sizeof(double));
2377
        SwsVector *vec= malloc(sizeof(SwsVector));
2378
        coeff[0]= 1.0;
2379

    
2380
        vec->coeff= coeff;
2381
        vec->length= 1;
2382

    
2383
        return vec;
2384
}
2385

    
2386
void normalizeVec(SwsVector *a, double height){
2387
        int i;
2388
        double sum=0;
2389
        double inv;
2390

    
2391
        for(i=0; i<a->length; i++)
2392
                sum+= a->coeff[i];
2393

    
2394
        inv= height/sum;
2395

    
2396
        for(i=0; i<a->length; i++)
2397
                a->coeff[i]*= height;
2398
}
2399

    
2400
void scaleVec(SwsVector *a, double scalar){
2401
        int i;
2402

    
2403
        for(i=0; i<a->length; i++)
2404
                a->coeff[i]*= scalar;
2405
}
2406

    
2407
static SwsVector *getConvVec(SwsVector *a, SwsVector *b){
2408
        int length= a->length + b->length - 1;
2409
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2410
        int i, j;
2411
        SwsVector *vec= malloc(sizeof(SwsVector));
2412

    
2413
        vec->coeff= coeff;
2414
        vec->length= length;
2415

    
2416
        for(i=0; i<length; i++) coeff[i]= 0.0;
2417

    
2418
        for(i=0; i<a->length; i++)
2419
        {
2420
                for(j=0; j<b->length; j++)
2421
                {
2422
                        coeff[i+j]+= a->coeff[i]*b->coeff[j];
2423
                }
2424
        }
2425

    
2426
        return vec;
2427
}
2428

    
2429
static SwsVector *sumVec(SwsVector *a, SwsVector *b){
2430
        int length= MAX(a->length, b->length);
2431
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2432
        int i;
2433
        SwsVector *vec= malloc(sizeof(SwsVector));
2434

    
2435
        vec->coeff= coeff;
2436
        vec->length= length;
2437

    
2438
        for(i=0; i<length; i++) coeff[i]= 0.0;
2439

    
2440
        for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2441
        for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];
2442

    
2443
        return vec;
2444
}
2445

    
2446
static SwsVector *diffVec(SwsVector *a, SwsVector *b){
2447
        int length= MAX(a->length, b->length);
2448
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2449
        int i;
2450
        SwsVector *vec= malloc(sizeof(SwsVector));
2451

    
2452
        vec->coeff= coeff;
2453
        vec->length= length;
2454

    
2455
        for(i=0; i<length; i++) coeff[i]= 0.0;
2456

    
2457
        for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2458
        for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];
2459

    
2460
        return vec;
2461
}
2462

    
2463
/* shift left / or right if "shift" is negative */
2464
static SwsVector *getShiftedVec(SwsVector *a, int shift){
2465
        int length= a->length + ABS(shift)*2;
2466
        double *coeff= memalign(sizeof(double), length*sizeof(double));
2467
        int i;
2468
        SwsVector *vec= malloc(sizeof(SwsVector));
2469

    
2470
        vec->coeff= coeff;
2471
        vec->length= length;
2472

    
2473
        for(i=0; i<length; i++) coeff[i]= 0.0;
2474

    
2475
        for(i=0; i<a->length; i++)
2476
        {
2477
                coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
2478
        }
2479

    
2480
        return vec;
2481
}
2482

    
2483
void shiftVec(SwsVector *a, int shift){
2484
        SwsVector *shifted= getShiftedVec(a, shift);
2485
        free(a->coeff);
2486
        a->coeff= shifted->coeff;
2487
        a->length= shifted->length;
2488
        free(shifted);
2489
}
2490

    
2491
void addVec(SwsVector *a, SwsVector *b){
2492
        SwsVector *sum= sumVec(a, b);
2493
        free(a->coeff);
2494
        a->coeff= sum->coeff;
2495
        a->length= sum->length;
2496
        free(sum);
2497
}
2498

    
2499
void subVec(SwsVector *a, SwsVector *b){
2500
        SwsVector *diff= diffVec(a, b);
2501
        free(a->coeff);
2502
        a->coeff= diff->coeff;
2503
        a->length= diff->length;
2504
        free(diff);
2505
}
2506

    
2507
void convVec(SwsVector *a, SwsVector *b){
2508
        SwsVector *conv= getConvVec(a, b);
2509
        free(a->coeff);
2510
        a->coeff= conv->coeff;
2511
        a->length= conv->length;
2512
        free(conv);
2513
}
2514

    
2515
SwsVector *cloneVec(SwsVector *a){
2516
        double *coeff= memalign(sizeof(double), a->length*sizeof(double));
2517
        int i;
2518
        SwsVector *vec= malloc(sizeof(SwsVector));
2519

    
2520
        vec->coeff= coeff;
2521
        vec->length= a->length;
2522

    
2523
        for(i=0; i<a->length; i++) coeff[i]= a->coeff[i];
2524

    
2525
        return vec;
2526
}
2527

    
2528
void printVec(SwsVector *a){
2529
        int i;
2530
        double max=0;
2531
        double min=0;
2532
        double range;
2533

    
2534
        for(i=0; i<a->length; i++)
2535
                if(a->coeff[i]>max) max= a->coeff[i];
2536

    
2537
        for(i=0; i<a->length; i++)
2538
                if(a->coeff[i]<min) min= a->coeff[i];
2539

    
2540
        range= max - min;
2541

    
2542
        for(i=0; i<a->length; i++)
2543
        {
2544
                int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
2545
                MSG_DBG2("%1.3f ", a->coeff[i]);
2546
                for(;x>0; x--) MSG_DBG2(" ");
2547
                MSG_DBG2("|\n");
2548
        }
2549
}
2550

    
2551
void freeVec(SwsVector *a){
2552
        if(!a) return;
2553
        if(a->coeff) free(a->coeff);
2554
        a->coeff=NULL;
2555
        a->length=0;
2556
        free(a);
2557
}
2558

    
2559
void freeSwsContext(SwsContext *c){
2560
        int i;
2561
        if(!c) return;
2562

    
2563
        if(c->lumPixBuf)
2564
        {
2565
                for(i=0; i<c->vLumBufSize; i++)
2566
                {
2567
                        if(c->lumPixBuf[i]) free(c->lumPixBuf[i]);
2568
                        c->lumPixBuf[i]=NULL;
2569
                }
2570
                free(c->lumPixBuf);
2571
                c->lumPixBuf=NULL;
2572
        }
2573

    
2574
        if(c->chrPixBuf)
2575
        {
2576
                for(i=0; i<c->vChrBufSize; i++)
2577
                {
2578
                        if(c->chrPixBuf[i]) free(c->chrPixBuf[i]);
2579
                        c->chrPixBuf[i]=NULL;
2580
                }
2581
                free(c->chrPixBuf);
2582
                c->chrPixBuf=NULL;
2583
        }
2584

    
2585
        if(c->vLumFilter) free(c->vLumFilter);
2586
        c->vLumFilter = NULL;
2587
        if(c->vChrFilter) free(c->vChrFilter);
2588
        c->vChrFilter = NULL;
2589
        if(c->hLumFilter) free(c->hLumFilter);
2590
        c->hLumFilter = NULL;
2591
        if(c->hChrFilter) free(c->hChrFilter);
2592
        c->hChrFilter = NULL;
2593

    
2594
        if(c->vLumFilterPos) free(c->vLumFilterPos);
2595
        c->vLumFilterPos = NULL;
2596
        if(c->vChrFilterPos) free(c->vChrFilterPos);
2597
        c->vChrFilterPos = NULL;
2598
        if(c->hLumFilterPos) free(c->hLumFilterPos);
2599
        c->hLumFilterPos = NULL;
2600
        if(c->hChrFilterPos) free(c->hChrFilterPos);
2601
        c->hChrFilterPos = NULL;
2602

    
2603
        if(c->lumMmxFilter) free(c->lumMmxFilter);
2604
        c->lumMmxFilter = NULL;
2605
        if(c->chrMmxFilter) free(c->chrMmxFilter);
2606
        c->chrMmxFilter = NULL;
2607

    
2608
        if(c->lumMmx2Filter) free(c->lumMmx2Filter);
2609
        c->lumMmx2Filter=NULL;
2610
        if(c->chrMmx2Filter) free(c->chrMmx2Filter);
2611
        c->chrMmx2Filter=NULL;
2612
        if(c->lumMmx2FilterPos) free(c->lumMmx2FilterPos);
2613
        c->lumMmx2FilterPos=NULL;
2614
        if(c->chrMmx2FilterPos) free(c->chrMmx2FilterPos);
2615
        c->chrMmx2FilterPos=NULL;
2616

    
2617
        free(c);
2618
}
2619

    
2620