Statistics
| Branch: | Revision:

ffmpeg / libswscale / swscale.c @ c4ca31d0

History | View | Annotate | Download (98.2 KB)

1
/*
2
 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
 *
20
 * the C code (not assembly, mmx, ...) of this file can be used
21
 * under the LGPL license too
22
 */
23

    
24
/*
25
  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8/Y800, YVU9/IF09, PAL8
26
  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
27
  {BGR,RGB}{1,4,8,15,16} support dithering
28

29
  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
30
  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
31
  x -> x
32
  YUV9 -> YV12
33
  YUV9/YV12 -> Y800
34
  Y800 -> YUV9/YV12
35
  BGR24 -> BGR32 & RGB24 -> RGB32
36
  BGR32 -> BGR24 & RGB32 -> RGB24
37
  BGR15 -> BGR16
38
*/
39

    
40
/*
41
tested special converters (most are tested actually but i didnt write it down ...)
42
 YV12 -> BGR16
43
 YV12 -> YV12
44
 BGR15 -> BGR16
45
 BGR16 -> BGR16
46
 YVU9 -> YV12
47

48
untested special converters
49
  YV12/I420 -> BGR15/BGR24/BGR32 (its the yuv2rgb stuff, so it should be ok)
50
  YV12/I420 -> YV12/I420
51
  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
52
  BGR24 -> BGR32 & RGB24 -> RGB32
53
  BGR32 -> BGR24 & RGB32 -> RGB24
54
  BGR24 -> YV12
55
*/
56

    
57
#include <inttypes.h>
58
#include <string.h>
59
#include <math.h>
60
#include <stdio.h>
61
#include <unistd.h>
62
#include "config.h"
63
#include <assert.h>
64
#ifdef HAVE_SYS_MMAN_H
65
#include <sys/mman.h>
66
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
67
#define MAP_ANONYMOUS MAP_ANON
68
#endif
69
#endif
70
#include "swscale.h"
71
#include "swscale_internal.h"
72
#include "x86_cpu.h"
73
#include "bswap.h"
74
#include "rgb2rgb.h"
75
#ifdef USE_FASTMEMCPY
76
#include "libvo/fastmemcpy.h"
77
#endif
78

    
79
#undef MOVNTQ
80
#undef PAVGB
81

    
82
//#undef HAVE_MMX2
83
//#define HAVE_3DNOW
84
//#undef HAVE_MMX
85
//#undef ARCH_X86
86
//#define WORDS_BIGENDIAN
87
#define DITHER1XBPP
88

    
89
#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
90

    
91
#define RET 0xC3 //near return opcode for X86
92

    
93
#ifdef MP_DEBUG
94
#define ASSERT(x) assert(x);
95
#else
96
#define ASSERT(x) ;
97
#endif
98

    
99
#ifdef M_PI
100
#define PI M_PI
101
#else
102
#define PI 3.14159265358979323846
103
#endif
104

    
105
#define isSupportedIn(x)  ((x)==PIX_FMT_YUV420P   || (x)==PIX_FMT_YUYV422 || (x)==PIX_FMT_UYVY422 \
106
                        || (x)==PIX_FMT_RGB32     || (x)==PIX_FMT_BGR24   || (x)==PIX_FMT_BGR565 || (x)==PIX_FMT_BGR555 \
107
                        || (x)==PIX_FMT_BGR32     || (x)==PIX_FMT_RGB24   || (x)==PIX_FMT_RGB565 || (x)==PIX_FMT_RGB555 \
108
                        || (x)==PIX_FMT_GRAY8     || (x)==PIX_FMT_YUV410P  \
109
                        || (x)==PIX_FMT_GRAY16BE  || (x)==PIX_FMT_GRAY16LE \
110
                        || (x)==PIX_FMT_YUV444P   || (x)==PIX_FMT_YUV422P || (x)==PIX_FMT_YUV411P \
111
                        || (x)==PIX_FMT_PAL8      || (x)==PIX_FMT_BGR8    || (x)==PIX_FMT_RGB8 \
112
                        || (x)==PIX_FMT_BGR4_BYTE || (x)==PIX_FMT_RGB4_BYTE)
113
#define isSupportedOut(x) ((x)==PIX_FMT_YUV420P  || (x)==PIX_FMT_YUYV422 || (x)==PIX_FMT_UYVY422 \
114
                        || (x)==PIX_FMT_YUV444P  || (x)==PIX_FMT_YUV422P || (x)==PIX_FMT_YUV411P \
115
                        || isRGB(x) || isBGR(x) \
116
                        || (x)==PIX_FMT_NV12     || (x)==PIX_FMT_NV21 \
117
                        || (x)==PIX_FMT_GRAY16BE || (x)==PIX_FMT_GRAY16LE \
118
                        || (x)==PIX_FMT_GRAY8    || (x)==PIX_FMT_YUV410P)
119
#define isPacked(x)    ((x)==PIX_FMT_PAL8    || (x)==PIX_FMT_YUYV422 || \
120
                        (x)==PIX_FMT_UYVY422 || isRGB(x) || isBGR(x))
121

    
122
#define RGB2YUV_SHIFT 16
123
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
124
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
125
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
126
#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
127
#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
128
#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
129
#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
130
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
131
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
132

    
133
extern const int32_t Inverse_Table_6_9[8][4];
134

    
135
/*
136
NOTES
137
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
138

139
TODO
140
more intelligent missalignment avoidance for the horizontal scaler
141
write special vertical cubic upscale version
142
Optimize C code (yv12 / minmax)
143
add support for packed pixel yuv input & output
144
add support for Y8 output
145
optimize bgr24 & bgr32
146
add BGR4 output support
147
write special BGR->BGR scaler
148
*/
149

    
150
#if defined(ARCH_X86) && defined (CONFIG_GPL)
151
static uint64_t attribute_used __attribute__((aligned(8))) bF8=       0xF8F8F8F8F8F8F8F8LL;
152
static uint64_t attribute_used __attribute__((aligned(8))) bFC=       0xFCFCFCFCFCFCFCFCLL;
153
static uint64_t                __attribute__((aligned(8))) w10=       0x0010001000100010LL;
154
static uint64_t attribute_used __attribute__((aligned(8))) w02=       0x0002000200020002LL;
155
static uint64_t attribute_used __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
156
static uint64_t attribute_used __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
157
static uint64_t attribute_used __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
158
static uint64_t attribute_used __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL;
159

    
160
static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
161
static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
162
static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
163
static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;
164

    
165
static uint64_t __attribute__((aligned(8))) dither4[2]={
166
        0x0103010301030103LL,
167
        0x0200020002000200LL,};
168

    
169
static uint64_t __attribute__((aligned(8))) dither8[2]={
170
        0x0602060206020602LL,
171
        0x0004000400040004LL,};
172

    
173
static uint64_t                __attribute__((aligned(8))) b16Mask=   0x001F001F001F001FLL;
174
static uint64_t attribute_used __attribute__((aligned(8))) g16Mask=   0x07E007E007E007E0LL;
175
static uint64_t attribute_used __attribute__((aligned(8))) r16Mask=   0xF800F800F800F800LL;
176
static uint64_t                __attribute__((aligned(8))) b15Mask=   0x001F001F001F001FLL;
177
static uint64_t attribute_used __attribute__((aligned(8))) g15Mask=   0x03E003E003E003E0LL;
178
static uint64_t attribute_used __attribute__((aligned(8))) r15Mask=   0x7C007C007C007C00LL;
179

    
180
static uint64_t attribute_used __attribute__((aligned(8))) M24A=      0x00FF0000FF0000FFLL;
181
static uint64_t attribute_used __attribute__((aligned(8))) M24B=      0xFF0000FF0000FF00LL;
182
static uint64_t attribute_used __attribute__((aligned(8))) M24C=      0x0000FF0000FF0000LL;
183

    
184
#ifdef FAST_BGR2YV12
185
static const uint64_t bgr2YCoeff   attribute_used __attribute__((aligned(8))) = 0x000000210041000DULL;
186
static const uint64_t bgr2UCoeff   attribute_used __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
187
static const uint64_t bgr2VCoeff   attribute_used __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
188
#else
189
static const uint64_t bgr2YCoeff   attribute_used __attribute__((aligned(8))) = 0x000020E540830C8BULL;
190
static const uint64_t bgr2UCoeff   attribute_used __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
191
static const uint64_t bgr2VCoeff   attribute_used __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
192
#endif /* FAST_BGR2YV12 */
193
static const uint64_t bgr2YOffset  attribute_used __attribute__((aligned(8))) = 0x1010101010101010ULL;
194
static const uint64_t bgr2UVOffset attribute_used __attribute__((aligned(8))) = 0x8080808080808080ULL;
195
static const uint64_t w1111        attribute_used __attribute__((aligned(8))) = 0x0001000100010001ULL;
196
#endif /* defined(ARCH_X86) */
197

    
198
// clipping helper table for C implementations:
199
static unsigned char clip_table[768];
200

    
201
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
202

    
203
extern const uint8_t dither_2x2_4[2][8];
204
extern const uint8_t dither_2x2_8[2][8];
205
extern const uint8_t dither_8x8_32[8][8];
206
extern const uint8_t dither_8x8_73[8][8];
207
extern const uint8_t dither_8x8_220[8][8];
208

    
209
static const char * sws_context_to_name(void * ptr) {
210
    return "swscaler";
211
}
212

    
213
static AVClass sws_context_class = { "SWScaler", sws_context_to_name, NULL };
214

    
215
char *sws_format_name(enum PixelFormat format)
216
{
217
    switch (format) {
218
        case PIX_FMT_YUV420P:
219
            return "yuv420p";
220
        case PIX_FMT_YUYV422:
221
            return "yuyv422";
222
        case PIX_FMT_RGB24:
223
            return "rgb24";
224
        case PIX_FMT_BGR24:
225
            return "bgr24";
226
        case PIX_FMT_YUV422P:
227
            return "yuv422p";
228
        case PIX_FMT_YUV444P:
229
            return "yuv444p";
230
        case PIX_FMT_RGB32:
231
            return "rgb32";
232
        case PIX_FMT_YUV410P:
233
            return "yuv410p";
234
        case PIX_FMT_YUV411P:
235
            return "yuv411p";
236
        case PIX_FMT_RGB565:
237
            return "rgb565";
238
        case PIX_FMT_RGB555:
239
            return "rgb555";
240
        case PIX_FMT_GRAY16BE:
241
            return "gray16be";
242
        case PIX_FMT_GRAY16LE:
243
            return "gray16le";
244
        case PIX_FMT_GRAY8:
245
            return "gray8";
246
        case PIX_FMT_MONOWHITE:
247
            return "mono white";
248
        case PIX_FMT_MONOBLACK:
249
            return "mono black";
250
        case PIX_FMT_PAL8:
251
            return "Palette";
252
        case PIX_FMT_YUVJ420P:
253
            return "yuvj420p";
254
        case PIX_FMT_YUVJ422P:
255
            return "yuvj422p";
256
        case PIX_FMT_YUVJ444P:
257
            return "yuvj444p";
258
        case PIX_FMT_XVMC_MPEG2_MC:
259
            return "xvmc_mpeg2_mc";
260
        case PIX_FMT_XVMC_MPEG2_IDCT:
261
            return "xvmc_mpeg2_idct";
262
        case PIX_FMT_UYVY422:
263
            return "uyvy422";
264
        case PIX_FMT_UYYVYY411:
265
            return "uyyvyy411";
266
        case PIX_FMT_RGB32_1:
267
            return "rgb32x";
268
        case PIX_FMT_BGR32_1:
269
            return "bgr32x";
270
        case PIX_FMT_BGR32:
271
            return "bgr32";
272
        case PIX_FMT_BGR565:
273
            return "bgr565";
274
        case PIX_FMT_BGR555:
275
            return "bgr555";
276
        case PIX_FMT_BGR8:
277
            return "bgr8";
278
        case PIX_FMT_BGR4:
279
            return "bgr4";
280
        case PIX_FMT_BGR4_BYTE:
281
            return "bgr4 byte";
282
        case PIX_FMT_RGB8:
283
            return "rgb8";
284
        case PIX_FMT_RGB4:
285
            return "rgb4";
286
        case PIX_FMT_RGB4_BYTE:
287
            return "rgb4 byte";
288
        case PIX_FMT_NV12:
289
            return "nv12";
290
        case PIX_FMT_NV21:
291
            return "nv21";
292
        default:
293
            return "Unknown format";
294
    }
295
}
296

    
297
#if defined(ARCH_X86) && defined (CONFIG_GPL)
298
void in_asm_used_var_warning_killer()
299
{
300
    volatile int i= bF8+bFC+w10+
301
    bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+
302
    M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101;
303
    if (i) i=0;
304
}
305
#endif
306

    
307
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
308
                               int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
309
                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
310
{
311
    //FIXME Optimize (just quickly writen not opti..)
312
    int i;
313
    for (i=0; i<dstW; i++)
314
    {
315
        int val=1<<18;
316
        int j;
317
        for (j=0; j<lumFilterSize; j++)
318
            val += lumSrc[j][i] * lumFilter[j];
319

    
320
        dest[i]= av_clip_uint8(val>>19);
321
    }
322

    
323
    if (uDest != NULL)
324
        for (i=0; i<chrDstW; i++)
325
        {
326
            int u=1<<18;
327
            int v=1<<18;
328
            int j;
329
            for (j=0; j<chrFilterSize; j++)
330
            {
331
                u += chrSrc[j][i] * chrFilter[j];
332
                v += chrSrc[j][i + 2048] * chrFilter[j];
333
            }
334

    
335
            uDest[i]= av_clip_uint8(u>>19);
336
            vDest[i]= av_clip_uint8(v>>19);
337
        }
338
}
339

    
340
static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
341
                                int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
342
                                uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
343
{
344
    //FIXME Optimize (just quickly writen not opti..)
345
    int i;
346
    for (i=0; i<dstW; i++)
347
    {
348
        int val=1<<18;
349
        int j;
350
        for (j=0; j<lumFilterSize; j++)
351
            val += lumSrc[j][i] * lumFilter[j];
352

    
353
        dest[i]= av_clip_uint8(val>>19);
354
    }
355

    
356
    if (uDest == NULL)
357
        return;
358

    
359
    if (dstFormat == PIX_FMT_NV12)
360
        for (i=0; i<chrDstW; i++)
361
        {
362
            int u=1<<18;
363
            int v=1<<18;
364
            int j;
365
            for (j=0; j<chrFilterSize; j++)
366
            {
367
                u += chrSrc[j][i] * chrFilter[j];
368
                v += chrSrc[j][i + 2048] * chrFilter[j];
369
            }
370

    
371
            uDest[2*i]= av_clip_uint8(u>>19);
372
            uDest[2*i+1]= av_clip_uint8(v>>19);
373
        }
374
    else
375
        for (i=0; i<chrDstW; i++)
376
        {
377
            int u=1<<18;
378
            int v=1<<18;
379
            int j;
380
            for (j=0; j<chrFilterSize; j++)
381
            {
382
                u += chrSrc[j][i] * chrFilter[j];
383
                v += chrSrc[j][i + 2048] * chrFilter[j];
384
            }
385

    
386
            uDest[2*i]= av_clip_uint8(v>>19);
387
            uDest[2*i+1]= av_clip_uint8(u>>19);
388
        }
389
}
390

    
391
#define YSCALE_YUV_2_PACKEDX_C(type) \
392
    for (i=0; i<(dstW>>1); i++){\
393
        int j;\
394
        int Y1 = 1<<18;\
395
        int Y2 = 1<<18;\
396
        int U  = 1<<18;\
397
        int V  = 1<<18;\
398
        type attribute_unused *r, *b, *g;\
399
        const int i2= 2*i;\
400
        \
401
        for (j=0; j<lumFilterSize; j++)\
402
        {\
403
            Y1 += lumSrc[j][i2] * lumFilter[j];\
404
            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
405
        }\
406
        for (j=0; j<chrFilterSize; j++)\
407
        {\
408
            U += chrSrc[j][i] * chrFilter[j];\
409
            V += chrSrc[j][i+2048] * chrFilter[j];\
410
        }\
411
        Y1>>=19;\
412
        Y2>>=19;\
413
        U >>=19;\
414
        V >>=19;\
415
        if ((Y1|Y2|U|V)&256)\
416
        {\
417
            if (Y1>255)   Y1=255; \
418
            else if (Y1<0)Y1=0;   \
419
            if (Y2>255)   Y2=255; \
420
            else if (Y2<0)Y2=0;   \
421
            if (U>255)    U=255;  \
422
            else if (U<0) U=0;    \
423
            if (V>255)    V=255;  \
424
            else if (V<0) V=0;    \
425
        }
426

    
427
#define YSCALE_YUV_2_RGBX_C(type) \
428
    YSCALE_YUV_2_PACKEDX_C(type)  \
429
    r = (type *)c->table_rV[V];   \
430
    g = (type *)(c->table_gU[U] + c->table_gV[V]); \
431
    b = (type *)c->table_bU[U];   \
432

    
433
#define YSCALE_YUV_2_PACKED2_C   \
434
    for (i=0; i<(dstW>>1); i++){ \
435
        const int i2= 2*i;       \
436
        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
437
        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
438
        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;  \
439
        int V= (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;  \
440

    
441
#define YSCALE_YUV_2_RGB2_C(type) \
442
    YSCALE_YUV_2_PACKED2_C\
443
    type *r, *b, *g;\
444
    r = (type *)c->table_rV[V];\
445
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
446
    b = (type *)c->table_bU[U];\
447

    
448
#define YSCALE_YUV_2_PACKED1_C \
449
    for (i=0; i<(dstW>>1); i++){\
450
        const int i2= 2*i;\
451
        int Y1= buf0[i2  ]>>7;\
452
        int Y2= buf0[i2+1]>>7;\
453
        int U= (uvbuf1[i     ])>>7;\
454
        int V= (uvbuf1[i+2048])>>7;\
455

    
456
#define YSCALE_YUV_2_RGB1_C(type) \
457
    YSCALE_YUV_2_PACKED1_C\
458
    type *r, *b, *g;\
459
    r = (type *)c->table_rV[V];\
460
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
461
    b = (type *)c->table_bU[U];\
462

    
463
#define YSCALE_YUV_2_PACKED1B_C \
464
    for (i=0; i<(dstW>>1); i++){\
465
        const int i2= 2*i;\
466
        int Y1= buf0[i2  ]>>7;\
467
        int Y2= buf0[i2+1]>>7;\
468
        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
469
        int V= (uvbuf0[i+2048] + uvbuf1[i+2048])>>8;\
470

    
471
#define YSCALE_YUV_2_RGB1B_C(type) \
472
    YSCALE_YUV_2_PACKED1B_C\
473
    type *r, *b, *g;\
474
    r = (type *)c->table_rV[V];\
475
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
476
    b = (type *)c->table_bU[U];\
477

    
478
#define YSCALE_YUV_2_ANYRGB_C(func, func2)\
479
    switch(c->dstFormat)\
480
    {\
481
    case PIX_FMT_RGB32:\
482
    case PIX_FMT_BGR32:\
483
        func(uint32_t)\
484
            ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
485
            ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
486
        }                \
487
        break;\
488
    case PIX_FMT_RGB24:\
489
        func(uint8_t)\
490
            ((uint8_t*)dest)[0]= r[Y1];\
491
            ((uint8_t*)dest)[1]= g[Y1];\
492
            ((uint8_t*)dest)[2]= b[Y1];\
493
            ((uint8_t*)dest)[3]= r[Y2];\
494
            ((uint8_t*)dest)[4]= g[Y2];\
495
            ((uint8_t*)dest)[5]= b[Y2];\
496
            dest+=6;\
497
        }\
498
        break;\
499
    case PIX_FMT_BGR24:\
500
        func(uint8_t)\
501
            ((uint8_t*)dest)[0]= b[Y1];\
502
            ((uint8_t*)dest)[1]= g[Y1];\
503
            ((uint8_t*)dest)[2]= r[Y1];\
504
            ((uint8_t*)dest)[3]= b[Y2];\
505
            ((uint8_t*)dest)[4]= g[Y2];\
506
            ((uint8_t*)dest)[5]= r[Y2];\
507
            dest+=6;\
508
        }\
509
        break;\
510
    case PIX_FMT_RGB565:\
511
    case PIX_FMT_BGR565:\
512
        {\
513
            const int dr1= dither_2x2_8[y&1    ][0];\
514
            const int dg1= dither_2x2_4[y&1    ][0];\
515
            const int db1= dither_2x2_8[(y&1)^1][0];\
516
            const int dr2= dither_2x2_8[y&1    ][1];\
517
            const int dg2= dither_2x2_4[y&1    ][1];\
518
            const int db2= dither_2x2_8[(y&1)^1][1];\
519
            func(uint16_t)\
520
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
521
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
522
            }\
523
        }\
524
        break;\
525
    case PIX_FMT_RGB555:\
526
    case PIX_FMT_BGR555:\
527
        {\
528
            const int dr1= dither_2x2_8[y&1    ][0];\
529
            const int dg1= dither_2x2_8[y&1    ][1];\
530
            const int db1= dither_2x2_8[(y&1)^1][0];\
531
            const int dr2= dither_2x2_8[y&1    ][1];\
532
            const int dg2= dither_2x2_8[y&1    ][0];\
533
            const int db2= dither_2x2_8[(y&1)^1][1];\
534
            func(uint16_t)\
535
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
536
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
537
            }\
538
        }\
539
        break;\
540
    case PIX_FMT_RGB8:\
541
    case PIX_FMT_BGR8:\
542
        {\
543
            const uint8_t * const d64= dither_8x8_73[y&7];\
544
            const uint8_t * const d32= dither_8x8_32[y&7];\
545
            func(uint8_t)\
546
                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
547
                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
548
            }\
549
        }\
550
        break;\
551
    case PIX_FMT_RGB4:\
552
    case PIX_FMT_BGR4:\
553
        {\
554
            const uint8_t * const d64= dither_8x8_73 [y&7];\
555
            const uint8_t * const d128=dither_8x8_220[y&7];\
556
            func(uint8_t)\
557
                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
558
                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
559
            }\
560
        }\
561
        break;\
562
    case PIX_FMT_RGB4_BYTE:\
563
    case PIX_FMT_BGR4_BYTE:\
564
        {\
565
            const uint8_t * const d64= dither_8x8_73 [y&7];\
566
            const uint8_t * const d128=dither_8x8_220[y&7];\
567
            func(uint8_t)\
568
                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
569
                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
570
            }\
571
        }\
572
        break;\
573
    case PIX_FMT_MONOBLACK:\
574
        {\
575
            const uint8_t * const d128=dither_8x8_220[y&7];\
576
            uint8_t *g= c->table_gU[128] + c->table_gV[128];\
577
            for (i=0; i<dstW-7; i+=8){\
578
                int acc;\
579
                acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
580
                acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
581
                acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
582
                acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
583
                acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
584
                acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
585
                acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
586
                acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
587
                ((uint8_t*)dest)[0]= acc;\
588
                dest++;\
589
            }\
590
\
591
/*\
592
((uint8_t*)dest)-= dstW>>4;\
593
{\
594
            int acc=0;\
595
            int left=0;\
596
            static int top[1024];\
597
            static int last_new[1024][1024];\
598
            static int last_in3[1024][1024];\
599
            static int drift[1024][1024];\
600
            int topLeft=0;\
601
            int shift=0;\
602
            int count=0;\
603
            const uint8_t * const d128=dither_8x8_220[y&7];\
604
            int error_new=0;\
605
            int error_in3=0;\
606
            int f=0;\
607
            \
608
            for (i=dstW>>1; i<dstW; i++){\
609
                int in= ((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19);\
610
                int in2 = (76309 * (in - 16) + 32768) >> 16;\
611
                int in3 = (in2 < 0) ? 0 : ((in2 > 255) ? 255 : in2);\
612
                int old= (left*7 + topLeft + top[i]*5 + top[i+1]*3)/20 + in3\
613
                         + (last_new[y][i] - in3)*f/256;\
614
                int new= old> 128 ? 255 : 0;\
615
\
616
                error_new+= FFABS(last_new[y][i] - new);\
617
                error_in3+= FFABS(last_in3[y][i] - in3);\
618
                f= error_new - error_in3*4;\
619
                if (f<0) f=0;\
620
                if (f>256) f=256;\
621
\
622
                topLeft= top[i];\
623
                left= top[i]= old - new;\
624
                last_new[y][i]= new;\
625
                last_in3[y][i]= in3;\
626
\
627
                acc+= acc + (new&1);\
628
                if ((i&7)==6){\
629
                    ((uint8_t*)dest)[0]= acc;\
630
                    ((uint8_t*)dest)++;\
631
                }\
632
            }\
633
}\
634
*/\
635
        }\
636
        break;\
637
    case PIX_FMT_YUYV422:\
638
        func2\
639
            ((uint8_t*)dest)[2*i2+0]= Y1;\
640
            ((uint8_t*)dest)[2*i2+1]= U;\
641
            ((uint8_t*)dest)[2*i2+2]= Y2;\
642
            ((uint8_t*)dest)[2*i2+3]= V;\
643
        }                \
644
        break;\
645
    case PIX_FMT_UYVY422:\
646
        func2\
647
            ((uint8_t*)dest)[2*i2+0]= U;\
648
            ((uint8_t*)dest)[2*i2+1]= Y1;\
649
            ((uint8_t*)dest)[2*i2+2]= V;\
650
            ((uint8_t*)dest)[2*i2+3]= Y2;\
651
        }                \
652
        break;\
653
    }\
654

    
655

    
656
static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
657
                                  int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
658
                                  uint8_t *dest, int dstW, int y)
659
{
660
    int i;
661
    switch(c->dstFormat)
662
    {
663
    case PIX_FMT_BGR32:
664
    case PIX_FMT_RGB32:
665
        YSCALE_YUV_2_RGBX_C(uint32_t)
666
            ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];
667
            ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];
668
        }
669
        break;
670
    case PIX_FMT_RGB24:
671
        YSCALE_YUV_2_RGBX_C(uint8_t)
672
            ((uint8_t*)dest)[0]= r[Y1];
673
            ((uint8_t*)dest)[1]= g[Y1];
674
            ((uint8_t*)dest)[2]= b[Y1];
675
            ((uint8_t*)dest)[3]= r[Y2];
676
            ((uint8_t*)dest)[4]= g[Y2];
677
            ((uint8_t*)dest)[5]= b[Y2];
678
            dest+=6;
679
        }
680
        break;
681
    case PIX_FMT_BGR24:
682
        YSCALE_YUV_2_RGBX_C(uint8_t)
683
            ((uint8_t*)dest)[0]= b[Y1];
684
            ((uint8_t*)dest)[1]= g[Y1];
685
            ((uint8_t*)dest)[2]= r[Y1];
686
            ((uint8_t*)dest)[3]= b[Y2];
687
            ((uint8_t*)dest)[4]= g[Y2];
688
            ((uint8_t*)dest)[5]= r[Y2];
689
            dest+=6;
690
        }
691
        break;
692
    case PIX_FMT_RGB565:
693
    case PIX_FMT_BGR565:
694
        {
695
            const int dr1= dither_2x2_8[y&1    ][0];
696
            const int dg1= dither_2x2_4[y&1    ][0];
697
            const int db1= dither_2x2_8[(y&1)^1][0];
698
            const int dr2= dither_2x2_8[y&1    ][1];
699
            const int dg2= dither_2x2_4[y&1    ][1];
700
            const int db2= dither_2x2_8[(y&1)^1][1];
701
            YSCALE_YUV_2_RGBX_C(uint16_t)
702
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
703
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
704
            }
705
        }
706
        break;
707
    case PIX_FMT_RGB555:
708
    case PIX_FMT_BGR555:
709
        {
710
            const int dr1= dither_2x2_8[y&1    ][0];
711
            const int dg1= dither_2x2_8[y&1    ][1];
712
            const int db1= dither_2x2_8[(y&1)^1][0];
713
            const int dr2= dither_2x2_8[y&1    ][1];
714
            const int dg2= dither_2x2_8[y&1    ][0];
715
            const int db2= dither_2x2_8[(y&1)^1][1];
716
            YSCALE_YUV_2_RGBX_C(uint16_t)
717
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
718
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
719
            }
720
        }
721
        break;
722
    case PIX_FMT_RGB8:
723
    case PIX_FMT_BGR8:
724
        {
725
            const uint8_t * const d64= dither_8x8_73[y&7];
726
            const uint8_t * const d32= dither_8x8_32[y&7];
727
            YSCALE_YUV_2_RGBX_C(uint8_t)
728
                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];
729
                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];
730
            }
731
        }
732
        break;
733
    case PIX_FMT_RGB4:
734
    case PIX_FMT_BGR4:
735
        {
736
            const uint8_t * const d64= dither_8x8_73 [y&7];
737
            const uint8_t * const d128=dither_8x8_220[y&7];
738
            YSCALE_YUV_2_RGBX_C(uint8_t)
739
                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]
740
                                  +((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);
741
            }
742
        }
743
        break;
744
    case PIX_FMT_RGB4_BYTE:
745
    case PIX_FMT_BGR4_BYTE:
746
        {
747
            const uint8_t * const d64= dither_8x8_73 [y&7];
748
            const uint8_t * const d128=dither_8x8_220[y&7];
749
            YSCALE_YUV_2_RGBX_C(uint8_t)
750
                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];
751
                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];
752
            }
753
        }
754
        break;
755
    case PIX_FMT_MONOBLACK:
756
        {
757
            const uint8_t * const d128=dither_8x8_220[y&7];
758
            uint8_t *g= c->table_gU[128] + c->table_gV[128];
759
            int acc=0;
760
            for (i=0; i<dstW-1; i+=2){
761
                int j;
762
                int Y1=1<<18;
763
                int Y2=1<<18;
764

    
765
                for (j=0; j<lumFilterSize; j++)
766
                {
767
                    Y1 += lumSrc[j][i] * lumFilter[j];
768
                    Y2 += lumSrc[j][i+1] * lumFilter[j];
769
                }
770
                Y1>>=19;
771
                Y2>>=19;
772
                if ((Y1|Y2)&256)
773
                {
774
                    if (Y1>255)   Y1=255;
775
                    else if (Y1<0)Y1=0;
776
                    if (Y2>255)   Y2=255;
777
                    else if (Y2<0)Y2=0;
778
                }
779
                acc+= acc + g[Y1+d128[(i+0)&7]];
780
                acc+= acc + g[Y2+d128[(i+1)&7]];
781
                if ((i&7)==6){
782
                    ((uint8_t*)dest)[0]= acc;
783
                    dest++;
784
                }
785
            }
786
        }
787
        break;
788
    case PIX_FMT_YUYV422:
789
        YSCALE_YUV_2_PACKEDX_C(void)
790
            ((uint8_t*)dest)[2*i2+0]= Y1;
791
            ((uint8_t*)dest)[2*i2+1]= U;
792
            ((uint8_t*)dest)[2*i2+2]= Y2;
793
            ((uint8_t*)dest)[2*i2+3]= V;
794
        }
795
        break;
796
    case PIX_FMT_UYVY422:
797
        YSCALE_YUV_2_PACKEDX_C(void)
798
            ((uint8_t*)dest)[2*i2+0]= U;
799
            ((uint8_t*)dest)[2*i2+1]= Y1;
800
            ((uint8_t*)dest)[2*i2+2]= V;
801
            ((uint8_t*)dest)[2*i2+3]= Y2;
802
        }
803
        break;
804
    }
805
}
806

    
807

    
808
//Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
809
//Plain C versions
810
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) || !defined(CONFIG_GPL)
811
#define COMPILE_C
812
#endif
813

    
814
#ifdef ARCH_POWERPC
815
#if (defined (HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
816
#define COMPILE_ALTIVEC
817
#endif //HAVE_ALTIVEC
818
#endif //ARCH_POWERPC
819

    
820
#if defined(ARCH_X86)
821

    
822
#if ((defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
823
#define COMPILE_MMX
824
#endif
825

    
826
#if (defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
827
#define COMPILE_MMX2
828
#endif
829

    
830
#if ((defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
831
#define COMPILE_3DNOW
832
#endif
833
#endif //ARCH_X86 || ARCH_X86_64
834

    
835
#undef HAVE_MMX
836
#undef HAVE_MMX2
837
#undef HAVE_3DNOW
838

    
839
#ifdef COMPILE_C
840
#undef HAVE_MMX
841
#undef HAVE_MMX2
842
#undef HAVE_3DNOW
843
#undef HAVE_ALTIVEC
844
#define RENAME(a) a ## _C
845
#include "swscale_template.c"
846
#endif
847

    
848
#ifdef ARCH_POWERPC
849
#ifdef COMPILE_ALTIVEC
850
#undef RENAME
851
#define HAVE_ALTIVEC
852
#define RENAME(a) a ## _altivec
853
#include "swscale_template.c"
854
#endif
855
#endif //ARCH_POWERPC
856

    
857
#if defined(ARCH_X86)
858

    
859
//X86 versions
860
/*
861
#undef RENAME
862
#undef HAVE_MMX
863
#undef HAVE_MMX2
864
#undef HAVE_3DNOW
865
#define ARCH_X86
866
#define RENAME(a) a ## _X86
867
#include "swscale_template.c"
868
*/
869
//MMX versions
870
#ifdef COMPILE_MMX
871
#undef RENAME
872
#define HAVE_MMX
873
#undef HAVE_MMX2
874
#undef HAVE_3DNOW
875
#define RENAME(a) a ## _MMX
876
#include "swscale_template.c"
877
#endif
878

    
879
//MMX2 versions
880
#ifdef COMPILE_MMX2
881
#undef RENAME
882
#define HAVE_MMX
883
#define HAVE_MMX2
884
#undef HAVE_3DNOW
885
#define RENAME(a) a ## _MMX2
886
#include "swscale_template.c"
887
#endif
888

    
889
//3DNOW versions
890
#ifdef COMPILE_3DNOW
891
#undef RENAME
892
#define HAVE_MMX
893
#undef HAVE_MMX2
894
#define HAVE_3DNOW
895
#define RENAME(a) a ## _3DNow
896
#include "swscale_template.c"
897
#endif
898

    
899
#endif //ARCH_X86 || ARCH_X86_64
900

    
901
// minor note: the HAVE_xyz is messed up after that line so don't use it
902

    
903
static double getSplineCoeff(double a, double b, double c, double d, double dist)
904
{
905
//    printf("%f %f %f %f %f\n", a,b,c,d,dist);
906
    if (dist<=1.0)      return ((d*dist + c)*dist + b)*dist +a;
907
    else                return getSplineCoeff(        0.0,
908
                                             b+ 2.0*c + 3.0*d,
909
                                                    c + 3.0*d,
910
                                            -b- 3.0*c - 6.0*d,
911
                                            dist-1.0);
912
}
913

    
914
static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
915
                             int srcW, int dstW, int filterAlign, int one, int flags,
916
                             SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
917
{
918
    int i;
919
    int filterSize;
920
    int filter2Size;
921
    int minFilterSize;
922
    double *filter=NULL;
923
    double *filter2=NULL;
924
#if defined(ARCH_X86)
925
    if (flags & SWS_CPU_CAPS_MMX)
926
        asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
927
#endif
928

    
929
    // Note the +1 is for the MMXscaler which reads over the end
930
    *filterPos = av_malloc((dstW+1)*sizeof(int16_t));
931

    
932
    if (FFABS(xInc - 0x10000) <10) // unscaled
933
    {
934
        int i;
935
        filterSize= 1;
936
        filter= av_malloc(dstW*sizeof(double)*filterSize);
937
        for (i=0; i<dstW*filterSize; i++) filter[i]=0;
938

    
939
        for (i=0; i<dstW; i++)
940
        {
941
            filter[i*filterSize]=1;
942
            (*filterPos)[i]=i;
943
        }
944

    
945
    }
946
    else if (flags&SWS_POINT) // lame looking point sampling mode
947
    {
948
        int i;
949
        int xDstInSrc;
950
        filterSize= 1;
951
        filter= av_malloc(dstW*sizeof(double)*filterSize);
952

    
953
        xDstInSrc= xInc/2 - 0x8000;
954
        for (i=0; i<dstW; i++)
955
        {
956
            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
957

    
958
            (*filterPos)[i]= xx;
959
            filter[i]= 1.0;
960
            xDstInSrc+= xInc;
961
        }
962
    }
963
    else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
964
    {
965
        int i;
966
        int xDstInSrc;
967
        if      (flags&SWS_BICUBIC) filterSize= 4;
968
        else if (flags&SWS_X      ) filterSize= 4;
969
        else                        filterSize= 2; // SWS_BILINEAR / SWS_AREA
970
        filter= av_malloc(dstW*sizeof(double)*filterSize);
971

    
972
        xDstInSrc= xInc/2 - 0x8000;
973
        for (i=0; i<dstW; i++)
974
        {
975
            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
976
            int j;
977

    
978
            (*filterPos)[i]= xx;
979
                //Bilinear upscale / linear interpolate / Area averaging
980
                for (j=0; j<filterSize; j++)
981
                {
982
                    double d= FFABS((xx<<16) - xDstInSrc)/(double)(1<<16);
983
                    double coeff= 1.0 - d;
984
                    if (coeff<0) coeff=0;
985
                    filter[i*filterSize + j]= coeff;
986
                    xx++;
987
                }
988
            xDstInSrc+= xInc;
989
        }
990
    }
991
    else
992
    {
993
        double xDstInSrc;
994
        double sizeFactor, filterSizeInSrc;
995
        const double xInc1= (double)xInc / (double)(1<<16);
996

    
997
        if      (flags&SWS_BICUBIC)      sizeFactor=  4.0;
998
        else if (flags&SWS_X)            sizeFactor=  8.0;
999
        else if (flags&SWS_AREA)         sizeFactor=  1.0; //downscale only, for upscale it is bilinear
1000
        else if (flags&SWS_GAUSS)        sizeFactor=  8.0;   // infinite ;)
1001
        else if (flags&SWS_LANCZOS)      sizeFactor= param[0] != SWS_PARAM_DEFAULT ? 2.0*param[0] : 6.0;
1002
        else if (flags&SWS_SINC)         sizeFactor= 20.0; // infinite ;)
1003
        else if (flags&SWS_SPLINE)       sizeFactor= 20.0;  // infinite ;)
1004
        else if (flags&SWS_BILINEAR)     sizeFactor=  2.0;
1005
        else {
1006
            sizeFactor= 0.0; //GCC warning killer
1007
            ASSERT(0)
1008
        }
1009

    
1010
        if (xInc1 <= 1.0)       filterSizeInSrc= sizeFactor; // upscale
1011
        else                    filterSizeInSrc= sizeFactor*srcW / (double)dstW;
1012

    
1013
        filterSize= (int)ceil(1 + filterSizeInSrc); // will be reduced later if possible
1014
        if (filterSize > srcW-2) filterSize=srcW-2;
1015

    
1016
        filter= av_malloc(dstW*sizeof(double)*filterSize);
1017

    
1018
        xDstInSrc= xInc1 / 2.0 - 0.5;
1019
        for (i=0; i<dstW; i++)
1020
        {
1021
            int xx= (int)(xDstInSrc - (filterSize-1)*0.5 + 0.5);
1022
            int j;
1023
            (*filterPos)[i]= xx;
1024
            for (j=0; j<filterSize; j++)
1025
            {
1026
                double d= FFABS(xx - xDstInSrc)/filterSizeInSrc*sizeFactor;
1027
                double coeff;
1028
                if (flags & SWS_BICUBIC)
1029
                {
1030
                    double B= param[0] != SWS_PARAM_DEFAULT ? param[0] : 0.0;
1031
                    double C= param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6;
1032

    
1033
                    if (d<1.0)
1034
                        coeff = (12-9*B-6*C)*d*d*d + (-18+12*B+6*C)*d*d + 6-2*B;
1035
                    else if (d<2.0)
1036
                        coeff = (-B-6*C)*d*d*d + (6*B+30*C)*d*d + (-12*B-48*C)*d +8*B+24*C;
1037
                    else
1038
                        coeff=0.0;
1039
                }
1040
/*                else if (flags & SWS_X)
1041
                {
1042
                    double p= param ? param*0.01 : 0.3;
1043
                    coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1044
                    coeff*= pow(2.0, - p*d*d);
1045
                }*/
1046
                else if (flags & SWS_X)
1047
                {
1048
                    double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
1049

    
1050
                    if (d<1.0)
1051
                        coeff = cos(d*PI);
1052
                    else
1053
                        coeff=-1.0;
1054
                    if (coeff<0.0)      coeff= -pow(-coeff, A);
1055
                    else                coeff=  pow( coeff, A);
1056
                    coeff= coeff*0.5 + 0.5;
1057
                }
1058
                else if (flags & SWS_AREA)
1059
                {
1060
                    double srcPixelSize= 1.0/xInc1;
1061
                    if      (d + srcPixelSize/2 < 0.5) coeff= 1.0;
1062
                    else if (d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
1063
                    else coeff=0.0;
1064
                }
1065
                else if (flags & SWS_GAUSS)
1066
                {
1067
                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1068
                    coeff = pow(2.0, - p*d*d);
1069
                }
1070
                else if (flags & SWS_SINC)
1071
                {
1072
                    coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1073
                }
1074
                else if (flags & SWS_LANCZOS)
1075
                {
1076
                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1077
                    coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0;
1078
                    if (d>p) coeff=0;
1079
                }
1080
                else if (flags & SWS_BILINEAR)
1081
                {
1082
                    coeff= 1.0 - d;
1083
                    if (coeff<0) coeff=0;
1084
                }
1085
                else if (flags & SWS_SPLINE)
1086
                {
1087
                    double p=-2.196152422706632;
1088
                    coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, d);
1089
                }
1090
                else {
1091
                    coeff= 0.0; //GCC warning killer
1092
                    ASSERT(0)
1093
                }
1094

    
1095
                filter[i*filterSize + j]= coeff;
1096
                xx++;
1097
            }
1098
            xDstInSrc+= xInc1;
1099
        }
1100
    }
1101

    
1102
    /* apply src & dst Filter to filter -> filter2
1103
       av_free(filter);
1104
    */
1105
    ASSERT(filterSize>0)
1106
    filter2Size= filterSize;
1107
    if (srcFilter) filter2Size+= srcFilter->length - 1;
1108
    if (dstFilter) filter2Size+= dstFilter->length - 1;
1109
    ASSERT(filter2Size>0)
1110
    filter2= av_malloc(filter2Size*dstW*sizeof(double));
1111

    
1112
    for (i=0; i<dstW; i++)
1113
    {
1114
        int j;
1115
        SwsVector scaleFilter;
1116
        SwsVector *outVec;
1117

    
1118
        scaleFilter.coeff= filter + i*filterSize;
1119
        scaleFilter.length= filterSize;
1120

    
1121
        if (srcFilter) outVec= sws_getConvVec(srcFilter, &scaleFilter);
1122
        else           outVec= &scaleFilter;
1123

    
1124
        ASSERT(outVec->length == filter2Size)
1125
        //FIXME dstFilter
1126

    
1127
        for (j=0; j<outVec->length; j++)
1128
        {
1129
            filter2[i*filter2Size + j]= outVec->coeff[j];
1130
        }
1131

    
1132
        (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
1133

    
1134
        if (outVec != &scaleFilter) sws_freeVec(outVec);
1135
    }
1136
    av_free(filter); filter=NULL;
1137

    
1138
    /* try to reduce the filter-size (step1 find size and shift left) */
1139
    // Assume its near normalized (*0.5 or *2.0 is ok but * 0.001 is not)
1140
    minFilterSize= 0;
1141
    for (i=dstW-1; i>=0; i--)
1142
    {
1143
        int min= filter2Size;
1144
        int j;
1145
        double cutOff=0.0;
1146

    
1147
        /* get rid off near zero elements on the left by shifting left */
1148
        for (j=0; j<filter2Size; j++)
1149
        {
1150
            int k;
1151
            cutOff += FFABS(filter2[i*filter2Size]);
1152

    
1153
            if (cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1154

    
1155
            /* preserve Monotonicity because the core can't handle the filter otherwise */
1156
            if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1157

    
1158
            // Move filter coeffs left
1159
            for (k=1; k<filter2Size; k++)
1160
                filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1161
            filter2[i*filter2Size + k - 1]= 0.0;
1162
            (*filterPos)[i]++;
1163
        }
1164

    
1165
        cutOff=0.0;
1166
        /* count near zeros on the right */
1167
        for (j=filter2Size-1; j>0; j--)
1168
        {
1169
            cutOff += FFABS(filter2[i*filter2Size + j]);
1170

    
1171
            if (cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1172
            min--;
1173
        }
1174

    
1175
        if (min>minFilterSize) minFilterSize= min;
1176
    }
1177

    
1178
    if (flags & SWS_CPU_CAPS_ALTIVEC) {
1179
        // we can handle the special case 4,
1180
        // so we don't want to go to the full 8
1181
        if (minFilterSize < 5)
1182
            filterAlign = 4;
1183

    
1184
        // we really don't want to waste our time
1185
        // doing useless computation, so fall-back on
1186
        // the scalar C code for very small filter.
1187
        // vectorizing is worth it only if you have
1188
        // decent-sized vector.
1189
        if (minFilterSize < 3)
1190
            filterAlign = 1;
1191
    }
1192

    
1193
    if (flags & SWS_CPU_CAPS_MMX) {
1194
        // special case for unscaled vertical filtering
1195
        if (minFilterSize == 1 && filterAlign == 2)
1196
            filterAlign= 1;
1197
    }
1198

    
1199
    ASSERT(minFilterSize > 0)
1200
    filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
1201
    ASSERT(filterSize > 0)
1202
    filter= av_malloc(filterSize*dstW*sizeof(double));
1203
    if (filterSize >= MAX_FILTER_SIZE)
1204
        return -1;
1205
    *outFilterSize= filterSize;
1206

    
1207
    if (flags&SWS_PRINT_INFO)
1208
        av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
1209
    /* try to reduce the filter-size (step2 reduce it) */
1210
    for (i=0; i<dstW; i++)
1211
    {
1212
        int j;
1213

    
1214
        for (j=0; j<filterSize; j++)
1215
        {
1216
            if (j>=filter2Size) filter[i*filterSize + j]= 0.0;
1217
            else               filter[i*filterSize + j]= filter2[i*filter2Size + j];
1218
        }
1219
    }
1220
    av_free(filter2); filter2=NULL;
1221

    
1222

    
1223
    //FIXME try to align filterpos if possible
1224

    
1225
    //fix borders
1226
    for (i=0; i<dstW; i++)
1227
    {
1228
        int j;
1229
        if ((*filterPos)[i] < 0)
1230
        {
1231
            // Move filter coeffs left to compensate for filterPos
1232
            for (j=1; j<filterSize; j++)
1233
            {
1234
                int left= FFMAX(j + (*filterPos)[i], 0);
1235
                filter[i*filterSize + left] += filter[i*filterSize + j];
1236
                filter[i*filterSize + j]=0;
1237
            }
1238
            (*filterPos)[i]= 0;
1239
        }
1240

    
1241
        if ((*filterPos)[i] + filterSize > srcW)
1242
        {
1243
            int shift= (*filterPos)[i] + filterSize - srcW;
1244
            // Move filter coeffs right to compensate for filterPos
1245
            for (j=filterSize-2; j>=0; j--)
1246
            {
1247
                int right= FFMIN(j + shift, filterSize-1);
1248
                filter[i*filterSize +right] += filter[i*filterSize +j];
1249
                filter[i*filterSize +j]=0;
1250
            }
1251
            (*filterPos)[i]= srcW - filterSize;
1252
        }
1253
    }
1254

    
1255
    // Note the +1 is for the MMXscaler which reads over the end
1256
    /* align at 16 for AltiVec (needed by hScale_altivec_real) */
1257
    *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t));
1258

    
1259
    /* Normalize & Store in outFilter */
1260
    for (i=0; i<dstW; i++)
1261
    {
1262
        int j;
1263
        double error=0;
1264
        double sum=0;
1265
        double scale= one;
1266

    
1267
        for (j=0; j<filterSize; j++)
1268
        {
1269
            sum+= filter[i*filterSize + j];
1270
        }
1271
        scale/= sum;
1272
        for (j=0; j<*outFilterSize; j++)
1273
        {
1274
            double v= filter[i*filterSize + j]*scale + error;
1275
            int intV= floor(v + 0.5);
1276
            (*outFilter)[i*(*outFilterSize) + j]= intV;
1277
            error = v - intV;
1278
        }
1279
    }
1280

    
1281
    (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
1282
    for (i=0; i<*outFilterSize; i++)
1283
    {
1284
        int j= dstW*(*outFilterSize);
1285
        (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
1286
    }
1287

    
1288
    av_free(filter);
1289
    return 0;
1290
}
1291

    
1292
#ifdef COMPILE_MMX2
1293
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
1294
{
1295
    uint8_t *fragmentA;
1296
    long imm8OfPShufW1A;
1297
    long imm8OfPShufW2A;
1298
    long fragmentLengthA;
1299
    uint8_t *fragmentB;
1300
    long imm8OfPShufW1B;
1301
    long imm8OfPShufW2B;
1302
    long fragmentLengthB;
1303
    int fragmentPos;
1304

    
1305
    int xpos, i;
1306

    
1307
    // create an optimized horizontal scaling routine
1308

    
1309
    //code fragment
1310

    
1311
    asm volatile(
1312
        "jmp                         9f                 \n\t"
1313
    // Begin
1314
        "0:                                             \n\t"
1315
        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1316
        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1317
        "movd   1(%%"REG_c", %%"REG_S"), %%mm1          \n\t"
1318
        "punpcklbw                %%mm7, %%mm1          \n\t"
1319
        "punpcklbw                %%mm7, %%mm0          \n\t"
1320
        "pshufw                   $0xFF, %%mm1, %%mm1   \n\t"
1321
        "1:                                             \n\t"
1322
        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1323
        "2:                                             \n\t"
1324
        "psubw                    %%mm1, %%mm0          \n\t"
1325
        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1326
        "pmullw                   %%mm3, %%mm0          \n\t"
1327
        "psllw                       $7, %%mm1          \n\t"
1328
        "paddw                    %%mm1, %%mm0          \n\t"
1329

    
1330
        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1331

    
1332
        "add                         $8, %%"REG_a"      \n\t"
1333
    // End
1334
        "9:                                             \n\t"
1335
//        "int $3                                         \n\t"
1336
        "lea                         0b, %0             \n\t"
1337
        "lea                         1b, %1             \n\t"
1338
        "lea                         2b, %2             \n\t"
1339
        "dec                         %1                 \n\t"
1340
        "dec                         %2                 \n\t"
1341
        "sub                         %0, %1             \n\t"
1342
        "sub                         %0, %2             \n\t"
1343
        "lea                         9b, %3             \n\t"
1344
        "sub                         %0, %3             \n\t"
1345

    
1346

    
1347
        :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
1348
        "=r" (fragmentLengthA)
1349
    );
1350

    
1351
    asm volatile(
1352
        "jmp                         9f                 \n\t"
1353
    // Begin
1354
        "0:                                             \n\t"
1355
        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1356
        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1357
        "punpcklbw                %%mm7, %%mm0          \n\t"
1358
        "pshufw                   $0xFF, %%mm0, %%mm1   \n\t"
1359
        "1:                                             \n\t"
1360
        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1361
        "2:                                             \n\t"
1362
        "psubw                    %%mm1, %%mm0          \n\t"
1363
        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1364
        "pmullw                   %%mm3, %%mm0          \n\t"
1365
        "psllw                       $7, %%mm1          \n\t"
1366
        "paddw                    %%mm1, %%mm0          \n\t"
1367

    
1368
        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1369

    
1370
        "add                         $8, %%"REG_a"      \n\t"
1371
    // End
1372
        "9:                                             \n\t"
1373
//        "int                       $3                   \n\t"
1374
        "lea                         0b, %0             \n\t"
1375
        "lea                         1b, %1             \n\t"
1376
        "lea                         2b, %2             \n\t"
1377
        "dec                         %1                 \n\t"
1378
        "dec                         %2                 \n\t"
1379
        "sub                         %0, %1             \n\t"
1380
        "sub                         %0, %2             \n\t"
1381
        "lea                         9b, %3             \n\t"
1382
        "sub                         %0, %3             \n\t"
1383

    
1384

    
1385
        :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
1386
        "=r" (fragmentLengthB)
1387
    );
1388

    
1389
    xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
1390
    fragmentPos=0;
1391

    
1392
    for (i=0; i<dstW/numSplits; i++)
1393
    {
1394
        int xx=xpos>>16;
1395

    
1396
        if ((i&3) == 0)
1397
        {
1398
            int a=0;
1399
            int b=((xpos+xInc)>>16) - xx;
1400
            int c=((xpos+xInc*2)>>16) - xx;
1401
            int d=((xpos+xInc*3)>>16) - xx;
1402

    
1403
            filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
1404
            filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
1405
            filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
1406
            filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
1407
            filterPos[i/2]= xx;
1408

    
1409
            if (d+1<4)
1410
            {
1411
                int maxShift= 3-(d+1);
1412
                int shift=0;
1413

    
1414
                memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
1415

    
1416
                funnyCode[fragmentPos + imm8OfPShufW1B]=
1417
                    (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
1418
                funnyCode[fragmentPos + imm8OfPShufW2B]=
1419
                    a | (b<<2) | (c<<4) | (d<<6);
1420

    
1421
                if (i+3>=dstW) shift=maxShift; //avoid overread
1422
                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
1423

    
1424
                if (shift && i>=shift)
1425
                {
1426
                    funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
1427
                    funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
1428
                    filterPos[i/2]-=shift;
1429
                }
1430

    
1431
                fragmentPos+= fragmentLengthB;
1432
            }
1433
            else
1434
            {
1435
                int maxShift= 3-d;
1436
                int shift=0;
1437

    
1438
                memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
1439

    
1440
                funnyCode[fragmentPos + imm8OfPShufW1A]=
1441
                funnyCode[fragmentPos + imm8OfPShufW2A]=
1442
                    a | (b<<2) | (c<<4) | (d<<6);
1443

    
1444
                if (i+4>=dstW) shift=maxShift; //avoid overread
1445
                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
1446

    
1447
                if (shift && i>=shift)
1448
                {
1449
                    funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
1450
                    funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
1451
                    filterPos[i/2]-=shift;
1452
                }
1453

    
1454
                fragmentPos+= fragmentLengthA;
1455
            }
1456

    
1457
            funnyCode[fragmentPos]= RET;
1458
        }
1459
        xpos+=xInc;
1460
    }
1461
    filterPos[i/2]= xpos>>16; // needed to jump to the next part
1462
}
1463
#endif /* COMPILE_MMX2 */
1464

    
1465
static void globalInit(void){
1466
    // generating tables:
1467
    int i;
1468
    for (i=0; i<768; i++){
1469
        int c= av_clip_uint8(i-256);
1470
        clip_table[i]=c;
1471
    }
1472
}
1473

    
1474
static SwsFunc getSwsFunc(int flags){
1475

    
1476
#if defined(RUNTIME_CPUDETECT) && defined (CONFIG_GPL)
1477
#if defined(ARCH_X86)
1478
    // ordered per speed fasterst first
1479
    if (flags & SWS_CPU_CAPS_MMX2)
1480
        return swScale_MMX2;
1481
    else if (flags & SWS_CPU_CAPS_3DNOW)
1482
        return swScale_3DNow;
1483
    else if (flags & SWS_CPU_CAPS_MMX)
1484
        return swScale_MMX;
1485
    else
1486
        return swScale_C;
1487

    
1488
#else
1489
#ifdef ARCH_POWERPC
1490
    if (flags & SWS_CPU_CAPS_ALTIVEC)
1491
        return swScale_altivec;
1492
    else
1493
        return swScale_C;
1494
#endif
1495
    return swScale_C;
1496
#endif /* defined(ARCH_X86) */
1497
#else //RUNTIME_CPUDETECT
1498
#ifdef HAVE_MMX2
1499
    return swScale_MMX2;
1500
#elif defined (HAVE_3DNOW)
1501
    return swScale_3DNow;
1502
#elif defined (HAVE_MMX)
1503
    return swScale_MMX;
1504
#elif defined (HAVE_ALTIVEC)
1505
    return swScale_altivec;
1506
#else
1507
    return swScale_C;
1508
#endif
1509
#endif //!RUNTIME_CPUDETECT
1510
}
1511

    
1512
static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1513
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1514
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1515
    /* Copy Y plane */
1516
    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1517
        memcpy(dst, src[0], srcSliceH*dstStride[0]);
1518
    else
1519
    {
1520
        int i;
1521
        uint8_t *srcPtr= src[0];
1522
        uint8_t *dstPtr= dst;
1523
        for (i=0; i<srcSliceH; i++)
1524
        {
1525
            memcpy(dstPtr, srcPtr, c->srcW);
1526
            srcPtr+= srcStride[0];
1527
            dstPtr+= dstStride[0];
1528
        }
1529
    }
1530
    dst = dstParam[1] + dstStride[1]*srcSliceY/2;
1531
    if (c->dstFormat == PIX_FMT_NV12)
1532
        interleaveBytes( src[1],src[2],dst,c->srcW/2,srcSliceH/2,srcStride[1],srcStride[2],dstStride[0] );
1533
    else
1534
        interleaveBytes( src[2],src[1],dst,c->srcW/2,srcSliceH/2,srcStride[2],srcStride[1],dstStride[0] );
1535

    
1536
    return srcSliceH;
1537
}
1538

    
1539
static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1540
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1541
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1542

    
1543
    yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1544

    
1545
    return srcSliceH;
1546
}
1547

    
1548
static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1549
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1550
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1551

    
1552
    yv12touyvy( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1553

    
1554
    return srcSliceH;
1555
}
1556

    
1557
/* {RGB,BGR}{15,16,24,32} -> {RGB,BGR}{15,16,24,32} */
1558
static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1559
                          int srcSliceH, uint8_t* dst[], int dstStride[]){
1560
    const int srcFormat= c->srcFormat;
1561
    const int dstFormat= c->dstFormat;
1562
    const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3;
1563
    const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3;
1564
    const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
1565
    const int dstId= fmt_depth(dstFormat) >> 2;
1566
    void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
1567

    
1568
    /* BGR -> BGR */
1569
    if (  (isBGR(srcFormat) && isBGR(dstFormat))
1570
       || (isRGB(srcFormat) && isRGB(dstFormat))){
1571
        switch(srcId | (dstId<<4)){
1572
        case 0x34: conv= rgb16to15; break;
1573
        case 0x36: conv= rgb24to15; break;
1574
        case 0x38: conv= rgb32to15; break;
1575
        case 0x43: conv= rgb15to16; break;
1576
        case 0x46: conv= rgb24to16; break;
1577
        case 0x48: conv= rgb32to16; break;
1578
        case 0x63: conv= rgb15to24; break;
1579
        case 0x64: conv= rgb16to24; break;
1580
        case 0x68: conv= rgb32to24; break;
1581
        case 0x83: conv= rgb15to32; break;
1582
        case 0x84: conv= rgb16to32; break;
1583
        case 0x86: conv= rgb24to32; break;
1584
        default: av_log(c, AV_LOG_ERROR, "swScaler: internal error %s -> %s converter\n",
1585
                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1586
        }
1587
    }else if (  (isBGR(srcFormat) && isRGB(dstFormat))
1588
             || (isRGB(srcFormat) && isBGR(dstFormat))){
1589
        switch(srcId | (dstId<<4)){
1590
        case 0x33: conv= rgb15tobgr15; break;
1591
        case 0x34: conv= rgb16tobgr15; break;
1592
        case 0x36: conv= rgb24tobgr15; break;
1593
        case 0x38: conv= rgb32tobgr15; break;
1594
        case 0x43: conv= rgb15tobgr16; break;
1595
        case 0x44: conv= rgb16tobgr16; break;
1596
        case 0x46: conv= rgb24tobgr16; break;
1597
        case 0x48: conv= rgb32tobgr16; break;
1598
        case 0x63: conv= rgb15tobgr24; break;
1599
        case 0x64: conv= rgb16tobgr24; break;
1600
        case 0x66: conv= rgb24tobgr24; break;
1601
        case 0x68: conv= rgb32tobgr24; break;
1602
        case 0x83: conv= rgb15tobgr32; break;
1603
        case 0x84: conv= rgb16tobgr32; break;
1604
        case 0x86: conv= rgb24tobgr32; break;
1605
        case 0x88: conv= rgb32tobgr32; break;
1606
        default: av_log(c, AV_LOG_ERROR, "swScaler: internal error %s -> %s converter\n",
1607
                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1608
        }
1609
    }else{
1610
        av_log(c, AV_LOG_ERROR, "swScaler: internal error %s -> %s converter\n",
1611
               sws_format_name(srcFormat), sws_format_name(dstFormat));
1612
    }
1613

    
1614
    if(conv)
1615
    {
1616
        if (dstStride[0]*srcBpp == srcStride[0]*dstBpp)
1617
            conv(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1618
        else
1619
        {
1620
            int i;
1621
            uint8_t *srcPtr= src[0];
1622
            uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1623

    
1624
            for (i=0; i<srcSliceH; i++)
1625
            {
1626
                conv(srcPtr, dstPtr, c->srcW*srcBpp);
1627
                srcPtr+= srcStride[0];
1628
                dstPtr+= dstStride[0];
1629
            }
1630
        }
1631
    }
1632
    return srcSliceH;
1633
}
1634

    
1635
static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1636
                              int srcSliceH, uint8_t* dst[], int dstStride[]){
1637

    
1638
    rgb24toyv12(
1639
        src[0],
1640
        dst[0]+ srcSliceY    *dstStride[0],
1641
        dst[1]+(srcSliceY>>1)*dstStride[1],
1642
        dst[2]+(srcSliceY>>1)*dstStride[2],
1643
        c->srcW, srcSliceH,
1644
        dstStride[0], dstStride[1], srcStride[0]);
1645
    return srcSliceH;
1646
}
1647

    
1648
static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1649
                             int srcSliceH, uint8_t* dst[], int dstStride[]){
1650
    int i;
1651

    
1652
    /* copy Y */
1653
    if (srcStride[0]==dstStride[0] && srcStride[0] > 0)
1654
        memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
1655
    else{
1656
        uint8_t *srcPtr= src[0];
1657
        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1658

    
1659
        for (i=0; i<srcSliceH; i++)
1660
        {
1661
            memcpy(dstPtr, srcPtr, c->srcW);
1662
            srcPtr+= srcStride[0];
1663
            dstPtr+= dstStride[0];
1664
        }
1665
    }
1666

    
1667
    if (c->dstFormat==PIX_FMT_YUV420P){
1668
        planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
1669
        planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
1670
    }else{
1671
        planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
1672
        planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
1673
    }
1674
    return srcSliceH;
1675
}
1676

    
1677
/* unscaled copy like stuff (assumes nearly identical formats) */
1678
static int simpleCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1679
                      int srcSliceH, uint8_t* dst[], int dstStride[]){
1680

    
1681
    if (isPacked(c->srcFormat))
1682
    {
1683
        if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1684
            memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
1685
        else
1686
        {
1687
            int i;
1688
            uint8_t *srcPtr= src[0];
1689
            uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1690
            int length=0;
1691

    
1692
            /* universal length finder */
1693
            while(length+c->srcW <= FFABS(dstStride[0])
1694
               && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
1695
            ASSERT(length!=0);
1696

    
1697
            for (i=0; i<srcSliceH; i++)
1698
            {
1699
                memcpy(dstPtr, srcPtr, length);
1700
                srcPtr+= srcStride[0];
1701
                dstPtr+= dstStride[0];
1702
            }
1703
        }
1704
    }
1705
    else
1706
    { /* Planar YUV or gray */
1707
        int plane;
1708
        for (plane=0; plane<3; plane++)
1709
        {
1710
            int length= plane==0 ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
1711
            int y=      plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
1712
            int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
1713

    
1714
            if ((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
1715
            {
1716
                if (!isGray(c->dstFormat))
1717
                    memset(dst[plane], 128, dstStride[plane]*height);
1718
            }
1719
            else
1720
            {
1721
                if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
1722
                    memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
1723
                else
1724
                {
1725
                    int i;
1726
                    uint8_t *srcPtr= src[plane];
1727
                    uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
1728
                    for (i=0; i<height; i++)
1729
                    {
1730
                        memcpy(dstPtr, srcPtr, length);
1731
                        srcPtr+= srcStride[plane];
1732
                        dstPtr+= dstStride[plane];
1733
                    }
1734
                }
1735
            }
1736
        }
1737
    }
1738
    return srcSliceH;
1739
}
1740

    
1741
static int gray16togray(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1742
                        int srcSliceH, uint8_t* dst[], int dstStride[]){
1743

    
1744
    int length= c->srcW;
1745
    int y=      srcSliceY;
1746
    int height= srcSliceH;
1747
    int i, j;
1748
    uint8_t *srcPtr= src[0];
1749
    uint8_t *dstPtr= dst[0] + dstStride[0]*y;
1750

    
1751
    if (!isGray(c->dstFormat)){
1752
        int height= -((-srcSliceH)>>c->chrDstVSubSample);
1753
        memset(dst[1], 128, dstStride[1]*height);
1754
        memset(dst[2], 128, dstStride[2]*height);
1755
    }
1756
    if (c->srcFormat == PIX_FMT_GRAY16LE) srcPtr++;
1757
    for (i=0; i<height; i++)
1758
    {
1759
        for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
1760
        srcPtr+= srcStride[0];
1761
        dstPtr+= dstStride[0];
1762
    }
1763
    return srcSliceH;
1764
}
1765

    
1766
static int graytogray16(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1767
                        int srcSliceH, uint8_t* dst[], int dstStride[]){
1768

    
1769
    int length= c->srcW;
1770
    int y=      srcSliceY;
1771
    int height= srcSliceH;
1772
    int i, j;
1773
    uint8_t *srcPtr= src[0];
1774
    uint8_t *dstPtr= dst[0] + dstStride[0]*y;
1775
    for (i=0; i<height; i++)
1776
    {
1777
        for (j=0; j<length; j++)
1778
        {
1779
            dstPtr[j<<1] = srcPtr[j];
1780
            dstPtr[(j<<1)+1] = srcPtr[j];
1781
        }
1782
        srcPtr+= srcStride[0];
1783
        dstPtr+= dstStride[0];
1784
    }
1785
    return srcSliceH;
1786
}
1787

    
1788
static int gray16swap(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1789
                      int srcSliceH, uint8_t* dst[], int dstStride[]){
1790

    
1791
    int length= c->srcW;
1792
    int y=      srcSliceY;
1793
    int height= srcSliceH;
1794
    int i, j;
1795
    uint16_t *srcPtr= src[0];
1796
    uint16_t *dstPtr= dst[0] + dstStride[0]*y/2;
1797
    for (i=0; i<height; i++)
1798
    {
1799
        for (j=0; j<length; j++) dstPtr[j] = bswap_16(srcPtr[j]);
1800
        srcPtr+= srcStride[0]/2;
1801
        dstPtr+= dstStride[0]/2;
1802
    }
1803
    return srcSliceH;
1804
}
1805

    
1806

    
1807
static void getSubSampleFactors(int *h, int *v, int format){
1808
    switch(format){
1809
    case PIX_FMT_UYVY422:
1810
    case PIX_FMT_YUYV422:
1811
        *h=1;
1812
        *v=0;
1813
        break;
1814
    case PIX_FMT_YUV420P:
1815
    case PIX_FMT_GRAY16BE:
1816
    case PIX_FMT_GRAY16LE:
1817
    case PIX_FMT_GRAY8: //FIXME remove after different subsamplings are fully implemented
1818
    case PIX_FMT_NV12:
1819
    case PIX_FMT_NV21:
1820
        *h=1;
1821
        *v=1;
1822
        break;
1823
    case PIX_FMT_YUV410P:
1824
        *h=2;
1825
        *v=2;
1826
        break;
1827
    case PIX_FMT_YUV444P:
1828
        *h=0;
1829
        *v=0;
1830
        break;
1831
    case PIX_FMT_YUV422P:
1832
        *h=1;
1833
        *v=0;
1834
        break;
1835
    case PIX_FMT_YUV411P:
1836
        *h=2;
1837
        *v=0;
1838
        break;
1839
    default:
1840
        *h=0;
1841
        *v=0;
1842
        break;
1843
    }
1844
}
1845

    
1846
static uint16_t roundToInt16(int64_t f){
1847
    int r= (f + (1<<15))>>16;
1848
         if (r<-0x7FFF) return 0x8000;
1849
    else if (r> 0x7FFF) return 0x7FFF;
1850
    else                return r;
1851
}
1852

    
1853
/**
1854
 * @param inv_table the yuv2rgb coeffs, normally Inverse_Table_6_9[x]
1855
 * @param fullRange if 1 then the luma range is 0..255 if 0 its 16..235
1856
 * @return -1 if not supported
1857
 */
1858
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
1859
    int64_t crv =  inv_table[0];
1860
    int64_t cbu =  inv_table[1];
1861
    int64_t cgu = -inv_table[2];
1862
    int64_t cgv = -inv_table[3];
1863
    int64_t cy  = 1<<16;
1864
    int64_t oy  = 0;
1865

    
1866
    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
1867
    memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
1868
    memcpy(c->dstColorspaceTable,     table, sizeof(int)*4);
1869

    
1870
    c->brightness= brightness;
1871
    c->contrast  = contrast;
1872
    c->saturation= saturation;
1873
    c->srcRange  = srcRange;
1874
    c->dstRange  = dstRange;
1875

    
1876
    c->uOffset=   0x0400040004000400LL;
1877
    c->vOffset=   0x0400040004000400LL;
1878

    
1879
    if (!srcRange){
1880
        cy= (cy*255) / 219;
1881
        oy= 16<<16;
1882
    }else{
1883
        crv= (crv*224) / 255;
1884
        cbu= (cbu*224) / 255;
1885
        cgu= (cgu*224) / 255;
1886
        cgv= (cgv*224) / 255;
1887
    }
1888

    
1889
    cy = (cy *contrast             )>>16;
1890
    crv= (crv*contrast * saturation)>>32;
1891
    cbu= (cbu*contrast * saturation)>>32;
1892
    cgu= (cgu*contrast * saturation)>>32;
1893
    cgv= (cgv*contrast * saturation)>>32;
1894

    
1895
    oy -= 256*brightness;
1896

    
1897
    c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
1898
    c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
1899
    c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
1900
    c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
1901
    c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
1902
    c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
1903

    
1904
    yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
1905
    //FIXME factorize
1906

    
1907
#ifdef COMPILE_ALTIVEC
1908
    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
1909
        yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation);
1910
#endif
1911
    return 0;
1912
}
1913

    
1914
/**
1915
 * @return -1 if not supported
1916
 */
1917
int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
1918
    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
1919

    
1920
    *inv_table = c->srcColorspaceTable;
1921
    *table     = c->dstColorspaceTable;
1922
    *srcRange  = c->srcRange;
1923
    *dstRange  = c->dstRange;
1924
    *brightness= c->brightness;
1925
    *contrast  = c->contrast;
1926
    *saturation= c->saturation;
1927

    
1928
    return 0;
1929
}
1930

    
1931
static int handle_jpeg(int *format)
1932
{
1933
    switch (*format) {
1934
        case PIX_FMT_YUVJ420P:
1935
            *format = PIX_FMT_YUV420P;
1936
            return 1;
1937
        case PIX_FMT_YUVJ422P:
1938
            *format = PIX_FMT_YUV422P;
1939
            return 1;
1940
        case PIX_FMT_YUVJ444P:
1941
            *format = PIX_FMT_YUV444P;
1942
            return 1;
1943
        default:
1944
            return 0;
1945
    }
1946
}
1947

    
1948
SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags,
1949
                           SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){
1950

    
1951
    SwsContext *c;
1952
    int i;
1953
    int usesVFilter, usesHFilter;
1954
    int unscaled, needsDither;
1955
    int srcRange, dstRange;
1956
    SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
1957
#if defined(ARCH_X86)
1958
    if (flags & SWS_CPU_CAPS_MMX)
1959
        asm volatile("emms\n\t"::: "memory");
1960
#endif
1961

    
1962
#if !defined(RUNTIME_CPUDETECT) || !defined (CONFIG_GPL) //ensure that the flags match the compiled variant if cpudetect is off
1963
    flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC);
1964
#ifdef HAVE_MMX2
1965
    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
1966
#elif defined (HAVE_3DNOW)
1967
    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
1968
#elif defined (HAVE_MMX)
1969
    flags |= SWS_CPU_CAPS_MMX;
1970
#elif defined (HAVE_ALTIVEC)
1971
    flags |= SWS_CPU_CAPS_ALTIVEC;
1972
#endif
1973
#endif /* RUNTIME_CPUDETECT */
1974
    if (clip_table[512] != 255) globalInit();
1975
    if (rgb15to16 == NULL) sws_rgb2rgb_init(flags);
1976

    
1977
    unscaled = (srcW == dstW && srcH == dstH);
1978
    needsDither= (isBGR(dstFormat) || isRGB(dstFormat))
1979
        && (fmt_depth(dstFormat))<24
1980
        && ((fmt_depth(dstFormat))<(fmt_depth(srcFormat)) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
1981

    
1982
    srcRange = handle_jpeg(&srcFormat);
1983
    dstRange = handle_jpeg(&dstFormat);
1984

    
1985
    if (!isSupportedIn(srcFormat))
1986
    {
1987
        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input format\n", sws_format_name(srcFormat));
1988
        return NULL;
1989
    }
1990
    if (!isSupportedOut(dstFormat))
1991
    {
1992
        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output format\n", sws_format_name(dstFormat));
1993
        return NULL;
1994
    }
1995

    
1996
    /* sanity check */
1997
    if (srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
1998
    {
1999
        av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
2000
               srcW, srcH, dstW, dstH);
2001
        return NULL;
2002
    }
2003

    
2004
    if (!dstFilter) dstFilter= &dummyFilter;
2005
    if (!srcFilter) srcFilter= &dummyFilter;
2006

    
2007
    c= av_mallocz(sizeof(SwsContext));
2008

    
2009
    c->av_class = &sws_context_class;
2010
    c->srcW= srcW;
2011
    c->srcH= srcH;
2012
    c->dstW= dstW;
2013
    c->dstH= dstH;
2014
    c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
2015
    c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
2016
    c->flags= flags;
2017
    c->dstFormat= dstFormat;
2018
    c->srcFormat= srcFormat;
2019
    c->vRounder= 4* 0x0001000100010001ULL;
2020

    
2021
    usesHFilter= usesVFilter= 0;
2022
    if (dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesVFilter=1;
2023
    if (dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesHFilter=1;
2024
    if (dstFilter->chrV!=NULL && dstFilter->chrV->length>1) usesVFilter=1;
2025
    if (dstFilter->chrH!=NULL && dstFilter->chrH->length>1) usesHFilter=1;
2026
    if (srcFilter->lumV!=NULL && srcFilter->lumV->length>1) usesVFilter=1;
2027
    if (srcFilter->lumH!=NULL && srcFilter->lumH->length>1) usesHFilter=1;
2028
    if (srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesVFilter=1;
2029
    if (srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesHFilter=1;
2030

    
2031
    getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
2032
    getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
2033

    
2034
    // reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation
2035
    if ((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
2036

    
2037
    // drop some chroma lines if the user wants it
2038
    c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
2039
    c->chrSrcVSubSample+= c->vChrDrop;
2040

    
2041
    // drop every 2. pixel for chroma calculation unless user wants full chroma
2042
    if ((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)
2043
      && srcFormat!=PIX_FMT_RGB8      && srcFormat!=PIX_FMT_BGR8
2044
      && srcFormat!=PIX_FMT_RGB4      && srcFormat!=PIX_FMT_BGR4
2045
      && srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE)
2046
        c->chrSrcHSubSample=1;
2047

    
2048
    if (param){
2049
        c->param[0] = param[0];
2050
        c->param[1] = param[1];
2051
    }else{
2052
        c->param[0] =
2053
        c->param[1] = SWS_PARAM_DEFAULT;
2054
    }
2055

    
2056
    c->chrIntHSubSample= c->chrDstHSubSample;
2057
    c->chrIntVSubSample= c->chrSrcVSubSample;
2058

    
2059
    // Note the -((-x)>>y) is so that we always round toward +inf.
2060
    c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
2061
    c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
2062
    c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
2063
    c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
2064

    
2065
    sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], srcRange, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16);
2066

    
2067
    /* unscaled special Cases */
2068
    if (unscaled && !usesHFilter && !usesVFilter)
2069
    {
2070
        /* yv12_to_nv12 */
2071
        if (srcFormat == PIX_FMT_YUV420P && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21))
2072
        {
2073
            c->swScale= PlanarToNV12Wrapper;
2074
        }
2075
#ifdef CONFIG_GPL
2076
        /* yuv2bgr */
2077
        if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P) && (isBGR(dstFormat) || isRGB(dstFormat)))
2078
        {
2079
            c->swScale= yuv2rgb_get_func_ptr(c);
2080
        }
2081
#endif
2082

    
2083
        if ( srcFormat==PIX_FMT_YUV410P && dstFormat==PIX_FMT_YUV420P )
2084
        {
2085
            c->swScale= yvu9toyv12Wrapper;
2086
        }
2087

    
2088
        /* bgr24toYV12 */
2089
        if (srcFormat==PIX_FMT_BGR24 && dstFormat==PIX_FMT_YUV420P)
2090
            c->swScale= bgr24toyv12Wrapper;
2091

    
2092
        /* rgb/bgr -> rgb/bgr (no dither needed forms) */
2093
        if (  (isBGR(srcFormat) || isRGB(srcFormat))
2094
           && (isBGR(dstFormat) || isRGB(dstFormat))
2095
           && srcFormat != PIX_FMT_BGR8      && dstFormat != PIX_FMT_BGR8
2096
           && srcFormat != PIX_FMT_RGB8      && dstFormat != PIX_FMT_RGB8
2097
           && srcFormat != PIX_FMT_BGR4      && dstFormat != PIX_FMT_BGR4
2098
           && srcFormat != PIX_FMT_RGB4      && dstFormat != PIX_FMT_RGB4
2099
           && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE
2100
           && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE
2101
           && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK
2102
           && !needsDither)
2103
             c->swScale= rgb2rgbWrapper;
2104

    
2105
        /* LQ converters if -sws 0 or -sws 4*/
2106
        if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
2107
            /* rgb/bgr -> rgb/bgr (dither needed forms) */
2108
            if ( (isBGR(srcFormat) || isRGB(srcFormat))
2109
              && (isBGR(dstFormat) || isRGB(dstFormat))
2110
              && needsDither)
2111
                c->swScale= rgb2rgbWrapper;
2112

    
2113
            /* yv12_to_yuy2 */
2114
            if (srcFormat == PIX_FMT_YUV420P &&
2115
                (dstFormat == PIX_FMT_YUYV422 || dstFormat == PIX_FMT_UYVY422))
2116
            {
2117
                if (dstFormat == PIX_FMT_YUYV422)
2118
                    c->swScale= PlanarToYuy2Wrapper;
2119
                else
2120
                    c->swScale= PlanarToUyvyWrapper;
2121
            }
2122
        }
2123

    
2124
#ifdef COMPILE_ALTIVEC
2125
        if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
2126
            ((srcFormat == PIX_FMT_YUV420P &&
2127
             (dstFormat == PIX_FMT_YUYV422 || dstFormat == PIX_FMT_UYVY422)))) {
2128
          // unscaled YV12 -> packed YUV, we want speed
2129
          if (dstFormat == PIX_FMT_YUYV422)
2130
              c->swScale= yv12toyuy2_unscaled_altivec;
2131
          else
2132
              c->swScale= yv12touyvy_unscaled_altivec;
2133
        }
2134
#endif
2135

    
2136
        /* simple copy */
2137
        if (  srcFormat == dstFormat
2138
            || (isPlanarYUV(srcFormat) && isGray(dstFormat))
2139
            || (isPlanarYUV(dstFormat) && isGray(srcFormat)) )
2140
        {
2141
            c->swScale= simpleCopy;
2142
        }
2143

    
2144
        /* gray16{le,be} conversions */
2145
        if (isGray16(srcFormat) && (isPlanarYUV(dstFormat) || (dstFormat == PIX_FMT_GRAY8)))
2146
        {
2147
            c->swScale= gray16togray;
2148
        }
2149
        if ((isPlanarYUV(srcFormat) || (srcFormat == PIX_FMT_GRAY8)) && isGray16(dstFormat))
2150
        {
2151
            c->swScale= graytogray16;
2152
        }
2153
        if (srcFormat != dstFormat && isGray16(srcFormat) && isGray16(dstFormat))
2154
        {
2155
            c->swScale= gray16swap;
2156
        }
2157

    
2158
        if (c->swScale){
2159
            if (flags&SWS_PRINT_INFO)
2160
                av_log(c, AV_LOG_INFO, "SwScaler: using unscaled %s -> %s special converter\n",
2161
                                sws_format_name(srcFormat), sws_format_name(dstFormat));
2162
            return c;
2163
        }
2164
    }
2165

    
2166
    if (flags & SWS_CPU_CAPS_MMX2)
2167
    {
2168
        c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
2169
        if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
2170
        {
2171
            if (flags&SWS_PRINT_INFO)
2172
                av_log(c, AV_LOG_INFO, "SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n");
2173
        }
2174
        if (usesHFilter) c->canMMX2BeUsed=0;
2175
    }
2176
    else
2177
        c->canMMX2BeUsed=0;
2178

    
2179
    c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
2180
    c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
2181

    
2182
    // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
2183
    // but only for the FAST_BILINEAR mode otherwise do correct scaling
2184
    // n-2 is the last chrominance sample available
2185
    // this is not perfect, but noone shuld notice the difference, the more correct variant
2186
    // would be like the vertical one, but that would require some special code for the
2187
    // first and last pixel
2188
    if (flags&SWS_FAST_BILINEAR)
2189
    {
2190
        if (c->canMMX2BeUsed)
2191
        {
2192
            c->lumXInc+= 20;
2193
            c->chrXInc+= 20;
2194
        }
2195
        //we don't use the x86asm scaler if mmx is available
2196
        else if (flags & SWS_CPU_CAPS_MMX)
2197
        {
2198
            c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
2199
            c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
2200
        }
2201
    }
2202

    
2203
    /* precalculate horizontal scaler filter coefficients */
2204
    {
2205
        const int filterAlign=
2206
            (flags & SWS_CPU_CAPS_MMX) ? 4 :
2207
            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2208
            1;
2209

    
2210
        initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
2211
                   srcW      ,       dstW, filterAlign, 1<<14,
2212
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2213
                   srcFilter->lumH, dstFilter->lumH, c->param);
2214
        initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
2215
                   c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
2216
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2217
                   srcFilter->chrH, dstFilter->chrH, c->param);
2218

    
2219
#define MAX_FUNNY_CODE_SIZE 10000
2220
#if defined(COMPILE_MMX2)
2221
// can't downscale !!!
2222
        if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
2223
        {
2224
#ifdef MAP_ANONYMOUS
2225
            c->funnyYCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2226
            c->funnyUVCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2227
#else
2228
            c->funnyYCode = av_malloc(MAX_FUNNY_CODE_SIZE);
2229
            c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE);
2230
#endif
2231

    
2232
            c->lumMmx2Filter   = av_malloc((dstW        /8+8)*sizeof(int16_t));
2233
            c->chrMmx2Filter   = av_malloc((c->chrDstW  /4+8)*sizeof(int16_t));
2234
            c->lumMmx2FilterPos= av_malloc((dstW      /2/8+8)*sizeof(int32_t));
2235
            c->chrMmx2FilterPos= av_malloc((c->chrDstW/2/4+8)*sizeof(int32_t));
2236

    
2237
            initMMX2HScaler(      dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
2238
            initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
2239
        }
2240
#endif /* defined(COMPILE_MMX2) */
2241
    } // Init Horizontal stuff
2242

    
2243

    
2244

    
2245
    /* precalculate vertical scaler filter coefficients */
2246
    {
2247
        const int filterAlign=
2248
            (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
2249
            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2250
            1;
2251

    
2252
        initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
2253
                   srcH      ,        dstH, filterAlign, (1<<12)-4,
2254
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2255
                   srcFilter->lumV, dstFilter->lumV, c->param);
2256
        initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
2257
                   c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
2258
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2259
                   srcFilter->chrV, dstFilter->chrV, c->param);
2260

    
2261
#ifdef HAVE_ALTIVEC
2262
        c->vYCoeffsBank = av_malloc(sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
2263
        c->vCCoeffsBank = av_malloc(sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH);
2264

    
2265
        for (i=0;i<c->vLumFilterSize*c->dstH;i++) {
2266
            int j;
2267
            short *p = (short *)&c->vYCoeffsBank[i];
2268
            for (j=0;j<8;j++)
2269
                p[j] = c->vLumFilter[i];
2270
        }
2271

    
2272
        for (i=0;i<c->vChrFilterSize*c->chrDstH;i++) {
2273
            int j;
2274
            short *p = (short *)&c->vCCoeffsBank[i];
2275
            for (j=0;j<8;j++)
2276
                p[j] = c->vChrFilter[i];
2277
        }
2278
#endif
2279
    }
2280

    
2281
    // Calculate Buffer Sizes so that they won't run out while handling these damn slices
2282
    c->vLumBufSize= c->vLumFilterSize;
2283
    c->vChrBufSize= c->vChrFilterSize;
2284
    for (i=0; i<dstH; i++)
2285
    {
2286
        int chrI= i*c->chrDstH / dstH;
2287
        int nextSlice= FFMAX(c->vLumFilterPos[i   ] + c->vLumFilterSize - 1,
2288
                           ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
2289

    
2290
        nextSlice>>= c->chrSrcVSubSample;
2291
        nextSlice<<= c->chrSrcVSubSample;
2292
        if (c->vLumFilterPos[i   ] + c->vLumBufSize < nextSlice)
2293
            c->vLumBufSize= nextSlice - c->vLumFilterPos[i   ];
2294
        if (c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
2295
            c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
2296
    }
2297

    
2298
    // allocate pixbufs (we use dynamic allocation because otherwise we would need to
2299
    c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
2300
    c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*));
2301
    //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
2302
    /* align at 16 bytes for AltiVec */
2303
    for (i=0; i<c->vLumBufSize; i++)
2304
        c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(4000);
2305
    for (i=0; i<c->vChrBufSize; i++)
2306
        c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= av_malloc(8000);
2307

    
2308
    //try to avoid drawing green stuff between the right end and the stride end
2309
    for (i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000);
2310

    
2311
    ASSERT(c->chrDstH <= dstH)
2312

    
2313
    if (flags&SWS_PRINT_INFO)
2314
    {
2315
#ifdef DITHER1XBPP
2316
        char *dither= " dithered";
2317
#else
2318
        char *dither= "";
2319
#endif
2320
        if (flags&SWS_FAST_BILINEAR)
2321
            av_log(c, AV_LOG_INFO, "SwScaler: FAST_BILINEAR scaler, ");
2322
        else if (flags&SWS_BILINEAR)
2323
            av_log(c, AV_LOG_INFO, "SwScaler: BILINEAR scaler, ");
2324
        else if (flags&SWS_BICUBIC)
2325
            av_log(c, AV_LOG_INFO, "SwScaler: BICUBIC scaler, ");
2326
        else if (flags&SWS_X)
2327
            av_log(c, AV_LOG_INFO, "SwScaler: Experimental scaler, ");
2328
        else if (flags&SWS_POINT)
2329
            av_log(c, AV_LOG_INFO, "SwScaler: Nearest Neighbor / POINT scaler, ");
2330
        else if (flags&SWS_AREA)
2331
            av_log(c, AV_LOG_INFO, "SwScaler: Area Averageing scaler, ");
2332
        else if (flags&SWS_BICUBLIN)
2333
            av_log(c, AV_LOG_INFO, "SwScaler: luma BICUBIC / chroma BILINEAR scaler, ");
2334
        else if (flags&SWS_GAUSS)
2335
            av_log(c, AV_LOG_INFO, "SwScaler: Gaussian scaler, ");
2336
        else if (flags&SWS_SINC)
2337
            av_log(c, AV_LOG_INFO, "SwScaler: Sinc scaler, ");
2338
        else if (flags&SWS_LANCZOS)
2339
            av_log(c, AV_LOG_INFO, "SwScaler: Lanczos scaler, ");
2340
        else if (flags&SWS_SPLINE)
2341
            av_log(c, AV_LOG_INFO, "SwScaler: Bicubic spline scaler, ");
2342
        else
2343
            av_log(c, AV_LOG_INFO, "SwScaler: ehh flags invalid?! ");
2344

    
2345
        if (dstFormat==PIX_FMT_BGR555 || dstFormat==PIX_FMT_BGR565)
2346
            av_log(c, AV_LOG_INFO, "from %s to%s %s ",
2347
                   sws_format_name(srcFormat), dither, sws_format_name(dstFormat));
2348
        else
2349
            av_log(c, AV_LOG_INFO, "from %s to %s ",
2350
                   sws_format_name(srcFormat), sws_format_name(dstFormat));
2351

    
2352
        if (flags & SWS_CPU_CAPS_MMX2)
2353
            av_log(c, AV_LOG_INFO, "using MMX2\n");
2354
        else if (flags & SWS_CPU_CAPS_3DNOW)
2355
            av_log(c, AV_LOG_INFO, "using 3DNOW\n");
2356
        else if (flags & SWS_CPU_CAPS_MMX)
2357
            av_log(c, AV_LOG_INFO, "using MMX\n");
2358
        else if (flags & SWS_CPU_CAPS_ALTIVEC)
2359
            av_log(c, AV_LOG_INFO, "using AltiVec\n");
2360
        else
2361
            av_log(c, AV_LOG_INFO, "using C\n");
2362
    }
2363

    
2364
    if (flags & SWS_PRINT_INFO)
2365
    {
2366
        if (flags & SWS_CPU_CAPS_MMX)
2367
        {
2368
            if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
2369
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
2370
            else
2371
            {
2372
                if (c->hLumFilterSize==4)
2373
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n");
2374
                else if (c->hLumFilterSize==8)
2375
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n");
2376
                else
2377
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n");
2378

    
2379
                if (c->hChrFilterSize==4)
2380
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n");
2381
                else if (c->hChrFilterSize==8)
2382
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n");
2383
                else
2384
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n");
2385
            }
2386
        }
2387
        else
2388
        {
2389
#if defined(ARCH_X86)
2390
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using X86-Asm scaler for horizontal scaling\n");
2391
#else
2392
            if (flags & SWS_FAST_BILINEAR)
2393
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n");
2394
            else
2395
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using C scaler for horizontal scaling\n");
2396
#endif
2397
        }
2398
        if (isPlanarYUV(dstFormat))
2399
        {
2400
            if (c->vLumFilterSize==1)
2401
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2402
            else
2403
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2404
        }
2405
        else
2406
        {
2407
            if (c->vLumFilterSize==1 && c->vChrFilterSize==2)
2408
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
2409
                       "SwScaler:       2-tap scaler for vertical chrominance scaling (BGR)\n",(flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2410
            else if (c->vLumFilterSize==2 && c->vChrFilterSize==2)
2411
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2412
            else
2413
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2414
        }
2415

    
2416
        if (dstFormat==PIX_FMT_BGR24)
2417
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR24 Converter\n",
2418
                   (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
2419
        else if (dstFormat==PIX_FMT_RGB32)
2420
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR32 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2421
        else if (dstFormat==PIX_FMT_BGR565)
2422
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR16 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2423
        else if (dstFormat==PIX_FMT_BGR555)
2424
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR15 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2425

    
2426
        av_log(c, AV_LOG_VERBOSE, "SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
2427
    }
2428
    if (flags & SWS_PRINT_INFO)
2429
    {
2430
        av_log(c, AV_LOG_DEBUG, "SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2431
               c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
2432
        av_log(c, AV_LOG_DEBUG, "SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2433
               c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
2434
    }
2435

    
2436
    c->swScale= getSwsFunc(flags);
2437
    return c;
2438
}
2439

    
2440
/**
2441
 * swscale warper, so we don't need to export the SwsContext.
2442
 * assumes planar YUV to be in YUV order instead of YVU
2443
 */
2444
int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2445
              int srcSliceH, uint8_t* dst[], int dstStride[]){
2446
    int i;
2447
    uint8_t* src2[4]= {src[0], src[1], src[2]};
2448
    uint32_t pal[256];
2449
    if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
2450
        av_log(c, AV_LOG_ERROR, "swScaler: slices start in the middle!\n");
2451
        return 0;
2452
    }
2453
    if (c->sliceDir == 0) {
2454
        if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
2455
    }
2456

    
2457
    if (c->srcFormat == PIX_FMT_PAL8){
2458
        for (i=0; i<256; i++){
2459
            int p= ((uint32_t*)(src[1]))[i];
2460
            int r= (p>>16)&0xFF;
2461
            int g= (p>> 8)&0xFF;
2462
            int b=  p     &0xFF;
2463
            int y= av_clip_uint8(((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16 );
2464
            int u= av_clip_uint8(((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128);
2465
            int v= av_clip_uint8(((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128);
2466
            pal[i]= y + (u<<8) + (v<<16);
2467
        }
2468
        src2[1]= pal;
2469
    }
2470

    
2471
    // copy strides, so they can safely be modified
2472
    if (c->sliceDir == 1) {
2473
        // slices go from top to bottom
2474
        int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2]};
2475
        int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2]};
2476
        return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst, dstStride2);
2477
    } else {
2478
        // slices go from bottom to top => we flip the image internally
2479
        uint8_t* dst2[4]= {dst[0] + (c->dstH-1)*dstStride[0],
2480
                           dst[1] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1],
2481
                           dst[2] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2]};
2482
        int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2]};
2483
        int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2]};
2484

    
2485
        src2[0] += (srcSliceH-1)*srcStride[0];
2486
        if (c->srcFormat != PIX_FMT_PAL8)
2487
            src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1];
2488
        src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2];
2489

    
2490
        return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
2491
    }
2492
}
2493

    
2494
/**
2495
 * swscale warper, so we don't need to export the SwsContext
2496
 */
2497
int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2498
                      int srcSliceH, uint8_t* dst[], int dstStride[]){
2499
    return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
2500
}
2501

    
2502
SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
2503
                                float lumaSharpen, float chromaSharpen,
2504
                                float chromaHShift, float chromaVShift,
2505
                                int verbose)
2506
{
2507
    SwsFilter *filter= av_malloc(sizeof(SwsFilter));
2508

    
2509
    if (lumaGBlur!=0.0){
2510
        filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0);
2511
        filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0);
2512
    }else{
2513
        filter->lumH= sws_getIdentityVec();
2514
        filter->lumV= sws_getIdentityVec();
2515
    }
2516

    
2517
    if (chromaGBlur!=0.0){
2518
        filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0);
2519
        filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0);
2520
    }else{
2521
        filter->chrH= sws_getIdentityVec();
2522
        filter->chrV= sws_getIdentityVec();
2523
    }
2524

    
2525
    if (chromaSharpen!=0.0){
2526
        SwsVector *id= sws_getIdentityVec();
2527
        sws_scaleVec(filter->chrH, -chromaSharpen);
2528
        sws_scaleVec(filter->chrV, -chromaSharpen);
2529
        sws_addVec(filter->chrH, id);
2530
        sws_addVec(filter->chrV, id);
2531
        sws_freeVec(id);
2532
    }
2533

    
2534
    if (lumaSharpen!=0.0){
2535
        SwsVector *id= sws_getIdentityVec();
2536
        sws_scaleVec(filter->lumH, -lumaSharpen);
2537
        sws_scaleVec(filter->lumV, -lumaSharpen);
2538
        sws_addVec(filter->lumH, id);
2539
        sws_addVec(filter->lumV, id);
2540
        sws_freeVec(id);
2541
    }
2542

    
2543
    if (chromaHShift != 0.0)
2544
        sws_shiftVec(filter->chrH, (int)(chromaHShift+0.5));
2545

    
2546
    if (chromaVShift != 0.0)
2547
        sws_shiftVec(filter->chrV, (int)(chromaVShift+0.5));
2548

    
2549
    sws_normalizeVec(filter->chrH, 1.0);
2550
    sws_normalizeVec(filter->chrV, 1.0);
2551
    sws_normalizeVec(filter->lumH, 1.0);
2552
    sws_normalizeVec(filter->lumV, 1.0);
2553

    
2554
    if (verbose) sws_printVec(filter->chrH);
2555
    if (verbose) sws_printVec(filter->lumH);
2556

    
2557
    return filter;
2558
}
2559

    
2560
/**
2561
 * returns a normalized gaussian curve used to filter stuff
2562
 * quality=3 is high quality, lowwer is lowwer quality
2563
 */
2564
SwsVector *sws_getGaussianVec(double variance, double quality){
2565
    const int length= (int)(variance*quality + 0.5) | 1;
2566
    int i;
2567
    double *coeff= av_malloc(length*sizeof(double));
2568
    double middle= (length-1)*0.5;
2569
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2570

    
2571
    vec->coeff= coeff;
2572
    vec->length= length;
2573

    
2574
    for (i=0; i<length; i++)
2575
    {
2576
        double dist= i-middle;
2577
        coeff[i]= exp( -dist*dist/(2*variance*variance) ) / sqrt(2*variance*PI);
2578
    }
2579

    
2580
    sws_normalizeVec(vec, 1.0);
2581

    
2582
    return vec;
2583
}
2584

    
2585
SwsVector *sws_getConstVec(double c, int length){
2586
    int i;
2587
    double *coeff= av_malloc(length*sizeof(double));
2588
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2589

    
2590
    vec->coeff= coeff;
2591
    vec->length= length;
2592

    
2593
    for (i=0; i<length; i++)
2594
        coeff[i]= c;
2595

    
2596
    return vec;
2597
}
2598

    
2599

    
2600
SwsVector *sws_getIdentityVec(void){
2601
    return sws_getConstVec(1.0, 1);
2602
}
2603

    
2604
double sws_dcVec(SwsVector *a){
2605
    int i;
2606
    double sum=0;
2607

    
2608
    for (i=0; i<a->length; i++)
2609
        sum+= a->coeff[i];
2610

    
2611
    return sum;
2612
}
2613

    
2614
void sws_scaleVec(SwsVector *a, double scalar){
2615
    int i;
2616

    
2617
    for (i=0; i<a->length; i++)
2618
        a->coeff[i]*= scalar;
2619
}
2620

    
2621
void sws_normalizeVec(SwsVector *a, double height){
2622
    sws_scaleVec(a, height/sws_dcVec(a));
2623
}
2624

    
2625
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){
2626
    int length= a->length + b->length - 1;
2627
    double *coeff= av_malloc(length*sizeof(double));
2628
    int i, j;
2629
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2630

    
2631
    vec->coeff= coeff;
2632
    vec->length= length;
2633

    
2634
    for (i=0; i<length; i++) coeff[i]= 0.0;
2635

    
2636
    for (i=0; i<a->length; i++)
2637
    {
2638
        for (j=0; j<b->length; j++)
2639
        {
2640
            coeff[i+j]+= a->coeff[i]*b->coeff[j];
2641
        }
2642
    }
2643

    
2644
    return vec;
2645
}
2646

    
2647
static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){
2648
    int length= FFMAX(a->length, b->length);
2649
    double *coeff= av_malloc(length*sizeof(double));
2650
    int i;
2651
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2652

    
2653
    vec->coeff= coeff;
2654
    vec->length= length;
2655

    
2656
    for (i=0; i<length; i++) coeff[i]= 0.0;
2657

    
2658
    for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2659
    for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];
2660

    
2661
    return vec;
2662
}
2663

    
2664
static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){
2665
    int length= FFMAX(a->length, b->length);
2666
    double *coeff= av_malloc(length*sizeof(double));
2667
    int i;
2668
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2669

    
2670
    vec->coeff= coeff;
2671
    vec->length= length;
2672

    
2673
    for (i=0; i<length; i++) coeff[i]= 0.0;
2674

    
2675
    for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2676
    for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];
2677

    
2678
    return vec;
2679
}
2680

    
2681
/* shift left / or right if "shift" is negative */
2682
static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){
2683
    int length= a->length + FFABS(shift)*2;
2684
    double *coeff= av_malloc(length*sizeof(double));
2685
    int i;
2686
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2687

    
2688
    vec->coeff= coeff;
2689
    vec->length= length;
2690

    
2691
    for (i=0; i<length; i++) coeff[i]= 0.0;
2692

    
2693
    for (i=0; i<a->length; i++)
2694
    {
2695
        coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
2696
    }
2697

    
2698
    return vec;
2699
}
2700

    
2701
void sws_shiftVec(SwsVector *a, int shift){
2702
    SwsVector *shifted= sws_getShiftedVec(a, shift);
2703
    av_free(a->coeff);
2704
    a->coeff= shifted->coeff;
2705
    a->length= shifted->length;
2706
    av_free(shifted);
2707
}
2708

    
2709
void sws_addVec(SwsVector *a, SwsVector *b){
2710
    SwsVector *sum= sws_sumVec(a, b);
2711
    av_free(a->coeff);
2712
    a->coeff= sum->coeff;
2713
    a->length= sum->length;
2714
    av_free(sum);
2715
}
2716

    
2717
void sws_subVec(SwsVector *a, SwsVector *b){
2718
    SwsVector *diff= sws_diffVec(a, b);
2719
    av_free(a->coeff);
2720
    a->coeff= diff->coeff;
2721
    a->length= diff->length;
2722
    av_free(diff);
2723
}
2724

    
2725
void sws_convVec(SwsVector *a, SwsVector *b){
2726
    SwsVector *conv= sws_getConvVec(a, b);
2727
    av_free(a->coeff);
2728
    a->coeff= conv->coeff;
2729
    a->length= conv->length;
2730
    av_free(conv);
2731
}
2732

    
2733
SwsVector *sws_cloneVec(SwsVector *a){
2734
    double *coeff= av_malloc(a->length*sizeof(double));
2735
    int i;
2736
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2737

    
2738
    vec->coeff= coeff;
2739
    vec->length= a->length;
2740

    
2741
    for (i=0; i<a->length; i++) coeff[i]= a->coeff[i];
2742

    
2743
    return vec;
2744
}
2745

    
2746
void sws_printVec(SwsVector *a){
2747
    int i;
2748
    double max=0;
2749
    double min=0;
2750
    double range;
2751

    
2752
    for (i=0; i<a->length; i++)
2753
        if (a->coeff[i]>max) max= a->coeff[i];
2754

    
2755
    for (i=0; i<a->length; i++)
2756
        if (a->coeff[i]<min) min= a->coeff[i];
2757

    
2758
    range= max - min;
2759

    
2760
    for (i=0; i<a->length; i++)
2761
    {
2762
        int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
2763
        av_log(NULL, AV_LOG_DEBUG, "%1.3f ", a->coeff[i]);
2764
        for (;x>0; x--) av_log(NULL, AV_LOG_DEBUG, " ");
2765
        av_log(NULL, AV_LOG_DEBUG, "|\n");
2766
    }
2767
}
2768

    
2769
void sws_freeVec(SwsVector *a){
2770
    if (!a) return;
2771
    av_free(a->coeff);
2772
    a->coeff=NULL;
2773
    a->length=0;
2774
    av_free(a);
2775
}
2776

    
2777
void sws_freeFilter(SwsFilter *filter){
2778
    if (!filter) return;
2779

    
2780
    if (filter->lumH) sws_freeVec(filter->lumH);
2781
    if (filter->lumV) sws_freeVec(filter->lumV);
2782
    if (filter->chrH) sws_freeVec(filter->chrH);
2783
    if (filter->chrV) sws_freeVec(filter->chrV);
2784
    av_free(filter);
2785
}
2786

    
2787

    
2788
void sws_freeContext(SwsContext *c){
2789
    int i;
2790
    if (!c) return;
2791

    
2792
    if (c->lumPixBuf)
2793
    {
2794
        for (i=0; i<c->vLumBufSize; i++)
2795
        {
2796
            av_free(c->lumPixBuf[i]);
2797
            c->lumPixBuf[i]=NULL;
2798
        }
2799
        av_free(c->lumPixBuf);
2800
        c->lumPixBuf=NULL;
2801
    }
2802

    
2803
    if (c->chrPixBuf)
2804
    {
2805
        for (i=0; i<c->vChrBufSize; i++)
2806
        {
2807
            av_free(c->chrPixBuf[i]);
2808
            c->chrPixBuf[i]=NULL;
2809
        }
2810
        av_free(c->chrPixBuf);
2811
        c->chrPixBuf=NULL;
2812
    }
2813

    
2814
    av_free(c->vLumFilter);
2815
    c->vLumFilter = NULL;
2816
    av_free(c->vChrFilter);
2817
    c->vChrFilter = NULL;
2818
    av_free(c->hLumFilter);
2819
    c->hLumFilter = NULL;
2820
    av_free(c->hChrFilter);
2821
    c->hChrFilter = NULL;
2822
#ifdef HAVE_ALTIVEC
2823
    av_free(c->vYCoeffsBank);
2824
    c->vYCoeffsBank = NULL;
2825
    av_free(c->vCCoeffsBank);
2826
    c->vCCoeffsBank = NULL;
2827
#endif
2828

    
2829
    av_free(c->vLumFilterPos);
2830
    c->vLumFilterPos = NULL;
2831
    av_free(c->vChrFilterPos);
2832
    c->vChrFilterPos = NULL;
2833
    av_free(c->hLumFilterPos);
2834
    c->hLumFilterPos = NULL;
2835
    av_free(c->hChrFilterPos);
2836
    c->hChrFilterPos = NULL;
2837

    
2838
#if defined(ARCH_X86) && defined(CONFIG_GPL)
2839
#ifdef MAP_ANONYMOUS
2840
    if (c->funnyYCode) munmap(c->funnyYCode, MAX_FUNNY_CODE_SIZE);
2841
    if (c->funnyUVCode) munmap(c->funnyUVCode, MAX_FUNNY_CODE_SIZE);
2842
#else
2843
    av_free(c->funnyYCode);
2844
    av_free(c->funnyUVCode);
2845
#endif
2846
    c->funnyYCode=NULL;
2847
    c->funnyUVCode=NULL;
2848
#endif /* defined(ARCH_X86) */
2849

    
2850
    av_free(c->lumMmx2Filter);
2851
    c->lumMmx2Filter=NULL;
2852
    av_free(c->chrMmx2Filter);
2853
    c->chrMmx2Filter=NULL;
2854
    av_free(c->lumMmx2FilterPos);
2855
    c->lumMmx2FilterPos=NULL;
2856
    av_free(c->chrMmx2FilterPos);
2857
    c->chrMmx2FilterPos=NULL;
2858
    av_free(c->yuvTable);
2859
    c->yuvTable=NULL;
2860

    
2861
    av_free(c);
2862
}
2863

    
2864
/**
2865
 * Checks if context is valid or reallocs a new one instead.
2866
 * If context is NULL, just calls sws_getContext() to get a new one.
2867
 * Otherwise, checks if the parameters are the same already saved in context.
2868
 * If that is the case, returns the current context.
2869
 * Otherwise, frees context and gets a new one.
2870
 *
2871
 * Be warned that srcFilter, dstFilter are not checked, they are
2872
 * asumed to remain valid.
2873
 */
2874
struct SwsContext *sws_getCachedContext(struct SwsContext *context,
2875
                                        int srcW, int srcH, int srcFormat,
2876
                                        int dstW, int dstH, int dstFormat, int flags,
2877
                                        SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
2878
{
2879
    if (context != NULL) {
2880
        if ((context->srcW != srcW) || (context->srcH != srcH) ||
2881
            (context->srcFormat != srcFormat) ||
2882
            (context->dstW != dstW) || (context->dstH != dstH) ||
2883
            (context->dstFormat != dstFormat) || (context->flags != flags) ||
2884
            (context->param != param))
2885
        {
2886
            sws_freeContext(context);
2887
            context = NULL;
2888
        }
2889
    }
2890
    if (context == NULL) {
2891
        return sws_getContext(srcW, srcH, srcFormat,
2892
                              dstW, dstH, dstFormat, flags,
2893
                              srcFilter, dstFilter, param);
2894
    }
2895
    return context;
2896
}
2897