Statistics
| Branch: | Revision:

ffmpeg / libswscale / swscale.c @ 8f58a4c9

History | View | Annotate | Download (101 KB)

1
/*
2
 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 *
20
 * the C code (not assembly, mmx, ...) of this file can be used
21
 * under the LGPL license too
22
 */
23

    
24
/*
25
  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8/Y800, YVU9/IF09, PAL8
26
  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
27
  {BGR,RGB}{1,4,8,15,16} support dithering
28

29
  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
30
  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
31
  x -> x
32
  YUV9 -> YV12
33
  YUV9/YV12 -> Y800
34
  Y800 -> YUV9/YV12
35
  BGR24 -> BGR32 & RGB24 -> RGB32
36
  BGR32 -> BGR24 & RGB32 -> RGB24
37
  BGR15 -> BGR16
38
*/
39

    
40
/*
41
tested special converters (most are tested actually but i didnt write it down ...)
42
 YV12 -> BGR16
43
 YV12 -> YV12
44
 BGR15 -> BGR16
45
 BGR16 -> BGR16
46
 YVU9 -> YV12
47

48
untested special converters
49
  YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be ok)
50
  YV12/I420 -> YV12/I420
51
  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
52
  BGR24 -> BGR32 & RGB24 -> RGB32
53
  BGR32 -> BGR24 & RGB32 -> RGB24
54
  BGR24 -> YV12
55
*/
56

    
57
#include <inttypes.h>
58
#include <string.h>
59
#include <math.h>
60
#include <stdio.h>
61
#include <unistd.h>
62
#include "config.h"
63
#include <assert.h>
64
#ifdef HAVE_SYS_MMAN_H
65
#include <sys/mman.h>
66
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
67
#define MAP_ANONYMOUS MAP_ANON
68
#endif
69
#endif
70
#include "swscale.h"
71
#include "swscale_internal.h"
72
#include "x86_cpu.h"
73
#include "bswap.h"
74
#include "rgb2rgb.h"
75
#include "libavcodec/opt.h"
76

    
77
#undef MOVNTQ
78
#undef PAVGB
79

    
80
//#undef HAVE_MMX2
81
//#define HAVE_3DNOW
82
//#undef HAVE_MMX
83
//#undef ARCH_X86
84
//#define WORDS_BIGENDIAN
85
#define DITHER1XBPP
86

    
87
#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
88

    
89
#define RET 0xC3 //near return opcode for X86
90

    
91
#ifdef MP_DEBUG
92
#define ASSERT(x) assert(x);
93
#else
94
#define ASSERT(x) ;
95
#endif
96

    
97
#ifdef M_PI
98
#define PI M_PI
99
#else
100
#define PI 3.14159265358979323846
101
#endif
102

    
103
#define isSupportedIn(x)    (       \
104
           (x)==PIX_FMT_YUV420P     \
105
        || (x)==PIX_FMT_YUYV422     \
106
        || (x)==PIX_FMT_UYVY422     \
107
        || (x)==PIX_FMT_RGB32       \
108
        || (x)==PIX_FMT_BGR24       \
109
        || (x)==PIX_FMT_BGR565      \
110
        || (x)==PIX_FMT_BGR555      \
111
        || (x)==PIX_FMT_BGR32       \
112
        || (x)==PIX_FMT_RGB24       \
113
        || (x)==PIX_FMT_RGB565      \
114
        || (x)==PIX_FMT_RGB555      \
115
        || (x)==PIX_FMT_GRAY8       \
116
        || (x)==PIX_FMT_YUV410P     \
117
        || (x)==PIX_FMT_GRAY16BE    \
118
        || (x)==PIX_FMT_GRAY16LE    \
119
        || (x)==PIX_FMT_YUV444P     \
120
        || (x)==PIX_FMT_YUV422P     \
121
        || (x)==PIX_FMT_YUV411P     \
122
        || (x)==PIX_FMT_PAL8        \
123
        || (x)==PIX_FMT_BGR8        \
124
        || (x)==PIX_FMT_RGB8        \
125
        || (x)==PIX_FMT_BGR4_BYTE   \
126
        || (x)==PIX_FMT_RGB4_BYTE   \
127
        || (x)==PIX_FMT_YUV440P     \
128
    )
129
#define isSupportedOut(x)   (       \
130
           (x)==PIX_FMT_YUV420P     \
131
        || (x)==PIX_FMT_YUYV422     \
132
        || (x)==PIX_FMT_UYVY422     \
133
        || (x)==PIX_FMT_YUV444P     \
134
        || (x)==PIX_FMT_YUV422P     \
135
        || (x)==PIX_FMT_YUV411P     \
136
        || isRGB(x)                 \
137
        || isBGR(x)                 \
138
        || (x)==PIX_FMT_NV12        \
139
        || (x)==PIX_FMT_NV21        \
140
        || (x)==PIX_FMT_GRAY16BE    \
141
        || (x)==PIX_FMT_GRAY16LE    \
142
        || (x)==PIX_FMT_GRAY8       \
143
        || (x)==PIX_FMT_YUV410P     \
144
    )
145
#define isPacked(x)         (       \
146
           (x)==PIX_FMT_PAL8        \
147
        || (x)==PIX_FMT_YUYV422     \
148
        || (x)==PIX_FMT_UYVY422     \
149
        || isRGB(x)                 \
150
        || isBGR(x)                 \
151
    )
152

    
153
#define RGB2YUV_SHIFT 16
154
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
155
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
156
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
157
#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
158
#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
159
#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
160
#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
161
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
162
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
163

    
164
extern const int32_t Inverse_Table_6_9[8][4];
165

    
166
/*
167
NOTES
168
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
169

170
TODO
171
more intelligent missalignment avoidance for the horizontal scaler
172
write special vertical cubic upscale version
173
Optimize C code (yv12 / minmax)
174
add support for packed pixel yuv input & output
175
add support for Y8 output
176
optimize bgr24 & bgr32
177
add BGR4 output support
178
write special BGR->BGR scaler
179
*/
180

    
181
#if defined(ARCH_X86) && defined (CONFIG_GPL)
182
static uint64_t attribute_used __attribute__((aligned(8))) bF8=       0xF8F8F8F8F8F8F8F8LL;
183
static uint64_t attribute_used __attribute__((aligned(8))) bFC=       0xFCFCFCFCFCFCFCFCLL;
184
static uint64_t                __attribute__((aligned(8))) w10=       0x0010001000100010LL;
185
static uint64_t attribute_used __attribute__((aligned(8))) w02=       0x0002000200020002LL;
186
static uint64_t attribute_used __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
187
static uint64_t attribute_used __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
188
static uint64_t attribute_used __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
189
static uint64_t attribute_used __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL;
190

    
191
static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
192
static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
193
static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
194
static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;
195

    
196
static uint64_t __attribute__((aligned(8))) dither4[2]={
197
        0x0103010301030103LL,
198
        0x0200020002000200LL,};
199

    
200
static uint64_t __attribute__((aligned(8))) dither8[2]={
201
        0x0602060206020602LL,
202
        0x0004000400040004LL,};
203

    
204
static uint64_t                __attribute__((aligned(8))) b16Mask=   0x001F001F001F001FLL;
205
static uint64_t attribute_used __attribute__((aligned(8))) g16Mask=   0x07E007E007E007E0LL;
206
static uint64_t attribute_used __attribute__((aligned(8))) r16Mask=   0xF800F800F800F800LL;
207
static uint64_t                __attribute__((aligned(8))) b15Mask=   0x001F001F001F001FLL;
208
static uint64_t attribute_used __attribute__((aligned(8))) g15Mask=   0x03E003E003E003E0LL;
209
static uint64_t attribute_used __attribute__((aligned(8))) r15Mask=   0x7C007C007C007C00LL;
210

    
211
static uint64_t attribute_used __attribute__((aligned(8))) M24A=      0x00FF0000FF0000FFLL;
212
static uint64_t attribute_used __attribute__((aligned(8))) M24B=      0xFF0000FF0000FF00LL;
213
static uint64_t attribute_used __attribute__((aligned(8))) M24C=      0x0000FF0000FF0000LL;
214

    
215
#ifdef FAST_BGR2YV12
216
static const uint64_t bgr2YCoeff   attribute_used __attribute__((aligned(8))) = 0x000000210041000DULL;
217
static const uint64_t bgr2UCoeff   attribute_used __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
218
static const uint64_t bgr2VCoeff   attribute_used __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
219
#else
220
static const uint64_t bgr2YCoeff   attribute_used __attribute__((aligned(8))) = 0x000020E540830C8BULL;
221
static const uint64_t bgr2UCoeff   attribute_used __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
222
static const uint64_t bgr2VCoeff   attribute_used __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
223
#endif /* FAST_BGR2YV12 */
224
static const uint64_t bgr2YOffset  attribute_used __attribute__((aligned(8))) = 0x1010101010101010ULL;
225
static const uint64_t bgr2UVOffset attribute_used __attribute__((aligned(8))) = 0x8080808080808080ULL;
226
static const uint64_t w1111        attribute_used __attribute__((aligned(8))) = 0x0001000100010001ULL;
227
#endif /* defined(ARCH_X86) */
228

    
229
// clipping helper table for C implementations:
230
static unsigned char clip_table[768];
231

    
232
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
233

    
234
extern const uint8_t dither_2x2_4[2][8];
235
extern const uint8_t dither_2x2_8[2][8];
236
extern const uint8_t dither_8x8_32[8][8];
237
extern const uint8_t dither_8x8_73[8][8];
238
extern const uint8_t dither_8x8_220[8][8];
239

    
240
static const char * sws_context_to_name(void * ptr) {
241
    return "swscaler";
242
}
243

    
244
#define OFFSET(x) offsetof(SwsContext, x)
245
#define DEFAULT 0
246
#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
247

    
248
static const AVOption options[] = {
249
    { "sws_flags", "scaler/cpu flags", OFFSET(flags), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, VE, "sws_flags" },
250
    { "fast_bilinear", "fast bilinear", 0, FF_OPT_TYPE_CONST, SWS_FAST_BILINEAR, INT_MIN, INT_MAX, VE, "sws_flags" },
251
    { "bilinear", "bilinear", 0, FF_OPT_TYPE_CONST, SWS_BILINEAR, INT_MIN, INT_MAX, VE, "sws_flags" },
252
    { "bicubic", "bicubic", 0, FF_OPT_TYPE_CONST, SWS_BICUBIC, INT_MIN, INT_MAX, VE, "sws_flags" },
253
    { "experimental", "experimental", 0, FF_OPT_TYPE_CONST, SWS_X, INT_MIN, INT_MAX, VE, "sws_flags" },
254
    { "neighbor", "nearest neighbor", 0, FF_OPT_TYPE_CONST, SWS_POINT, INT_MIN, INT_MAX, VE, "sws_flags" },
255
    { "area", "averaging area", 0, FF_OPT_TYPE_CONST, SWS_AREA, INT_MIN, INT_MAX, VE, "sws_flags" },
256
    { "bicublin", "luma bicubic, chroma bilinear", 0, FF_OPT_TYPE_CONST, SWS_BICUBLIN, INT_MIN, INT_MAX, VE, "sws_flags" },
257
    { "gauss", "gaussian", 0, FF_OPT_TYPE_CONST, SWS_GAUSS, INT_MIN, INT_MAX, VE, "sws_flags" },
258
    { "sinc", "sinc", 0, FF_OPT_TYPE_CONST, SWS_SINC, INT_MIN, INT_MAX, VE, "sws_flags" },
259
    { "lanczos", "lanczos", 0, FF_OPT_TYPE_CONST, SWS_LANCZOS, INT_MIN, INT_MAX, VE, "sws_flags" },
260
    { "spline", "natural bicubic spline", 0, FF_OPT_TYPE_CONST, SWS_SPLINE, INT_MIN, INT_MAX, VE, "sws_flags" },
261
    { "print_info", "print info", 0, FF_OPT_TYPE_CONST, SWS_PRINT_INFO, INT_MIN, INT_MAX, VE, "sws_flags" },
262
    { "accurate_rnd", "accurate rounding", 0, FF_OPT_TYPE_CONST, SWS_ACCURATE_RND, INT_MIN, INT_MAX, VE, "sws_flags" },
263
    { "mmx", "MMX SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_MMX, INT_MIN, INT_MAX, VE, "sws_flags" },
264
    { "mmx2", "MMX2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_MMX2, INT_MIN, INT_MAX, VE, "sws_flags" },
265
    { "3dnow", "3DNOW SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_3DNOW, INT_MIN, INT_MAX, VE, "sws_flags" },
266
    { "altivec", "AltiVec SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_ALTIVEC, INT_MIN, INT_MAX, VE, "sws_flags" },
267
    { "bfin", "Blackfin SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_BFIN, INT_MIN, INT_MAX, VE, "sws_flags" },
268
    { "full_chroma_int", "full chroma interpolation", 0 , FF_OPT_TYPE_CONST, SWS_FULL_CHR_H_INT, INT_MIN, INT_MAX, VE, "sws_flags" },
269
    { "full_chroma_inp", "full chroma input", 0 , FF_OPT_TYPE_CONST, SWS_FULL_CHR_H_INP, INT_MIN, INT_MAX, VE, "sws_flags" },
270
    { NULL }
271
};
272

    
273
#undef VE
274
#undef DEFAULT
275

    
276
static AVClass sws_context_class = { "SWScaler", sws_context_to_name, options };
277

    
278
char *sws_format_name(enum PixelFormat format)
279
{
280
    switch (format) {
281
        case PIX_FMT_YUV420P:
282
            return "yuv420p";
283
        case PIX_FMT_YUYV422:
284
            return "yuyv422";
285
        case PIX_FMT_RGB24:
286
            return "rgb24";
287
        case PIX_FMT_BGR24:
288
            return "bgr24";
289
        case PIX_FMT_YUV422P:
290
            return "yuv422p";
291
        case PIX_FMT_YUV444P:
292
            return "yuv444p";
293
        case PIX_FMT_RGB32:
294
            return "rgb32";
295
        case PIX_FMT_YUV410P:
296
            return "yuv410p";
297
        case PIX_FMT_YUV411P:
298
            return "yuv411p";
299
        case PIX_FMT_RGB565:
300
            return "rgb565";
301
        case PIX_FMT_RGB555:
302
            return "rgb555";
303
        case PIX_FMT_GRAY16BE:
304
            return "gray16be";
305
        case PIX_FMT_GRAY16LE:
306
            return "gray16le";
307
        case PIX_FMT_GRAY8:
308
            return "gray8";
309
        case PIX_FMT_MONOWHITE:
310
            return "mono white";
311
        case PIX_FMT_MONOBLACK:
312
            return "mono black";
313
        case PIX_FMT_PAL8:
314
            return "Palette";
315
        case PIX_FMT_YUVJ420P:
316
            return "yuvj420p";
317
        case PIX_FMT_YUVJ422P:
318
            return "yuvj422p";
319
        case PIX_FMT_YUVJ444P:
320
            return "yuvj444p";
321
        case PIX_FMT_XVMC_MPEG2_MC:
322
            return "xvmc_mpeg2_mc";
323
        case PIX_FMT_XVMC_MPEG2_IDCT:
324
            return "xvmc_mpeg2_idct";
325
        case PIX_FMT_UYVY422:
326
            return "uyvy422";
327
        case PIX_FMT_UYYVYY411:
328
            return "uyyvyy411";
329
        case PIX_FMT_RGB32_1:
330
            return "rgb32x";
331
        case PIX_FMT_BGR32_1:
332
            return "bgr32x";
333
        case PIX_FMT_BGR32:
334
            return "bgr32";
335
        case PIX_FMT_BGR565:
336
            return "bgr565";
337
        case PIX_FMT_BGR555:
338
            return "bgr555";
339
        case PIX_FMT_BGR8:
340
            return "bgr8";
341
        case PIX_FMT_BGR4:
342
            return "bgr4";
343
        case PIX_FMT_BGR4_BYTE:
344
            return "bgr4 byte";
345
        case PIX_FMT_RGB8:
346
            return "rgb8";
347
        case PIX_FMT_RGB4:
348
            return "rgb4";
349
        case PIX_FMT_RGB4_BYTE:
350
            return "rgb4 byte";
351
        case PIX_FMT_NV12:
352
            return "nv12";
353
        case PIX_FMT_NV21:
354
            return "nv21";
355
        case PIX_FMT_YUV440P:
356
            return "yuv440p";
357
        default:
358
            return "Unknown format";
359
    }
360
}
361

    
362
#if defined(ARCH_X86) && defined (CONFIG_GPL)
363
void in_asm_used_var_warning_killer()
364
{
365
    volatile int i= bF8+bFC+w10+
366
    bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+
367
    M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101;
368
    if (i) i=0;
369
}
370
#endif
371

    
372
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
373
                               int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
374
                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
375
{
376
    //FIXME Optimize (just quickly writen not opti..)
377
    int i;
378
    for (i=0; i<dstW; i++)
379
    {
380
        int val=1<<18;
381
        int j;
382
        for (j=0; j<lumFilterSize; j++)
383
            val += lumSrc[j][i] * lumFilter[j];
384

    
385
        dest[i]= av_clip_uint8(val>>19);
386
    }
387

    
388
    if (uDest != NULL)
389
        for (i=0; i<chrDstW; i++)
390
        {
391
            int u=1<<18;
392
            int v=1<<18;
393
            int j;
394
            for (j=0; j<chrFilterSize; j++)
395
            {
396
                u += chrSrc[j][i] * chrFilter[j];
397
                v += chrSrc[j][i + 2048] * chrFilter[j];
398
            }
399

    
400
            uDest[i]= av_clip_uint8(u>>19);
401
            vDest[i]= av_clip_uint8(v>>19);
402
        }
403
}
404

    
405
static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
406
                                int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
407
                                uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
408
{
409
    //FIXME Optimize (just quickly writen not opti..)
410
    int i;
411
    for (i=0; i<dstW; i++)
412
    {
413
        int val=1<<18;
414
        int j;
415
        for (j=0; j<lumFilterSize; j++)
416
            val += lumSrc[j][i] * lumFilter[j];
417

    
418
        dest[i]= av_clip_uint8(val>>19);
419
    }
420

    
421
    if (uDest == NULL)
422
        return;
423

    
424
    if (dstFormat == PIX_FMT_NV12)
425
        for (i=0; i<chrDstW; i++)
426
        {
427
            int u=1<<18;
428
            int v=1<<18;
429
            int j;
430
            for (j=0; j<chrFilterSize; j++)
431
            {
432
                u += chrSrc[j][i] * chrFilter[j];
433
                v += chrSrc[j][i + 2048] * chrFilter[j];
434
            }
435

    
436
            uDest[2*i]= av_clip_uint8(u>>19);
437
            uDest[2*i+1]= av_clip_uint8(v>>19);
438
        }
439
    else
440
        for (i=0; i<chrDstW; i++)
441
        {
442
            int u=1<<18;
443
            int v=1<<18;
444
            int j;
445
            for (j=0; j<chrFilterSize; j++)
446
            {
447
                u += chrSrc[j][i] * chrFilter[j];
448
                v += chrSrc[j][i + 2048] * chrFilter[j];
449
            }
450

    
451
            uDest[2*i]= av_clip_uint8(v>>19);
452
            uDest[2*i+1]= av_clip_uint8(u>>19);
453
        }
454
}
455

    
456
#define YSCALE_YUV_2_PACKEDX_C(type) \
457
    for (i=0; i<(dstW>>1); i++){\
458
        int j;\
459
        int Y1 = 1<<18;\
460
        int Y2 = 1<<18;\
461
        int U  = 1<<18;\
462
        int V  = 1<<18;\
463
        type av_unused *r, *b, *g;\
464
        const int i2= 2*i;\
465
        \
466
        for (j=0; j<lumFilterSize; j++)\
467
        {\
468
            Y1 += lumSrc[j][i2] * lumFilter[j];\
469
            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
470
        }\
471
        for (j=0; j<chrFilterSize; j++)\
472
        {\
473
            U += chrSrc[j][i] * chrFilter[j];\
474
            V += chrSrc[j][i+2048] * chrFilter[j];\
475
        }\
476
        Y1>>=19;\
477
        Y2>>=19;\
478
        U >>=19;\
479
        V >>=19;\
480
        if ((Y1|Y2|U|V)&256)\
481
        {\
482
            if (Y1>255)   Y1=255; \
483
            else if (Y1<0)Y1=0;   \
484
            if (Y2>255)   Y2=255; \
485
            else if (Y2<0)Y2=0;   \
486
            if (U>255)    U=255;  \
487
            else if (U<0) U=0;    \
488
            if (V>255)    V=255;  \
489
            else if (V<0) V=0;    \
490
        }
491

    
492
#define YSCALE_YUV_2_RGBX_C(type) \
493
    YSCALE_YUV_2_PACKEDX_C(type)  \
494
    r = (type *)c->table_rV[V];   \
495
    g = (type *)(c->table_gU[U] + c->table_gV[V]); \
496
    b = (type *)c->table_bU[U];   \
497

    
498
#define YSCALE_YUV_2_PACKED2_C   \
499
    for (i=0; i<(dstW>>1); i++){ \
500
        const int i2= 2*i;       \
501
        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
502
        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
503
        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;  \
504
        int V= (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;  \
505

    
506
#define YSCALE_YUV_2_RGB2_C(type) \
507
    YSCALE_YUV_2_PACKED2_C\
508
    type *r, *b, *g;\
509
    r = (type *)c->table_rV[V];\
510
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
511
    b = (type *)c->table_bU[U];\
512

    
513
#define YSCALE_YUV_2_PACKED1_C \
514
    for (i=0; i<(dstW>>1); i++){\
515
        const int i2= 2*i;\
516
        int Y1= buf0[i2  ]>>7;\
517
        int Y2= buf0[i2+1]>>7;\
518
        int U= (uvbuf1[i     ])>>7;\
519
        int V= (uvbuf1[i+2048])>>7;\
520

    
521
#define YSCALE_YUV_2_RGB1_C(type) \
522
    YSCALE_YUV_2_PACKED1_C\
523
    type *r, *b, *g;\
524
    r = (type *)c->table_rV[V];\
525
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
526
    b = (type *)c->table_bU[U];\
527

    
528
#define YSCALE_YUV_2_PACKED1B_C \
529
    for (i=0; i<(dstW>>1); i++){\
530
        const int i2= 2*i;\
531
        int Y1= buf0[i2  ]>>7;\
532
        int Y2= buf0[i2+1]>>7;\
533
        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
534
        int V= (uvbuf0[i+2048] + uvbuf1[i+2048])>>8;\
535

    
536
#define YSCALE_YUV_2_RGB1B_C(type) \
537
    YSCALE_YUV_2_PACKED1B_C\
538
    type *r, *b, *g;\
539
    r = (type *)c->table_rV[V];\
540
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
541
    b = (type *)c->table_bU[U];\
542

    
543
#define YSCALE_YUV_2_ANYRGB_C(func, func2)\
544
    switch(c->dstFormat)\
545
    {\
546
    case PIX_FMT_RGB32:\
547
    case PIX_FMT_BGR32:\
548
        func(uint32_t)\
549
            ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
550
            ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
551
        }                \
552
        break;\
553
    case PIX_FMT_RGB24:\
554
        func(uint8_t)\
555
            ((uint8_t*)dest)[0]= r[Y1];\
556
            ((uint8_t*)dest)[1]= g[Y1];\
557
            ((uint8_t*)dest)[2]= b[Y1];\
558
            ((uint8_t*)dest)[3]= r[Y2];\
559
            ((uint8_t*)dest)[4]= g[Y2];\
560
            ((uint8_t*)dest)[5]= b[Y2];\
561
            dest+=6;\
562
        }\
563
        break;\
564
    case PIX_FMT_BGR24:\
565
        func(uint8_t)\
566
            ((uint8_t*)dest)[0]= b[Y1];\
567
            ((uint8_t*)dest)[1]= g[Y1];\
568
            ((uint8_t*)dest)[2]= r[Y1];\
569
            ((uint8_t*)dest)[3]= b[Y2];\
570
            ((uint8_t*)dest)[4]= g[Y2];\
571
            ((uint8_t*)dest)[5]= r[Y2];\
572
            dest+=6;\
573
        }\
574
        break;\
575
    case PIX_FMT_RGB565:\
576
    case PIX_FMT_BGR565:\
577
        {\
578
            const int dr1= dither_2x2_8[y&1    ][0];\
579
            const int dg1= dither_2x2_4[y&1    ][0];\
580
            const int db1= dither_2x2_8[(y&1)^1][0];\
581
            const int dr2= dither_2x2_8[y&1    ][1];\
582
            const int dg2= dither_2x2_4[y&1    ][1];\
583
            const int db2= dither_2x2_8[(y&1)^1][1];\
584
            func(uint16_t)\
585
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
586
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
587
            }\
588
        }\
589
        break;\
590
    case PIX_FMT_RGB555:\
591
    case PIX_FMT_BGR555:\
592
        {\
593
            const int dr1= dither_2x2_8[y&1    ][0];\
594
            const int dg1= dither_2x2_8[y&1    ][1];\
595
            const int db1= dither_2x2_8[(y&1)^1][0];\
596
            const int dr2= dither_2x2_8[y&1    ][1];\
597
            const int dg2= dither_2x2_8[y&1    ][0];\
598
            const int db2= dither_2x2_8[(y&1)^1][1];\
599
            func(uint16_t)\
600
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
601
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
602
            }\
603
        }\
604
        break;\
605
    case PIX_FMT_RGB8:\
606
    case PIX_FMT_BGR8:\
607
        {\
608
            const uint8_t * const d64= dither_8x8_73[y&7];\
609
            const uint8_t * const d32= dither_8x8_32[y&7];\
610
            func(uint8_t)\
611
                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
612
                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
613
            }\
614
        }\
615
        break;\
616
    case PIX_FMT_RGB4:\
617
    case PIX_FMT_BGR4:\
618
        {\
619
            const uint8_t * const d64= dither_8x8_73 [y&7];\
620
            const uint8_t * const d128=dither_8x8_220[y&7];\
621
            func(uint8_t)\
622
                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
623
                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
624
            }\
625
        }\
626
        break;\
627
    case PIX_FMT_RGB4_BYTE:\
628
    case PIX_FMT_BGR4_BYTE:\
629
        {\
630
            const uint8_t * const d64= dither_8x8_73 [y&7];\
631
            const uint8_t * const d128=dither_8x8_220[y&7];\
632
            func(uint8_t)\
633
                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
634
                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
635
            }\
636
        }\
637
        break;\
638
    case PIX_FMT_MONOBLACK:\
639
        {\
640
            const uint8_t * const d128=dither_8x8_220[y&7];\
641
            uint8_t *g= c->table_gU[128] + c->table_gV[128];\
642
            for (i=0; i<dstW-7; i+=8){\
643
                int acc;\
644
                acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
645
                acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
646
                acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
647
                acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
648
                acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
649
                acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
650
                acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
651
                acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
652
                ((uint8_t*)dest)[0]= acc;\
653
                dest++;\
654
            }\
655
\
656
/*\
657
((uint8_t*)dest)-= dstW>>4;\
658
{\
659
            int acc=0;\
660
            int left=0;\
661
            static int top[1024];\
662
            static int last_new[1024][1024];\
663
            static int last_in3[1024][1024];\
664
            static int drift[1024][1024];\
665
            int topLeft=0;\
666
            int shift=0;\
667
            int count=0;\
668
            const uint8_t * const d128=dither_8x8_220[y&7];\
669
            int error_new=0;\
670
            int error_in3=0;\
671
            int f=0;\
672
            \
673
            for (i=dstW>>1; i<dstW; i++){\
674
                int in= ((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19);\
675
                int in2 = (76309 * (in - 16) + 32768) >> 16;\
676
                int in3 = (in2 < 0) ? 0 : ((in2 > 255) ? 255 : in2);\
677
                int old= (left*7 + topLeft + top[i]*5 + top[i+1]*3)/20 + in3\
678
                         + (last_new[y][i] - in3)*f/256;\
679
                int new= old> 128 ? 255 : 0;\
680
\
681
                error_new+= FFABS(last_new[y][i] - new);\
682
                error_in3+= FFABS(last_in3[y][i] - in3);\
683
                f= error_new - error_in3*4;\
684
                if (f<0) f=0;\
685
                if (f>256) f=256;\
686
\
687
                topLeft= top[i];\
688
                left= top[i]= old - new;\
689
                last_new[y][i]= new;\
690
                last_in3[y][i]= in3;\
691
\
692
                acc+= acc + (new&1);\
693
                if ((i&7)==6){\
694
                    ((uint8_t*)dest)[0]= acc;\
695
                    ((uint8_t*)dest)++;\
696
                }\
697
            }\
698
}\
699
*/\
700
        }\
701
        break;\
702
    case PIX_FMT_YUYV422:\
703
        func2\
704
            ((uint8_t*)dest)[2*i2+0]= Y1;\
705
            ((uint8_t*)dest)[2*i2+1]= U;\
706
            ((uint8_t*)dest)[2*i2+2]= Y2;\
707
            ((uint8_t*)dest)[2*i2+3]= V;\
708
        }                \
709
        break;\
710
    case PIX_FMT_UYVY422:\
711
        func2\
712
            ((uint8_t*)dest)[2*i2+0]= U;\
713
            ((uint8_t*)dest)[2*i2+1]= Y1;\
714
            ((uint8_t*)dest)[2*i2+2]= V;\
715
            ((uint8_t*)dest)[2*i2+3]= Y2;\
716
        }                \
717
        break;\
718
    }\
719

    
720

    
721
static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
722
                                  int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
723
                                  uint8_t *dest, int dstW, int y)
724
{
725
    int i;
726
    switch(c->dstFormat)
727
    {
728
    case PIX_FMT_BGR32:
729
    case PIX_FMT_RGB32:
730
        YSCALE_YUV_2_RGBX_C(uint32_t)
731
            ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];
732
            ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];
733
        }
734
        break;
735
    case PIX_FMT_RGB24:
736
        YSCALE_YUV_2_RGBX_C(uint8_t)
737
            ((uint8_t*)dest)[0]= r[Y1];
738
            ((uint8_t*)dest)[1]= g[Y1];
739
            ((uint8_t*)dest)[2]= b[Y1];
740
            ((uint8_t*)dest)[3]= r[Y2];
741
            ((uint8_t*)dest)[4]= g[Y2];
742
            ((uint8_t*)dest)[5]= b[Y2];
743
            dest+=6;
744
        }
745
        break;
746
    case PIX_FMT_BGR24:
747
        YSCALE_YUV_2_RGBX_C(uint8_t)
748
            ((uint8_t*)dest)[0]= b[Y1];
749
            ((uint8_t*)dest)[1]= g[Y1];
750
            ((uint8_t*)dest)[2]= r[Y1];
751
            ((uint8_t*)dest)[3]= b[Y2];
752
            ((uint8_t*)dest)[4]= g[Y2];
753
            ((uint8_t*)dest)[5]= r[Y2];
754
            dest+=6;
755
        }
756
        break;
757
    case PIX_FMT_RGB565:
758
    case PIX_FMT_BGR565:
759
        {
760
            const int dr1= dither_2x2_8[y&1    ][0];
761
            const int dg1= dither_2x2_4[y&1    ][0];
762
            const int db1= dither_2x2_8[(y&1)^1][0];
763
            const int dr2= dither_2x2_8[y&1    ][1];
764
            const int dg2= dither_2x2_4[y&1    ][1];
765
            const int db2= dither_2x2_8[(y&1)^1][1];
766
            YSCALE_YUV_2_RGBX_C(uint16_t)
767
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
768
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
769
            }
770
        }
771
        break;
772
    case PIX_FMT_RGB555:
773
    case PIX_FMT_BGR555:
774
        {
775
            const int dr1= dither_2x2_8[y&1    ][0];
776
            const int dg1= dither_2x2_8[y&1    ][1];
777
            const int db1= dither_2x2_8[(y&1)^1][0];
778
            const int dr2= dither_2x2_8[y&1    ][1];
779
            const int dg2= dither_2x2_8[y&1    ][0];
780
            const int db2= dither_2x2_8[(y&1)^1][1];
781
            YSCALE_YUV_2_RGBX_C(uint16_t)
782
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
783
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
784
            }
785
        }
786
        break;
787
    case PIX_FMT_RGB8:
788
    case PIX_FMT_BGR8:
789
        {
790
            const uint8_t * const d64= dither_8x8_73[y&7];
791
            const uint8_t * const d32= dither_8x8_32[y&7];
792
            YSCALE_YUV_2_RGBX_C(uint8_t)
793
                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];
794
                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];
795
            }
796
        }
797
        break;
798
    case PIX_FMT_RGB4:
799
    case PIX_FMT_BGR4:
800
        {
801
            const uint8_t * const d64= dither_8x8_73 [y&7];
802
            const uint8_t * const d128=dither_8x8_220[y&7];
803
            YSCALE_YUV_2_RGBX_C(uint8_t)
804
                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]
805
                                  +((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);
806
            }
807
        }
808
        break;
809
    case PIX_FMT_RGB4_BYTE:
810
    case PIX_FMT_BGR4_BYTE:
811
        {
812
            const uint8_t * const d64= dither_8x8_73 [y&7];
813
            const uint8_t * const d128=dither_8x8_220[y&7];
814
            YSCALE_YUV_2_RGBX_C(uint8_t)
815
                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];
816
                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];
817
            }
818
        }
819
        break;
820
    case PIX_FMT_MONOBLACK:
821
        {
822
            const uint8_t * const d128=dither_8x8_220[y&7];
823
            uint8_t *g= c->table_gU[128] + c->table_gV[128];
824
            int acc=0;
825
            for (i=0; i<dstW-1; i+=2){
826
                int j;
827
                int Y1=1<<18;
828
                int Y2=1<<18;
829

    
830
                for (j=0; j<lumFilterSize; j++)
831
                {
832
                    Y1 += lumSrc[j][i] * lumFilter[j];
833
                    Y2 += lumSrc[j][i+1] * lumFilter[j];
834
                }
835
                Y1>>=19;
836
                Y2>>=19;
837
                if ((Y1|Y2)&256)
838
                {
839
                    if (Y1>255)   Y1=255;
840
                    else if (Y1<0)Y1=0;
841
                    if (Y2>255)   Y2=255;
842
                    else if (Y2<0)Y2=0;
843
                }
844
                acc+= acc + g[Y1+d128[(i+0)&7]];
845
                acc+= acc + g[Y2+d128[(i+1)&7]];
846
                if ((i&7)==6){
847
                    ((uint8_t*)dest)[0]= acc;
848
                    dest++;
849
                }
850
            }
851
        }
852
        break;
853
    case PIX_FMT_YUYV422:
854
        YSCALE_YUV_2_PACKEDX_C(void)
855
            ((uint8_t*)dest)[2*i2+0]= Y1;
856
            ((uint8_t*)dest)[2*i2+1]= U;
857
            ((uint8_t*)dest)[2*i2+2]= Y2;
858
            ((uint8_t*)dest)[2*i2+3]= V;
859
        }
860
        break;
861
    case PIX_FMT_UYVY422:
862
        YSCALE_YUV_2_PACKEDX_C(void)
863
            ((uint8_t*)dest)[2*i2+0]= U;
864
            ((uint8_t*)dest)[2*i2+1]= Y1;
865
            ((uint8_t*)dest)[2*i2+2]= V;
866
            ((uint8_t*)dest)[2*i2+3]= Y2;
867
        }
868
        break;
869
    }
870
}
871

    
872

    
873
//Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
874
//Plain C versions
875
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) || !defined(CONFIG_GPL)
876
#define COMPILE_C
877
#endif
878

    
879
#ifdef ARCH_POWERPC
880
#if (defined (HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
881
#define COMPILE_ALTIVEC
882
#endif //HAVE_ALTIVEC
883
#endif //ARCH_POWERPC
884

    
885
#if defined(ARCH_X86)
886

    
887
#if ((defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
888
#define COMPILE_MMX
889
#endif
890

    
891
#if (defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
892
#define COMPILE_MMX2
893
#endif
894

    
895
#if ((defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
896
#define COMPILE_3DNOW
897
#endif
898
#endif //ARCH_X86 || ARCH_X86_64
899

    
900
#undef HAVE_MMX
901
#undef HAVE_MMX2
902
#undef HAVE_3DNOW
903

    
904
#ifdef COMPILE_C
905
#undef HAVE_MMX
906
#undef HAVE_MMX2
907
#undef HAVE_3DNOW
908
#undef HAVE_ALTIVEC
909
#define RENAME(a) a ## _C
910
#include "swscale_template.c"
911
#endif
912

    
913
#ifdef ARCH_POWERPC
914
#ifdef COMPILE_ALTIVEC
915
#undef RENAME
916
#define HAVE_ALTIVEC
917
#define RENAME(a) a ## _altivec
918
#include "swscale_template.c"
919
#endif
920
#endif //ARCH_POWERPC
921

    
922
#if defined(ARCH_X86)
923

    
924
//X86 versions
925
/*
926
#undef RENAME
927
#undef HAVE_MMX
928
#undef HAVE_MMX2
929
#undef HAVE_3DNOW
930
#define ARCH_X86
931
#define RENAME(a) a ## _X86
932
#include "swscale_template.c"
933
*/
934
//MMX versions
935
#ifdef COMPILE_MMX
936
#undef RENAME
937
#define HAVE_MMX
938
#undef HAVE_MMX2
939
#undef HAVE_3DNOW
940
#define RENAME(a) a ## _MMX
941
#include "swscale_template.c"
942
#endif
943

    
944
//MMX2 versions
945
#ifdef COMPILE_MMX2
946
#undef RENAME
947
#define HAVE_MMX
948
#define HAVE_MMX2
949
#undef HAVE_3DNOW
950
#define RENAME(a) a ## _MMX2
951
#include "swscale_template.c"
952
#endif
953

    
954
//3DNOW versions
955
#ifdef COMPILE_3DNOW
956
#undef RENAME
957
#define HAVE_MMX
958
#undef HAVE_MMX2
959
#define HAVE_3DNOW
960
#define RENAME(a) a ## _3DNow
961
#include "swscale_template.c"
962
#endif
963

    
964
#endif //ARCH_X86 || ARCH_X86_64
965

    
966
// minor note: the HAVE_xyz is messed up after that line so don't use it
967

    
968
static double getSplineCoeff(double a, double b, double c, double d, double dist)
969
{
970
//    printf("%f %f %f %f %f\n", a,b,c,d,dist);
971
    if (dist<=1.0)      return ((d*dist + c)*dist + b)*dist +a;
972
    else                return getSplineCoeff(        0.0,
973
                                             b+ 2.0*c + 3.0*d,
974
                                                    c + 3.0*d,
975
                                            -b- 3.0*c - 6.0*d,
976
                                            dist-1.0);
977
}
978

    
979
static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
980
                             int srcW, int dstW, int filterAlign, int one, int flags,
981
                             SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
982
{
983
    int i;
984
    int filterSize;
985
    int filter2Size;
986
    int minFilterSize;
987
    double *filter=NULL;
988
    double *filter2=NULL;
989
#if defined(ARCH_X86)
990
    if (flags & SWS_CPU_CAPS_MMX)
991
        asm volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
992
#endif
993

    
994
    // Note the +1 is for the MMXscaler which reads over the end
995
    *filterPos = av_malloc((dstW+1)*sizeof(int16_t));
996

    
997
    if (FFABS(xInc - 0x10000) <10) // unscaled
998
    {
999
        int i;
1000
        filterSize= 1;
1001
        filter= av_malloc(dstW*sizeof(double)*filterSize);
1002
        for (i=0; i<dstW*filterSize; i++) filter[i]=0;
1003

    
1004
        for (i=0; i<dstW; i++)
1005
        {
1006
            filter[i*filterSize]=1;
1007
            (*filterPos)[i]=i;
1008
        }
1009

    
1010
    }
1011
    else if (flags&SWS_POINT) // lame looking point sampling mode
1012
    {
1013
        int i;
1014
        int xDstInSrc;
1015
        filterSize= 1;
1016
        filter= av_malloc(dstW*sizeof(double)*filterSize);
1017

    
1018
        xDstInSrc= xInc/2 - 0x8000;
1019
        for (i=0; i<dstW; i++)
1020
        {
1021
            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1022

    
1023
            (*filterPos)[i]= xx;
1024
            filter[i]= 1.0;
1025
            xDstInSrc+= xInc;
1026
        }
1027
    }
1028
    else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
1029
    {
1030
        int i;
1031
        int xDstInSrc;
1032
        if      (flags&SWS_BICUBIC) filterSize= 4;
1033
        else if (flags&SWS_X      ) filterSize= 4;
1034
        else                        filterSize= 2; // SWS_BILINEAR / SWS_AREA
1035
        filter= av_malloc(dstW*sizeof(double)*filterSize);
1036

    
1037
        xDstInSrc= xInc/2 - 0x8000;
1038
        for (i=0; i<dstW; i++)
1039
        {
1040
            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1041
            int j;
1042

    
1043
            (*filterPos)[i]= xx;
1044
                //Bilinear upscale / linear interpolate / Area averaging
1045
                for (j=0; j<filterSize; j++)
1046
                {
1047
                    double d= FFABS((xx<<16) - xDstInSrc)/(double)(1<<16);
1048
                    double coeff= 1.0 - d;
1049
                    if (coeff<0) coeff=0;
1050
                    filter[i*filterSize + j]= coeff;
1051
                    xx++;
1052
                }
1053
            xDstInSrc+= xInc;
1054
        }
1055
    }
1056
    else
1057
    {
1058
        double xDstInSrc;
1059
        double sizeFactor, filterSizeInSrc;
1060
        const double xInc1= (double)xInc / (double)(1<<16);
1061

    
1062
        if      (flags&SWS_BICUBIC)      sizeFactor=  4.0;
1063
        else if (flags&SWS_X)            sizeFactor=  8.0;
1064
        else if (flags&SWS_AREA)         sizeFactor=  1.0; //downscale only, for upscale it is bilinear
1065
        else if (flags&SWS_GAUSS)        sizeFactor=  8.0;   // infinite ;)
1066
        else if (flags&SWS_LANCZOS)      sizeFactor= param[0] != SWS_PARAM_DEFAULT ? 2.0*param[0] : 6.0;
1067
        else if (flags&SWS_SINC)         sizeFactor= 20.0; // infinite ;)
1068
        else if (flags&SWS_SPLINE)       sizeFactor= 20.0;  // infinite ;)
1069
        else if (flags&SWS_BILINEAR)     sizeFactor=  2.0;
1070
        else {
1071
            sizeFactor= 0.0; //GCC warning killer
1072
            ASSERT(0)
1073
        }
1074

    
1075
        if (xInc1 <= 1.0)       filterSizeInSrc= sizeFactor; // upscale
1076
        else                    filterSizeInSrc= sizeFactor*srcW / (double)dstW;
1077

    
1078
        filterSize= (int)ceil(1 + filterSizeInSrc); // will be reduced later if possible
1079
        if (filterSize > srcW-2) filterSize=srcW-2;
1080

    
1081
        filter= av_malloc(dstW*sizeof(double)*filterSize);
1082

    
1083
        xDstInSrc= xInc1 / 2.0 - 0.5;
1084
        for (i=0; i<dstW; i++)
1085
        {
1086
            int xx= (int)(xDstInSrc - (filterSize-1)*0.5 + 0.5);
1087
            int j;
1088
            (*filterPos)[i]= xx;
1089
            for (j=0; j<filterSize; j++)
1090
            {
1091
                double d= FFABS(xx - xDstInSrc)/filterSizeInSrc*sizeFactor;
1092
                double coeff;
1093
                if (flags & SWS_BICUBIC)
1094
                {
1095
                    double B= param[0] != SWS_PARAM_DEFAULT ? param[0] : 0.0;
1096
                    double C= param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6;
1097

    
1098
                    if (d<1.0)
1099
                        coeff = (12-9*B-6*C)*d*d*d + (-18+12*B+6*C)*d*d + 6-2*B;
1100
                    else if (d<2.0)
1101
                        coeff = (-B-6*C)*d*d*d + (6*B+30*C)*d*d + (-12*B-48*C)*d +8*B+24*C;
1102
                    else
1103
                        coeff=0.0;
1104
                }
1105
/*                else if (flags & SWS_X)
1106
                {
1107
                    double p= param ? param*0.01 : 0.3;
1108
                    coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1109
                    coeff*= pow(2.0, - p*d*d);
1110
                }*/
1111
                else if (flags & SWS_X)
1112
                {
1113
                    double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
1114

    
1115
                    if (d<1.0)
1116
                        coeff = cos(d*PI);
1117
                    else
1118
                        coeff=-1.0;
1119
                    if (coeff<0.0)      coeff= -pow(-coeff, A);
1120
                    else                coeff=  pow( coeff, A);
1121
                    coeff= coeff*0.5 + 0.5;
1122
                }
1123
                else if (flags & SWS_AREA)
1124
                {
1125
                    double srcPixelSize= 1.0/xInc1;
1126
                    if      (d + srcPixelSize/2 < 0.5) coeff= 1.0;
1127
                    else if (d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
1128
                    else coeff=0.0;
1129
                }
1130
                else if (flags & SWS_GAUSS)
1131
                {
1132
                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1133
                    coeff = pow(2.0, - p*d*d);
1134
                }
1135
                else if (flags & SWS_SINC)
1136
                {
1137
                    coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1138
                }
1139
                else if (flags & SWS_LANCZOS)
1140
                {
1141
                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1142
                    coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0;
1143
                    if (d>p) coeff=0;
1144
                }
1145
                else if (flags & SWS_BILINEAR)
1146
                {
1147
                    coeff= 1.0 - d;
1148
                    if (coeff<0) coeff=0;
1149
                }
1150
                else if (flags & SWS_SPLINE)
1151
                {
1152
                    double p=-2.196152422706632;
1153
                    coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, d);
1154
                }
1155
                else {
1156
                    coeff= 0.0; //GCC warning killer
1157
                    ASSERT(0)
1158
                }
1159

    
1160
                filter[i*filterSize + j]= coeff;
1161
                xx++;
1162
            }
1163
            xDstInSrc+= xInc1;
1164
        }
1165
    }
1166

    
1167
    /* apply src & dst Filter to filter -> filter2
1168
       av_free(filter);
1169
    */
1170
    ASSERT(filterSize>0)
1171
    filter2Size= filterSize;
1172
    if (srcFilter) filter2Size+= srcFilter->length - 1;
1173
    if (dstFilter) filter2Size+= dstFilter->length - 1;
1174
    ASSERT(filter2Size>0)
1175
    filter2= av_malloc(filter2Size*dstW*sizeof(double));
1176

    
1177
    for (i=0; i<dstW; i++)
1178
    {
1179
        int j;
1180
        SwsVector scaleFilter;
1181
        SwsVector *outVec;
1182

    
1183
        scaleFilter.coeff= filter + i*filterSize;
1184
        scaleFilter.length= filterSize;
1185

    
1186
        if (srcFilter) outVec= sws_getConvVec(srcFilter, &scaleFilter);
1187
        else           outVec= &scaleFilter;
1188

    
1189
        ASSERT(outVec->length == filter2Size)
1190
        //FIXME dstFilter
1191

    
1192
        for (j=0; j<outVec->length; j++)
1193
        {
1194
            filter2[i*filter2Size + j]= outVec->coeff[j];
1195
        }
1196

    
1197
        (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
1198

    
1199
        if (outVec != &scaleFilter) sws_freeVec(outVec);
1200
    }
1201
    av_free(filter); filter=NULL;
1202

    
1203
    /* try to reduce the filter-size (step1 find size and shift left) */
1204
    // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
1205
    minFilterSize= 0;
1206
    for (i=dstW-1; i>=0; i--)
1207
    {
1208
        int min= filter2Size;
1209
        int j;
1210
        double cutOff=0.0;
1211

    
1212
        /* get rid off near zero elements on the left by shifting left */
1213
        for (j=0; j<filter2Size; j++)
1214
        {
1215
            int k;
1216
            cutOff += FFABS(filter2[i*filter2Size]);
1217

    
1218
            if (cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1219

    
1220
            /* preserve monotonicity because the core can't handle the filter otherwise */
1221
            if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1222

    
1223
            // Move filter coeffs left
1224
            for (k=1; k<filter2Size; k++)
1225
                filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1226
            filter2[i*filter2Size + k - 1]= 0.0;
1227
            (*filterPos)[i]++;
1228
        }
1229

    
1230
        cutOff=0.0;
1231
        /* count near zeros on the right */
1232
        for (j=filter2Size-1; j>0; j--)
1233
        {
1234
            cutOff += FFABS(filter2[i*filter2Size + j]);
1235

    
1236
            if (cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1237
            min--;
1238
        }
1239

    
1240
        if (min>minFilterSize) minFilterSize= min;
1241
    }
1242

    
1243
    if (flags & SWS_CPU_CAPS_ALTIVEC) {
1244
        // we can handle the special case 4,
1245
        // so we don't want to go to the full 8
1246
        if (minFilterSize < 5)
1247
            filterAlign = 4;
1248

    
1249
        // we really don't want to waste our time
1250
        // doing useless computation, so fall-back on
1251
        // the scalar C code for very small filter.
1252
        // vectorizing is worth it only if you have
1253
        // decent-sized vector.
1254
        if (minFilterSize < 3)
1255
            filterAlign = 1;
1256
    }
1257

    
1258
    if (flags & SWS_CPU_CAPS_MMX) {
1259
        // special case for unscaled vertical filtering
1260
        if (minFilterSize == 1 && filterAlign == 2)
1261
            filterAlign= 1;
1262
    }
1263

    
1264
    ASSERT(minFilterSize > 0)
1265
    filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
1266
    ASSERT(filterSize > 0)
1267
    filter= av_malloc(filterSize*dstW*sizeof(double));
1268
    if (filterSize >= MAX_FILTER_SIZE)
1269
        return -1;
1270
    *outFilterSize= filterSize;
1271

    
1272
    if (flags&SWS_PRINT_INFO)
1273
        av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
1274
    /* try to reduce the filter-size (step2 reduce it) */
1275
    for (i=0; i<dstW; i++)
1276
    {
1277
        int j;
1278

    
1279
        for (j=0; j<filterSize; j++)
1280
        {
1281
            if (j>=filter2Size) filter[i*filterSize + j]= 0.0;
1282
            else               filter[i*filterSize + j]= filter2[i*filter2Size + j];
1283
        }
1284
    }
1285
    av_free(filter2); filter2=NULL;
1286

    
1287

    
1288
    //FIXME try to align filterpos if possible
1289

    
1290
    //fix borders
1291
    for (i=0; i<dstW; i++)
1292
    {
1293
        int j;
1294
        if ((*filterPos)[i] < 0)
1295
        {
1296
            // Move filter coeffs left to compensate for filterPos
1297
            for (j=1; j<filterSize; j++)
1298
            {
1299
                int left= FFMAX(j + (*filterPos)[i], 0);
1300
                filter[i*filterSize + left] += filter[i*filterSize + j];
1301
                filter[i*filterSize + j]=0;
1302
            }
1303
            (*filterPos)[i]= 0;
1304
        }
1305

    
1306
        if ((*filterPos)[i] + filterSize > srcW)
1307
        {
1308
            int shift= (*filterPos)[i] + filterSize - srcW;
1309
            // Move filter coeffs right to compensate for filterPos
1310
            for (j=filterSize-2; j>=0; j--)
1311
            {
1312
                int right= FFMIN(j + shift, filterSize-1);
1313
                filter[i*filterSize +right] += filter[i*filterSize +j];
1314
                filter[i*filterSize +j]=0;
1315
            }
1316
            (*filterPos)[i]= srcW - filterSize;
1317
        }
1318
    }
1319

    
1320
    // Note the +1 is for the MMXscaler which reads over the end
1321
    /* align at 16 for AltiVec (needed by hScale_altivec_real) */
1322
    *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t));
1323

    
1324
    /* Normalize & Store in outFilter */
1325
    for (i=0; i<dstW; i++)
1326
    {
1327
        int j;
1328
        double error=0;
1329
        double sum=0;
1330
        double scale= one;
1331

    
1332
        for (j=0; j<filterSize; j++)
1333
        {
1334
            sum+= filter[i*filterSize + j];
1335
        }
1336
        scale/= sum;
1337
        for (j=0; j<*outFilterSize; j++)
1338
        {
1339
            double v= filter[i*filterSize + j]*scale + error;
1340
            int intV= floor(v + 0.5);
1341
            (*outFilter)[i*(*outFilterSize) + j]= intV;
1342
            error = v - intV;
1343
        }
1344
    }
1345

    
1346
    (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
1347
    for (i=0; i<*outFilterSize; i++)
1348
    {
1349
        int j= dstW*(*outFilterSize);
1350
        (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
1351
    }
1352

    
1353
    av_free(filter);
1354
    return 0;
1355
}
1356

    
1357
#ifdef COMPILE_MMX2
1358
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
1359
{
1360
    uint8_t *fragmentA;
1361
    long imm8OfPShufW1A;
1362
    long imm8OfPShufW2A;
1363
    long fragmentLengthA;
1364
    uint8_t *fragmentB;
1365
    long imm8OfPShufW1B;
1366
    long imm8OfPShufW2B;
1367
    long fragmentLengthB;
1368
    int fragmentPos;
1369

    
1370
    int xpos, i;
1371

    
1372
    // create an optimized horizontal scaling routine
1373

    
1374
    //code fragment
1375

    
1376
    asm volatile(
1377
        "jmp                         9f                 \n\t"
1378
    // Begin
1379
        "0:                                             \n\t"
1380
        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1381
        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1382
        "movd   1(%%"REG_c", %%"REG_S"), %%mm1          \n\t"
1383
        "punpcklbw                %%mm7, %%mm1          \n\t"
1384
        "punpcklbw                %%mm7, %%mm0          \n\t"
1385
        "pshufw                   $0xFF, %%mm1, %%mm1   \n\t"
1386
        "1:                                             \n\t"
1387
        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1388
        "2:                                             \n\t"
1389
        "psubw                    %%mm1, %%mm0          \n\t"
1390
        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1391
        "pmullw                   %%mm3, %%mm0          \n\t"
1392
        "psllw                       $7, %%mm1          \n\t"
1393
        "paddw                    %%mm1, %%mm0          \n\t"
1394

    
1395
        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1396

    
1397
        "add                         $8, %%"REG_a"      \n\t"
1398
    // End
1399
        "9:                                             \n\t"
1400
//        "int $3                                         \n\t"
1401
        "lea                         0b, %0             \n\t"
1402
        "lea                         1b, %1             \n\t"
1403
        "lea                         2b, %2             \n\t"
1404
        "dec                         %1                 \n\t"
1405
        "dec                         %2                 \n\t"
1406
        "sub                         %0, %1             \n\t"
1407
        "sub                         %0, %2             \n\t"
1408
        "lea                         9b, %3             \n\t"
1409
        "sub                         %0, %3             \n\t"
1410

    
1411

    
1412
        :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
1413
        "=r" (fragmentLengthA)
1414
    );
1415

    
1416
    asm volatile(
1417
        "jmp                         9f                 \n\t"
1418
    // Begin
1419
        "0:                                             \n\t"
1420
        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1421
        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1422
        "punpcklbw                %%mm7, %%mm0          \n\t"
1423
        "pshufw                   $0xFF, %%mm0, %%mm1   \n\t"
1424
        "1:                                             \n\t"
1425
        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1426
        "2:                                             \n\t"
1427
        "psubw                    %%mm1, %%mm0          \n\t"
1428
        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1429
        "pmullw                   %%mm3, %%mm0          \n\t"
1430
        "psllw                       $7, %%mm1          \n\t"
1431
        "paddw                    %%mm1, %%mm0          \n\t"
1432

    
1433
        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1434

    
1435
        "add                         $8, %%"REG_a"      \n\t"
1436
    // End
1437
        "9:                                             \n\t"
1438
//        "int                       $3                   \n\t"
1439
        "lea                         0b, %0             \n\t"
1440
        "lea                         1b, %1             \n\t"
1441
        "lea                         2b, %2             \n\t"
1442
        "dec                         %1                 \n\t"
1443
        "dec                         %2                 \n\t"
1444
        "sub                         %0, %1             \n\t"
1445
        "sub                         %0, %2             \n\t"
1446
        "lea                         9b, %3             \n\t"
1447
        "sub                         %0, %3             \n\t"
1448

    
1449

    
1450
        :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
1451
        "=r" (fragmentLengthB)
1452
    );
1453

    
1454
    xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
1455
    fragmentPos=0;
1456

    
1457
    for (i=0; i<dstW/numSplits; i++)
1458
    {
1459
        int xx=xpos>>16;
1460

    
1461
        if ((i&3) == 0)
1462
        {
1463
            int a=0;
1464
            int b=((xpos+xInc)>>16) - xx;
1465
            int c=((xpos+xInc*2)>>16) - xx;
1466
            int d=((xpos+xInc*3)>>16) - xx;
1467

    
1468
            filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
1469
            filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
1470
            filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
1471
            filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
1472
            filterPos[i/2]= xx;
1473

    
1474
            if (d+1<4)
1475
            {
1476
                int maxShift= 3-(d+1);
1477
                int shift=0;
1478

    
1479
                memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
1480

    
1481
                funnyCode[fragmentPos + imm8OfPShufW1B]=
1482
                    (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
1483
                funnyCode[fragmentPos + imm8OfPShufW2B]=
1484
                    a | (b<<2) | (c<<4) | (d<<6);
1485

    
1486
                if (i+3>=dstW) shift=maxShift; //avoid overread
1487
                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
1488

    
1489
                if (shift && i>=shift)
1490
                {
1491
                    funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
1492
                    funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
1493
                    filterPos[i/2]-=shift;
1494
                }
1495

    
1496
                fragmentPos+= fragmentLengthB;
1497
            }
1498
            else
1499
            {
1500
                int maxShift= 3-d;
1501
                int shift=0;
1502

    
1503
                memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
1504

    
1505
                funnyCode[fragmentPos + imm8OfPShufW1A]=
1506
                funnyCode[fragmentPos + imm8OfPShufW2A]=
1507
                    a | (b<<2) | (c<<4) | (d<<6);
1508

    
1509
                if (i+4>=dstW) shift=maxShift; //avoid overread
1510
                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
1511

    
1512
                if (shift && i>=shift)
1513
                {
1514
                    funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
1515
                    funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
1516
                    filterPos[i/2]-=shift;
1517
                }
1518

    
1519
                fragmentPos+= fragmentLengthA;
1520
            }
1521

    
1522
            funnyCode[fragmentPos]= RET;
1523
        }
1524
        xpos+=xInc;
1525
    }
1526
    filterPos[i/2]= xpos>>16; // needed to jump to the next part
1527
}
1528
#endif /* COMPILE_MMX2 */
1529

    
1530
static void globalInit(void){
1531
    // generating tables:
1532
    int i;
1533
    for (i=0; i<768; i++){
1534
        int c= av_clip_uint8(i-256);
1535
        clip_table[i]=c;
1536
    }
1537
}
1538

    
1539
static SwsFunc getSwsFunc(int flags){
1540

    
1541
#if defined(RUNTIME_CPUDETECT) && defined (CONFIG_GPL)
1542
#if defined(ARCH_X86)
1543
    // ordered per speed fasterst first
1544
    if (flags & SWS_CPU_CAPS_MMX2)
1545
        return swScale_MMX2;
1546
    else if (flags & SWS_CPU_CAPS_3DNOW)
1547
        return swScale_3DNow;
1548
    else if (flags & SWS_CPU_CAPS_MMX)
1549
        return swScale_MMX;
1550
    else
1551
        return swScale_C;
1552

    
1553
#else
1554
#ifdef ARCH_POWERPC
1555
    if (flags & SWS_CPU_CAPS_ALTIVEC)
1556
        return swScale_altivec;
1557
    else
1558
        return swScale_C;
1559
#endif
1560
    return swScale_C;
1561
#endif /* defined(ARCH_X86) */
1562
#else //RUNTIME_CPUDETECT
1563
#ifdef HAVE_MMX2
1564
    return swScale_MMX2;
1565
#elif defined (HAVE_3DNOW)
1566
    return swScale_3DNow;
1567
#elif defined (HAVE_MMX)
1568
    return swScale_MMX;
1569
#elif defined (HAVE_ALTIVEC)
1570
    return swScale_altivec;
1571
#else
1572
    return swScale_C;
1573
#endif
1574
#endif //!RUNTIME_CPUDETECT
1575
}
1576

    
1577
static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1578
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1579
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1580
    /* Copy Y plane */
1581
    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1582
        memcpy(dst, src[0], srcSliceH*dstStride[0]);
1583
    else
1584
    {
1585
        int i;
1586
        uint8_t *srcPtr= src[0];
1587
        uint8_t *dstPtr= dst;
1588
        for (i=0; i<srcSliceH; i++)
1589
        {
1590
            memcpy(dstPtr, srcPtr, c->srcW);
1591
            srcPtr+= srcStride[0];
1592
            dstPtr+= dstStride[0];
1593
        }
1594
    }
1595
    dst = dstParam[1] + dstStride[1]*srcSliceY/2;
1596
    if (c->dstFormat == PIX_FMT_NV12)
1597
        interleaveBytes( src[1],src[2],dst,c->srcW/2,srcSliceH/2,srcStride[1],srcStride[2],dstStride[0] );
1598
    else
1599
        interleaveBytes( src[2],src[1],dst,c->srcW/2,srcSliceH/2,srcStride[2],srcStride[1],dstStride[0] );
1600

    
1601
    return srcSliceH;
1602
}
1603

    
1604
static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1605
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1606
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1607

    
1608
    yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1609

    
1610
    return srcSliceH;
1611
}
1612

    
1613
static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1614
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1615
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1616

    
1617
    yv12touyvy( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1618

    
1619
    return srcSliceH;
1620
}
1621

    
1622
/* {RGB,BGR}{15,16,24,32} -> {RGB,BGR}{15,16,24,32} */
1623
static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1624
                          int srcSliceH, uint8_t* dst[], int dstStride[]){
1625
    const int srcFormat= c->srcFormat;
1626
    const int dstFormat= c->dstFormat;
1627
    const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3;
1628
    const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3;
1629
    const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
1630
    const int dstId= fmt_depth(dstFormat) >> 2;
1631
    void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
1632

    
1633
    /* BGR -> BGR */
1634
    if (  (isBGR(srcFormat) && isBGR(dstFormat))
1635
       || (isRGB(srcFormat) && isRGB(dstFormat))){
1636
        switch(srcId | (dstId<<4)){
1637
        case 0x34: conv= rgb16to15; break;
1638
        case 0x36: conv= rgb24to15; break;
1639
        case 0x38: conv= rgb32to15; break;
1640
        case 0x43: conv= rgb15to16; break;
1641
        case 0x46: conv= rgb24to16; break;
1642
        case 0x48: conv= rgb32to16; break;
1643
        case 0x63: conv= rgb15to24; break;
1644
        case 0x64: conv= rgb16to24; break;
1645
        case 0x68: conv= rgb32to24; break;
1646
        case 0x83: conv= rgb15to32; break;
1647
        case 0x84: conv= rgb16to32; break;
1648
        case 0x86: conv= rgb24to32; break;
1649
        default: av_log(c, AV_LOG_ERROR, "swScaler: internal error %s -> %s converter\n",
1650
                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1651
        }
1652
    }else if (  (isBGR(srcFormat) && isRGB(dstFormat))
1653
             || (isRGB(srcFormat) && isBGR(dstFormat))){
1654
        switch(srcId | (dstId<<4)){
1655
        case 0x33: conv= rgb15tobgr15; break;
1656
        case 0x34: conv= rgb16tobgr15; break;
1657
        case 0x36: conv= rgb24tobgr15; break;
1658
        case 0x38: conv= rgb32tobgr15; break;
1659
        case 0x43: conv= rgb15tobgr16; break;
1660
        case 0x44: conv= rgb16tobgr16; break;
1661
        case 0x46: conv= rgb24tobgr16; break;
1662
        case 0x48: conv= rgb32tobgr16; break;
1663
        case 0x63: conv= rgb15tobgr24; break;
1664
        case 0x64: conv= rgb16tobgr24; break;
1665
        case 0x66: conv= rgb24tobgr24; break;
1666
        case 0x68: conv= rgb32tobgr24; break;
1667
        case 0x83: conv= rgb15tobgr32; break;
1668
        case 0x84: conv= rgb16tobgr32; break;
1669
        case 0x86: conv= rgb24tobgr32; break;
1670
        case 0x88: conv= rgb32tobgr32; break;
1671
        default: av_log(c, AV_LOG_ERROR, "swScaler: internal error %s -> %s converter\n",
1672
                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1673
        }
1674
    }else{
1675
        av_log(c, AV_LOG_ERROR, "swScaler: internal error %s -> %s converter\n",
1676
               sws_format_name(srcFormat), sws_format_name(dstFormat));
1677
    }
1678

    
1679
    if(conv)
1680
    {
1681
        if (dstStride[0]*srcBpp == srcStride[0]*dstBpp)
1682
            conv(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1683
        else
1684
        {
1685
            int i;
1686
            uint8_t *srcPtr= src[0];
1687
            uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1688

    
1689
            for (i=0; i<srcSliceH; i++)
1690
            {
1691
                conv(srcPtr, dstPtr, c->srcW*srcBpp);
1692
                srcPtr+= srcStride[0];
1693
                dstPtr+= dstStride[0];
1694
            }
1695
        }
1696
    }
1697
    return srcSliceH;
1698
}
1699

    
1700
static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1701
                              int srcSliceH, uint8_t* dst[], int dstStride[]){
1702

    
1703
    rgb24toyv12(
1704
        src[0],
1705
        dst[0]+ srcSliceY    *dstStride[0],
1706
        dst[1]+(srcSliceY>>1)*dstStride[1],
1707
        dst[2]+(srcSliceY>>1)*dstStride[2],
1708
        c->srcW, srcSliceH,
1709
        dstStride[0], dstStride[1], srcStride[0]);
1710
    return srcSliceH;
1711
}
1712

    
1713
static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1714
                             int srcSliceH, uint8_t* dst[], int dstStride[]){
1715
    int i;
1716

    
1717
    /* copy Y */
1718
    if (srcStride[0]==dstStride[0] && srcStride[0] > 0)
1719
        memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
1720
    else{
1721
        uint8_t *srcPtr= src[0];
1722
        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1723

    
1724
        for (i=0; i<srcSliceH; i++)
1725
        {
1726
            memcpy(dstPtr, srcPtr, c->srcW);
1727
            srcPtr+= srcStride[0];
1728
            dstPtr+= dstStride[0];
1729
        }
1730
    }
1731

    
1732
    if (c->dstFormat==PIX_FMT_YUV420P){
1733
        planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
1734
        planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
1735
    }else{
1736
        planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
1737
        planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
1738
    }
1739
    return srcSliceH;
1740
}
1741

    
1742
/* unscaled copy like stuff (assumes nearly identical formats) */
1743
static int simpleCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1744
                      int srcSliceH, uint8_t* dst[], int dstStride[]){
1745

    
1746
    if (isPacked(c->srcFormat))
1747
    {
1748
        if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1749
            memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
1750
        else
1751
        {
1752
            int i;
1753
            uint8_t *srcPtr= src[0];
1754
            uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1755
            int length=0;
1756

    
1757
            /* universal length finder */
1758
            while(length+c->srcW <= FFABS(dstStride[0])
1759
               && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
1760
            ASSERT(length!=0);
1761

    
1762
            for (i=0; i<srcSliceH; i++)
1763
            {
1764
                memcpy(dstPtr, srcPtr, length);
1765
                srcPtr+= srcStride[0];
1766
                dstPtr+= dstStride[0];
1767
            }
1768
        }
1769
    }
1770
    else
1771
    { /* Planar YUV or gray */
1772
        int plane;
1773
        for (plane=0; plane<3; plane++)
1774
        {
1775
            int length= plane==0 ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
1776
            int y=      plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
1777
            int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
1778

    
1779
            if ((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
1780
            {
1781
                if (!isGray(c->dstFormat))
1782
                    memset(dst[plane], 128, dstStride[plane]*height);
1783
            }
1784
            else
1785
            {
1786
                if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
1787
                    memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
1788
                else
1789
                {
1790
                    int i;
1791
                    uint8_t *srcPtr= src[plane];
1792
                    uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
1793
                    for (i=0; i<height; i++)
1794
                    {
1795
                        memcpy(dstPtr, srcPtr, length);
1796
                        srcPtr+= srcStride[plane];
1797
                        dstPtr+= dstStride[plane];
1798
                    }
1799
                }
1800
            }
1801
        }
1802
    }
1803
    return srcSliceH;
1804
}
1805

    
1806
static int gray16togray(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1807
                        int srcSliceH, uint8_t* dst[], int dstStride[]){
1808

    
1809
    int length= c->srcW;
1810
    int y=      srcSliceY;
1811
    int height= srcSliceH;
1812
    int i, j;
1813
    uint8_t *srcPtr= src[0];
1814
    uint8_t *dstPtr= dst[0] + dstStride[0]*y;
1815

    
1816
    if (!isGray(c->dstFormat)){
1817
        int height= -((-srcSliceH)>>c->chrDstVSubSample);
1818
        memset(dst[1], 128, dstStride[1]*height);
1819
        memset(dst[2], 128, dstStride[2]*height);
1820
    }
1821
    if (c->srcFormat == PIX_FMT_GRAY16LE) srcPtr++;
1822
    for (i=0; i<height; i++)
1823
    {
1824
        for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
1825
        srcPtr+= srcStride[0];
1826
        dstPtr+= dstStride[0];
1827
    }
1828
    return srcSliceH;
1829
}
1830

    
1831
static int graytogray16(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1832
                        int srcSliceH, uint8_t* dst[], int dstStride[]){
1833

    
1834
    int length= c->srcW;
1835
    int y=      srcSliceY;
1836
    int height= srcSliceH;
1837
    int i, j;
1838
    uint8_t *srcPtr= src[0];
1839
    uint8_t *dstPtr= dst[0] + dstStride[0]*y;
1840
    for (i=0; i<height; i++)
1841
    {
1842
        for (j=0; j<length; j++)
1843
        {
1844
            dstPtr[j<<1] = srcPtr[j];
1845
            dstPtr[(j<<1)+1] = srcPtr[j];
1846
        }
1847
        srcPtr+= srcStride[0];
1848
        dstPtr+= dstStride[0];
1849
    }
1850
    return srcSliceH;
1851
}
1852

    
1853
static int gray16swap(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1854
                      int srcSliceH, uint8_t* dst[], int dstStride[]){
1855

    
1856
    int length= c->srcW;
1857
    int y=      srcSliceY;
1858
    int height= srcSliceH;
1859
    int i, j;
1860
    uint16_t *srcPtr= src[0];
1861
    uint16_t *dstPtr= dst[0] + dstStride[0]*y/2;
1862
    for (i=0; i<height; i++)
1863
    {
1864
        for (j=0; j<length; j++) dstPtr[j] = bswap_16(srcPtr[j]);
1865
        srcPtr+= srcStride[0]/2;
1866
        dstPtr+= dstStride[0]/2;
1867
    }
1868
    return srcSliceH;
1869
}
1870

    
1871

    
1872
static void getSubSampleFactors(int *h, int *v, int format){
1873
    switch(format){
1874
    case PIX_FMT_UYVY422:
1875
    case PIX_FMT_YUYV422:
1876
        *h=1;
1877
        *v=0;
1878
        break;
1879
    case PIX_FMT_YUV420P:
1880
    case PIX_FMT_GRAY16BE:
1881
    case PIX_FMT_GRAY16LE:
1882
    case PIX_FMT_GRAY8: //FIXME remove after different subsamplings are fully implemented
1883
    case PIX_FMT_NV12:
1884
    case PIX_FMT_NV21:
1885
        *h=1;
1886
        *v=1;
1887
        break;
1888
    case PIX_FMT_YUV440P:
1889
        *h=0;
1890
        *v=1;
1891
        break;
1892
    case PIX_FMT_YUV410P:
1893
        *h=2;
1894
        *v=2;
1895
        break;
1896
    case PIX_FMT_YUV444P:
1897
        *h=0;
1898
        *v=0;
1899
        break;
1900
    case PIX_FMT_YUV422P:
1901
        *h=1;
1902
        *v=0;
1903
        break;
1904
    case PIX_FMT_YUV411P:
1905
        *h=2;
1906
        *v=0;
1907
        break;
1908
    default:
1909
        *h=0;
1910
        *v=0;
1911
        break;
1912
    }
1913
}
1914

    
1915
static uint16_t roundToInt16(int64_t f){
1916
    int r= (f + (1<<15))>>16;
1917
         if (r<-0x7FFF) return 0x8000;
1918
    else if (r> 0x7FFF) return 0x7FFF;
1919
    else                return r;
1920
}
1921

    
1922
/**
1923
 * @param inv_table the yuv2rgb coeffs, normally Inverse_Table_6_9[x]
1924
 * @param fullRange if 1 then the luma range is 0..255 if 0 it is 16..235
1925
 * @return -1 if not supported
1926
 */
1927
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
1928
    int64_t crv =  inv_table[0];
1929
    int64_t cbu =  inv_table[1];
1930
    int64_t cgu = -inv_table[2];
1931
    int64_t cgv = -inv_table[3];
1932
    int64_t cy  = 1<<16;
1933
    int64_t oy  = 0;
1934

    
1935
    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
1936
    memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
1937
    memcpy(c->dstColorspaceTable,     table, sizeof(int)*4);
1938

    
1939
    c->brightness= brightness;
1940
    c->contrast  = contrast;
1941
    c->saturation= saturation;
1942
    c->srcRange  = srcRange;
1943
    c->dstRange  = dstRange;
1944

    
1945
    c->uOffset=   0x0400040004000400LL;
1946
    c->vOffset=   0x0400040004000400LL;
1947

    
1948
    if (!srcRange){
1949
        cy= (cy*255) / 219;
1950
        oy= 16<<16;
1951
    }else{
1952
        crv= (crv*224) / 255;
1953
        cbu= (cbu*224) / 255;
1954
        cgu= (cgu*224) / 255;
1955
        cgv= (cgv*224) / 255;
1956
    }
1957

    
1958
    cy = (cy *contrast             )>>16;
1959
    crv= (crv*contrast * saturation)>>32;
1960
    cbu= (cbu*contrast * saturation)>>32;
1961
    cgu= (cgu*contrast * saturation)>>32;
1962
    cgv= (cgv*contrast * saturation)>>32;
1963

    
1964
    oy -= 256*brightness;
1965

    
1966
    c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
1967
    c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
1968
    c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
1969
    c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
1970
    c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
1971
    c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
1972

    
1973
    yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
1974
    //FIXME factorize
1975

    
1976
#ifdef COMPILE_ALTIVEC
1977
    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
1978
        yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation);
1979
#endif
1980
    return 0;
1981
}
1982

    
1983
/**
1984
 * @return -1 if not supported
1985
 */
1986
int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
1987
    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
1988

    
1989
    *inv_table = c->srcColorspaceTable;
1990
    *table     = c->dstColorspaceTable;
1991
    *srcRange  = c->srcRange;
1992
    *dstRange  = c->dstRange;
1993
    *brightness= c->brightness;
1994
    *contrast  = c->contrast;
1995
    *saturation= c->saturation;
1996

    
1997
    return 0;
1998
}
1999

    
2000
static int handle_jpeg(int *format)
2001
{
2002
    switch (*format) {
2003
        case PIX_FMT_YUVJ420P:
2004
            *format = PIX_FMT_YUV420P;
2005
            return 1;
2006
        case PIX_FMT_YUVJ422P:
2007
            *format = PIX_FMT_YUV422P;
2008
            return 1;
2009
        case PIX_FMT_YUVJ444P:
2010
            *format = PIX_FMT_YUV444P;
2011
            return 1;
2012
        case PIX_FMT_YUVJ440P:
2013
            *format = PIX_FMT_YUV440P;
2014
            return 1;
2015
        default:
2016
            return 0;
2017
    }
2018
}
2019

    
2020
SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags,
2021
                           SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){
2022

    
2023
    SwsContext *c;
2024
    int i;
2025
    int usesVFilter, usesHFilter;
2026
    int unscaled, needsDither;
2027
    int srcRange, dstRange;
2028
    SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
2029
#if defined(ARCH_X86)
2030
    if (flags & SWS_CPU_CAPS_MMX)
2031
        asm volatile("emms\n\t"::: "memory");
2032
#endif
2033

    
2034
#if !defined(RUNTIME_CPUDETECT) || !defined (CONFIG_GPL) //ensure that the flags match the compiled variant if cpudetect is off
2035
    flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN);
2036
#ifdef HAVE_MMX2
2037
    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
2038
#elif defined (HAVE_3DNOW)
2039
    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
2040
#elif defined (HAVE_MMX)
2041
    flags |= SWS_CPU_CAPS_MMX;
2042
#elif defined (HAVE_ALTIVEC)
2043
    flags |= SWS_CPU_CAPS_ALTIVEC;
2044
#elif defined (ARCH_BFIN)
2045
    flags |= SWS_CPU_CAPS_BFIN;
2046
#endif
2047
#endif /* RUNTIME_CPUDETECT */
2048
    if (clip_table[512] != 255) globalInit();
2049
    if (rgb15to16 == NULL) sws_rgb2rgb_init(flags);
2050

    
2051
    unscaled = (srcW == dstW && srcH == dstH);
2052
    needsDither= (isBGR(dstFormat) || isRGB(dstFormat))
2053
        && (fmt_depth(dstFormat))<24
2054
        && ((fmt_depth(dstFormat))<(fmt_depth(srcFormat)) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
2055

    
2056
    srcRange = handle_jpeg(&srcFormat);
2057
    dstRange = handle_jpeg(&dstFormat);
2058

    
2059
    if (!isSupportedIn(srcFormat))
2060
    {
2061
        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input format\n", sws_format_name(srcFormat));
2062
        return NULL;
2063
    }
2064
    if (!isSupportedOut(dstFormat))
2065
    {
2066
        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output format\n", sws_format_name(dstFormat));
2067
        return NULL;
2068
    }
2069

    
2070
    /* sanity check */
2071
    if (srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
2072
    {
2073
        av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
2074
               srcW, srcH, dstW, dstH);
2075
        return NULL;
2076
    }
2077

    
2078
    if (!dstFilter) dstFilter= &dummyFilter;
2079
    if (!srcFilter) srcFilter= &dummyFilter;
2080

    
2081
    c= av_mallocz(sizeof(SwsContext));
2082

    
2083
    c->av_class = &sws_context_class;
2084
    c->srcW= srcW;
2085
    c->srcH= srcH;
2086
    c->dstW= dstW;
2087
    c->dstH= dstH;
2088
    c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
2089
    c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
2090
    c->flags= flags;
2091
    c->dstFormat= dstFormat;
2092
    c->srcFormat= srcFormat;
2093
    c->vRounder= 4* 0x0001000100010001ULL;
2094

    
2095
    usesHFilter= usesVFilter= 0;
2096
    if (dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesVFilter=1;
2097
    if (dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesHFilter=1;
2098
    if (dstFilter->chrV!=NULL && dstFilter->chrV->length>1) usesVFilter=1;
2099
    if (dstFilter->chrH!=NULL && dstFilter->chrH->length>1) usesHFilter=1;
2100
    if (srcFilter->lumV!=NULL && srcFilter->lumV->length>1) usesVFilter=1;
2101
    if (srcFilter->lumH!=NULL && srcFilter->lumH->length>1) usesHFilter=1;
2102
    if (srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesVFilter=1;
2103
    if (srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesHFilter=1;
2104

    
2105
    getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
2106
    getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
2107

    
2108
    // reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation
2109
    if ((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
2110

    
2111
    // drop some chroma lines if the user wants it
2112
    c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
2113
    c->chrSrcVSubSample+= c->vChrDrop;
2114

    
2115
    // drop every 2. pixel for chroma calculation unless user wants full chroma
2116
    if ((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)
2117
      && srcFormat!=PIX_FMT_RGB8      && srcFormat!=PIX_FMT_BGR8
2118
      && srcFormat!=PIX_FMT_RGB4      && srcFormat!=PIX_FMT_BGR4
2119
      && srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE)
2120
        c->chrSrcHSubSample=1;
2121

    
2122
    if (param){
2123
        c->param[0] = param[0];
2124
        c->param[1] = param[1];
2125
    }else{
2126
        c->param[0] =
2127
        c->param[1] = SWS_PARAM_DEFAULT;
2128
    }
2129

    
2130
    c->chrIntHSubSample= c->chrDstHSubSample;
2131
    c->chrIntVSubSample= c->chrSrcVSubSample;
2132

    
2133
    // Note the -((-x)>>y) is so that we always round toward +inf.
2134
    c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
2135
    c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
2136
    c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
2137
    c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
2138

    
2139
    sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], srcRange, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16);
2140

    
2141
    /* unscaled special Cases */
2142
    if (unscaled && !usesHFilter && !usesVFilter)
2143
    {
2144
        /* yv12_to_nv12 */
2145
        if (srcFormat == PIX_FMT_YUV420P && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21))
2146
        {
2147
            c->swScale= PlanarToNV12Wrapper;
2148
        }
2149
#ifdef CONFIG_GPL
2150
        /* yuv2bgr */
2151
        if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P) && (isBGR(dstFormat) || isRGB(dstFormat)))
2152
        {
2153
            c->swScale= yuv2rgb_get_func_ptr(c);
2154
        }
2155
#endif
2156

    
2157
        if ( srcFormat==PIX_FMT_YUV410P && dstFormat==PIX_FMT_YUV420P )
2158
        {
2159
            c->swScale= yvu9toyv12Wrapper;
2160
        }
2161

    
2162
        /* bgr24toYV12 */
2163
        if (srcFormat==PIX_FMT_BGR24 && dstFormat==PIX_FMT_YUV420P)
2164
            c->swScale= bgr24toyv12Wrapper;
2165

    
2166
        /* rgb/bgr -> rgb/bgr (no dither needed forms) */
2167
        if (  (isBGR(srcFormat) || isRGB(srcFormat))
2168
           && (isBGR(dstFormat) || isRGB(dstFormat))
2169
           && srcFormat != PIX_FMT_BGR8      && dstFormat != PIX_FMT_BGR8
2170
           && srcFormat != PIX_FMT_RGB8      && dstFormat != PIX_FMT_RGB8
2171
           && srcFormat != PIX_FMT_BGR4      && dstFormat != PIX_FMT_BGR4
2172
           && srcFormat != PIX_FMT_RGB4      && dstFormat != PIX_FMT_RGB4
2173
           && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE
2174
           && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE
2175
           && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK
2176
           && !needsDither)
2177
             c->swScale= rgb2rgbWrapper;
2178

    
2179
        /* LQ converters if -sws 0 or -sws 4*/
2180
        if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
2181
            /* rgb/bgr -> rgb/bgr (dither needed forms) */
2182
            if ( (isBGR(srcFormat) || isRGB(srcFormat))
2183
              && (isBGR(dstFormat) || isRGB(dstFormat))
2184
              && needsDither)
2185
                c->swScale= rgb2rgbWrapper;
2186

    
2187
            /* yv12_to_yuy2 */
2188
            if (srcFormat == PIX_FMT_YUV420P &&
2189
                (dstFormat == PIX_FMT_YUYV422 || dstFormat == PIX_FMT_UYVY422))
2190
            {
2191
                if (dstFormat == PIX_FMT_YUYV422)
2192
                    c->swScale= PlanarToYuy2Wrapper;
2193
                else
2194
                    c->swScale= PlanarToUyvyWrapper;
2195
            }
2196
        }
2197

    
2198
#ifdef COMPILE_ALTIVEC
2199
        if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
2200
            ((srcFormat == PIX_FMT_YUV420P &&
2201
             (dstFormat == PIX_FMT_YUYV422 || dstFormat == PIX_FMT_UYVY422)))) {
2202
          // unscaled YV12 -> packed YUV, we want speed
2203
          if (dstFormat == PIX_FMT_YUYV422)
2204
              c->swScale= yv12toyuy2_unscaled_altivec;
2205
          else
2206
              c->swScale= yv12touyvy_unscaled_altivec;
2207
        }
2208
#endif
2209

    
2210
        /* simple copy */
2211
        if (  srcFormat == dstFormat
2212
            || (isPlanarYUV(srcFormat) && isGray(dstFormat))
2213
            || (isPlanarYUV(dstFormat) && isGray(srcFormat)) )
2214
        {
2215
            c->swScale= simpleCopy;
2216
        }
2217

    
2218
        /* gray16{le,be} conversions */
2219
        if (isGray16(srcFormat) && (isPlanarYUV(dstFormat) || (dstFormat == PIX_FMT_GRAY8)))
2220
        {
2221
            c->swScale= gray16togray;
2222
        }
2223
        if ((isPlanarYUV(srcFormat) || (srcFormat == PIX_FMT_GRAY8)) && isGray16(dstFormat))
2224
        {
2225
            c->swScale= graytogray16;
2226
        }
2227
        if (srcFormat != dstFormat && isGray16(srcFormat) && isGray16(dstFormat))
2228
        {
2229
            c->swScale= gray16swap;
2230
        }
2231

    
2232
#ifdef ARCH_BFIN
2233
        if (flags & SWS_CPU_CAPS_BFIN)
2234
            ff_bfin_get_unscaled_swscale (c);
2235
#endif
2236

    
2237
        if (c->swScale){
2238
            if (flags&SWS_PRINT_INFO)
2239
                av_log(c, AV_LOG_INFO, "SwScaler: using unscaled %s -> %s special converter\n",
2240
                                sws_format_name(srcFormat), sws_format_name(dstFormat));
2241
            return c;
2242
        }
2243
    }
2244

    
2245
    if (flags & SWS_CPU_CAPS_MMX2)
2246
    {
2247
        c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
2248
        if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
2249
        {
2250
            if (flags&SWS_PRINT_INFO)
2251
                av_log(c, AV_LOG_INFO, "SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n");
2252
        }
2253
        if (usesHFilter) c->canMMX2BeUsed=0;
2254
    }
2255
    else
2256
        c->canMMX2BeUsed=0;
2257

    
2258
    c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
2259
    c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
2260

    
2261
    // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
2262
    // but only for the FAST_BILINEAR mode otherwise do correct scaling
2263
    // n-2 is the last chrominance sample available
2264
    // this is not perfect, but no one should notice the difference, the more correct variant
2265
    // would be like the vertical one, but that would require some special code for the
2266
    // first and last pixel
2267
    if (flags&SWS_FAST_BILINEAR)
2268
    {
2269
        if (c->canMMX2BeUsed)
2270
        {
2271
            c->lumXInc+= 20;
2272
            c->chrXInc+= 20;
2273
        }
2274
        //we don't use the x86asm scaler if mmx is available
2275
        else if (flags & SWS_CPU_CAPS_MMX)
2276
        {
2277
            c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
2278
            c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
2279
        }
2280
    }
2281

    
2282
    /* precalculate horizontal scaler filter coefficients */
2283
    {
2284
        const int filterAlign=
2285
            (flags & SWS_CPU_CAPS_MMX) ? 4 :
2286
            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2287
            1;
2288

    
2289
        initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
2290
                   srcW      ,       dstW, filterAlign, 1<<14,
2291
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2292
                   srcFilter->lumH, dstFilter->lumH, c->param);
2293
        initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
2294
                   c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
2295
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2296
                   srcFilter->chrH, dstFilter->chrH, c->param);
2297

    
2298
#define MAX_FUNNY_CODE_SIZE 10000
2299
#if defined(COMPILE_MMX2)
2300
// can't downscale !!!
2301
        if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
2302
        {
2303
#ifdef MAP_ANONYMOUS
2304
            c->funnyYCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2305
            c->funnyUVCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2306
#else
2307
            c->funnyYCode = av_malloc(MAX_FUNNY_CODE_SIZE);
2308
            c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE);
2309
#endif
2310

    
2311
            c->lumMmx2Filter   = av_malloc((dstW        /8+8)*sizeof(int16_t));
2312
            c->chrMmx2Filter   = av_malloc((c->chrDstW  /4+8)*sizeof(int16_t));
2313
            c->lumMmx2FilterPos= av_malloc((dstW      /2/8+8)*sizeof(int32_t));
2314
            c->chrMmx2FilterPos= av_malloc((c->chrDstW/2/4+8)*sizeof(int32_t));
2315

    
2316
            initMMX2HScaler(      dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
2317
            initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
2318
        }
2319
#endif /* defined(COMPILE_MMX2) */
2320
    } // Init Horizontal stuff
2321

    
2322

    
2323

    
2324
    /* precalculate vertical scaler filter coefficients */
2325
    {
2326
        const int filterAlign=
2327
            (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
2328
            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2329
            1;
2330

    
2331
        initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
2332
                   srcH      ,        dstH, filterAlign, (1<<12)-4,
2333
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2334
                   srcFilter->lumV, dstFilter->lumV, c->param);
2335
        initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
2336
                   c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
2337
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2338
                   srcFilter->chrV, dstFilter->chrV, c->param);
2339

    
2340
#ifdef HAVE_ALTIVEC
2341
        c->vYCoeffsBank = av_malloc(sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
2342
        c->vCCoeffsBank = av_malloc(sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH);
2343

    
2344
        for (i=0;i<c->vLumFilterSize*c->dstH;i++) {
2345
            int j;
2346
            short *p = (short *)&c->vYCoeffsBank[i];
2347
            for (j=0;j<8;j++)
2348
                p[j] = c->vLumFilter[i];
2349
        }
2350

    
2351
        for (i=0;i<c->vChrFilterSize*c->chrDstH;i++) {
2352
            int j;
2353
            short *p = (short *)&c->vCCoeffsBank[i];
2354
            for (j=0;j<8;j++)
2355
                p[j] = c->vChrFilter[i];
2356
        }
2357
#endif
2358
    }
2359

    
2360
    // Calculate Buffer Sizes so that they won't run out while handling these damn slices
2361
    c->vLumBufSize= c->vLumFilterSize;
2362
    c->vChrBufSize= c->vChrFilterSize;
2363
    for (i=0; i<dstH; i++)
2364
    {
2365
        int chrI= i*c->chrDstH / dstH;
2366
        int nextSlice= FFMAX(c->vLumFilterPos[i   ] + c->vLumFilterSize - 1,
2367
                           ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
2368

    
2369
        nextSlice>>= c->chrSrcVSubSample;
2370
        nextSlice<<= c->chrSrcVSubSample;
2371
        if (c->vLumFilterPos[i   ] + c->vLumBufSize < nextSlice)
2372
            c->vLumBufSize= nextSlice - c->vLumFilterPos[i   ];
2373
        if (c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
2374
            c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
2375
    }
2376

    
2377
    // allocate pixbufs (we use dynamic allocation because otherwise we would need to
2378
    c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
2379
    c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*));
2380
    //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
2381
    /* align at 16 bytes for AltiVec */
2382
    for (i=0; i<c->vLumBufSize; i++)
2383
        c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(4000);
2384
    for (i=0; i<c->vChrBufSize; i++)
2385
        c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= av_malloc(8000);
2386

    
2387
    //try to avoid drawing green stuff between the right end and the stride end
2388
    for (i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000);
2389

    
2390
    ASSERT(c->chrDstH <= dstH)
2391

    
2392
    if (flags&SWS_PRINT_INFO)
2393
    {
2394
#ifdef DITHER1XBPP
2395
        char *dither= " dithered";
2396
#else
2397
        char *dither= "";
2398
#endif
2399
        if (flags&SWS_FAST_BILINEAR)
2400
            av_log(c, AV_LOG_INFO, "SwScaler: FAST_BILINEAR scaler, ");
2401
        else if (flags&SWS_BILINEAR)
2402
            av_log(c, AV_LOG_INFO, "SwScaler: BILINEAR scaler, ");
2403
        else if (flags&SWS_BICUBIC)
2404
            av_log(c, AV_LOG_INFO, "SwScaler: BICUBIC scaler, ");
2405
        else if (flags&SWS_X)
2406
            av_log(c, AV_LOG_INFO, "SwScaler: Experimental scaler, ");
2407
        else if (flags&SWS_POINT)
2408
            av_log(c, AV_LOG_INFO, "SwScaler: Nearest Neighbor / POINT scaler, ");
2409
        else if (flags&SWS_AREA)
2410
            av_log(c, AV_LOG_INFO, "SwScaler: Area Averageing scaler, ");
2411
        else if (flags&SWS_BICUBLIN)
2412
            av_log(c, AV_LOG_INFO, "SwScaler: luma BICUBIC / chroma BILINEAR scaler, ");
2413
        else if (flags&SWS_GAUSS)
2414
            av_log(c, AV_LOG_INFO, "SwScaler: Gaussian scaler, ");
2415
        else if (flags&SWS_SINC)
2416
            av_log(c, AV_LOG_INFO, "SwScaler: Sinc scaler, ");
2417
        else if (flags&SWS_LANCZOS)
2418
            av_log(c, AV_LOG_INFO, "SwScaler: Lanczos scaler, ");
2419
        else if (flags&SWS_SPLINE)
2420
            av_log(c, AV_LOG_INFO, "SwScaler: Bicubic spline scaler, ");
2421
        else
2422
            av_log(c, AV_LOG_INFO, "SwScaler: ehh flags invalid?! ");
2423

    
2424
        if (dstFormat==PIX_FMT_BGR555 || dstFormat==PIX_FMT_BGR565)
2425
            av_log(c, AV_LOG_INFO, "from %s to%s %s ",
2426
                   sws_format_name(srcFormat), dither, sws_format_name(dstFormat));
2427
        else
2428
            av_log(c, AV_LOG_INFO, "from %s to %s ",
2429
                   sws_format_name(srcFormat), sws_format_name(dstFormat));
2430

    
2431
        if (flags & SWS_CPU_CAPS_MMX2)
2432
            av_log(c, AV_LOG_INFO, "using MMX2\n");
2433
        else if (flags & SWS_CPU_CAPS_3DNOW)
2434
            av_log(c, AV_LOG_INFO, "using 3DNOW\n");
2435
        else if (flags & SWS_CPU_CAPS_MMX)
2436
            av_log(c, AV_LOG_INFO, "using MMX\n");
2437
        else if (flags & SWS_CPU_CAPS_ALTIVEC)
2438
            av_log(c, AV_LOG_INFO, "using AltiVec\n");
2439
        else
2440
            av_log(c, AV_LOG_INFO, "using C\n");
2441
    }
2442

    
2443
    if (flags & SWS_PRINT_INFO)
2444
    {
2445
        if (flags & SWS_CPU_CAPS_MMX)
2446
        {
2447
            if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
2448
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
2449
            else
2450
            {
2451
                if (c->hLumFilterSize==4)
2452
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n");
2453
                else if (c->hLumFilterSize==8)
2454
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n");
2455
                else
2456
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n");
2457

    
2458
                if (c->hChrFilterSize==4)
2459
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n");
2460
                else if (c->hChrFilterSize==8)
2461
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n");
2462
                else
2463
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n");
2464
            }
2465
        }
2466
        else
2467
        {
2468
#if defined(ARCH_X86)
2469
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using X86-Asm scaler for horizontal scaling\n");
2470
#else
2471
            if (flags & SWS_FAST_BILINEAR)
2472
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n");
2473
            else
2474
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using C scaler for horizontal scaling\n");
2475
#endif
2476
        }
2477
        if (isPlanarYUV(dstFormat))
2478
        {
2479
            if (c->vLumFilterSize==1)
2480
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2481
            else
2482
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2483
        }
2484
        else
2485
        {
2486
            if (c->vLumFilterSize==1 && c->vChrFilterSize==2)
2487
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
2488
                       "SwScaler:       2-tap scaler for vertical chrominance scaling (BGR)\n",(flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2489
            else if (c->vLumFilterSize==2 && c->vChrFilterSize==2)
2490
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2491
            else
2492
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2493
        }
2494

    
2495
        if (dstFormat==PIX_FMT_BGR24)
2496
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR24 Converter\n",
2497
                   (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
2498
        else if (dstFormat==PIX_FMT_RGB32)
2499
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR32 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2500
        else if (dstFormat==PIX_FMT_BGR565)
2501
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR16 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2502
        else if (dstFormat==PIX_FMT_BGR555)
2503
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR15 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2504

    
2505
        av_log(c, AV_LOG_VERBOSE, "SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
2506
    }
2507
    if (flags & SWS_PRINT_INFO)
2508
    {
2509
        av_log(c, AV_LOG_DEBUG, "SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2510
               c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
2511
        av_log(c, AV_LOG_DEBUG, "SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2512
               c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
2513
    }
2514

    
2515
    c->swScale= getSwsFunc(flags);
2516
    return c;
2517
}
2518

    
2519
/**
2520
 * swscale warper, so we don't need to export the SwsContext.
2521
 * assumes planar YUV to be in YUV order instead of YVU
2522
 */
2523
int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2524
              int srcSliceH, uint8_t* dst[], int dstStride[]){
2525
    int i;
2526
    uint8_t* src2[4]= {src[0], src[1], src[2]};
2527
    uint32_t pal[256];
2528
    if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
2529
        av_log(c, AV_LOG_ERROR, "swScaler: slices start in the middle!\n");
2530
        return 0;
2531
    }
2532
    if (c->sliceDir == 0) {
2533
        if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
2534
    }
2535

    
2536
    if (c->srcFormat == PIX_FMT_PAL8){
2537
        for (i=0; i<256; i++){
2538
            int p= ((uint32_t*)(src[1]))[i];
2539
            int r= (p>>16)&0xFF;
2540
            int g= (p>> 8)&0xFF;
2541
            int b=  p     &0xFF;
2542
            int y= av_clip_uint8(((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16 );
2543
            int u= av_clip_uint8(((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128);
2544
            int v= av_clip_uint8(((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128);
2545
            pal[i]= y + (u<<8) + (v<<16);
2546
        }
2547
        src2[1]= pal;
2548
    }
2549

    
2550
    // copy strides, so they can safely be modified
2551
    if (c->sliceDir == 1) {
2552
        // slices go from top to bottom
2553
        int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2]};
2554
        int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2]};
2555
        return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst, dstStride2);
2556
    } else {
2557
        // slices go from bottom to top => we flip the image internally
2558
        uint8_t* dst2[4]= {dst[0] + (c->dstH-1)*dstStride[0],
2559
                           dst[1] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1],
2560
                           dst[2] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2]};
2561
        int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2]};
2562
        int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2]};
2563

    
2564
        src2[0] += (srcSliceH-1)*srcStride[0];
2565
        if (c->srcFormat != PIX_FMT_PAL8)
2566
            src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1];
2567
        src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2];
2568

    
2569
        return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
2570
    }
2571
}
2572

    
2573
/**
2574
 * swscale warper, so we don't need to export the SwsContext
2575
 */
2576
int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2577
                      int srcSliceH, uint8_t* dst[], int dstStride[]){
2578
    return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
2579
}
2580

    
2581
SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
2582
                                float lumaSharpen, float chromaSharpen,
2583
                                float chromaHShift, float chromaVShift,
2584
                                int verbose)
2585
{
2586
    SwsFilter *filter= av_malloc(sizeof(SwsFilter));
2587

    
2588
    if (lumaGBlur!=0.0){
2589
        filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0);
2590
        filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0);
2591
    }else{
2592
        filter->lumH= sws_getIdentityVec();
2593
        filter->lumV= sws_getIdentityVec();
2594
    }
2595

    
2596
    if (chromaGBlur!=0.0){
2597
        filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0);
2598
        filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0);
2599
    }else{
2600
        filter->chrH= sws_getIdentityVec();
2601
        filter->chrV= sws_getIdentityVec();
2602
    }
2603

    
2604
    if (chromaSharpen!=0.0){
2605
        SwsVector *id= sws_getIdentityVec();
2606
        sws_scaleVec(filter->chrH, -chromaSharpen);
2607
        sws_scaleVec(filter->chrV, -chromaSharpen);
2608
        sws_addVec(filter->chrH, id);
2609
        sws_addVec(filter->chrV, id);
2610
        sws_freeVec(id);
2611
    }
2612

    
2613
    if (lumaSharpen!=0.0){
2614
        SwsVector *id= sws_getIdentityVec();
2615
        sws_scaleVec(filter->lumH, -lumaSharpen);
2616
        sws_scaleVec(filter->lumV, -lumaSharpen);
2617
        sws_addVec(filter->lumH, id);
2618
        sws_addVec(filter->lumV, id);
2619
        sws_freeVec(id);
2620
    }
2621

    
2622
    if (chromaHShift != 0.0)
2623
        sws_shiftVec(filter->chrH, (int)(chromaHShift+0.5));
2624

    
2625
    if (chromaVShift != 0.0)
2626
        sws_shiftVec(filter->chrV, (int)(chromaVShift+0.5));
2627

    
2628
    sws_normalizeVec(filter->chrH, 1.0);
2629
    sws_normalizeVec(filter->chrV, 1.0);
2630
    sws_normalizeVec(filter->lumH, 1.0);
2631
    sws_normalizeVec(filter->lumV, 1.0);
2632

    
2633
    if (verbose) sws_printVec(filter->chrH);
2634
    if (verbose) sws_printVec(filter->lumH);
2635

    
2636
    return filter;
2637
}
2638

    
2639
/**
2640
 * returns a normalized gaussian curve used to filter stuff
2641
 * quality=3 is high quality, lowwer is lowwer quality
2642
 */
2643
SwsVector *sws_getGaussianVec(double variance, double quality){
2644
    const int length= (int)(variance*quality + 0.5) | 1;
2645
    int i;
2646
    double *coeff= av_malloc(length*sizeof(double));
2647
    double middle= (length-1)*0.5;
2648
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2649

    
2650
    vec->coeff= coeff;
2651
    vec->length= length;
2652

    
2653
    for (i=0; i<length; i++)
2654
    {
2655
        double dist= i-middle;
2656
        coeff[i]= exp( -dist*dist/(2*variance*variance) ) / sqrt(2*variance*PI);
2657
    }
2658

    
2659
    sws_normalizeVec(vec, 1.0);
2660

    
2661
    return vec;
2662
}
2663

    
2664
SwsVector *sws_getConstVec(double c, int length){
2665
    int i;
2666
    double *coeff= av_malloc(length*sizeof(double));
2667
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2668

    
2669
    vec->coeff= coeff;
2670
    vec->length= length;
2671

    
2672
    for (i=0; i<length; i++)
2673
        coeff[i]= c;
2674

    
2675
    return vec;
2676
}
2677

    
2678

    
2679
SwsVector *sws_getIdentityVec(void){
2680
    return sws_getConstVec(1.0, 1);
2681
}
2682

    
2683
double sws_dcVec(SwsVector *a){
2684
    int i;
2685
    double sum=0;
2686

    
2687
    for (i=0; i<a->length; i++)
2688
        sum+= a->coeff[i];
2689

    
2690
    return sum;
2691
}
2692

    
2693
void sws_scaleVec(SwsVector *a, double scalar){
2694
    int i;
2695

    
2696
    for (i=0; i<a->length; i++)
2697
        a->coeff[i]*= scalar;
2698
}
2699

    
2700
void sws_normalizeVec(SwsVector *a, double height){
2701
    sws_scaleVec(a, height/sws_dcVec(a));
2702
}
2703

    
2704
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){
2705
    int length= a->length + b->length - 1;
2706
    double *coeff= av_malloc(length*sizeof(double));
2707
    int i, j;
2708
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2709

    
2710
    vec->coeff= coeff;
2711
    vec->length= length;
2712

    
2713
    for (i=0; i<length; i++) coeff[i]= 0.0;
2714

    
2715
    for (i=0; i<a->length; i++)
2716
    {
2717
        for (j=0; j<b->length; j++)
2718
        {
2719
            coeff[i+j]+= a->coeff[i]*b->coeff[j];
2720
        }
2721
    }
2722

    
2723
    return vec;
2724
}
2725

    
2726
static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){
2727
    int length= FFMAX(a->length, b->length);
2728
    double *coeff= av_malloc(length*sizeof(double));
2729
    int i;
2730
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2731

    
2732
    vec->coeff= coeff;
2733
    vec->length= length;
2734

    
2735
    for (i=0; i<length; i++) coeff[i]= 0.0;
2736

    
2737
    for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2738
    for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];
2739

    
2740
    return vec;
2741
}
2742

    
2743
static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){
2744
    int length= FFMAX(a->length, b->length);
2745
    double *coeff= av_malloc(length*sizeof(double));
2746
    int i;
2747
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2748

    
2749
    vec->coeff= coeff;
2750
    vec->length= length;
2751

    
2752
    for (i=0; i<length; i++) coeff[i]= 0.0;
2753

    
2754
    for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2755
    for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];
2756

    
2757
    return vec;
2758
}
2759

    
2760
/* shift left / or right if "shift" is negative */
2761
static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){
2762
    int length= a->length + FFABS(shift)*2;
2763
    double *coeff= av_malloc(length*sizeof(double));
2764
    int i;
2765
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2766

    
2767
    vec->coeff= coeff;
2768
    vec->length= length;
2769

    
2770
    for (i=0; i<length; i++) coeff[i]= 0.0;
2771

    
2772
    for (i=0; i<a->length; i++)
2773
    {
2774
        coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
2775
    }
2776

    
2777
    return vec;
2778
}
2779

    
2780
void sws_shiftVec(SwsVector *a, int shift){
2781
    SwsVector *shifted= sws_getShiftedVec(a, shift);
2782
    av_free(a->coeff);
2783
    a->coeff= shifted->coeff;
2784
    a->length= shifted->length;
2785
    av_free(shifted);
2786
}
2787

    
2788
void sws_addVec(SwsVector *a, SwsVector *b){
2789
    SwsVector *sum= sws_sumVec(a, b);
2790
    av_free(a->coeff);
2791
    a->coeff= sum->coeff;
2792
    a->length= sum->length;
2793
    av_free(sum);
2794
}
2795

    
2796
void sws_subVec(SwsVector *a, SwsVector *b){
2797
    SwsVector *diff= sws_diffVec(a, b);
2798
    av_free(a->coeff);
2799
    a->coeff= diff->coeff;
2800
    a->length= diff->length;
2801
    av_free(diff);
2802
}
2803

    
2804
void sws_convVec(SwsVector *a, SwsVector *b){
2805
    SwsVector *conv= sws_getConvVec(a, b);
2806
    av_free(a->coeff);
2807
    a->coeff= conv->coeff;
2808
    a->length= conv->length;
2809
    av_free(conv);
2810
}
2811

    
2812
SwsVector *sws_cloneVec(SwsVector *a){
2813
    double *coeff= av_malloc(a->length*sizeof(double));
2814
    int i;
2815
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2816

    
2817
    vec->coeff= coeff;
2818
    vec->length= a->length;
2819

    
2820
    for (i=0; i<a->length; i++) coeff[i]= a->coeff[i];
2821

    
2822
    return vec;
2823
}
2824

    
2825
void sws_printVec(SwsVector *a){
2826
    int i;
2827
    double max=0;
2828
    double min=0;
2829
    double range;
2830

    
2831
    for (i=0; i<a->length; i++)
2832
        if (a->coeff[i]>max) max= a->coeff[i];
2833

    
2834
    for (i=0; i<a->length; i++)
2835
        if (a->coeff[i]<min) min= a->coeff[i];
2836

    
2837
    range= max - min;
2838

    
2839
    for (i=0; i<a->length; i++)
2840
    {
2841
        int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
2842
        av_log(NULL, AV_LOG_DEBUG, "%1.3f ", a->coeff[i]);
2843
        for (;x>0; x--) av_log(NULL, AV_LOG_DEBUG, " ");
2844
        av_log(NULL, AV_LOG_DEBUG, "|\n");
2845
    }
2846
}
2847

    
2848
void sws_freeVec(SwsVector *a){
2849
    if (!a) return;
2850
    av_free(a->coeff);
2851
    a->coeff=NULL;
2852
    a->length=0;
2853
    av_free(a);
2854
}
2855

    
2856
void sws_freeFilter(SwsFilter *filter){
2857
    if (!filter) return;
2858

    
2859
    if (filter->lumH) sws_freeVec(filter->lumH);
2860
    if (filter->lumV) sws_freeVec(filter->lumV);
2861
    if (filter->chrH) sws_freeVec(filter->chrH);
2862
    if (filter->chrV) sws_freeVec(filter->chrV);
2863
    av_free(filter);
2864
}
2865

    
2866

    
2867
void sws_freeContext(SwsContext *c){
2868
    int i;
2869
    if (!c) return;
2870

    
2871
    if (c->lumPixBuf)
2872
    {
2873
        for (i=0; i<c->vLumBufSize; i++)
2874
        {
2875
            av_free(c->lumPixBuf[i]);
2876
            c->lumPixBuf[i]=NULL;
2877
        }
2878
        av_free(c->lumPixBuf);
2879
        c->lumPixBuf=NULL;
2880
    }
2881

    
2882
    if (c->chrPixBuf)
2883
    {
2884
        for (i=0; i<c->vChrBufSize; i++)
2885
        {
2886
            av_free(c->chrPixBuf[i]);
2887
            c->chrPixBuf[i]=NULL;
2888
        }
2889
        av_free(c->chrPixBuf);
2890
        c->chrPixBuf=NULL;
2891
    }
2892

    
2893
    av_free(c->vLumFilter);
2894
    c->vLumFilter = NULL;
2895
    av_free(c->vChrFilter);
2896
    c->vChrFilter = NULL;
2897
    av_free(c->hLumFilter);
2898
    c->hLumFilter = NULL;
2899
    av_free(c->hChrFilter);
2900
    c->hChrFilter = NULL;
2901
#ifdef HAVE_ALTIVEC
2902
    av_free(c->vYCoeffsBank);
2903
    c->vYCoeffsBank = NULL;
2904
    av_free(c->vCCoeffsBank);
2905
    c->vCCoeffsBank = NULL;
2906
#endif
2907

    
2908
    av_free(c->vLumFilterPos);
2909
    c->vLumFilterPos = NULL;
2910
    av_free(c->vChrFilterPos);
2911
    c->vChrFilterPos = NULL;
2912
    av_free(c->hLumFilterPos);
2913
    c->hLumFilterPos = NULL;
2914
    av_free(c->hChrFilterPos);
2915
    c->hChrFilterPos = NULL;
2916

    
2917
#if defined(ARCH_X86) && defined(CONFIG_GPL)
2918
#ifdef MAP_ANONYMOUS
2919
    if (c->funnyYCode) munmap(c->funnyYCode, MAX_FUNNY_CODE_SIZE);
2920
    if (c->funnyUVCode) munmap(c->funnyUVCode, MAX_FUNNY_CODE_SIZE);
2921
#else
2922
    av_free(c->funnyYCode);
2923
    av_free(c->funnyUVCode);
2924
#endif
2925
    c->funnyYCode=NULL;
2926
    c->funnyUVCode=NULL;
2927
#endif /* defined(ARCH_X86) */
2928

    
2929
    av_free(c->lumMmx2Filter);
2930
    c->lumMmx2Filter=NULL;
2931
    av_free(c->chrMmx2Filter);
2932
    c->chrMmx2Filter=NULL;
2933
    av_free(c->lumMmx2FilterPos);
2934
    c->lumMmx2FilterPos=NULL;
2935
    av_free(c->chrMmx2FilterPos);
2936
    c->chrMmx2FilterPos=NULL;
2937
    av_free(c->yuvTable);
2938
    c->yuvTable=NULL;
2939

    
2940
    av_free(c);
2941
}
2942

    
2943
/**
2944
 * Checks if context is valid or reallocs a new one instead.
2945
 * If context is NULL, just calls sws_getContext() to get a new one.
2946
 * Otherwise, checks if the parameters are the same already saved in context.
2947
 * If that is the case, returns the current context.
2948
 * Otherwise, frees context and gets a new one.
2949
 *
2950
 * Be warned that srcFilter, dstFilter are not checked, they are
2951
 * asumed to remain valid.
2952
 */
2953
struct SwsContext *sws_getCachedContext(struct SwsContext *context,
2954
                                        int srcW, int srcH, int srcFormat,
2955
                                        int dstW, int dstH, int dstFormat, int flags,
2956
                                        SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
2957
{
2958
    if (context != NULL) {
2959
        if ((context->srcW != srcW) || (context->srcH != srcH) ||
2960
            (context->srcFormat != srcFormat) ||
2961
            (context->dstW != dstW) || (context->dstH != dstH) ||
2962
            (context->dstFormat != dstFormat) || (context->flags != flags) ||
2963
            (context->param != param))
2964
        {
2965
            sws_freeContext(context);
2966
            context = NULL;
2967
        }
2968
    }
2969
    if (context == NULL) {
2970
        return sws_getContext(srcW, srcH, srcFormat,
2971
                              dstW, dstH, dstFormat, flags,
2972
                              srcFilter, dstFilter, param);
2973
    }
2974
    return context;
2975
}
2976