Statistics
| Branch: | Revision:

ffmpeg / libswscale / swscale.c @ d334c7c2

History | View | Annotate | Download (101 KB)

1
/*
2
 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 *
20
 * the C code (not assembly, mmx, ...) of this file can be used
21
 * under the LGPL license too
22
 */
23

    
24
/*
25
  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8/Y800, YVU9/IF09, PAL8
26
  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
27
  {BGR,RGB}{1,4,8,15,16} support dithering
28

29
  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
30
  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
31
  x -> x
32
  YUV9 -> YV12
33
  YUV9/YV12 -> Y800
34
  Y800 -> YUV9/YV12
35
  BGR24 -> BGR32 & RGB24 -> RGB32
36
  BGR32 -> BGR24 & RGB32 -> RGB24
37
  BGR15 -> BGR16
38
*/
39

    
40
/*
41
tested special converters (most are tested actually but i didnt write it down ...)
42
 YV12 -> BGR16
43
 YV12 -> YV12
44
 BGR15 -> BGR16
45
 BGR16 -> BGR16
46
 YVU9 -> YV12
47

48
untested special converters
49
  YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be ok)
50
  YV12/I420 -> YV12/I420
51
  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
52
  BGR24 -> BGR32 & RGB24 -> RGB32
53
  BGR32 -> BGR24 & RGB32 -> RGB24
54
  BGR24 -> YV12
55
*/
56

    
57
#include <inttypes.h>
58
#include <string.h>
59
#include <math.h>
60
#include <stdio.h>
61
#include <unistd.h>
62
#include "config.h"
63
#include <assert.h>
64
#ifdef HAVE_SYS_MMAN_H
65
#include <sys/mman.h>
66
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
67
#define MAP_ANONYMOUS MAP_ANON
68
#endif
69
#endif
70
#include "swscale.h"
71
#include "swscale_internal.h"
72
#include "x86_cpu.h"
73
#include "bswap.h"
74
#include "rgb2rgb.h"
75
#include "libavcodec/opt.h"
76

    
77
#undef MOVNTQ
78
#undef PAVGB
79

    
80
//#undef HAVE_MMX2
81
//#define HAVE_3DNOW
82
//#undef HAVE_MMX
83
//#undef ARCH_X86
84
//#define WORDS_BIGENDIAN
85
#define DITHER1XBPP
86

    
87
#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
88

    
89
#define RET 0xC3 //near return opcode for X86
90

    
91
#ifdef MP_DEBUG
92
#define ASSERT(x) assert(x);
93
#else
94
#define ASSERT(x) ;
95
#endif
96

    
97
#ifdef M_PI
98
#define PI M_PI
99
#else
100
#define PI 3.14159265358979323846
101
#endif
102

    
103
#define isSupportedIn(x)    (       \
104
           (x)==PIX_FMT_YUV420P     \
105
        || (x)==PIX_FMT_YUVA420P    \
106
        || (x)==PIX_FMT_YUYV422     \
107
        || (x)==PIX_FMT_UYVY422     \
108
        || (x)==PIX_FMT_RGB32       \
109
        || (x)==PIX_FMT_BGR24       \
110
        || (x)==PIX_FMT_BGR565      \
111
        || (x)==PIX_FMT_BGR555      \
112
        || (x)==PIX_FMT_BGR32       \
113
        || (x)==PIX_FMT_RGB24       \
114
        || (x)==PIX_FMT_RGB565      \
115
        || (x)==PIX_FMT_RGB555      \
116
        || (x)==PIX_FMT_GRAY8       \
117
        || (x)==PIX_FMT_YUV410P     \
118
        || (x)==PIX_FMT_GRAY16BE    \
119
        || (x)==PIX_FMT_GRAY16LE    \
120
        || (x)==PIX_FMT_YUV444P     \
121
        || (x)==PIX_FMT_YUV422P     \
122
        || (x)==PIX_FMT_YUV411P     \
123
        || (x)==PIX_FMT_PAL8        \
124
        || (x)==PIX_FMT_BGR8        \
125
        || (x)==PIX_FMT_RGB8        \
126
        || (x)==PIX_FMT_BGR4_BYTE   \
127
        || (x)==PIX_FMT_RGB4_BYTE   \
128
        || (x)==PIX_FMT_YUV440P     \
129
    )
130
#define isSupportedOut(x)   (       \
131
           (x)==PIX_FMT_YUV420P     \
132
        || (x)==PIX_FMT_YUYV422     \
133
        || (x)==PIX_FMT_UYVY422     \
134
        || (x)==PIX_FMT_YUV444P     \
135
        || (x)==PIX_FMT_YUV422P     \
136
        || (x)==PIX_FMT_YUV411P     \
137
        || isRGB(x)                 \
138
        || isBGR(x)                 \
139
        || (x)==PIX_FMT_NV12        \
140
        || (x)==PIX_FMT_NV21        \
141
        || (x)==PIX_FMT_GRAY16BE    \
142
        || (x)==PIX_FMT_GRAY16LE    \
143
        || (x)==PIX_FMT_GRAY8       \
144
        || (x)==PIX_FMT_YUV410P     \
145
    )
146
#define isPacked(x)         (       \
147
           (x)==PIX_FMT_PAL8        \
148
        || (x)==PIX_FMT_YUYV422     \
149
        || (x)==PIX_FMT_UYVY422     \
150
        || isRGB(x)                 \
151
        || isBGR(x)                 \
152
    )
153

    
154
#define RGB2YUV_SHIFT 16
155
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
156
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
157
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
158
#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
159
#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
160
#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
161
#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
162
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
163
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
164

    
165
extern const int32_t Inverse_Table_6_9[8][4];
166

    
167
/*
168
NOTES
169
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
170

171
TODO
172
more intelligent misalignment avoidance for the horizontal scaler
173
write special vertical cubic upscale version
174
Optimize C code (yv12 / minmax)
175
add support for packed pixel yuv input & output
176
add support for Y8 output
177
optimize bgr24 & bgr32
178
add BGR4 output support
179
write special BGR->BGR scaler
180
*/
181

    
182
#if defined(ARCH_X86) && defined (CONFIG_GPL)
183
DECLARE_ASM_CONST(8, uint64_t, bF8)=       0xF8F8F8F8F8F8F8F8LL;
184
DECLARE_ASM_CONST(8, uint64_t, bFC)=       0xFCFCFCFCFCFCFCFCLL;
185
DECLARE_ASM_CONST(8, uint64_t, w10)=       0x0010001000100010LL;
186
DECLARE_ASM_CONST(8, uint64_t, w02)=       0x0002000200020002LL;
187
DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
188
DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
189
DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
190
DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
191

    
192
static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
193
static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
194
static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
195
static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;
196

    
197
static uint64_t __attribute__((aligned(8))) dither4[2]={
198
        0x0103010301030103LL,
199
        0x0200020002000200LL,};
200

    
201
static uint64_t __attribute__((aligned(8))) dither8[2]={
202
        0x0602060206020602LL,
203
        0x0004000400040004LL,};
204

    
205
DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
206
DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
207
DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
208
DECLARE_ASM_CONST(8, uint64_t, b15Mask)=   0x001F001F001F001FLL;
209
DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
210
DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
211

    
212
DECLARE_ASM_CONST(8, uint64_t, M24A)=      0x00FF0000FF0000FFLL;
213
DECLARE_ASM_CONST(8, uint64_t, M24B)=      0xFF0000FF0000FF00LL;
214
DECLARE_ASM_CONST(8, uint64_t, M24C)=      0x0000FF0000FF0000LL;
215

    
216
#ifdef FAST_BGR2YV12
217
DECLARE_ASM_CONST(8, uint64_t, bgr2YCoeff)   = 0x000000210041000DULL;
218
DECLARE_ASM_CONST(8, uint64_t, bgr2UCoeff)   = 0x0000FFEEFFDC0038ULL;
219
DECLARE_ASM_CONST(8, uint64_t, bgr2VCoeff)   = 0x00000038FFD2FFF8ULL;
220
#else
221
DECLARE_ASM_CONST(8, uint64_t, bgr2YCoeff)   = 0x000020E540830C8BULL;
222
DECLARE_ASM_CONST(8, uint64_t, bgr2UCoeff)   = 0x0000ED0FDAC23831ULL;
223
DECLARE_ASM_CONST(8, uint64_t, bgr2VCoeff)   = 0x00003831D0E6F6EAULL;
224
#endif /* FAST_BGR2YV12 */
225
DECLARE_ASM_CONST(8, uint64_t, bgr2YOffset)  = 0x1010101010101010ULL;
226
DECLARE_ASM_CONST(8, uint64_t, bgr2UVOffset) = 0x8080808080808080ULL;
227
DECLARE_ASM_CONST(8, uint64_t, w1111)        = 0x0001000100010001ULL;
228
#endif /* defined(ARCH_X86) */
229

    
230
// clipping helper table for C implementations:
231
static unsigned char clip_table[768];
232

    
233
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
234

    
235
extern const uint8_t dither_2x2_4[2][8];
236
extern const uint8_t dither_2x2_8[2][8];
237
extern const uint8_t dither_8x8_32[8][8];
238
extern const uint8_t dither_8x8_73[8][8];
239
extern const uint8_t dither_8x8_220[8][8];
240

    
241
static const char * sws_context_to_name(void * ptr) {
242
    return "swscaler";
243
}
244

    
245
#define OFFSET(x) offsetof(SwsContext, x)
246
#define DEFAULT 0
247
#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
248

    
249
static const AVOption options[] = {
250
    { "sws_flags", "scaler/cpu flags", OFFSET(flags), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, VE, "sws_flags" },
251
    { "fast_bilinear", "fast bilinear", 0, FF_OPT_TYPE_CONST, SWS_FAST_BILINEAR, INT_MIN, INT_MAX, VE, "sws_flags" },
252
    { "bilinear", "bilinear", 0, FF_OPT_TYPE_CONST, SWS_BILINEAR, INT_MIN, INT_MAX, VE, "sws_flags" },
253
    { "bicubic", "bicubic", 0, FF_OPT_TYPE_CONST, SWS_BICUBIC, INT_MIN, INT_MAX, VE, "sws_flags" },
254
    { "experimental", "experimental", 0, FF_OPT_TYPE_CONST, SWS_X, INT_MIN, INT_MAX, VE, "sws_flags" },
255
    { "neighbor", "nearest neighbor", 0, FF_OPT_TYPE_CONST, SWS_POINT, INT_MIN, INT_MAX, VE, "sws_flags" },
256
    { "area", "averaging area", 0, FF_OPT_TYPE_CONST, SWS_AREA, INT_MIN, INT_MAX, VE, "sws_flags" },
257
    { "bicublin", "luma bicubic, chroma bilinear", 0, FF_OPT_TYPE_CONST, SWS_BICUBLIN, INT_MIN, INT_MAX, VE, "sws_flags" },
258
    { "gauss", "gaussian", 0, FF_OPT_TYPE_CONST, SWS_GAUSS, INT_MIN, INT_MAX, VE, "sws_flags" },
259
    { "sinc", "sinc", 0, FF_OPT_TYPE_CONST, SWS_SINC, INT_MIN, INT_MAX, VE, "sws_flags" },
260
    { "lanczos", "lanczos", 0, FF_OPT_TYPE_CONST, SWS_LANCZOS, INT_MIN, INT_MAX, VE, "sws_flags" },
261
    { "spline", "natural bicubic spline", 0, FF_OPT_TYPE_CONST, SWS_SPLINE, INT_MIN, INT_MAX, VE, "sws_flags" },
262
    { "print_info", "print info", 0, FF_OPT_TYPE_CONST, SWS_PRINT_INFO, INT_MIN, INT_MAX, VE, "sws_flags" },
263
    { "accurate_rnd", "accurate rounding", 0, FF_OPT_TYPE_CONST, SWS_ACCURATE_RND, INT_MIN, INT_MAX, VE, "sws_flags" },
264
    { "mmx", "MMX SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_MMX, INT_MIN, INT_MAX, VE, "sws_flags" },
265
    { "mmx2", "MMX2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_MMX2, INT_MIN, INT_MAX, VE, "sws_flags" },
266
    { "3dnow", "3DNOW SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_3DNOW, INT_MIN, INT_MAX, VE, "sws_flags" },
267
    { "altivec", "AltiVec SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_ALTIVEC, INT_MIN, INT_MAX, VE, "sws_flags" },
268
    { "bfin", "Blackfin SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_BFIN, INT_MIN, INT_MAX, VE, "sws_flags" },
269
    { "full_chroma_int", "full chroma interpolation", 0 , FF_OPT_TYPE_CONST, SWS_FULL_CHR_H_INT, INT_MIN, INT_MAX, VE, "sws_flags" },
270
    { "full_chroma_inp", "full chroma input", 0 , FF_OPT_TYPE_CONST, SWS_FULL_CHR_H_INP, INT_MIN, INT_MAX, VE, "sws_flags" },
271
    { NULL }
272
};
273

    
274
#undef VE
275
#undef DEFAULT
276

    
277
static AVClass sws_context_class = { "SWScaler", sws_context_to_name, options };
278

    
279
char *sws_format_name(enum PixelFormat format)
280
{
281
    switch (format) {
282
        case PIX_FMT_YUV420P:
283
            return "yuv420p";
284
        case PIX_FMT_YUVA420P:
285
            return "yuva420p";
286
        case PIX_FMT_YUYV422:
287
            return "yuyv422";
288
        case PIX_FMT_RGB24:
289
            return "rgb24";
290
        case PIX_FMT_BGR24:
291
            return "bgr24";
292
        case PIX_FMT_YUV422P:
293
            return "yuv422p";
294
        case PIX_FMT_YUV444P:
295
            return "yuv444p";
296
        case PIX_FMT_RGB32:
297
            return "rgb32";
298
        case PIX_FMT_YUV410P:
299
            return "yuv410p";
300
        case PIX_FMT_YUV411P:
301
            return "yuv411p";
302
        case PIX_FMT_RGB565:
303
            return "rgb565";
304
        case PIX_FMT_RGB555:
305
            return "rgb555";
306
        case PIX_FMT_GRAY16BE:
307
            return "gray16be";
308
        case PIX_FMT_GRAY16LE:
309
            return "gray16le";
310
        case PIX_FMT_GRAY8:
311
            return "gray8";
312
        case PIX_FMT_MONOWHITE:
313
            return "mono white";
314
        case PIX_FMT_MONOBLACK:
315
            return "mono black";
316
        case PIX_FMT_PAL8:
317
            return "Palette";
318
        case PIX_FMT_YUVJ420P:
319
            return "yuvj420p";
320
        case PIX_FMT_YUVJ422P:
321
            return "yuvj422p";
322
        case PIX_FMT_YUVJ444P:
323
            return "yuvj444p";
324
        case PIX_FMT_XVMC_MPEG2_MC:
325
            return "xvmc_mpeg2_mc";
326
        case PIX_FMT_XVMC_MPEG2_IDCT:
327
            return "xvmc_mpeg2_idct";
328
        case PIX_FMT_UYVY422:
329
            return "uyvy422";
330
        case PIX_FMT_UYYVYY411:
331
            return "uyyvyy411";
332
        case PIX_FMT_RGB32_1:
333
            return "rgb32x";
334
        case PIX_FMT_BGR32_1:
335
            return "bgr32x";
336
        case PIX_FMT_BGR32:
337
            return "bgr32";
338
        case PIX_FMT_BGR565:
339
            return "bgr565";
340
        case PIX_FMT_BGR555:
341
            return "bgr555";
342
        case PIX_FMT_BGR8:
343
            return "bgr8";
344
        case PIX_FMT_BGR4:
345
            return "bgr4";
346
        case PIX_FMT_BGR4_BYTE:
347
            return "bgr4 byte";
348
        case PIX_FMT_RGB8:
349
            return "rgb8";
350
        case PIX_FMT_RGB4:
351
            return "rgb4";
352
        case PIX_FMT_RGB4_BYTE:
353
            return "rgb4 byte";
354
        case PIX_FMT_NV12:
355
            return "nv12";
356
        case PIX_FMT_NV21:
357
            return "nv21";
358
        case PIX_FMT_YUV440P:
359
            return "yuv440p";
360
        default:
361
            return "Unknown format";
362
    }
363
}
364

    
365
#if defined(ARCH_X86) && defined (CONFIG_GPL)
366
void in_asm_used_var_warning_killer()
367
{
368
    volatile int i= bF8+bFC+w10+
369
    bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+
370
    M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101;
371
    if (i) i=0;
372
}
373
#endif
374

    
375
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
376
                               int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
377
                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
378
{
379
    //FIXME Optimize (just quickly writen not opti..)
380
    int i;
381
    for (i=0; i<dstW; i++)
382
    {
383
        int val=1<<18;
384
        int j;
385
        for (j=0; j<lumFilterSize; j++)
386
            val += lumSrc[j][i] * lumFilter[j];
387

    
388
        dest[i]= av_clip_uint8(val>>19);
389
    }
390

    
391
    if (uDest)
392
        for (i=0; i<chrDstW; i++)
393
        {
394
            int u=1<<18;
395
            int v=1<<18;
396
            int j;
397
            for (j=0; j<chrFilterSize; j++)
398
            {
399
                u += chrSrc[j][i] * chrFilter[j];
400
                v += chrSrc[j][i + 2048] * chrFilter[j];
401
            }
402

    
403
            uDest[i]= av_clip_uint8(u>>19);
404
            vDest[i]= av_clip_uint8(v>>19);
405
        }
406
}
407

    
408
static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
409
                                int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
410
                                uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
411
{
412
    //FIXME Optimize (just quickly writen not opti..)
413
    int i;
414
    for (i=0; i<dstW; i++)
415
    {
416
        int val=1<<18;
417
        int j;
418
        for (j=0; j<lumFilterSize; j++)
419
            val += lumSrc[j][i] * lumFilter[j];
420

    
421
        dest[i]= av_clip_uint8(val>>19);
422
    }
423

    
424
    if (!uDest)
425
        return;
426

    
427
    if (dstFormat == PIX_FMT_NV12)
428
        for (i=0; i<chrDstW; i++)
429
        {
430
            int u=1<<18;
431
            int v=1<<18;
432
            int j;
433
            for (j=0; j<chrFilterSize; j++)
434
            {
435
                u += chrSrc[j][i] * chrFilter[j];
436
                v += chrSrc[j][i + 2048] * chrFilter[j];
437
            }
438

    
439
            uDest[2*i]= av_clip_uint8(u>>19);
440
            uDest[2*i+1]= av_clip_uint8(v>>19);
441
        }
442
    else
443
        for (i=0; i<chrDstW; i++)
444
        {
445
            int u=1<<18;
446
            int v=1<<18;
447
            int j;
448
            for (j=0; j<chrFilterSize; j++)
449
            {
450
                u += chrSrc[j][i] * chrFilter[j];
451
                v += chrSrc[j][i + 2048] * chrFilter[j];
452
            }
453

    
454
            uDest[2*i]= av_clip_uint8(v>>19);
455
            uDest[2*i+1]= av_clip_uint8(u>>19);
456
        }
457
}
458

    
459
#define YSCALE_YUV_2_PACKEDX_C(type) \
460
    for (i=0; i<(dstW>>1); i++){\
461
        int j;\
462
        int Y1 = 1<<18;\
463
        int Y2 = 1<<18;\
464
        int U  = 1<<18;\
465
        int V  = 1<<18;\
466
        type av_unused *r, *b, *g;\
467
        const int i2= 2*i;\
468
        \
469
        for (j=0; j<lumFilterSize; j++)\
470
        {\
471
            Y1 += lumSrc[j][i2] * lumFilter[j];\
472
            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
473
        }\
474
        for (j=0; j<chrFilterSize; j++)\
475
        {\
476
            U += chrSrc[j][i] * chrFilter[j];\
477
            V += chrSrc[j][i+2048] * chrFilter[j];\
478
        }\
479
        Y1>>=19;\
480
        Y2>>=19;\
481
        U >>=19;\
482
        V >>=19;\
483
        if ((Y1|Y2|U|V)&256)\
484
        {\
485
            if (Y1>255)   Y1=255; \
486
            else if (Y1<0)Y1=0;   \
487
            if (Y2>255)   Y2=255; \
488
            else if (Y2<0)Y2=0;   \
489
            if (U>255)    U=255;  \
490
            else if (U<0) U=0;    \
491
            if (V>255)    V=255;  \
492
            else if (V<0) V=0;    \
493
        }
494

    
495
#define YSCALE_YUV_2_RGBX_C(type) \
496
    YSCALE_YUV_2_PACKEDX_C(type)  \
497
    r = (type *)c->table_rV[V];   \
498
    g = (type *)(c->table_gU[U] + c->table_gV[V]); \
499
    b = (type *)c->table_bU[U];   \
500

    
501
#define YSCALE_YUV_2_PACKED2_C   \
502
    for (i=0; i<(dstW>>1); i++){ \
503
        const int i2= 2*i;       \
504
        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
505
        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
506
        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;  \
507
        int V= (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;  \
508

    
509
#define YSCALE_YUV_2_RGB2_C(type) \
510
    YSCALE_YUV_2_PACKED2_C\
511
    type *r, *b, *g;\
512
    r = (type *)c->table_rV[V];\
513
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
514
    b = (type *)c->table_bU[U];\
515

    
516
#define YSCALE_YUV_2_PACKED1_C \
517
    for (i=0; i<(dstW>>1); i++){\
518
        const int i2= 2*i;\
519
        int Y1= buf0[i2  ]>>7;\
520
        int Y2= buf0[i2+1]>>7;\
521
        int U= (uvbuf1[i     ])>>7;\
522
        int V= (uvbuf1[i+2048])>>7;\
523

    
524
#define YSCALE_YUV_2_RGB1_C(type) \
525
    YSCALE_YUV_2_PACKED1_C\
526
    type *r, *b, *g;\
527
    r = (type *)c->table_rV[V];\
528
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
529
    b = (type *)c->table_bU[U];\
530

    
531
#define YSCALE_YUV_2_PACKED1B_C \
532
    for (i=0; i<(dstW>>1); i++){\
533
        const int i2= 2*i;\
534
        int Y1= buf0[i2  ]>>7;\
535
        int Y2= buf0[i2+1]>>7;\
536
        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
537
        int V= (uvbuf0[i+2048] + uvbuf1[i+2048])>>8;\
538

    
539
#define YSCALE_YUV_2_RGB1B_C(type) \
540
    YSCALE_YUV_2_PACKED1B_C\
541
    type *r, *b, *g;\
542
    r = (type *)c->table_rV[V];\
543
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
544
    b = (type *)c->table_bU[U];\
545

    
546
#define YSCALE_YUV_2_ANYRGB_C(func, func2)\
547
    switch(c->dstFormat)\
548
    {\
549
    case PIX_FMT_RGB32:\
550
    case PIX_FMT_BGR32:\
551
        func(uint32_t)\
552
            ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
553
            ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
554
        }                \
555
        break;\
556
    case PIX_FMT_RGB24:\
557
        func(uint8_t)\
558
            ((uint8_t*)dest)[0]= r[Y1];\
559
            ((uint8_t*)dest)[1]= g[Y1];\
560
            ((uint8_t*)dest)[2]= b[Y1];\
561
            ((uint8_t*)dest)[3]= r[Y2];\
562
            ((uint8_t*)dest)[4]= g[Y2];\
563
            ((uint8_t*)dest)[5]= b[Y2];\
564
            dest+=6;\
565
        }\
566
        break;\
567
    case PIX_FMT_BGR24:\
568
        func(uint8_t)\
569
            ((uint8_t*)dest)[0]= b[Y1];\
570
            ((uint8_t*)dest)[1]= g[Y1];\
571
            ((uint8_t*)dest)[2]= r[Y1];\
572
            ((uint8_t*)dest)[3]= b[Y2];\
573
            ((uint8_t*)dest)[4]= g[Y2];\
574
            ((uint8_t*)dest)[5]= r[Y2];\
575
            dest+=6;\
576
        }\
577
        break;\
578
    case PIX_FMT_RGB565:\
579
    case PIX_FMT_BGR565:\
580
        {\
581
            const int dr1= dither_2x2_8[y&1    ][0];\
582
            const int dg1= dither_2x2_4[y&1    ][0];\
583
            const int db1= dither_2x2_8[(y&1)^1][0];\
584
            const int dr2= dither_2x2_8[y&1    ][1];\
585
            const int dg2= dither_2x2_4[y&1    ][1];\
586
            const int db2= dither_2x2_8[(y&1)^1][1];\
587
            func(uint16_t)\
588
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
589
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
590
            }\
591
        }\
592
        break;\
593
    case PIX_FMT_RGB555:\
594
    case PIX_FMT_BGR555:\
595
        {\
596
            const int dr1= dither_2x2_8[y&1    ][0];\
597
            const int dg1= dither_2x2_8[y&1    ][1];\
598
            const int db1= dither_2x2_8[(y&1)^1][0];\
599
            const int dr2= dither_2x2_8[y&1    ][1];\
600
            const int dg2= dither_2x2_8[y&1    ][0];\
601
            const int db2= dither_2x2_8[(y&1)^1][1];\
602
            func(uint16_t)\
603
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
604
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
605
            }\
606
        }\
607
        break;\
608
    case PIX_FMT_RGB8:\
609
    case PIX_FMT_BGR8:\
610
        {\
611
            const uint8_t * const d64= dither_8x8_73[y&7];\
612
            const uint8_t * const d32= dither_8x8_32[y&7];\
613
            func(uint8_t)\
614
                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
615
                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
616
            }\
617
        }\
618
        break;\
619
    case PIX_FMT_RGB4:\
620
    case PIX_FMT_BGR4:\
621
        {\
622
            const uint8_t * const d64= dither_8x8_73 [y&7];\
623
            const uint8_t * const d128=dither_8x8_220[y&7];\
624
            func(uint8_t)\
625
                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
626
                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
627
            }\
628
        }\
629
        break;\
630
    case PIX_FMT_RGB4_BYTE:\
631
    case PIX_FMT_BGR4_BYTE:\
632
        {\
633
            const uint8_t * const d64= dither_8x8_73 [y&7];\
634
            const uint8_t * const d128=dither_8x8_220[y&7];\
635
            func(uint8_t)\
636
                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
637
                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
638
            }\
639
        }\
640
        break;\
641
    case PIX_FMT_MONOBLACK:\
642
        {\
643
            const uint8_t * const d128=dither_8x8_220[y&7];\
644
            uint8_t *g= c->table_gU[128] + c->table_gV[128];\
645
            for (i=0; i<dstW-7; i+=8){\
646
                int acc;\
647
                acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
648
                acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
649
                acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
650
                acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
651
                acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
652
                acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
653
                acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
654
                acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
655
                ((uint8_t*)dest)[0]= acc;\
656
                dest++;\
657
            }\
658
\
659
/*\
660
((uint8_t*)dest)-= dstW>>4;\
661
{\
662
            int acc=0;\
663
            int left=0;\
664
            static int top[1024];\
665
            static int last_new[1024][1024];\
666
            static int last_in3[1024][1024];\
667
            static int drift[1024][1024];\
668
            int topLeft=0;\
669
            int shift=0;\
670
            int count=0;\
671
            const uint8_t * const d128=dither_8x8_220[y&7];\
672
            int error_new=0;\
673
            int error_in3=0;\
674
            int f=0;\
675
            \
676
            for (i=dstW>>1; i<dstW; i++){\
677
                int in= ((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19);\
678
                int in2 = (76309 * (in - 16) + 32768) >> 16;\
679
                int in3 = (in2 < 0) ? 0 : ((in2 > 255) ? 255 : in2);\
680
                int old= (left*7 + topLeft + top[i]*5 + top[i+1]*3)/20 + in3\
681
                         + (last_new[y][i] - in3)*f/256;\
682
                int new= old> 128 ? 255 : 0;\
683
\
684
                error_new+= FFABS(last_new[y][i] - new);\
685
                error_in3+= FFABS(last_in3[y][i] - in3);\
686
                f= error_new - error_in3*4;\
687
                if (f<0) f=0;\
688
                if (f>256) f=256;\
689
\
690
                topLeft= top[i];\
691
                left= top[i]= old - new;\
692
                last_new[y][i]= new;\
693
                last_in3[y][i]= in3;\
694
\
695
                acc+= acc + (new&1);\
696
                if ((i&7)==6){\
697
                    ((uint8_t*)dest)[0]= acc;\
698
                    ((uint8_t*)dest)++;\
699
                }\
700
            }\
701
}\
702
*/\
703
        }\
704
        break;\
705
    case PIX_FMT_YUYV422:\
706
        func2\
707
            ((uint8_t*)dest)[2*i2+0]= Y1;\
708
            ((uint8_t*)dest)[2*i2+1]= U;\
709
            ((uint8_t*)dest)[2*i2+2]= Y2;\
710
            ((uint8_t*)dest)[2*i2+3]= V;\
711
        }                \
712
        break;\
713
    case PIX_FMT_UYVY422:\
714
        func2\
715
            ((uint8_t*)dest)[2*i2+0]= U;\
716
            ((uint8_t*)dest)[2*i2+1]= Y1;\
717
            ((uint8_t*)dest)[2*i2+2]= V;\
718
            ((uint8_t*)dest)[2*i2+3]= Y2;\
719
        }                \
720
        break;\
721
    }\
722

    
723

    
724
static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
725
                                  int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
726
                                  uint8_t *dest, int dstW, int y)
727
{
728
    int i;
729
    switch(c->dstFormat)
730
    {
731
    case PIX_FMT_BGR32:
732
    case PIX_FMT_RGB32:
733
        YSCALE_YUV_2_RGBX_C(uint32_t)
734
            ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];
735
            ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];
736
        }
737
        break;
738
    case PIX_FMT_RGB24:
739
        YSCALE_YUV_2_RGBX_C(uint8_t)
740
            ((uint8_t*)dest)[0]= r[Y1];
741
            ((uint8_t*)dest)[1]= g[Y1];
742
            ((uint8_t*)dest)[2]= b[Y1];
743
            ((uint8_t*)dest)[3]= r[Y2];
744
            ((uint8_t*)dest)[4]= g[Y2];
745
            ((uint8_t*)dest)[5]= b[Y2];
746
            dest+=6;
747
        }
748
        break;
749
    case PIX_FMT_BGR24:
750
        YSCALE_YUV_2_RGBX_C(uint8_t)
751
            ((uint8_t*)dest)[0]= b[Y1];
752
            ((uint8_t*)dest)[1]= g[Y1];
753
            ((uint8_t*)dest)[2]= r[Y1];
754
            ((uint8_t*)dest)[3]= b[Y2];
755
            ((uint8_t*)dest)[4]= g[Y2];
756
            ((uint8_t*)dest)[5]= r[Y2];
757
            dest+=6;
758
        }
759
        break;
760
    case PIX_FMT_RGB565:
761
    case PIX_FMT_BGR565:
762
        {
763
            const int dr1= dither_2x2_8[y&1    ][0];
764
            const int dg1= dither_2x2_4[y&1    ][0];
765
            const int db1= dither_2x2_8[(y&1)^1][0];
766
            const int dr2= dither_2x2_8[y&1    ][1];
767
            const int dg2= dither_2x2_4[y&1    ][1];
768
            const int db2= dither_2x2_8[(y&1)^1][1];
769
            YSCALE_YUV_2_RGBX_C(uint16_t)
770
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
771
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
772
            }
773
        }
774
        break;
775
    case PIX_FMT_RGB555:
776
    case PIX_FMT_BGR555:
777
        {
778
            const int dr1= dither_2x2_8[y&1    ][0];
779
            const int dg1= dither_2x2_8[y&1    ][1];
780
            const int db1= dither_2x2_8[(y&1)^1][0];
781
            const int dr2= dither_2x2_8[y&1    ][1];
782
            const int dg2= dither_2x2_8[y&1    ][0];
783
            const int db2= dither_2x2_8[(y&1)^1][1];
784
            YSCALE_YUV_2_RGBX_C(uint16_t)
785
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
786
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
787
            }
788
        }
789
        break;
790
    case PIX_FMT_RGB8:
791
    case PIX_FMT_BGR8:
792
        {
793
            const uint8_t * const d64= dither_8x8_73[y&7];
794
            const uint8_t * const d32= dither_8x8_32[y&7];
795
            YSCALE_YUV_2_RGBX_C(uint8_t)
796
                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];
797
                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];
798
            }
799
        }
800
        break;
801
    case PIX_FMT_RGB4:
802
    case PIX_FMT_BGR4:
803
        {
804
            const uint8_t * const d64= dither_8x8_73 [y&7];
805
            const uint8_t * const d128=dither_8x8_220[y&7];
806
            YSCALE_YUV_2_RGBX_C(uint8_t)
807
                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]
808
                                  +((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);
809
            }
810
        }
811
        break;
812
    case PIX_FMT_RGB4_BYTE:
813
    case PIX_FMT_BGR4_BYTE:
814
        {
815
            const uint8_t * const d64= dither_8x8_73 [y&7];
816
            const uint8_t * const d128=dither_8x8_220[y&7];
817
            YSCALE_YUV_2_RGBX_C(uint8_t)
818
                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];
819
                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];
820
            }
821
        }
822
        break;
823
    case PIX_FMT_MONOBLACK:
824
        {
825
            const uint8_t * const d128=dither_8x8_220[y&7];
826
            uint8_t *g= c->table_gU[128] + c->table_gV[128];
827
            int acc=0;
828
            for (i=0; i<dstW-1; i+=2){
829
                int j;
830
                int Y1=1<<18;
831
                int Y2=1<<18;
832

    
833
                for (j=0; j<lumFilterSize; j++)
834
                {
835
                    Y1 += lumSrc[j][i] * lumFilter[j];
836
                    Y2 += lumSrc[j][i+1] * lumFilter[j];
837
                }
838
                Y1>>=19;
839
                Y2>>=19;
840
                if ((Y1|Y2)&256)
841
                {
842
                    if (Y1>255)   Y1=255;
843
                    else if (Y1<0)Y1=0;
844
                    if (Y2>255)   Y2=255;
845
                    else if (Y2<0)Y2=0;
846
                }
847
                acc+= acc + g[Y1+d128[(i+0)&7]];
848
                acc+= acc + g[Y2+d128[(i+1)&7]];
849
                if ((i&7)==6){
850
                    ((uint8_t*)dest)[0]= acc;
851
                    dest++;
852
                }
853
            }
854
        }
855
        break;
856
    case PIX_FMT_YUYV422:
857
        YSCALE_YUV_2_PACKEDX_C(void)
858
            ((uint8_t*)dest)[2*i2+0]= Y1;
859
            ((uint8_t*)dest)[2*i2+1]= U;
860
            ((uint8_t*)dest)[2*i2+2]= Y2;
861
            ((uint8_t*)dest)[2*i2+3]= V;
862
        }
863
        break;
864
    case PIX_FMT_UYVY422:
865
        YSCALE_YUV_2_PACKEDX_C(void)
866
            ((uint8_t*)dest)[2*i2+0]= U;
867
            ((uint8_t*)dest)[2*i2+1]= Y1;
868
            ((uint8_t*)dest)[2*i2+2]= V;
869
            ((uint8_t*)dest)[2*i2+3]= Y2;
870
        }
871
        break;
872
    }
873
}
874

    
875

    
876
//Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
877
//Plain C versions
878
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) || !defined(CONFIG_GPL)
879
#define COMPILE_C
880
#endif
881

    
882
#ifdef ARCH_POWERPC
883
#if (defined (HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
884
#define COMPILE_ALTIVEC
885
#endif //HAVE_ALTIVEC
886
#endif //ARCH_POWERPC
887

    
888
#if defined(ARCH_X86)
889

    
890
#if ((defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
891
#define COMPILE_MMX
892
#endif
893

    
894
#if (defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
895
#define COMPILE_MMX2
896
#endif
897

    
898
#if ((defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
899
#define COMPILE_3DNOW
900
#endif
901
#endif //ARCH_X86 || ARCH_X86_64
902

    
903
#undef HAVE_MMX
904
#undef HAVE_MMX2
905
#undef HAVE_3DNOW
906

    
907
#ifdef COMPILE_C
908
#undef HAVE_MMX
909
#undef HAVE_MMX2
910
#undef HAVE_3DNOW
911
#undef HAVE_ALTIVEC
912
#define RENAME(a) a ## _C
913
#include "swscale_template.c"
914
#endif
915

    
916
#ifdef ARCH_POWERPC
917
#ifdef COMPILE_ALTIVEC
918
#undef RENAME
919
#define HAVE_ALTIVEC
920
#define RENAME(a) a ## _altivec
921
#include "swscale_template.c"
922
#endif
923
#endif //ARCH_POWERPC
924

    
925
#if defined(ARCH_X86)
926

    
927
//X86 versions
928
/*
929
#undef RENAME
930
#undef HAVE_MMX
931
#undef HAVE_MMX2
932
#undef HAVE_3DNOW
933
#define ARCH_X86
934
#define RENAME(a) a ## _X86
935
#include "swscale_template.c"
936
*/
937
//MMX versions
938
#ifdef COMPILE_MMX
939
#undef RENAME
940
#define HAVE_MMX
941
#undef HAVE_MMX2
942
#undef HAVE_3DNOW
943
#define RENAME(a) a ## _MMX
944
#include "swscale_template.c"
945
#endif
946

    
947
//MMX2 versions
948
#ifdef COMPILE_MMX2
949
#undef RENAME
950
#define HAVE_MMX
951
#define HAVE_MMX2
952
#undef HAVE_3DNOW
953
#define RENAME(a) a ## _MMX2
954
#include "swscale_template.c"
955
#endif
956

    
957
//3DNOW versions
958
#ifdef COMPILE_3DNOW
959
#undef RENAME
960
#define HAVE_MMX
961
#undef HAVE_MMX2
962
#define HAVE_3DNOW
963
#define RENAME(a) a ## _3DNow
964
#include "swscale_template.c"
965
#endif
966

    
967
#endif //ARCH_X86 || ARCH_X86_64
968

    
969
// minor note: the HAVE_xyz is messed up after that line so don't use it
970

    
971
static double getSplineCoeff(double a, double b, double c, double d, double dist)
972
{
973
//    printf("%f %f %f %f %f\n", a,b,c,d,dist);
974
    if (dist<=1.0)      return ((d*dist + c)*dist + b)*dist +a;
975
    else                return getSplineCoeff(        0.0,
976
                                             b+ 2.0*c + 3.0*d,
977
                                                    c + 3.0*d,
978
                                            -b- 3.0*c - 6.0*d,
979
                                            dist-1.0);
980
}
981

    
982
static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
983
                             int srcW, int dstW, int filterAlign, int one, int flags,
984
                             SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
985
{
986
    int i;
987
    int filterSize;
988
    int filter2Size;
989
    int minFilterSize;
990
    double *filter=NULL;
991
    double *filter2=NULL;
992
#if defined(ARCH_X86)
993
    if (flags & SWS_CPU_CAPS_MMX)
994
        asm volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
995
#endif
996

    
997
    // Note the +1 is for the MMXscaler which reads over the end
998
    *filterPos = av_malloc((dstW+1)*sizeof(int16_t));
999

    
1000
    if (FFABS(xInc - 0x10000) <10) // unscaled
1001
    {
1002
        int i;
1003
        filterSize= 1;
1004
        filter= av_malloc(dstW*sizeof(double)*filterSize);
1005
        for (i=0; i<dstW*filterSize; i++) filter[i]=0;
1006

    
1007
        for (i=0; i<dstW; i++)
1008
        {
1009
            filter[i*filterSize]=1;
1010
            (*filterPos)[i]=i;
1011
        }
1012

    
1013
    }
1014
    else if (flags&SWS_POINT) // lame looking point sampling mode
1015
    {
1016
        int i;
1017
        int xDstInSrc;
1018
        filterSize= 1;
1019
        filter= av_malloc(dstW*sizeof(double)*filterSize);
1020

    
1021
        xDstInSrc= xInc/2 - 0x8000;
1022
        for (i=0; i<dstW; i++)
1023
        {
1024
            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1025

    
1026
            (*filterPos)[i]= xx;
1027
            filter[i]= 1.0;
1028
            xDstInSrc+= xInc;
1029
        }
1030
    }
1031
    else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
1032
    {
1033
        int i;
1034
        int xDstInSrc;
1035
        if      (flags&SWS_BICUBIC) filterSize= 4;
1036
        else if (flags&SWS_X      ) filterSize= 4;
1037
        else                        filterSize= 2; // SWS_BILINEAR / SWS_AREA
1038
        filter= av_malloc(dstW*sizeof(double)*filterSize);
1039

    
1040
        xDstInSrc= xInc/2 - 0x8000;
1041
        for (i=0; i<dstW; i++)
1042
        {
1043
            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1044
            int j;
1045

    
1046
            (*filterPos)[i]= xx;
1047
                //Bilinear upscale / linear interpolate / Area averaging
1048
                for (j=0; j<filterSize; j++)
1049
                {
1050
                    double d= FFABS((xx<<16) - xDstInSrc)/(double)(1<<16);
1051
                    double coeff= 1.0 - d;
1052
                    if (coeff<0) coeff=0;
1053
                    filter[i*filterSize + j]= coeff;
1054
                    xx++;
1055
                }
1056
            xDstInSrc+= xInc;
1057
        }
1058
    }
1059
    else
1060
    {
1061
        double xDstInSrc;
1062
        double sizeFactor, filterSizeInSrc;
1063
        const double xInc1= (double)xInc / (double)(1<<16);
1064

    
1065
        if      (flags&SWS_BICUBIC)      sizeFactor=  4.0;
1066
        else if (flags&SWS_X)            sizeFactor=  8.0;
1067
        else if (flags&SWS_AREA)         sizeFactor=  1.0; //downscale only, for upscale it is bilinear
1068
        else if (flags&SWS_GAUSS)        sizeFactor=  8.0;   // infinite ;)
1069
        else if (flags&SWS_LANCZOS)      sizeFactor= param[0] != SWS_PARAM_DEFAULT ? 2.0*param[0] : 6.0;
1070
        else if (flags&SWS_SINC)         sizeFactor= 20.0; // infinite ;)
1071
        else if (flags&SWS_SPLINE)       sizeFactor= 20.0;  // infinite ;)
1072
        else if (flags&SWS_BILINEAR)     sizeFactor=  2.0;
1073
        else {
1074
            sizeFactor= 0.0; //GCC warning killer
1075
            ASSERT(0)
1076
        }
1077

    
1078
        if (xInc1 <= 1.0)       filterSizeInSrc= sizeFactor; // upscale
1079
        else                    filterSizeInSrc= sizeFactor*srcW / (double)dstW;
1080

    
1081
        filterSize= (int)ceil(1 + filterSizeInSrc); // will be reduced later if possible
1082
        if (filterSize > srcW-2) filterSize=srcW-2;
1083

    
1084
        filter= av_malloc(dstW*sizeof(double)*filterSize);
1085

    
1086
        xDstInSrc= xInc1 / 2.0 - 0.5;
1087
        for (i=0; i<dstW; i++)
1088
        {
1089
            int xx= (int)(xDstInSrc - (filterSize-1)*0.5 + 0.5);
1090
            int j;
1091
            (*filterPos)[i]= xx;
1092
            for (j=0; j<filterSize; j++)
1093
            {
1094
                double d= FFABS(xx - xDstInSrc)/filterSizeInSrc*sizeFactor;
1095
                double coeff;
1096
                if (flags & SWS_BICUBIC)
1097
                {
1098
                    double B= param[0] != SWS_PARAM_DEFAULT ? param[0] : 0.0;
1099
                    double C= param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6;
1100

    
1101
                    if (d<1.0)
1102
                        coeff = (12-9*B-6*C)*d*d*d + (-18+12*B+6*C)*d*d + 6-2*B;
1103
                    else if (d<2.0)
1104
                        coeff = (-B-6*C)*d*d*d + (6*B+30*C)*d*d + (-12*B-48*C)*d +8*B+24*C;
1105
                    else
1106
                        coeff=0.0;
1107
                }
1108
/*                else if (flags & SWS_X)
1109
                {
1110
                    double p= param ? param*0.01 : 0.3;
1111
                    coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1112
                    coeff*= pow(2.0, - p*d*d);
1113
                }*/
1114
                else if (flags & SWS_X)
1115
                {
1116
                    double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
1117

    
1118
                    if (d<1.0)
1119
                        coeff = cos(d*PI);
1120
                    else
1121
                        coeff=-1.0;
1122
                    if (coeff<0.0)      coeff= -pow(-coeff, A);
1123
                    else                coeff=  pow( coeff, A);
1124
                    coeff= coeff*0.5 + 0.5;
1125
                }
1126
                else if (flags & SWS_AREA)
1127
                {
1128
                    double srcPixelSize= 1.0/xInc1;
1129
                    if      (d + srcPixelSize/2 < 0.5) coeff= 1.0;
1130
                    else if (d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
1131
                    else coeff=0.0;
1132
                }
1133
                else if (flags & SWS_GAUSS)
1134
                {
1135
                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1136
                    coeff = pow(2.0, - p*d*d);
1137
                }
1138
                else if (flags & SWS_SINC)
1139
                {
1140
                    coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1141
                }
1142
                else if (flags & SWS_LANCZOS)
1143
                {
1144
                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1145
                    coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0;
1146
                    if (d>p) coeff=0;
1147
                }
1148
                else if (flags & SWS_BILINEAR)
1149
                {
1150
                    coeff= 1.0 - d;
1151
                    if (coeff<0) coeff=0;
1152
                }
1153
                else if (flags & SWS_SPLINE)
1154
                {
1155
                    double p=-2.196152422706632;
1156
                    coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, d);
1157
                }
1158
                else {
1159
                    coeff= 0.0; //GCC warning killer
1160
                    ASSERT(0)
1161
                }
1162

    
1163
                filter[i*filterSize + j]= coeff;
1164
                xx++;
1165
            }
1166
            xDstInSrc+= xInc1;
1167
        }
1168
    }
1169

    
1170
    /* apply src & dst Filter to filter -> filter2
1171
       av_free(filter);
1172
    */
1173
    ASSERT(filterSize>0)
1174
    filter2Size= filterSize;
1175
    if (srcFilter) filter2Size+= srcFilter->length - 1;
1176
    if (dstFilter) filter2Size+= dstFilter->length - 1;
1177
    ASSERT(filter2Size>0)
1178
    filter2= av_malloc(filter2Size*dstW*sizeof(double));
1179

    
1180
    for (i=0; i<dstW; i++)
1181
    {
1182
        int j;
1183
        SwsVector scaleFilter;
1184
        SwsVector *outVec;
1185

    
1186
        scaleFilter.coeff= filter + i*filterSize;
1187
        scaleFilter.length= filterSize;
1188

    
1189
        if (srcFilter) outVec= sws_getConvVec(srcFilter, &scaleFilter);
1190
        else           outVec= &scaleFilter;
1191

    
1192
        ASSERT(outVec->length == filter2Size)
1193
        //FIXME dstFilter
1194

    
1195
        for (j=0; j<outVec->length; j++)
1196
        {
1197
            filter2[i*filter2Size + j]= outVec->coeff[j];
1198
        }
1199

    
1200
        (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
1201

    
1202
        if (outVec != &scaleFilter) sws_freeVec(outVec);
1203
    }
1204
    av_free(filter); filter=NULL;
1205

    
1206
    /* try to reduce the filter-size (step1 find size and shift left) */
1207
    // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
1208
    minFilterSize= 0;
1209
    for (i=dstW-1; i>=0; i--)
1210
    {
1211
        int min= filter2Size;
1212
        int j;
1213
        double cutOff=0.0;
1214

    
1215
        /* get rid off near zero elements on the left by shifting left */
1216
        for (j=0; j<filter2Size; j++)
1217
        {
1218
            int k;
1219
            cutOff += FFABS(filter2[i*filter2Size]);
1220

    
1221
            if (cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1222

    
1223
            /* preserve monotonicity because the core can't handle the filter otherwise */
1224
            if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1225

    
1226
            // Move filter coeffs left
1227
            for (k=1; k<filter2Size; k++)
1228
                filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1229
            filter2[i*filter2Size + k - 1]= 0.0;
1230
            (*filterPos)[i]++;
1231
        }
1232

    
1233
        cutOff=0.0;
1234
        /* count near zeros on the right */
1235
        for (j=filter2Size-1; j>0; j--)
1236
        {
1237
            cutOff += FFABS(filter2[i*filter2Size + j]);
1238

    
1239
            if (cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1240
            min--;
1241
        }
1242

    
1243
        if (min>minFilterSize) minFilterSize= min;
1244
    }
1245

    
1246
    if (flags & SWS_CPU_CAPS_ALTIVEC) {
1247
        // we can handle the special case 4,
1248
        // so we don't want to go to the full 8
1249
        if (minFilterSize < 5)
1250
            filterAlign = 4;
1251

    
1252
        // we really don't want to waste our time
1253
        // doing useless computation, so fall-back on
1254
        // the scalar C code for very small filter.
1255
        // vectorizing is worth it only if you have
1256
        // decent-sized vector.
1257
        if (minFilterSize < 3)
1258
            filterAlign = 1;
1259
    }
1260

    
1261
    if (flags & SWS_CPU_CAPS_MMX) {
1262
        // special case for unscaled vertical filtering
1263
        if (minFilterSize == 1 && filterAlign == 2)
1264
            filterAlign= 1;
1265
    }
1266

    
1267
    ASSERT(minFilterSize > 0)
1268
    filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
1269
    ASSERT(filterSize > 0)
1270
    filter= av_malloc(filterSize*dstW*sizeof(double));
1271
    if (filterSize >= MAX_FILTER_SIZE)
1272
        return -1;
1273
    *outFilterSize= filterSize;
1274

    
1275
    if (flags&SWS_PRINT_INFO)
1276
        av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
1277
    /* try to reduce the filter-size (step2 reduce it) */
1278
    for (i=0; i<dstW; i++)
1279
    {
1280
        int j;
1281

    
1282
        for (j=0; j<filterSize; j++)
1283
        {
1284
            if (j>=filter2Size) filter[i*filterSize + j]= 0.0;
1285
            else               filter[i*filterSize + j]= filter2[i*filter2Size + j];
1286
        }
1287
    }
1288
    av_free(filter2); filter2=NULL;
1289

    
1290

    
1291
    //FIXME try to align filterpos if possible
1292

    
1293
    //fix borders
1294
    for (i=0; i<dstW; i++)
1295
    {
1296
        int j;
1297
        if ((*filterPos)[i] < 0)
1298
        {
1299
            // Move filter coeffs left to compensate for filterPos
1300
            for (j=1; j<filterSize; j++)
1301
            {
1302
                int left= FFMAX(j + (*filterPos)[i], 0);
1303
                filter[i*filterSize + left] += filter[i*filterSize + j];
1304
                filter[i*filterSize + j]=0;
1305
            }
1306
            (*filterPos)[i]= 0;
1307
        }
1308

    
1309
        if ((*filterPos)[i] + filterSize > srcW)
1310
        {
1311
            int shift= (*filterPos)[i] + filterSize - srcW;
1312
            // Move filter coeffs right to compensate for filterPos
1313
            for (j=filterSize-2; j>=0; j--)
1314
            {
1315
                int right= FFMIN(j + shift, filterSize-1);
1316
                filter[i*filterSize +right] += filter[i*filterSize +j];
1317
                filter[i*filterSize +j]=0;
1318
            }
1319
            (*filterPos)[i]= srcW - filterSize;
1320
        }
1321
    }
1322

    
1323
    // Note the +1 is for the MMXscaler which reads over the end
1324
    /* align at 16 for AltiVec (needed by hScale_altivec_real) */
1325
    *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t));
1326

    
1327
    /* Normalize & Store in outFilter */
1328
    for (i=0; i<dstW; i++)
1329
    {
1330
        int j;
1331
        double error=0;
1332
        double sum=0;
1333
        double scale= one;
1334

    
1335
        for (j=0; j<filterSize; j++)
1336
        {
1337
            sum+= filter[i*filterSize + j];
1338
        }
1339
        scale/= sum;
1340
        for (j=0; j<*outFilterSize; j++)
1341
        {
1342
            double v= filter[i*filterSize + j]*scale + error;
1343
            int intV= floor(v + 0.5);
1344
            (*outFilter)[i*(*outFilterSize) + j]= intV;
1345
            error = v - intV;
1346
        }
1347
    }
1348

    
1349
    (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
1350
    for (i=0; i<*outFilterSize; i++)
1351
    {
1352
        int j= dstW*(*outFilterSize);
1353
        (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
1354
    }
1355

    
1356
    av_free(filter);
1357
    return 0;
1358
}
1359

    
1360
#ifdef COMPILE_MMX2
1361
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
1362
{
1363
    uint8_t *fragmentA;
1364
    long imm8OfPShufW1A;
1365
    long imm8OfPShufW2A;
1366
    long fragmentLengthA;
1367
    uint8_t *fragmentB;
1368
    long imm8OfPShufW1B;
1369
    long imm8OfPShufW2B;
1370
    long fragmentLengthB;
1371
    int fragmentPos;
1372

    
1373
    int xpos, i;
1374

    
1375
    // create an optimized horizontal scaling routine
1376

    
1377
    //code fragment
1378

    
1379
    asm volatile(
1380
        "jmp                         9f                 \n\t"
1381
    // Begin
1382
        "0:                                             \n\t"
1383
        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1384
        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1385
        "movd   1(%%"REG_c", %%"REG_S"), %%mm1          \n\t"
1386
        "punpcklbw                %%mm7, %%mm1          \n\t"
1387
        "punpcklbw                %%mm7, %%mm0          \n\t"
1388
        "pshufw                   $0xFF, %%mm1, %%mm1   \n\t"
1389
        "1:                                             \n\t"
1390
        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1391
        "2:                                             \n\t"
1392
        "psubw                    %%mm1, %%mm0          \n\t"
1393
        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1394
        "pmullw                   %%mm3, %%mm0          \n\t"
1395
        "psllw                       $7, %%mm1          \n\t"
1396
        "paddw                    %%mm1, %%mm0          \n\t"
1397

    
1398
        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1399

    
1400
        "add                         $8, %%"REG_a"      \n\t"
1401
    // End
1402
        "9:                                             \n\t"
1403
//        "int $3                                         \n\t"
1404
        "lea                         0b, %0             \n\t"
1405
        "lea                         1b, %1             \n\t"
1406
        "lea                         2b, %2             \n\t"
1407
        "dec                         %1                 \n\t"
1408
        "dec                         %2                 \n\t"
1409
        "sub                         %0, %1             \n\t"
1410
        "sub                         %0, %2             \n\t"
1411
        "lea                         9b, %3             \n\t"
1412
        "sub                         %0, %3             \n\t"
1413

    
1414

    
1415
        :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
1416
        "=r" (fragmentLengthA)
1417
    );
1418

    
1419
    asm volatile(
1420
        "jmp                         9f                 \n\t"
1421
    // Begin
1422
        "0:                                             \n\t"
1423
        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1424
        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1425
        "punpcklbw                %%mm7, %%mm0          \n\t"
1426
        "pshufw                   $0xFF, %%mm0, %%mm1   \n\t"
1427
        "1:                                             \n\t"
1428
        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1429
        "2:                                             \n\t"
1430
        "psubw                    %%mm1, %%mm0          \n\t"
1431
        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1432
        "pmullw                   %%mm3, %%mm0          \n\t"
1433
        "psllw                       $7, %%mm1          \n\t"
1434
        "paddw                    %%mm1, %%mm0          \n\t"
1435

    
1436
        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1437

    
1438
        "add                         $8, %%"REG_a"      \n\t"
1439
    // End
1440
        "9:                                             \n\t"
1441
//        "int                       $3                   \n\t"
1442
        "lea                         0b, %0             \n\t"
1443
        "lea                         1b, %1             \n\t"
1444
        "lea                         2b, %2             \n\t"
1445
        "dec                         %1                 \n\t"
1446
        "dec                         %2                 \n\t"
1447
        "sub                         %0, %1             \n\t"
1448
        "sub                         %0, %2             \n\t"
1449
        "lea                         9b, %3             \n\t"
1450
        "sub                         %0, %3             \n\t"
1451

    
1452

    
1453
        :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
1454
        "=r" (fragmentLengthB)
1455
    );
1456

    
1457
    xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
1458
    fragmentPos=0;
1459

    
1460
    for (i=0; i<dstW/numSplits; i++)
1461
    {
1462
        int xx=xpos>>16;
1463

    
1464
        if ((i&3) == 0)
1465
        {
1466
            int a=0;
1467
            int b=((xpos+xInc)>>16) - xx;
1468
            int c=((xpos+xInc*2)>>16) - xx;
1469
            int d=((xpos+xInc*3)>>16) - xx;
1470

    
1471
            filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
1472
            filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
1473
            filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
1474
            filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
1475
            filterPos[i/2]= xx;
1476

    
1477
            if (d+1<4)
1478
            {
1479
                int maxShift= 3-(d+1);
1480
                int shift=0;
1481

    
1482
                memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
1483

    
1484
                funnyCode[fragmentPos + imm8OfPShufW1B]=
1485
                    (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
1486
                funnyCode[fragmentPos + imm8OfPShufW2B]=
1487
                    a | (b<<2) | (c<<4) | (d<<6);
1488

    
1489
                if (i+3>=dstW) shift=maxShift; //avoid overread
1490
                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
1491

    
1492
                if (shift && i>=shift)
1493
                {
1494
                    funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
1495
                    funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
1496
                    filterPos[i/2]-=shift;
1497
                }
1498

    
1499
                fragmentPos+= fragmentLengthB;
1500
            }
1501
            else
1502
            {
1503
                int maxShift= 3-d;
1504
                int shift=0;
1505

    
1506
                memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
1507

    
1508
                funnyCode[fragmentPos + imm8OfPShufW1A]=
1509
                funnyCode[fragmentPos + imm8OfPShufW2A]=
1510
                    a | (b<<2) | (c<<4) | (d<<6);
1511

    
1512
                if (i+4>=dstW) shift=maxShift; //avoid overread
1513
                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
1514

    
1515
                if (shift && i>=shift)
1516
                {
1517
                    funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
1518
                    funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
1519
                    filterPos[i/2]-=shift;
1520
                }
1521

    
1522
                fragmentPos+= fragmentLengthA;
1523
            }
1524

    
1525
            funnyCode[fragmentPos]= RET;
1526
        }
1527
        xpos+=xInc;
1528
    }
1529
    filterPos[i/2]= xpos>>16; // needed to jump to the next part
1530
}
1531
#endif /* COMPILE_MMX2 */
1532

    
1533
static void globalInit(void){
1534
    // generating tables:
1535
    int i;
1536
    for (i=0; i<768; i++){
1537
        int c= av_clip_uint8(i-256);
1538
        clip_table[i]=c;
1539
    }
1540
}
1541

    
1542
static SwsFunc getSwsFunc(int flags){
1543

    
1544
#if defined(RUNTIME_CPUDETECT) && defined (CONFIG_GPL)
1545
#if defined(ARCH_X86)
1546
    // ordered per speed fastest first
1547
    if (flags & SWS_CPU_CAPS_MMX2)
1548
        return swScale_MMX2;
1549
    else if (flags & SWS_CPU_CAPS_3DNOW)
1550
        return swScale_3DNow;
1551
    else if (flags & SWS_CPU_CAPS_MMX)
1552
        return swScale_MMX;
1553
    else
1554
        return swScale_C;
1555

    
1556
#else
1557
#ifdef ARCH_POWERPC
1558
    if (flags & SWS_CPU_CAPS_ALTIVEC)
1559
        return swScale_altivec;
1560
    else
1561
        return swScale_C;
1562
#endif
1563
    return swScale_C;
1564
#endif /* defined(ARCH_X86) */
1565
#else //RUNTIME_CPUDETECT
1566
#ifdef HAVE_MMX2
1567
    return swScale_MMX2;
1568
#elif defined (HAVE_3DNOW)
1569
    return swScale_3DNow;
1570
#elif defined (HAVE_MMX)
1571
    return swScale_MMX;
1572
#elif defined (HAVE_ALTIVEC)
1573
    return swScale_altivec;
1574
#else
1575
    return swScale_C;
1576
#endif
1577
#endif //!RUNTIME_CPUDETECT
1578
}
1579

    
1580
static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1581
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1582
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1583
    /* Copy Y plane */
1584
    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1585
        memcpy(dst, src[0], srcSliceH*dstStride[0]);
1586
    else
1587
    {
1588
        int i;
1589
        uint8_t *srcPtr= src[0];
1590
        uint8_t *dstPtr= dst;
1591
        for (i=0; i<srcSliceH; i++)
1592
        {
1593
            memcpy(dstPtr, srcPtr, c->srcW);
1594
            srcPtr+= srcStride[0];
1595
            dstPtr+= dstStride[0];
1596
        }
1597
    }
1598
    dst = dstParam[1] + dstStride[1]*srcSliceY/2;
1599
    if (c->dstFormat == PIX_FMT_NV12)
1600
        interleaveBytes(src[1], src[2], dst, c->srcW/2, srcSliceH/2, srcStride[1], srcStride[2], dstStride[0]);
1601
    else
1602
        interleaveBytes(src[2], src[1], dst, c->srcW/2, srcSliceH/2, srcStride[2], srcStride[1], dstStride[0]);
1603

    
1604
    return srcSliceH;
1605
}
1606

    
1607
static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1608
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1609
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1610

    
1611
    yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
1612

    
1613
    return srcSliceH;
1614
}
1615

    
1616
static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1617
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1618
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1619

    
1620
    yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
1621

    
1622
    return srcSliceH;
1623
}
1624

    
1625
/* {RGB,BGR}{15,16,24,32} -> {RGB,BGR}{15,16,24,32} */
1626
static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1627
                          int srcSliceH, uint8_t* dst[], int dstStride[]){
1628
    const int srcFormat= c->srcFormat;
1629
    const int dstFormat= c->dstFormat;
1630
    const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3;
1631
    const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3;
1632
    const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
1633
    const int dstId= fmt_depth(dstFormat) >> 2;
1634
    void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
1635

    
1636
    /* BGR -> BGR */
1637
    if (  (isBGR(srcFormat) && isBGR(dstFormat))
1638
       || (isRGB(srcFormat) && isRGB(dstFormat))){
1639
        switch(srcId | (dstId<<4)){
1640
        case 0x34: conv= rgb16to15; break;
1641
        case 0x36: conv= rgb24to15; break;
1642
        case 0x38: conv= rgb32to15; break;
1643
        case 0x43: conv= rgb15to16; break;
1644
        case 0x46: conv= rgb24to16; break;
1645
        case 0x48: conv= rgb32to16; break;
1646
        case 0x63: conv= rgb15to24; break;
1647
        case 0x64: conv= rgb16to24; break;
1648
        case 0x68: conv= rgb32to24; break;
1649
        case 0x83: conv= rgb15to32; break;
1650
        case 0x84: conv= rgb16to32; break;
1651
        case 0x86: conv= rgb24to32; break;
1652
        default: av_log(c, AV_LOG_ERROR, "swScaler: internal error %s -> %s converter\n",
1653
                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1654
        }
1655
    }else if (  (isBGR(srcFormat) && isRGB(dstFormat))
1656
             || (isRGB(srcFormat) && isBGR(dstFormat))){
1657
        switch(srcId | (dstId<<4)){
1658
        case 0x33: conv= rgb15tobgr15; break;
1659
        case 0x34: conv= rgb16tobgr15; break;
1660
        case 0x36: conv= rgb24tobgr15; break;
1661
        case 0x38: conv= rgb32tobgr15; break;
1662
        case 0x43: conv= rgb15tobgr16; break;
1663
        case 0x44: conv= rgb16tobgr16; break;
1664
        case 0x46: conv= rgb24tobgr16; break;
1665
        case 0x48: conv= rgb32tobgr16; break;
1666
        case 0x63: conv= rgb15tobgr24; break;
1667
        case 0x64: conv= rgb16tobgr24; break;
1668
        case 0x66: conv= rgb24tobgr24; break;
1669
        case 0x68: conv= rgb32tobgr24; break;
1670
        case 0x83: conv= rgb15tobgr32; break;
1671
        case 0x84: conv= rgb16tobgr32; break;
1672
        case 0x86: conv= rgb24tobgr32; break;
1673
        case 0x88: conv= rgb32tobgr32; break;
1674
        default: av_log(c, AV_LOG_ERROR, "swScaler: internal error %s -> %s converter\n",
1675
                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1676
        }
1677
    }else{
1678
        av_log(c, AV_LOG_ERROR, "swScaler: internal error %s -> %s converter\n",
1679
               sws_format_name(srcFormat), sws_format_name(dstFormat));
1680
    }
1681

    
1682
    if(conv)
1683
    {
1684
        if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0)
1685
            conv(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1686
        else
1687
        {
1688
            int i;
1689
            uint8_t *srcPtr= src[0];
1690
            uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1691

    
1692
            for (i=0; i<srcSliceH; i++)
1693
            {
1694
                conv(srcPtr, dstPtr, c->srcW*srcBpp);
1695
                srcPtr+= srcStride[0];
1696
                dstPtr+= dstStride[0];
1697
            }
1698
        }
1699
    }
1700
    return srcSliceH;
1701
}
1702

    
1703
static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1704
                              int srcSliceH, uint8_t* dst[], int dstStride[]){
1705

    
1706
    rgb24toyv12(
1707
        src[0],
1708
        dst[0]+ srcSliceY    *dstStride[0],
1709
        dst[1]+(srcSliceY>>1)*dstStride[1],
1710
        dst[2]+(srcSliceY>>1)*dstStride[2],
1711
        c->srcW, srcSliceH,
1712
        dstStride[0], dstStride[1], srcStride[0]);
1713
    return srcSliceH;
1714
}
1715

    
1716
static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1717
                             int srcSliceH, uint8_t* dst[], int dstStride[]){
1718
    int i;
1719

    
1720
    /* copy Y */
1721
    if (srcStride[0]==dstStride[0] && srcStride[0] > 0)
1722
        memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
1723
    else{
1724
        uint8_t *srcPtr= src[0];
1725
        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1726

    
1727
        for (i=0; i<srcSliceH; i++)
1728
        {
1729
            memcpy(dstPtr, srcPtr, c->srcW);
1730
            srcPtr+= srcStride[0];
1731
            dstPtr+= dstStride[0];
1732
        }
1733
    }
1734

    
1735
    if (c->dstFormat==PIX_FMT_YUV420P){
1736
        planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
1737
        planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
1738
    }else{
1739
        planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
1740
        planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
1741
    }
1742
    return srcSliceH;
1743
}
1744

    
1745
/* unscaled copy like stuff (assumes nearly identical formats) */
1746
static int simpleCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1747
                      int srcSliceH, uint8_t* dst[], int dstStride[]){
1748

    
1749
    if (isPacked(c->srcFormat))
1750
    {
1751
        if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1752
            memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
1753
        else
1754
        {
1755
            int i;
1756
            uint8_t *srcPtr= src[0];
1757
            uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1758
            int length=0;
1759

    
1760
            /* universal length finder */
1761
            while(length+c->srcW <= FFABS(dstStride[0])
1762
               && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
1763
            ASSERT(length!=0);
1764

    
1765
            for (i=0; i<srcSliceH; i++)
1766
            {
1767
                memcpy(dstPtr, srcPtr, length);
1768
                srcPtr+= srcStride[0];
1769
                dstPtr+= dstStride[0];
1770
            }
1771
        }
1772
    }
1773
    else
1774
    { /* Planar YUV or gray */
1775
        int plane;
1776
        for (plane=0; plane<3; plane++)
1777
        {
1778
            int length= plane==0 ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
1779
            int y=      plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
1780
            int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
1781

    
1782
            if ((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
1783
            {
1784
                if (!isGray(c->dstFormat))
1785
                    memset(dst[plane], 128, dstStride[plane]*height);
1786
            }
1787
            else
1788
            {
1789
                if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
1790
                    memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
1791
                else
1792
                {
1793
                    int i;
1794
                    uint8_t *srcPtr= src[plane];
1795
                    uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
1796
                    for (i=0; i<height; i++)
1797
                    {
1798
                        memcpy(dstPtr, srcPtr, length);
1799
                        srcPtr+= srcStride[plane];
1800
                        dstPtr+= dstStride[plane];
1801
                    }
1802
                }
1803
            }
1804
        }
1805
    }
1806
    return srcSliceH;
1807
}
1808

    
1809
static int gray16togray(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1810
                        int srcSliceH, uint8_t* dst[], int dstStride[]){
1811

    
1812
    int length= c->srcW;
1813
    int y=      srcSliceY;
1814
    int height= srcSliceH;
1815
    int i, j;
1816
    uint8_t *srcPtr= src[0];
1817
    uint8_t *dstPtr= dst[0] + dstStride[0]*y;
1818

    
1819
    if (!isGray(c->dstFormat)){
1820
        int height= -((-srcSliceH)>>c->chrDstVSubSample);
1821
        memset(dst[1], 128, dstStride[1]*height);
1822
        memset(dst[2], 128, dstStride[2]*height);
1823
    }
1824
    if (c->srcFormat == PIX_FMT_GRAY16LE) srcPtr++;
1825
    for (i=0; i<height; i++)
1826
    {
1827
        for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
1828
        srcPtr+= srcStride[0];
1829
        dstPtr+= dstStride[0];
1830
    }
1831
    return srcSliceH;
1832
}
1833

    
1834
static int graytogray16(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1835
                        int srcSliceH, uint8_t* dst[], int dstStride[]){
1836

    
1837
    int length= c->srcW;
1838
    int y=      srcSliceY;
1839
    int height= srcSliceH;
1840
    int i, j;
1841
    uint8_t *srcPtr= src[0];
1842
    uint8_t *dstPtr= dst[0] + dstStride[0]*y;
1843
    for (i=0; i<height; i++)
1844
    {
1845
        for (j=0; j<length; j++)
1846
        {
1847
            dstPtr[j<<1] = srcPtr[j];
1848
            dstPtr[(j<<1)+1] = srcPtr[j];
1849
        }
1850
        srcPtr+= srcStride[0];
1851
        dstPtr+= dstStride[0];
1852
    }
1853
    return srcSliceH;
1854
}
1855

    
1856
static int gray16swap(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1857
                      int srcSliceH, uint8_t* dst[], int dstStride[]){
1858

    
1859
    int length= c->srcW;
1860
    int y=      srcSliceY;
1861
    int height= srcSliceH;
1862
    int i, j;
1863
    uint16_t *srcPtr= src[0];
1864
    uint16_t *dstPtr= dst[0] + dstStride[0]*y/2;
1865
    for (i=0; i<height; i++)
1866
    {
1867
        for (j=0; j<length; j++) dstPtr[j] = bswap_16(srcPtr[j]);
1868
        srcPtr+= srcStride[0]/2;
1869
        dstPtr+= dstStride[0]/2;
1870
    }
1871
    return srcSliceH;
1872
}
1873

    
1874

    
1875
static void getSubSampleFactors(int *h, int *v, int format){
1876
    switch(format){
1877
    case PIX_FMT_UYVY422:
1878
    case PIX_FMT_YUYV422:
1879
        *h=1;
1880
        *v=0;
1881
        break;
1882
    case PIX_FMT_YUV420P:
1883
    case PIX_FMT_YUVA420P:
1884
    case PIX_FMT_GRAY16BE:
1885
    case PIX_FMT_GRAY16LE:
1886
    case PIX_FMT_GRAY8: //FIXME remove after different subsamplings are fully implemented
1887
    case PIX_FMT_NV12:
1888
    case PIX_FMT_NV21:
1889
        *h=1;
1890
        *v=1;
1891
        break;
1892
    case PIX_FMT_YUV440P:
1893
        *h=0;
1894
        *v=1;
1895
        break;
1896
    case PIX_FMT_YUV410P:
1897
        *h=2;
1898
        *v=2;
1899
        break;
1900
    case PIX_FMT_YUV444P:
1901
        *h=0;
1902
        *v=0;
1903
        break;
1904
    case PIX_FMT_YUV422P:
1905
        *h=1;
1906
        *v=0;
1907
        break;
1908
    case PIX_FMT_YUV411P:
1909
        *h=2;
1910
        *v=0;
1911
        break;
1912
    default:
1913
        *h=0;
1914
        *v=0;
1915
        break;
1916
    }
1917
}
1918

    
1919
static uint16_t roundToInt16(int64_t f){
1920
    int r= (f + (1<<15))>>16;
1921
         if (r<-0x7FFF) return 0x8000;
1922
    else if (r> 0x7FFF) return 0x7FFF;
1923
    else                return r;
1924
}
1925

    
1926
/**
1927
 * @param inv_table the yuv2rgb coeffs, normally Inverse_Table_6_9[x]
1928
 * @param fullRange if 1 then the luma range is 0..255 if 0 it is 16..235
1929
 * @return -1 if not supported
1930
 */
1931
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
1932
    int64_t crv =  inv_table[0];
1933
    int64_t cbu =  inv_table[1];
1934
    int64_t cgu = -inv_table[2];
1935
    int64_t cgv = -inv_table[3];
1936
    int64_t cy  = 1<<16;
1937
    int64_t oy  = 0;
1938

    
1939
    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
1940
    memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
1941
    memcpy(c->dstColorspaceTable,     table, sizeof(int)*4);
1942

    
1943
    c->brightness= brightness;
1944
    c->contrast  = contrast;
1945
    c->saturation= saturation;
1946
    c->srcRange  = srcRange;
1947
    c->dstRange  = dstRange;
1948

    
1949
    c->uOffset=   0x0400040004000400LL;
1950
    c->vOffset=   0x0400040004000400LL;
1951

    
1952
    if (!srcRange){
1953
        cy= (cy*255) / 219;
1954
        oy= 16<<16;
1955
    }else{
1956
        crv= (crv*224) / 255;
1957
        cbu= (cbu*224) / 255;
1958
        cgu= (cgu*224) / 255;
1959
        cgv= (cgv*224) / 255;
1960
    }
1961

    
1962
    cy = (cy *contrast             )>>16;
1963
    crv= (crv*contrast * saturation)>>32;
1964
    cbu= (cbu*contrast * saturation)>>32;
1965
    cgu= (cgu*contrast * saturation)>>32;
1966
    cgv= (cgv*contrast * saturation)>>32;
1967

    
1968
    oy -= 256*brightness;
1969

    
1970
    c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
1971
    c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
1972
    c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
1973
    c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
1974
    c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
1975
    c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
1976

    
1977
    yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
1978
    //FIXME factorize
1979

    
1980
#ifdef COMPILE_ALTIVEC
1981
    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
1982
        yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation);
1983
#endif
1984
    return 0;
1985
}
1986

    
1987
/**
1988
 * @return -1 if not supported
1989
 */
1990
int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
1991
    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
1992

    
1993
    *inv_table = c->srcColorspaceTable;
1994
    *table     = c->dstColorspaceTable;
1995
    *srcRange  = c->srcRange;
1996
    *dstRange  = c->dstRange;
1997
    *brightness= c->brightness;
1998
    *contrast  = c->contrast;
1999
    *saturation= c->saturation;
2000

    
2001
    return 0;
2002
}
2003

    
2004
static int handle_jpeg(int *format)
2005
{
2006
    switch (*format) {
2007
        case PIX_FMT_YUVJ420P:
2008
            *format = PIX_FMT_YUV420P;
2009
            return 1;
2010
        case PIX_FMT_YUVJ422P:
2011
            *format = PIX_FMT_YUV422P;
2012
            return 1;
2013
        case PIX_FMT_YUVJ444P:
2014
            *format = PIX_FMT_YUV444P;
2015
            return 1;
2016
        case PIX_FMT_YUVJ440P:
2017
            *format = PIX_FMT_YUV440P;
2018
            return 1;
2019
        default:
2020
            return 0;
2021
    }
2022
}
2023

    
2024
SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags,
2025
                           SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){
2026

    
2027
    SwsContext *c;
2028
    int i;
2029
    int usesVFilter, usesHFilter;
2030
    int unscaled, needsDither;
2031
    int srcRange, dstRange;
2032
    SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
2033
#if defined(ARCH_X86)
2034
    if (flags & SWS_CPU_CAPS_MMX)
2035
        asm volatile("emms\n\t"::: "memory");
2036
#endif
2037

    
2038
#if !defined(RUNTIME_CPUDETECT) || !defined (CONFIG_GPL) //ensure that the flags match the compiled variant if cpudetect is off
2039
    flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN);
2040
#ifdef HAVE_MMX2
2041
    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
2042
#elif defined (HAVE_3DNOW)
2043
    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
2044
#elif defined (HAVE_MMX)
2045
    flags |= SWS_CPU_CAPS_MMX;
2046
#elif defined (HAVE_ALTIVEC)
2047
    flags |= SWS_CPU_CAPS_ALTIVEC;
2048
#elif defined (ARCH_BFIN)
2049
    flags |= SWS_CPU_CAPS_BFIN;
2050
#endif
2051
#endif /* RUNTIME_CPUDETECT */
2052
    if (clip_table[512] != 255) globalInit();
2053
    if (!rgb15to16) sws_rgb2rgb_init(flags);
2054

    
2055
    unscaled = (srcW == dstW && srcH == dstH);
2056
    needsDither= (isBGR(dstFormat) || isRGB(dstFormat))
2057
        && (fmt_depth(dstFormat))<24
2058
        && ((fmt_depth(dstFormat))<(fmt_depth(srcFormat)) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
2059

    
2060
    srcRange = handle_jpeg(&srcFormat);
2061
    dstRange = handle_jpeg(&dstFormat);
2062

    
2063
    if (!isSupportedIn(srcFormat))
2064
    {
2065
        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input format\n", sws_format_name(srcFormat));
2066
        return NULL;
2067
    }
2068
    if (!isSupportedOut(dstFormat))
2069
    {
2070
        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output format\n", sws_format_name(dstFormat));
2071
        return NULL;
2072
    }
2073

    
2074
    /* sanity check */
2075
    if (srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
2076
    {
2077
        av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
2078
               srcW, srcH, dstW, dstH);
2079
        return NULL;
2080
    }
2081

    
2082
    if (!dstFilter) dstFilter= &dummyFilter;
2083
    if (!srcFilter) srcFilter= &dummyFilter;
2084

    
2085
    c= av_mallocz(sizeof(SwsContext));
2086

    
2087
    c->av_class = &sws_context_class;
2088
    c->srcW= srcW;
2089
    c->srcH= srcH;
2090
    c->dstW= dstW;
2091
    c->dstH= dstH;
2092
    c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
2093
    c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
2094
    c->flags= flags;
2095
    c->dstFormat= dstFormat;
2096
    c->srcFormat= srcFormat;
2097
    c->vRounder= 4* 0x0001000100010001ULL;
2098

    
2099
    usesHFilter= usesVFilter= 0;
2100
    if (dstFilter->lumV && dstFilter->lumV->length>1) usesVFilter=1;
2101
    if (dstFilter->lumH && dstFilter->lumH->length>1) usesHFilter=1;
2102
    if (dstFilter->chrV && dstFilter->chrV->length>1) usesVFilter=1;
2103
    if (dstFilter->chrH && dstFilter->chrH->length>1) usesHFilter=1;
2104
    if (srcFilter->lumV && srcFilter->lumV->length>1) usesVFilter=1;
2105
    if (srcFilter->lumH && srcFilter->lumH->length>1) usesHFilter=1;
2106
    if (srcFilter->chrV && srcFilter->chrV->length>1) usesVFilter=1;
2107
    if (srcFilter->chrH && srcFilter->chrH->length>1) usesHFilter=1;
2108

    
2109
    getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
2110
    getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
2111

    
2112
    // reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation
2113
    if ((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
2114

    
2115
    // drop some chroma lines if the user wants it
2116
    c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
2117
    c->chrSrcVSubSample+= c->vChrDrop;
2118

    
2119
    // drop every 2. pixel for chroma calculation unless user wants full chroma
2120
    if ((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)
2121
      && srcFormat!=PIX_FMT_RGB8      && srcFormat!=PIX_FMT_BGR8
2122
      && srcFormat!=PIX_FMT_RGB4      && srcFormat!=PIX_FMT_BGR4
2123
      && srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE)
2124
        c->chrSrcHSubSample=1;
2125

    
2126
    if (param){
2127
        c->param[0] = param[0];
2128
        c->param[1] = param[1];
2129
    }else{
2130
        c->param[0] =
2131
        c->param[1] = SWS_PARAM_DEFAULT;
2132
    }
2133

    
2134
    c->chrIntHSubSample= c->chrDstHSubSample;
2135
    c->chrIntVSubSample= c->chrSrcVSubSample;
2136

    
2137
    // Note the -((-x)>>y) is so that we always round toward +inf.
2138
    c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
2139
    c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
2140
    c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
2141
    c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
2142

    
2143
    sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], srcRange, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16);
2144

    
2145
    /* unscaled special Cases */
2146
    if (unscaled && !usesHFilter && !usesVFilter)
2147
    {
2148
        /* yv12_to_nv12 */
2149
        if (srcFormat == PIX_FMT_YUV420P && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21))
2150
        {
2151
            c->swScale= PlanarToNV12Wrapper;
2152
        }
2153
#ifdef CONFIG_GPL
2154
        /* yuv2bgr */
2155
        if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P) && (isBGR(dstFormat) || isRGB(dstFormat)))
2156
        {
2157
            c->swScale= yuv2rgb_get_func_ptr(c);
2158
        }
2159
#endif
2160

    
2161
        if (srcFormat==PIX_FMT_YUV410P && dstFormat==PIX_FMT_YUV420P)
2162
        {
2163
            c->swScale= yvu9toyv12Wrapper;
2164
        }
2165

    
2166
        /* bgr24toYV12 */
2167
        if (srcFormat==PIX_FMT_BGR24 && dstFormat==PIX_FMT_YUV420P)
2168
            c->swScale= bgr24toyv12Wrapper;
2169

    
2170
        /* rgb/bgr -> rgb/bgr (no dither needed forms) */
2171
        if (  (isBGR(srcFormat) || isRGB(srcFormat))
2172
           && (isBGR(dstFormat) || isRGB(dstFormat))
2173
           && srcFormat != PIX_FMT_BGR8      && dstFormat != PIX_FMT_BGR8
2174
           && srcFormat != PIX_FMT_RGB8      && dstFormat != PIX_FMT_RGB8
2175
           && srcFormat != PIX_FMT_BGR4      && dstFormat != PIX_FMT_BGR4
2176
           && srcFormat != PIX_FMT_RGB4      && dstFormat != PIX_FMT_RGB4
2177
           && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE
2178
           && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE
2179
           && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK
2180
           && !needsDither)
2181
             c->swScale= rgb2rgbWrapper;
2182

    
2183
        /* LQ converters if -sws 0 or -sws 4*/
2184
        if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
2185
            /* rgb/bgr -> rgb/bgr (dither needed forms) */
2186
            if ( (isBGR(srcFormat) || isRGB(srcFormat))
2187
              && (isBGR(dstFormat) || isRGB(dstFormat))
2188
              && needsDither)
2189
                c->swScale= rgb2rgbWrapper;
2190

    
2191
            /* yv12_to_yuy2 */
2192
            if (srcFormat == PIX_FMT_YUV420P &&
2193
                (dstFormat == PIX_FMT_YUYV422 || dstFormat == PIX_FMT_UYVY422))
2194
            {
2195
                if (dstFormat == PIX_FMT_YUYV422)
2196
                    c->swScale= PlanarToYuy2Wrapper;
2197
                else
2198
                    c->swScale= PlanarToUyvyWrapper;
2199
            }
2200
        }
2201

    
2202
#ifdef COMPILE_ALTIVEC
2203
        if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
2204
            ((srcFormat == PIX_FMT_YUV420P &&
2205
             (dstFormat == PIX_FMT_YUYV422 || dstFormat == PIX_FMT_UYVY422)))) {
2206
          // unscaled YV12 -> packed YUV, we want speed
2207
          if (dstFormat == PIX_FMT_YUYV422)
2208
              c->swScale= yv12toyuy2_unscaled_altivec;
2209
          else
2210
              c->swScale= yv12touyvy_unscaled_altivec;
2211
        }
2212
#endif
2213

    
2214
        /* simple copy */
2215
        if (  srcFormat == dstFormat
2216
            || (isPlanarYUV(srcFormat) && isGray(dstFormat))
2217
            || (isPlanarYUV(dstFormat) && isGray(srcFormat)))
2218
        {
2219
            c->swScale= simpleCopy;
2220
        }
2221

    
2222
        /* gray16{le,be} conversions */
2223
        if (isGray16(srcFormat) && (isPlanarYUV(dstFormat) || (dstFormat == PIX_FMT_GRAY8)))
2224
        {
2225
            c->swScale= gray16togray;
2226
        }
2227
        if ((isPlanarYUV(srcFormat) || (srcFormat == PIX_FMT_GRAY8)) && isGray16(dstFormat))
2228
        {
2229
            c->swScale= graytogray16;
2230
        }
2231
        if (srcFormat != dstFormat && isGray16(srcFormat) && isGray16(dstFormat))
2232
        {
2233
            c->swScale= gray16swap;
2234
        }
2235

    
2236
#ifdef ARCH_BFIN
2237
        if (flags & SWS_CPU_CAPS_BFIN)
2238
            ff_bfin_get_unscaled_swscale (c);
2239
#endif
2240

    
2241
        if (c->swScale){
2242
            if (flags&SWS_PRINT_INFO)
2243
                av_log(c, AV_LOG_INFO, "SwScaler: using unscaled %s -> %s special converter\n",
2244
                                sws_format_name(srcFormat), sws_format_name(dstFormat));
2245
            return c;
2246
        }
2247
    }
2248

    
2249
    if (flags & SWS_CPU_CAPS_MMX2)
2250
    {
2251
        c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
2252
        if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
2253
        {
2254
            if (flags&SWS_PRINT_INFO)
2255
                av_log(c, AV_LOG_INFO, "SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n");
2256
        }
2257
        if (usesHFilter) c->canMMX2BeUsed=0;
2258
    }
2259
    else
2260
        c->canMMX2BeUsed=0;
2261

    
2262
    c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
2263
    c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
2264

    
2265
    // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
2266
    // but only for the FAST_BILINEAR mode otherwise do correct scaling
2267
    // n-2 is the last chrominance sample available
2268
    // this is not perfect, but no one should notice the difference, the more correct variant
2269
    // would be like the vertical one, but that would require some special code for the
2270
    // first and last pixel
2271
    if (flags&SWS_FAST_BILINEAR)
2272
    {
2273
        if (c->canMMX2BeUsed)
2274
        {
2275
            c->lumXInc+= 20;
2276
            c->chrXInc+= 20;
2277
        }
2278
        //we don't use the x86asm scaler if mmx is available
2279
        else if (flags & SWS_CPU_CAPS_MMX)
2280
        {
2281
            c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
2282
            c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
2283
        }
2284
    }
2285

    
2286
    /* precalculate horizontal scaler filter coefficients */
2287
    {
2288
        const int filterAlign=
2289
            (flags & SWS_CPU_CAPS_MMX) ? 4 :
2290
            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2291
            1;
2292

    
2293
        initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
2294
                   srcW      ,       dstW, filterAlign, 1<<14,
2295
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2296
                   srcFilter->lumH, dstFilter->lumH, c->param);
2297
        initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
2298
                   c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
2299
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2300
                   srcFilter->chrH, dstFilter->chrH, c->param);
2301

    
2302
#define MAX_FUNNY_CODE_SIZE 10000
2303
#if defined(COMPILE_MMX2)
2304
// can't downscale !!!
2305
        if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
2306
        {
2307
#ifdef MAP_ANONYMOUS
2308
            c->funnyYCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2309
            c->funnyUVCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2310
#else
2311
            c->funnyYCode = av_malloc(MAX_FUNNY_CODE_SIZE);
2312
            c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE);
2313
#endif
2314

    
2315
            c->lumMmx2Filter   = av_malloc((dstW        /8+8)*sizeof(int16_t));
2316
            c->chrMmx2Filter   = av_malloc((c->chrDstW  /4+8)*sizeof(int16_t));
2317
            c->lumMmx2FilterPos= av_malloc((dstW      /2/8+8)*sizeof(int32_t));
2318
            c->chrMmx2FilterPos= av_malloc((c->chrDstW/2/4+8)*sizeof(int32_t));
2319

    
2320
            initMMX2HScaler(      dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
2321
            initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
2322
        }
2323
#endif /* defined(COMPILE_MMX2) */
2324
    } // Init Horizontal stuff
2325

    
2326

    
2327

    
2328
    /* precalculate vertical scaler filter coefficients */
2329
    {
2330
        const int filterAlign=
2331
            (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
2332
            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2333
            1;
2334

    
2335
        initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
2336
                   srcH      ,        dstH, filterAlign, (1<<12)-4,
2337
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2338
                   srcFilter->lumV, dstFilter->lumV, c->param);
2339
        initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
2340
                   c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
2341
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2342
                   srcFilter->chrV, dstFilter->chrV, c->param);
2343

    
2344
#ifdef HAVE_ALTIVEC
2345
        c->vYCoeffsBank = av_malloc(sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
2346
        c->vCCoeffsBank = av_malloc(sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH);
2347

    
2348
        for (i=0;i<c->vLumFilterSize*c->dstH;i++) {
2349
            int j;
2350
            short *p = (short *)&c->vYCoeffsBank[i];
2351
            for (j=0;j<8;j++)
2352
                p[j] = c->vLumFilter[i];
2353
        }
2354

    
2355
        for (i=0;i<c->vChrFilterSize*c->chrDstH;i++) {
2356
            int j;
2357
            short *p = (short *)&c->vCCoeffsBank[i];
2358
            for (j=0;j<8;j++)
2359
                p[j] = c->vChrFilter[i];
2360
        }
2361
#endif
2362
    }
2363

    
2364
    // Calculate Buffer Sizes so that they won't run out while handling these damn slices
2365
    c->vLumBufSize= c->vLumFilterSize;
2366
    c->vChrBufSize= c->vChrFilterSize;
2367
    for (i=0; i<dstH; i++)
2368
    {
2369
        int chrI= i*c->chrDstH / dstH;
2370
        int nextSlice= FFMAX(c->vLumFilterPos[i   ] + c->vLumFilterSize - 1,
2371
                           ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
2372

    
2373
        nextSlice>>= c->chrSrcVSubSample;
2374
        nextSlice<<= c->chrSrcVSubSample;
2375
        if (c->vLumFilterPos[i   ] + c->vLumBufSize < nextSlice)
2376
            c->vLumBufSize= nextSlice - c->vLumFilterPos[i];
2377
        if (c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
2378
            c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
2379
    }
2380

    
2381
    // allocate pixbufs (we use dynamic allocation because otherwise we would need to
2382
    c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
2383
    c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*));
2384
    //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
2385
    /* align at 16 bytes for AltiVec */
2386
    for (i=0; i<c->vLumBufSize; i++)
2387
        c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(4000);
2388
    for (i=0; i<c->vChrBufSize; i++)
2389
        c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= av_malloc(8000);
2390

    
2391
    //try to avoid drawing green stuff between the right end and the stride end
2392
    for (i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000);
2393

    
2394
    ASSERT(c->chrDstH <= dstH)
2395

    
2396
    if (flags&SWS_PRINT_INFO)
2397
    {
2398
#ifdef DITHER1XBPP
2399
        char *dither= " dithered";
2400
#else
2401
        char *dither= "";
2402
#endif
2403
        if (flags&SWS_FAST_BILINEAR)
2404
            av_log(c, AV_LOG_INFO, "SwScaler: FAST_BILINEAR scaler, ");
2405
        else if (flags&SWS_BILINEAR)
2406
            av_log(c, AV_LOG_INFO, "SwScaler: BILINEAR scaler, ");
2407
        else if (flags&SWS_BICUBIC)
2408
            av_log(c, AV_LOG_INFO, "SwScaler: BICUBIC scaler, ");
2409
        else if (flags&SWS_X)
2410
            av_log(c, AV_LOG_INFO, "SwScaler: Experimental scaler, ");
2411
        else if (flags&SWS_POINT)
2412
            av_log(c, AV_LOG_INFO, "SwScaler: Nearest Neighbor / POINT scaler, ");
2413
        else if (flags&SWS_AREA)
2414
            av_log(c, AV_LOG_INFO, "SwScaler: Area Averageing scaler, ");
2415
        else if (flags&SWS_BICUBLIN)
2416
            av_log(c, AV_LOG_INFO, "SwScaler: luma BICUBIC / chroma BILINEAR scaler, ");
2417
        else if (flags&SWS_GAUSS)
2418
            av_log(c, AV_LOG_INFO, "SwScaler: Gaussian scaler, ");
2419
        else if (flags&SWS_SINC)
2420
            av_log(c, AV_LOG_INFO, "SwScaler: Sinc scaler, ");
2421
        else if (flags&SWS_LANCZOS)
2422
            av_log(c, AV_LOG_INFO, "SwScaler: Lanczos scaler, ");
2423
        else if (flags&SWS_SPLINE)
2424
            av_log(c, AV_LOG_INFO, "SwScaler: Bicubic spline scaler, ");
2425
        else
2426
            av_log(c, AV_LOG_INFO, "SwScaler: ehh flags invalid?! ");
2427

    
2428
        if (dstFormat==PIX_FMT_BGR555 || dstFormat==PIX_FMT_BGR565)
2429
            av_log(c, AV_LOG_INFO, "from %s to%s %s ",
2430
                   sws_format_name(srcFormat), dither, sws_format_name(dstFormat));
2431
        else
2432
            av_log(c, AV_LOG_INFO, "from %s to %s ",
2433
                   sws_format_name(srcFormat), sws_format_name(dstFormat));
2434

    
2435
        if (flags & SWS_CPU_CAPS_MMX2)
2436
            av_log(c, AV_LOG_INFO, "using MMX2\n");
2437
        else if (flags & SWS_CPU_CAPS_3DNOW)
2438
            av_log(c, AV_LOG_INFO, "using 3DNOW\n");
2439
        else if (flags & SWS_CPU_CAPS_MMX)
2440
            av_log(c, AV_LOG_INFO, "using MMX\n");
2441
        else if (flags & SWS_CPU_CAPS_ALTIVEC)
2442
            av_log(c, AV_LOG_INFO, "using AltiVec\n");
2443
        else
2444
            av_log(c, AV_LOG_INFO, "using C\n");
2445
    }
2446

    
2447
    if (flags & SWS_PRINT_INFO)
2448
    {
2449
        if (flags & SWS_CPU_CAPS_MMX)
2450
        {
2451
            if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
2452
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
2453
            else
2454
            {
2455
                if (c->hLumFilterSize==4)
2456
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n");
2457
                else if (c->hLumFilterSize==8)
2458
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n");
2459
                else
2460
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n");
2461

    
2462
                if (c->hChrFilterSize==4)
2463
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n");
2464
                else if (c->hChrFilterSize==8)
2465
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n");
2466
                else
2467
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n");
2468
            }
2469
        }
2470
        else
2471
        {
2472
#if defined(ARCH_X86)
2473
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using X86-Asm scaler for horizontal scaling\n");
2474
#else
2475
            if (flags & SWS_FAST_BILINEAR)
2476
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n");
2477
            else
2478
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using C scaler for horizontal scaling\n");
2479
#endif
2480
        }
2481
        if (isPlanarYUV(dstFormat))
2482
        {
2483
            if (c->vLumFilterSize==1)
2484
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2485
            else
2486
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2487
        }
2488
        else
2489
        {
2490
            if (c->vLumFilterSize==1 && c->vChrFilterSize==2)
2491
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
2492
                       "SwScaler:       2-tap scaler for vertical chrominance scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2493
            else if (c->vLumFilterSize==2 && c->vChrFilterSize==2)
2494
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2495
            else
2496
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2497
        }
2498

    
2499
        if (dstFormat==PIX_FMT_BGR24)
2500
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR24 Converter\n",
2501
                   (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
2502
        else if (dstFormat==PIX_FMT_RGB32)
2503
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR32 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2504
        else if (dstFormat==PIX_FMT_BGR565)
2505
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR16 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2506
        else if (dstFormat==PIX_FMT_BGR555)
2507
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR15 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2508

    
2509
        av_log(c, AV_LOG_VERBOSE, "SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
2510
    }
2511
    if (flags & SWS_PRINT_INFO)
2512
    {
2513
        av_log(c, AV_LOG_DEBUG, "SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2514
               c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
2515
        av_log(c, AV_LOG_DEBUG, "SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2516
               c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
2517
    }
2518

    
2519
    c->swScale= getSwsFunc(flags);
2520
    return c;
2521
}
2522

    
2523
/**
2524
 * swscale wrapper, so we don't need to export the SwsContext.
2525
 * assumes planar YUV to be in YUV order instead of YVU
2526
 */
2527
int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2528
              int srcSliceH, uint8_t* dst[], int dstStride[]){
2529
    int i;
2530
    uint8_t* src2[4]= {src[0], src[1], src[2]};
2531
    uint32_t pal[256];
2532
    if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
2533
        av_log(c, AV_LOG_ERROR, "swScaler: slices start in the middle!\n");
2534
        return 0;
2535
    }
2536
    if (c->sliceDir == 0) {
2537
        if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
2538
    }
2539

    
2540
    if (c->srcFormat == PIX_FMT_PAL8){
2541
        for (i=0; i<256; i++){
2542
            int p= ((uint32_t*)(src[1]))[i];
2543
            int r= (p>>16)&0xFF;
2544
            int g= (p>> 8)&0xFF;
2545
            int b=  p     &0xFF;
2546
            int y= av_clip_uint8(((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16 );
2547
            int u= av_clip_uint8(((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128);
2548
            int v= av_clip_uint8(((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128);
2549
            pal[i]= y + (u<<8) + (v<<16);
2550
        }
2551
        src2[1]= pal;
2552
    }
2553

    
2554
    // copy strides, so they can safely be modified
2555
    if (c->sliceDir == 1) {
2556
        // slices go from top to bottom
2557
        int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2]};
2558
        int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2]};
2559
        return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst, dstStride2);
2560
    } else {
2561
        // slices go from bottom to top => we flip the image internally
2562
        uint8_t* dst2[4]= {dst[0] + (c->dstH-1)*dstStride[0],
2563
                           dst[1] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1],
2564
                           dst[2] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2]};
2565
        int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2]};
2566
        int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2]};
2567

    
2568
        src2[0] += (srcSliceH-1)*srcStride[0];
2569
        if (c->srcFormat != PIX_FMT_PAL8)
2570
            src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1];
2571
        src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2];
2572

    
2573
        return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
2574
    }
2575
}
2576

    
2577
/**
2578
 * swscale wrapper, so we don't need to export the SwsContext
2579
 */
2580
int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2581
                      int srcSliceH, uint8_t* dst[], int dstStride[]){
2582
    return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
2583
}
2584

    
2585
SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
2586
                                float lumaSharpen, float chromaSharpen,
2587
                                float chromaHShift, float chromaVShift,
2588
                                int verbose)
2589
{
2590
    SwsFilter *filter= av_malloc(sizeof(SwsFilter));
2591

    
2592
    if (lumaGBlur!=0.0){
2593
        filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0);
2594
        filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0);
2595
    }else{
2596
        filter->lumH= sws_getIdentityVec();
2597
        filter->lumV= sws_getIdentityVec();
2598
    }
2599

    
2600
    if (chromaGBlur!=0.0){
2601
        filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0);
2602
        filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0);
2603
    }else{
2604
        filter->chrH= sws_getIdentityVec();
2605
        filter->chrV= sws_getIdentityVec();
2606
    }
2607

    
2608
    if (chromaSharpen!=0.0){
2609
        SwsVector *id= sws_getIdentityVec();
2610
        sws_scaleVec(filter->chrH, -chromaSharpen);
2611
        sws_scaleVec(filter->chrV, -chromaSharpen);
2612
        sws_addVec(filter->chrH, id);
2613
        sws_addVec(filter->chrV, id);
2614
        sws_freeVec(id);
2615
    }
2616

    
2617
    if (lumaSharpen!=0.0){
2618
        SwsVector *id= sws_getIdentityVec();
2619
        sws_scaleVec(filter->lumH, -lumaSharpen);
2620
        sws_scaleVec(filter->lumV, -lumaSharpen);
2621
        sws_addVec(filter->lumH, id);
2622
        sws_addVec(filter->lumV, id);
2623
        sws_freeVec(id);
2624
    }
2625

    
2626
    if (chromaHShift != 0.0)
2627
        sws_shiftVec(filter->chrH, (int)(chromaHShift+0.5));
2628

    
2629
    if (chromaVShift != 0.0)
2630
        sws_shiftVec(filter->chrV, (int)(chromaVShift+0.5));
2631

    
2632
    sws_normalizeVec(filter->chrH, 1.0);
2633
    sws_normalizeVec(filter->chrV, 1.0);
2634
    sws_normalizeVec(filter->lumH, 1.0);
2635
    sws_normalizeVec(filter->lumV, 1.0);
2636

    
2637
    if (verbose) sws_printVec(filter->chrH);
2638
    if (verbose) sws_printVec(filter->lumH);
2639

    
2640
    return filter;
2641
}
2642

    
2643
/**
2644
 * returns a normalized gaussian curve used to filter stuff
2645
 * quality=3 is high quality, lowwer is lowwer quality
2646
 */
2647
SwsVector *sws_getGaussianVec(double variance, double quality){
2648
    const int length= (int)(variance*quality + 0.5) | 1;
2649
    int i;
2650
    double *coeff= av_malloc(length*sizeof(double));
2651
    double middle= (length-1)*0.5;
2652
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2653

    
2654
    vec->coeff= coeff;
2655
    vec->length= length;
2656

    
2657
    for (i=0; i<length; i++)
2658
    {
2659
        double dist= i-middle;
2660
        coeff[i]= exp(-dist*dist/(2*variance*variance)) / sqrt(2*variance*PI);
2661
    }
2662

    
2663
    sws_normalizeVec(vec, 1.0);
2664

    
2665
    return vec;
2666
}
2667

    
2668
SwsVector *sws_getConstVec(double c, int length){
2669
    int i;
2670
    double *coeff= av_malloc(length*sizeof(double));
2671
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2672

    
2673
    vec->coeff= coeff;
2674
    vec->length= length;
2675

    
2676
    for (i=0; i<length; i++)
2677
        coeff[i]= c;
2678

    
2679
    return vec;
2680
}
2681

    
2682

    
2683
SwsVector *sws_getIdentityVec(void){
2684
    return sws_getConstVec(1.0, 1);
2685
}
2686

    
2687
double sws_dcVec(SwsVector *a){
2688
    int i;
2689
    double sum=0;
2690

    
2691
    for (i=0; i<a->length; i++)
2692
        sum+= a->coeff[i];
2693

    
2694
    return sum;
2695
}
2696

    
2697
void sws_scaleVec(SwsVector *a, double scalar){
2698
    int i;
2699

    
2700
    for (i=0; i<a->length; i++)
2701
        a->coeff[i]*= scalar;
2702
}
2703

    
2704
void sws_normalizeVec(SwsVector *a, double height){
2705
    sws_scaleVec(a, height/sws_dcVec(a));
2706
}
2707

    
2708
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){
2709
    int length= a->length + b->length - 1;
2710
    double *coeff= av_malloc(length*sizeof(double));
2711
    int i, j;
2712
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2713

    
2714
    vec->coeff= coeff;
2715
    vec->length= length;
2716

    
2717
    for (i=0; i<length; i++) coeff[i]= 0.0;
2718

    
2719
    for (i=0; i<a->length; i++)
2720
    {
2721
        for (j=0; j<b->length; j++)
2722
        {
2723
            coeff[i+j]+= a->coeff[i]*b->coeff[j];
2724
        }
2725
    }
2726

    
2727
    return vec;
2728
}
2729

    
2730
static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){
2731
    int length= FFMAX(a->length, b->length);
2732
    double *coeff= av_malloc(length*sizeof(double));
2733
    int i;
2734
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2735

    
2736
    vec->coeff= coeff;
2737
    vec->length= length;
2738

    
2739
    for (i=0; i<length; i++) coeff[i]= 0.0;
2740

    
2741
    for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2742
    for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];
2743

    
2744
    return vec;
2745
}
2746

    
2747
static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){
2748
    int length= FFMAX(a->length, b->length);
2749
    double *coeff= av_malloc(length*sizeof(double));
2750
    int i;
2751
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2752

    
2753
    vec->coeff= coeff;
2754
    vec->length= length;
2755

    
2756
    for (i=0; i<length; i++) coeff[i]= 0.0;
2757

    
2758
    for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2759
    for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];
2760

    
2761
    return vec;
2762
}
2763

    
2764
/* shift left / or right if "shift" is negative */
2765
static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){
2766
    int length= a->length + FFABS(shift)*2;
2767
    double *coeff= av_malloc(length*sizeof(double));
2768
    int i;
2769
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2770

    
2771
    vec->coeff= coeff;
2772
    vec->length= length;
2773

    
2774
    for (i=0; i<length; i++) coeff[i]= 0.0;
2775

    
2776
    for (i=0; i<a->length; i++)
2777
    {
2778
        coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
2779
    }
2780

    
2781
    return vec;
2782
}
2783

    
2784
void sws_shiftVec(SwsVector *a, int shift){
2785
    SwsVector *shifted= sws_getShiftedVec(a, shift);
2786
    av_free(a->coeff);
2787
    a->coeff= shifted->coeff;
2788
    a->length= shifted->length;
2789
    av_free(shifted);
2790
}
2791

    
2792
void sws_addVec(SwsVector *a, SwsVector *b){
2793
    SwsVector *sum= sws_sumVec(a, b);
2794
    av_free(a->coeff);
2795
    a->coeff= sum->coeff;
2796
    a->length= sum->length;
2797
    av_free(sum);
2798
}
2799

    
2800
void sws_subVec(SwsVector *a, SwsVector *b){
2801
    SwsVector *diff= sws_diffVec(a, b);
2802
    av_free(a->coeff);
2803
    a->coeff= diff->coeff;
2804
    a->length= diff->length;
2805
    av_free(diff);
2806
}
2807

    
2808
void sws_convVec(SwsVector *a, SwsVector *b){
2809
    SwsVector *conv= sws_getConvVec(a, b);
2810
    av_free(a->coeff);
2811
    a->coeff= conv->coeff;
2812
    a->length= conv->length;
2813
    av_free(conv);
2814
}
2815

    
2816
SwsVector *sws_cloneVec(SwsVector *a){
2817
    double *coeff= av_malloc(a->length*sizeof(double));
2818
    int i;
2819
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2820

    
2821
    vec->coeff= coeff;
2822
    vec->length= a->length;
2823

    
2824
    for (i=0; i<a->length; i++) coeff[i]= a->coeff[i];
2825

    
2826
    return vec;
2827
}
2828

    
2829
void sws_printVec(SwsVector *a){
2830
    int i;
2831
    double max=0;
2832
    double min=0;
2833
    double range;
2834

    
2835
    for (i=0; i<a->length; i++)
2836
        if (a->coeff[i]>max) max= a->coeff[i];
2837

    
2838
    for (i=0; i<a->length; i++)
2839
        if (a->coeff[i]<min) min= a->coeff[i];
2840

    
2841
    range= max - min;
2842

    
2843
    for (i=0; i<a->length; i++)
2844
    {
2845
        int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
2846
        av_log(NULL, AV_LOG_DEBUG, "%1.3f ", a->coeff[i]);
2847
        for (;x>0; x--) av_log(NULL, AV_LOG_DEBUG, " ");
2848
        av_log(NULL, AV_LOG_DEBUG, "|\n");
2849
    }
2850
}
2851

    
2852
void sws_freeVec(SwsVector *a){
2853
    if (!a) return;
2854
    av_free(a->coeff);
2855
    a->coeff=NULL;
2856
    a->length=0;
2857
    av_free(a);
2858
}
2859

    
2860
void sws_freeFilter(SwsFilter *filter){
2861
    if (!filter) return;
2862

    
2863
    if (filter->lumH) sws_freeVec(filter->lumH);
2864
    if (filter->lumV) sws_freeVec(filter->lumV);
2865
    if (filter->chrH) sws_freeVec(filter->chrH);
2866
    if (filter->chrV) sws_freeVec(filter->chrV);
2867
    av_free(filter);
2868
}
2869

    
2870

    
2871
void sws_freeContext(SwsContext *c){
2872
    int i;
2873
    if (!c) return;
2874

    
2875
    if (c->lumPixBuf)
2876
    {
2877
        for (i=0; i<c->vLumBufSize; i++)
2878
        {
2879
            av_free(c->lumPixBuf[i]);
2880
            c->lumPixBuf[i]=NULL;
2881
        }
2882
        av_free(c->lumPixBuf);
2883
        c->lumPixBuf=NULL;
2884
    }
2885

    
2886
    if (c->chrPixBuf)
2887
    {
2888
        for (i=0; i<c->vChrBufSize; i++)
2889
        {
2890
            av_free(c->chrPixBuf[i]);
2891
            c->chrPixBuf[i]=NULL;
2892
        }
2893
        av_free(c->chrPixBuf);
2894
        c->chrPixBuf=NULL;
2895
    }
2896

    
2897
    av_free(c->vLumFilter);
2898
    c->vLumFilter = NULL;
2899
    av_free(c->vChrFilter);
2900
    c->vChrFilter = NULL;
2901
    av_free(c->hLumFilter);
2902
    c->hLumFilter = NULL;
2903
    av_free(c->hChrFilter);
2904
    c->hChrFilter = NULL;
2905
#ifdef HAVE_ALTIVEC
2906
    av_free(c->vYCoeffsBank);
2907
    c->vYCoeffsBank = NULL;
2908
    av_free(c->vCCoeffsBank);
2909
    c->vCCoeffsBank = NULL;
2910
#endif
2911

    
2912
    av_free(c->vLumFilterPos);
2913
    c->vLumFilterPos = NULL;
2914
    av_free(c->vChrFilterPos);
2915
    c->vChrFilterPos = NULL;
2916
    av_free(c->hLumFilterPos);
2917
    c->hLumFilterPos = NULL;
2918
    av_free(c->hChrFilterPos);
2919
    c->hChrFilterPos = NULL;
2920

    
2921
#if defined(ARCH_X86) && defined(CONFIG_GPL)
2922
#ifdef MAP_ANONYMOUS
2923
    if (c->funnyYCode) munmap(c->funnyYCode, MAX_FUNNY_CODE_SIZE);
2924
    if (c->funnyUVCode) munmap(c->funnyUVCode, MAX_FUNNY_CODE_SIZE);
2925
#else
2926
    av_free(c->funnyYCode);
2927
    av_free(c->funnyUVCode);
2928
#endif
2929
    c->funnyYCode=NULL;
2930
    c->funnyUVCode=NULL;
2931
#endif /* defined(ARCH_X86) */
2932

    
2933
    av_free(c->lumMmx2Filter);
2934
    c->lumMmx2Filter=NULL;
2935
    av_free(c->chrMmx2Filter);
2936
    c->chrMmx2Filter=NULL;
2937
    av_free(c->lumMmx2FilterPos);
2938
    c->lumMmx2FilterPos=NULL;
2939
    av_free(c->chrMmx2FilterPos);
2940
    c->chrMmx2FilterPos=NULL;
2941
    av_free(c->yuvTable);
2942
    c->yuvTable=NULL;
2943

    
2944
    av_free(c);
2945
}
2946

    
2947
/**
2948
 * Checks if context is valid or reallocs a new one instead.
2949
 * If context is NULL, just calls sws_getContext() to get a new one.
2950
 * Otherwise, checks if the parameters are the same already saved in context.
2951
 * If that is the case, returns the current context.
2952
 * Otherwise, frees context and gets a new one.
2953
 *
2954
 * Be warned that srcFilter, dstFilter are not checked, they are
2955
 * asumed to remain valid.
2956
 */
2957
struct SwsContext *sws_getCachedContext(struct SwsContext *context,
2958
                                        int srcW, int srcH, int srcFormat,
2959
                                        int dstW, int dstH, int dstFormat, int flags,
2960
                                        SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
2961
{
2962
    static const double default_param[2] = {SWS_PARAM_DEFAULT, SWS_PARAM_DEFAULT};
2963

    
2964
    if (!param)
2965
        param = default_param;
2966

    
2967
    if (context) {
2968
        if (context->srcW != srcW || context->srcH != srcH ||
2969
            context->srcFormat != srcFormat ||
2970
            context->dstW != dstW || context->dstH != dstH ||
2971
            context->dstFormat != dstFormat || context->flags != flags ||
2972
            context->param[0] != param[0] || context->param[1] != param[1])
2973
        {
2974
            sws_freeContext(context);
2975
            context = NULL;
2976
        }
2977
    }
2978
    if (!context) {
2979
        return sws_getContext(srcW, srcH, srcFormat,
2980
                              dstW, dstH, dstFormat, flags,
2981
                              srcFilter, dstFilter, param);
2982
    }
2983
    return context;
2984
}
2985