Statistics
| Branch: | Revision:

ffmpeg / libswscale / swscale.c @ 92db6235

History | View | Annotate | Download (123 KB)

1
/*
2
 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 *
20
 * the C code (not assembly, mmx, ...) of this file can be used
21
 * under the LGPL license too
22
 */
23

    
24
/*
25
  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
26
  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
27
  {BGR,RGB}{1,4,8,15,16} support dithering
28

29
  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
30
  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
31
  x -> x
32
  YUV9 -> YV12
33
  YUV9/YV12 -> Y800
34
  Y800 -> YUV9/YV12
35
  BGR24 -> BGR32 & RGB24 -> RGB32
36
  BGR32 -> BGR24 & RGB32 -> RGB24
37
  BGR15 -> BGR16
38
*/
39

    
40
/*
41
tested special converters (most are tested actually, but I did not write it down ...)
42
 YV12 -> BGR16
43
 YV12 -> YV12
44
 BGR15 -> BGR16
45
 BGR16 -> BGR16
46
 YVU9 -> YV12
47

48
untested special converters
49
  YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
50
  YV12/I420 -> YV12/I420
51
  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
52
  BGR24 -> BGR32 & RGB24 -> RGB32
53
  BGR32 -> BGR24 & RGB32 -> RGB24
54
  BGR24 -> YV12
55
*/
56

    
57
#define _SVID_SOURCE //needed for MAP_ANONYMOUS
58
#include <inttypes.h>
59
#include <string.h>
60
#include <math.h>
61
#include <stdio.h>
62
#include <unistd.h>
63
#include "config.h"
64
#include <assert.h>
65
#if HAVE_SYS_MMAN_H
66
#include <sys/mman.h>
67
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
68
#define MAP_ANONYMOUS MAP_ANON
69
#endif
70
#endif
71
#if HAVE_VIRTUALALLOC
72
#define WIN32_LEAN_AND_MEAN
73
#include <windows.h>
74
#endif
75
#include "swscale.h"
76
#include "swscale_internal.h"
77
#include "rgb2rgb.h"
78
#include "libavutil/x86_cpu.h"
79
#include "libavutil/bswap.h"
80

    
81
unsigned swscale_version(void)
82
{
83
    return LIBSWSCALE_VERSION_INT;
84
}
85

    
86
#undef MOVNTQ
87
#undef PAVGB
88

    
89
//#undef HAVE_MMX2
90
//#define HAVE_AMD3DNOW
91
//#undef HAVE_MMX
92
//#undef ARCH_X86
93
//#define WORDS_BIGENDIAN
94
#define DITHER1XBPP
95

    
96
#define FAST_BGR2YV12 // use 7 bit coefficients instead of 15 bit
97

    
98
#define RET 0xC3 //near return opcode for x86
99

    
100
#ifdef M_PI
101
#define PI M_PI
102
#else
103
#define PI 3.14159265358979323846
104
#endif
105

    
106
#define isSupportedIn(x)    (       \
107
           (x)==PIX_FMT_YUV420P     \
108
        || (x)==PIX_FMT_YUVA420P    \
109
        || (x)==PIX_FMT_YUYV422     \
110
        || (x)==PIX_FMT_UYVY422     \
111
        || (x)==PIX_FMT_RGB48BE     \
112
        || (x)==PIX_FMT_RGB48LE     \
113
        || (x)==PIX_FMT_RGB32       \
114
        || (x)==PIX_FMT_RGB32_1     \
115
        || (x)==PIX_FMT_BGR24       \
116
        || (x)==PIX_FMT_BGR565      \
117
        || (x)==PIX_FMT_BGR555      \
118
        || (x)==PIX_FMT_BGR32       \
119
        || (x)==PIX_FMT_BGR32_1     \
120
        || (x)==PIX_FMT_RGB24       \
121
        || (x)==PIX_FMT_RGB565      \
122
        || (x)==PIX_FMT_RGB555      \
123
        || (x)==PIX_FMT_GRAY8       \
124
        || (x)==PIX_FMT_YUV410P     \
125
        || (x)==PIX_FMT_YUV440P     \
126
        || (x)==PIX_FMT_GRAY16BE    \
127
        || (x)==PIX_FMT_GRAY16LE    \
128
        || (x)==PIX_FMT_YUV444P     \
129
        || (x)==PIX_FMT_YUV422P     \
130
        || (x)==PIX_FMT_YUV411P     \
131
        || (x)==PIX_FMT_PAL8        \
132
        || (x)==PIX_FMT_BGR8        \
133
        || (x)==PIX_FMT_RGB8        \
134
        || (x)==PIX_FMT_BGR4_BYTE   \
135
        || (x)==PIX_FMT_RGB4_BYTE   \
136
        || (x)==PIX_FMT_YUV440P     \
137
        || (x)==PIX_FMT_MONOWHITE   \
138
        || (x)==PIX_FMT_MONOBLACK   \
139
        || (x)==PIX_FMT_YUV420PLE   \
140
        || (x)==PIX_FMT_YUV422PLE   \
141
        || (x)==PIX_FMT_YUV444PLE   \
142
        || (x)==PIX_FMT_YUV420PBE   \
143
        || (x)==PIX_FMT_YUV422PBE   \
144
        || (x)==PIX_FMT_YUV444PBE   \
145
    )
146
#define isSupportedOut(x)   (       \
147
           (x)==PIX_FMT_YUV420P     \
148
        || (x)==PIX_FMT_YUVA420P    \
149
        || (x)==PIX_FMT_YUYV422     \
150
        || (x)==PIX_FMT_UYVY422     \
151
        || (x)==PIX_FMT_YUV444P     \
152
        || (x)==PIX_FMT_YUV422P     \
153
        || (x)==PIX_FMT_YUV411P     \
154
        || isRGB(x)                 \
155
        || isBGR(x)                 \
156
        || (x)==PIX_FMT_NV12        \
157
        || (x)==PIX_FMT_NV21        \
158
        || (x)==PIX_FMT_GRAY16BE    \
159
        || (x)==PIX_FMT_GRAY16LE    \
160
        || (x)==PIX_FMT_GRAY8       \
161
        || (x)==PIX_FMT_YUV410P     \
162
        || (x)==PIX_FMT_YUV440P     \
163
        || (x)==PIX_FMT_YUV420PLE   \
164
        || (x)==PIX_FMT_YUV422PLE   \
165
        || (x)==PIX_FMT_YUV444PLE   \
166
        || (x)==PIX_FMT_YUV420PBE   \
167
        || (x)==PIX_FMT_YUV422PBE   \
168
        || (x)==PIX_FMT_YUV444PBE   \
169
    )
170
#define isPacked(x)         (       \
171
           (x)==PIX_FMT_PAL8        \
172
        || (x)==PIX_FMT_YUYV422     \
173
        || (x)==PIX_FMT_UYVY422     \
174
        || isRGB(x)                 \
175
        || isBGR(x)                 \
176
    )
177
#define usePal(x)           (       \
178
           (x)==PIX_FMT_PAL8        \
179
        || (x)==PIX_FMT_BGR4_BYTE   \
180
        || (x)==PIX_FMT_RGB4_BYTE   \
181
        || (x)==PIX_FMT_BGR8        \
182
        || (x)==PIX_FMT_RGB8        \
183
    )
184

    
185
#define RGB2YUV_SHIFT 15
186
#define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
187
#define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
188
#define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
189
#define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
190
#define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
191
#define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
192
#define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
193
#define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
194
#define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
195

    
196
extern const int32_t ff_yuv2rgb_coeffs[8][4];
197

    
198
static const double rgb2yuv_table[8][9]={
199
    {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
200
    {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
201
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
202
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
203
    {0.59  , 0.11  , 0.30  , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
204
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
205
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //SMPTE 170M
206
    {0.701 , 0.087 , 0.212 , -0.384, 0.5  -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
207
};
208

    
209
/*
210
NOTES
211
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
212

213
TODO
214
more intelligent misalignment avoidance for the horizontal scaler
215
write special vertical cubic upscale version
216
optimize C code (YV12 / minmax)
217
add support for packed pixel YUV input & output
218
add support for Y8 output
219
optimize BGR24 & BGR32
220
add BGR4 output support
221
write special BGR->BGR scaler
222
*/
223

    
224
#if ARCH_X86 && CONFIG_GPL
225
DECLARE_ASM_CONST(8, uint64_t, bF8)=       0xF8F8F8F8F8F8F8F8LL;
226
DECLARE_ASM_CONST(8, uint64_t, bFC)=       0xFCFCFCFCFCFCFCFCLL;
227
DECLARE_ASM_CONST(8, uint64_t, w10)=       0x0010001000100010LL;
228
DECLARE_ASM_CONST(8, uint64_t, w02)=       0x0002000200020002LL;
229
DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
230
DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
231
DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
232
DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
233

    
234
const DECLARE_ALIGNED(8, uint64_t, ff_dither4[2]) = {
235
        0x0103010301030103LL,
236
        0x0200020002000200LL,};
237

    
238
const DECLARE_ALIGNED(8, uint64_t, ff_dither8[2]) = {
239
        0x0602060206020602LL,
240
        0x0004000400040004LL,};
241

    
242
DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
243
DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
244
DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
245
DECLARE_ASM_CONST(8, uint64_t, b15Mask)=   0x001F001F001F001FLL;
246
DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
247
DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
248

    
249
DECLARE_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
250
DECLARE_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
251
DECLARE_ALIGNED(8, const uint64_t, ff_M24C)         = 0x0000FF0000FF0000LL;
252

    
253
#ifdef FAST_BGR2YV12
254
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000000210041000DULL;
255
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000FFEEFFDC0038ULL;
256
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00000038FFD2FFF8ULL;
257
#else
258
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000020E540830C8BULL;
259
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000ED0FDAC23831ULL;
260
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00003831D0E6F6EAULL;
261
#endif /* FAST_BGR2YV12 */
262
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
263
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
264
DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
265

    
266
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
267
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
268
DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
269
DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
270
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
271

    
272
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV[2][4]) = {
273
    {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
274
    {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
275
};
276

    
277
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
278

    
279
#endif /* ARCH_X86 && CONFIG_GPL */
280

    
281
// clipping helper table for C implementations:
282
static unsigned char clip_table[768];
283

    
284
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
285

    
286
DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4[2][8])={
287
{  1,   3,   1,   3,   1,   3,   1,   3, },
288
{  2,   0,   2,   0,   2,   0,   2,   0, },
289
};
290

    
291
DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8[2][8])={
292
{  6,   2,   6,   2,   6,   2,   6,   2, },
293
{  0,   4,   0,   4,   0,   4,   0,   4, },
294
};
295

    
296
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32[8][8])={
297
{ 17,   9,  23,  15,  16,   8,  22,  14, },
298
{  5,  29,   3,  27,   4,  28,   2,  26, },
299
{ 21,  13,  19,  11,  20,  12,  18,  10, },
300
{  0,  24,   6,  30,   1,  25,   7,  31, },
301
{ 16,   8,  22,  14,  17,   9,  23,  15, },
302
{  4,  28,   2,  26,   5,  29,   3,  27, },
303
{ 20,  12,  18,  10,  21,  13,  19,  11, },
304
{  1,  25,   7,  31,   0,  24,   6,  30, },
305
};
306

    
307
#if 0
308
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_64[8][8])={
309
{  0,  48,  12,  60,   3,  51,  15,  63, },
310
{ 32,  16,  44,  28,  35,  19,  47,  31, },
311
{  8,  56,   4,  52,  11,  59,   7,  55, },
312
{ 40,  24,  36,  20,  43,  27,  39,  23, },
313
{  2,  50,  14,  62,   1,  49,  13,  61, },
314
{ 34,  18,  46,  30,  33,  17,  45,  29, },
315
{ 10,  58,   6,  54,   9,  57,   5,  53, },
316
{ 42,  26,  38,  22,  41,  25,  37,  21, },
317
};
318
#endif
319

    
320
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73[8][8])={
321
{  0,  55,  14,  68,   3,  58,  17,  72, },
322
{ 37,  18,  50,  32,  40,  22,  54,  35, },
323
{  9,  64,   5,  59,  13,  67,   8,  63, },
324
{ 46,  27,  41,  23,  49,  31,  44,  26, },
325
{  2,  57,  16,  71,   1,  56,  15,  70, },
326
{ 39,  21,  52,  34,  38,  19,  51,  33, },
327
{ 11,  66,   7,  62,  10,  65,   6,  60, },
328
{ 48,  30,  43,  25,  47,  29,  42,  24, },
329
};
330

    
331
#if 0
332
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_128[8][8])={
333
{ 68,  36,  92,  60,  66,  34,  90,  58, },
334
{ 20, 116,  12, 108,  18, 114,  10, 106, },
335
{ 84,  52,  76,  44,  82,  50,  74,  42, },
336
{  0,  96,  24, 120,   6, 102,  30, 126, },
337
{ 64,  32,  88,  56,  70,  38,  94,  62, },
338
{ 16, 112,   8, 104,  22, 118,  14, 110, },
339
{ 80,  48,  72,  40,  86,  54,  78,  46, },
340
{  4, 100,  28, 124,   2,  98,  26, 122, },
341
};
342
#endif
343

    
344
#if 1
345
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220[8][8])={
346
{117,  62, 158, 103, 113,  58, 155, 100, },
347
{ 34, 199,  21, 186,  31, 196,  17, 182, },
348
{144,  89, 131,  76, 141,  86, 127,  72, },
349
{  0, 165,  41, 206,  10, 175,  52, 217, },
350
{110,  55, 151,  96, 120,  65, 162, 107, },
351
{ 28, 193,  14, 179,  38, 203,  24, 189, },
352
{138,  83, 124,  69, 148,  93, 134,  79, },
353
{  7, 172,  48, 213,   3, 168,  45, 210, },
354
};
355
#elif 1
356
// tries to correct a gamma of 1.5
357
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220[8][8])={
358
{  0, 143,  18, 200,   2, 156,  25, 215, },
359
{ 78,  28, 125,  64,  89,  36, 138,  74, },
360
{ 10, 180,   3, 161,  16, 195,   8, 175, },
361
{109,  51,  93,  38, 121,  60, 105,  47, },
362
{  1, 152,  23, 210,   0, 147,  20, 205, },
363
{ 85,  33, 134,  71,  81,  30, 130,  67, },
364
{ 14, 190,   6, 171,  12, 185,   5, 166, },
365
{117,  57, 101,  44, 113,  54,  97,  41, },
366
};
367
#elif 1
368
// tries to correct a gamma of 2.0
369
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220[8][8])={
370
{  0, 124,   8, 193,   0, 140,  12, 213, },
371
{ 55,  14, 104,  42,  66,  19, 119,  52, },
372
{  3, 168,   1, 145,   6, 187,   3, 162, },
373
{ 86,  31,  70,  21,  99,  39,  82,  28, },
374
{  0, 134,  11, 206,   0, 129,   9, 200, },
375
{ 62,  17, 114,  48,  58,  16, 109,  45, },
376
{  5, 181,   2, 157,   4, 175,   1, 151, },
377
{ 95,  36,  78,  26,  90,  34,  74,  24, },
378
};
379
#else
380
// tries to correct a gamma of 2.5
381
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220[8][8])={
382
{  0, 107,   3, 187,   0, 125,   6, 212, },
383
{ 39,   7,  86,  28,  49,  11, 102,  36, },
384
{  1, 158,   0, 131,   3, 180,   1, 151, },
385
{ 68,  19,  52,  12,  81,  25,  64,  17, },
386
{  0, 119,   5, 203,   0, 113,   4, 195, },
387
{ 45,   9,  96,  33,  42,   8,  91,  30, },
388
{  2, 172,   1, 144,   2, 165,   0, 137, },
389
{ 77,  23,  60,  15,  72,  21,  56,  14, },
390
};
391
#endif
392

    
393
const char *sws_format_name(enum PixelFormat format)
394
{
395
    switch (format) {
396
        case PIX_FMT_YUV420P:
397
            return "yuv420p";
398
        case PIX_FMT_YUVA420P:
399
            return "yuva420p";
400
        case PIX_FMT_YUYV422:
401
            return "yuyv422";
402
        case PIX_FMT_RGB24:
403
            return "rgb24";
404
        case PIX_FMT_BGR24:
405
            return "bgr24";
406
        case PIX_FMT_YUV422P:
407
            return "yuv422p";
408
        case PIX_FMT_YUV444P:
409
            return "yuv444p";
410
        case PIX_FMT_RGB32:
411
            return "rgb32";
412
        case PIX_FMT_YUV410P:
413
            return "yuv410p";
414
        case PIX_FMT_YUV411P:
415
            return "yuv411p";
416
        case PIX_FMT_RGB565:
417
            return "rgb565";
418
        case PIX_FMT_RGB555:
419
            return "rgb555";
420
        case PIX_FMT_GRAY16BE:
421
            return "gray16be";
422
        case PIX_FMT_GRAY16LE:
423
            return "gray16le";
424
        case PIX_FMT_GRAY8:
425
            return "gray8";
426
        case PIX_FMT_MONOWHITE:
427
            return "mono white";
428
        case PIX_FMT_MONOBLACK:
429
            return "mono black";
430
        case PIX_FMT_PAL8:
431
            return "Palette";
432
        case PIX_FMT_YUVJ420P:
433
            return "yuvj420p";
434
        case PIX_FMT_YUVJ422P:
435
            return "yuvj422p";
436
        case PIX_FMT_YUVJ444P:
437
            return "yuvj444p";
438
        case PIX_FMT_XVMC_MPEG2_MC:
439
            return "xvmc_mpeg2_mc";
440
        case PIX_FMT_XVMC_MPEG2_IDCT:
441
            return "xvmc_mpeg2_idct";
442
        case PIX_FMT_UYVY422:
443
            return "uyvy422";
444
        case PIX_FMT_UYYVYY411:
445
            return "uyyvyy411";
446
        case PIX_FMT_RGB32_1:
447
            return "rgb32x";
448
        case PIX_FMT_BGR32_1:
449
            return "bgr32x";
450
        case PIX_FMT_BGR32:
451
            return "bgr32";
452
        case PIX_FMT_BGR565:
453
            return "bgr565";
454
        case PIX_FMT_BGR555:
455
            return "bgr555";
456
        case PIX_FMT_BGR8:
457
            return "bgr8";
458
        case PIX_FMT_BGR4:
459
            return "bgr4";
460
        case PIX_FMT_BGR4_BYTE:
461
            return "bgr4 byte";
462
        case PIX_FMT_RGB8:
463
            return "rgb8";
464
        case PIX_FMT_RGB4:
465
            return "rgb4";
466
        case PIX_FMT_RGB4_BYTE:
467
            return "rgb4 byte";
468
        case PIX_FMT_RGB48BE:
469
            return "rgb48be";
470
        case PIX_FMT_RGB48LE:
471
            return "rgb48le";
472
        case PIX_FMT_NV12:
473
            return "nv12";
474
        case PIX_FMT_NV21:
475
            return "nv21";
476
        case PIX_FMT_YUV440P:
477
            return "yuv440p";
478
        case PIX_FMT_VDPAU_H264:
479
            return "vdpau_h264";
480
        case PIX_FMT_VDPAU_MPEG1:
481
            return "vdpau_mpeg1";
482
        case PIX_FMT_VDPAU_MPEG2:
483
            return "vdpau_mpeg2";
484
        case PIX_FMT_VDPAU_WMV3:
485
            return "vdpau_wmv3";
486
        case PIX_FMT_VDPAU_VC1:
487
            return "vdpau_vc1";
488
        case PIX_FMT_YUV420PLE:
489
            return "yuv420ple";
490
        case PIX_FMT_YUV422PLE:
491
            return "yuv422ple";
492
        case PIX_FMT_YUV444PLE:
493
            return "yuv444ple";
494
        case PIX_FMT_YUV420PBE:
495
            return "yuv420pbe";
496
        case PIX_FMT_YUV422PBE:
497
            return "yuv422pbe";
498
        case PIX_FMT_YUV444PBE:
499
            return "yuv444pbe";
500
        default:
501
            return "Unknown format";
502
    }
503
}
504

    
505
static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
506
                               const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
507
                               const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW)
508
{
509
    //FIXME Optimize (just quickly written not optimized..)
510
    int i;
511
    for (i=0; i<dstW; i++)
512
    {
513
        int val=1<<18;
514
        int j;
515
        for (j=0; j<lumFilterSize; j++)
516
            val += lumSrc[j][i] * lumFilter[j];
517

    
518
        dest[i]= av_clip_uint8(val>>19);
519
    }
520

    
521
    if (uDest)
522
        for (i=0; i<chrDstW; i++)
523
        {
524
            int u=1<<18;
525
            int v=1<<18;
526
            int j;
527
            for (j=0; j<chrFilterSize; j++)
528
            {
529
                u += chrSrc[j][i] * chrFilter[j];
530
                v += chrSrc[j][i + VOFW] * chrFilter[j];
531
            }
532

    
533
            uDest[i]= av_clip_uint8(u>>19);
534
            vDest[i]= av_clip_uint8(v>>19);
535
        }
536

    
537
    if (CONFIG_SWSCALE_ALPHA && aDest)
538
        for (i=0; i<dstW; i++){
539
            int val=1<<18;
540
            int j;
541
            for (j=0; j<lumFilterSize; j++)
542
                val += alpSrc[j][i] * lumFilter[j];
543

    
544
            aDest[i]= av_clip_uint8(val>>19);
545
        }
546

    
547
}
548

    
549
static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
550
                                const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
551
                                uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
552
{
553
    //FIXME Optimize (just quickly written not optimized..)
554
    int i;
555
    for (i=0; i<dstW; i++)
556
    {
557
        int val=1<<18;
558
        int j;
559
        for (j=0; j<lumFilterSize; j++)
560
            val += lumSrc[j][i] * lumFilter[j];
561

    
562
        dest[i]= av_clip_uint8(val>>19);
563
    }
564

    
565
    if (!uDest)
566
        return;
567

    
568
    if (dstFormat == PIX_FMT_NV12)
569
        for (i=0; i<chrDstW; i++)
570
        {
571
            int u=1<<18;
572
            int v=1<<18;
573
            int j;
574
            for (j=0; j<chrFilterSize; j++)
575
            {
576
                u += chrSrc[j][i] * chrFilter[j];
577
                v += chrSrc[j][i + VOFW] * chrFilter[j];
578
            }
579

    
580
            uDest[2*i]= av_clip_uint8(u>>19);
581
            uDest[2*i+1]= av_clip_uint8(v>>19);
582
        }
583
    else
584
        for (i=0; i<chrDstW; i++)
585
        {
586
            int u=1<<18;
587
            int v=1<<18;
588
            int j;
589
            for (j=0; j<chrFilterSize; j++)
590
            {
591
                u += chrSrc[j][i] * chrFilter[j];
592
                v += chrSrc[j][i + VOFW] * chrFilter[j];
593
            }
594

    
595
            uDest[2*i]= av_clip_uint8(v>>19);
596
            uDest[2*i+1]= av_clip_uint8(u>>19);
597
        }
598
}
599

    
600
#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
601
    for (i=0; i<(dstW>>1); i++){\
602
        int j;\
603
        int Y1 = 1<<18;\
604
        int Y2 = 1<<18;\
605
        int U  = 1<<18;\
606
        int V  = 1<<18;\
607
        int av_unused A1, A2;\
608
        type av_unused *r, *b, *g;\
609
        const int i2= 2*i;\
610
        \
611
        for (j=0; j<lumFilterSize; j++)\
612
        {\
613
            Y1 += lumSrc[j][i2] * lumFilter[j];\
614
            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
615
        }\
616
        for (j=0; j<chrFilterSize; j++)\
617
        {\
618
            U += chrSrc[j][i] * chrFilter[j];\
619
            V += chrSrc[j][i+VOFW] * chrFilter[j];\
620
        }\
621
        Y1>>=19;\
622
        Y2>>=19;\
623
        U >>=19;\
624
        V >>=19;\
625
        if (alpha){\
626
            A1 = 1<<18;\
627
            A2 = 1<<18;\
628
            for (j=0; j<lumFilterSize; j++){\
629
                A1 += alpSrc[j][i2  ] * lumFilter[j];\
630
                A2 += alpSrc[j][i2+1] * lumFilter[j];\
631
            }\
632
            A1>>=19;\
633
            A2>>=19;\
634
        }\
635

    
636
#define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
637
        YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
638
        if ((Y1|Y2|U|V)&256)\
639
        {\
640
            if (Y1>255)   Y1=255; \
641
            else if (Y1<0)Y1=0;   \
642
            if (Y2>255)   Y2=255; \
643
            else if (Y2<0)Y2=0;   \
644
            if (U>255)    U=255;  \
645
            else if (U<0) U=0;    \
646
            if (V>255)    V=255;  \
647
            else if (V<0) V=0;    \
648
        }\
649
        if (alpha && ((A1|A2)&256)){\
650
            A1=av_clip_uint8(A1);\
651
            A2=av_clip_uint8(A2);\
652
        }
653

    
654
#define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
655
    for (i=0; i<dstW; i++){\
656
        int j;\
657
        int Y = 0;\
658
        int U = -128<<19;\
659
        int V = -128<<19;\
660
        int av_unused A;\
661
        int R,G,B;\
662
        \
663
        for (j=0; j<lumFilterSize; j++){\
664
            Y += lumSrc[j][i     ] * lumFilter[j];\
665
        }\
666
        for (j=0; j<chrFilterSize; j++){\
667
            U += chrSrc[j][i     ] * chrFilter[j];\
668
            V += chrSrc[j][i+VOFW] * chrFilter[j];\
669
        }\
670
        Y >>=10;\
671
        U >>=10;\
672
        V >>=10;\
673
        if (alpha){\
674
            A = rnd;\
675
            for (j=0; j<lumFilterSize; j++)\
676
                A += alpSrc[j][i     ] * lumFilter[j];\
677
            A >>=19;\
678
            if (A&256)\
679
                A = av_clip_uint8(A);\
680
        }\
681

    
682
#define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
683
    YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
684
        Y-= c->yuv2rgb_y_offset;\
685
        Y*= c->yuv2rgb_y_coeff;\
686
        Y+= rnd;\
687
        R= Y + V*c->yuv2rgb_v2r_coeff;\
688
        G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
689
        B= Y +                          U*c->yuv2rgb_u2b_coeff;\
690
        if ((R|G|B)&(0xC0000000)){\
691
            if (R>=(256<<22))   R=(256<<22)-1; \
692
            else if (R<0)R=0;   \
693
            if (G>=(256<<22))   G=(256<<22)-1; \
694
            else if (G<0)G=0;   \
695
            if (B>=(256<<22))   B=(256<<22)-1; \
696
            else if (B<0)B=0;   \
697
        }\
698

    
699

    
700
#define YSCALE_YUV_2_GRAY16_C \
701
    for (i=0; i<(dstW>>1); i++){\
702
        int j;\
703
        int Y1 = 1<<18;\
704
        int Y2 = 1<<18;\
705
        int U  = 1<<18;\
706
        int V  = 1<<18;\
707
        \
708
        const int i2= 2*i;\
709
        \
710
        for (j=0; j<lumFilterSize; j++)\
711
        {\
712
            Y1 += lumSrc[j][i2] * lumFilter[j];\
713
            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
714
        }\
715
        Y1>>=11;\
716
        Y2>>=11;\
717
        if ((Y1|Y2|U|V)&65536)\
718
        {\
719
            if (Y1>65535)   Y1=65535; \
720
            else if (Y1<0)Y1=0;   \
721
            if (Y2>65535)   Y2=65535; \
722
            else if (Y2<0)Y2=0;   \
723
        }
724

    
725
#define YSCALE_YUV_2_RGBX_C(type,alpha) \
726
    YSCALE_YUV_2_PACKEDX_C(type,alpha)  /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
727
    r = (type *)c->table_rV[V];   \
728
    g = (type *)(c->table_gU[U] + c->table_gV[V]); \
729
    b = (type *)c->table_bU[U];   \
730

    
731
#define YSCALE_YUV_2_PACKED2_C(type,alpha)   \
732
    for (i=0; i<(dstW>>1); i++){ \
733
        const int i2= 2*i;       \
734
        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
735
        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
736
        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;  \
737
        int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19;  \
738
        type av_unused *r, *b, *g;                                    \
739
        int av_unused A1, A2;                                         \
740
        if (alpha){\
741
            A1= (abuf0[i2  ]*yalpha1+abuf1[i2  ]*yalpha)>>19;         \
742
            A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19;         \
743
        }\
744

    
745
#define YSCALE_YUV_2_GRAY16_2_C   \
746
    for (i=0; i<(dstW>>1); i++){ \
747
        const int i2= 2*i;       \
748
        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>11;           \
749
        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;           \
750

    
751
#define YSCALE_YUV_2_RGB2_C(type,alpha) \
752
    YSCALE_YUV_2_PACKED2_C(type,alpha)\
753
    r = (type *)c->table_rV[V];\
754
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
755
    b = (type *)c->table_bU[U];\
756

    
757
#define YSCALE_YUV_2_PACKED1_C(type,alpha) \
758
    for (i=0; i<(dstW>>1); i++){\
759
        const int i2= 2*i;\
760
        int Y1= buf0[i2  ]>>7;\
761
        int Y2= buf0[i2+1]>>7;\
762
        int U= (uvbuf1[i     ])>>7;\
763
        int V= (uvbuf1[i+VOFW])>>7;\
764
        type av_unused *r, *b, *g;\
765
        int av_unused A1, A2;\
766
        if (alpha){\
767
            A1= abuf0[i2  ]>>7;\
768
            A2= abuf0[i2+1]>>7;\
769
        }\
770

    
771
#define YSCALE_YUV_2_GRAY16_1_C \
772
    for (i=0; i<(dstW>>1); i++){\
773
        const int i2= 2*i;\
774
        int Y1= buf0[i2  ]<<1;\
775
        int Y2= buf0[i2+1]<<1;\
776

    
777
#define YSCALE_YUV_2_RGB1_C(type,alpha) \
778
    YSCALE_YUV_2_PACKED1_C(type,alpha)\
779
    r = (type *)c->table_rV[V];\
780
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
781
    b = (type *)c->table_bU[U];\
782

    
783
#define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
784
    for (i=0; i<(dstW>>1); i++){\
785
        const int i2= 2*i;\
786
        int Y1= buf0[i2  ]>>7;\
787
        int Y2= buf0[i2+1]>>7;\
788
        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
789
        int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\
790
        type av_unused *r, *b, *g;\
791
        int av_unused A1, A2;\
792
        if (alpha){\
793
            A1= abuf0[i2  ]>>7;\
794
            A2= abuf0[i2+1]>>7;\
795
        }\
796

    
797
#define YSCALE_YUV_2_RGB1B_C(type,alpha) \
798
    YSCALE_YUV_2_PACKED1B_C(type,alpha)\
799
    r = (type *)c->table_rV[V];\
800
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
801
    b = (type *)c->table_bU[U];\
802

    
803
#define YSCALE_YUV_2_MONO2_C \
804
    const uint8_t * const d128=dither_8x8_220[y&7];\
805
    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
806
    for (i=0; i<dstW-7; i+=8){\
807
        int acc;\
808
        acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
809
        acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
810
        acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
811
        acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
812
        acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
813
        acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
814
        acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
815
        acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
816
        ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
817
        dest++;\
818
    }\
819

    
820

    
821
#define YSCALE_YUV_2_MONOX_C \
822
    const uint8_t * const d128=dither_8x8_220[y&7];\
823
    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
824
    int acc=0;\
825
    for (i=0; i<dstW-1; i+=2){\
826
        int j;\
827
        int Y1=1<<18;\
828
        int Y2=1<<18;\
829
\
830
        for (j=0; j<lumFilterSize; j++)\
831
        {\
832
            Y1 += lumSrc[j][i] * lumFilter[j];\
833
            Y2 += lumSrc[j][i+1] * lumFilter[j];\
834
        }\
835
        Y1>>=19;\
836
        Y2>>=19;\
837
        if ((Y1|Y2)&256)\
838
        {\
839
            if (Y1>255)   Y1=255;\
840
            else if (Y1<0)Y1=0;\
841
            if (Y2>255)   Y2=255;\
842
            else if (Y2<0)Y2=0;\
843
        }\
844
        acc+= acc + g[Y1+d128[(i+0)&7]];\
845
        acc+= acc + g[Y2+d128[(i+1)&7]];\
846
        if ((i&7)==6){\
847
            ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
848
            dest++;\
849
        }\
850
    }
851

    
852

    
853
#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
854
    switch(c->dstFormat)\
855
    {\
856
    case PIX_FMT_RGB48BE:\
857
    case PIX_FMT_RGB48LE:\
858
        func(uint8_t,0)\
859
            ((uint8_t*)dest)[ 0]= r[Y1];\
860
            ((uint8_t*)dest)[ 1]= r[Y1];\
861
            ((uint8_t*)dest)[ 2]= g[Y1];\
862
            ((uint8_t*)dest)[ 3]= g[Y1];\
863
            ((uint8_t*)dest)[ 4]= b[Y1];\
864
            ((uint8_t*)dest)[ 5]= b[Y1];\
865
            ((uint8_t*)dest)[ 6]= r[Y2];\
866
            ((uint8_t*)dest)[ 7]= r[Y2];\
867
            ((uint8_t*)dest)[ 8]= g[Y2];\
868
            ((uint8_t*)dest)[ 9]= g[Y2];\
869
            ((uint8_t*)dest)[10]= b[Y2];\
870
            ((uint8_t*)dest)[11]= b[Y2];\
871
            dest+=12;\
872
        }\
873
        break;\
874
    case PIX_FMT_RGBA:\
875
    case PIX_FMT_BGRA:\
876
        if (CONFIG_SMALL){\
877
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
878
            func(uint32_t,needAlpha)\
879
                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
880
                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
881
            }\
882
        }else{\
883
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){\
884
                func(uint32_t,1)\
885
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
886
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
887
                }\
888
            }else{\
889
                func(uint32_t,0)\
890
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
891
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
892
                }\
893
            }\
894
        }\
895
        break;\
896
    case PIX_FMT_ARGB:\
897
    case PIX_FMT_ABGR:\
898
        if (CONFIG_SMALL){\
899
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
900
            func(uint32_t,needAlpha)\
901
                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
902
                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
903
            }\
904
        }else{\
905
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){\
906
                func(uint32_t,1)\
907
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
908
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
909
                }\
910
            }else{\
911
                func(uint32_t,0)\
912
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
913
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
914
                }\
915
            }\
916
        }                \
917
        break;\
918
    case PIX_FMT_RGB24:\
919
        func(uint8_t,0)\
920
            ((uint8_t*)dest)[0]= r[Y1];\
921
            ((uint8_t*)dest)[1]= g[Y1];\
922
            ((uint8_t*)dest)[2]= b[Y1];\
923
            ((uint8_t*)dest)[3]= r[Y2];\
924
            ((uint8_t*)dest)[4]= g[Y2];\
925
            ((uint8_t*)dest)[5]= b[Y2];\
926
            dest+=6;\
927
        }\
928
        break;\
929
    case PIX_FMT_BGR24:\
930
        func(uint8_t,0)\
931
            ((uint8_t*)dest)[0]= b[Y1];\
932
            ((uint8_t*)dest)[1]= g[Y1];\
933
            ((uint8_t*)dest)[2]= r[Y1];\
934
            ((uint8_t*)dest)[3]= b[Y2];\
935
            ((uint8_t*)dest)[4]= g[Y2];\
936
            ((uint8_t*)dest)[5]= r[Y2];\
937
            dest+=6;\
938
        }\
939
        break;\
940
    case PIX_FMT_RGB565:\
941
    case PIX_FMT_BGR565:\
942
        {\
943
            const int dr1= dither_2x2_8[y&1    ][0];\
944
            const int dg1= dither_2x2_4[y&1    ][0];\
945
            const int db1= dither_2x2_8[(y&1)^1][0];\
946
            const int dr2= dither_2x2_8[y&1    ][1];\
947
            const int dg2= dither_2x2_4[y&1    ][1];\
948
            const int db2= dither_2x2_8[(y&1)^1][1];\
949
            func(uint16_t,0)\
950
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
951
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
952
            }\
953
        }\
954
        break;\
955
    case PIX_FMT_RGB555:\
956
    case PIX_FMT_BGR555:\
957
        {\
958
            const int dr1= dither_2x2_8[y&1    ][0];\
959
            const int dg1= dither_2x2_8[y&1    ][1];\
960
            const int db1= dither_2x2_8[(y&1)^1][0];\
961
            const int dr2= dither_2x2_8[y&1    ][1];\
962
            const int dg2= dither_2x2_8[y&1    ][0];\
963
            const int db2= dither_2x2_8[(y&1)^1][1];\
964
            func(uint16_t,0)\
965
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
966
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
967
            }\
968
        }\
969
        break;\
970
    case PIX_FMT_RGB8:\
971
    case PIX_FMT_BGR8:\
972
        {\
973
            const uint8_t * const d64= dither_8x8_73[y&7];\
974
            const uint8_t * const d32= dither_8x8_32[y&7];\
975
            func(uint8_t,0)\
976
                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
977
                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
978
            }\
979
        }\
980
        break;\
981
    case PIX_FMT_RGB4:\
982
    case PIX_FMT_BGR4:\
983
        {\
984
            const uint8_t * const d64= dither_8x8_73 [y&7];\
985
            const uint8_t * const d128=dither_8x8_220[y&7];\
986
            func(uint8_t,0)\
987
                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
988
                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
989
            }\
990
        }\
991
        break;\
992
    case PIX_FMT_RGB4_BYTE:\
993
    case PIX_FMT_BGR4_BYTE:\
994
        {\
995
            const uint8_t * const d64= dither_8x8_73 [y&7];\
996
            const uint8_t * const d128=dither_8x8_220[y&7];\
997
            func(uint8_t,0)\
998
                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
999
                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
1000
            }\
1001
        }\
1002
        break;\
1003
    case PIX_FMT_MONOBLACK:\
1004
    case PIX_FMT_MONOWHITE:\
1005
        {\
1006
            func_monoblack\
1007
        }\
1008
        break;\
1009
    case PIX_FMT_YUYV422:\
1010
        func2\
1011
            ((uint8_t*)dest)[2*i2+0]= Y1;\
1012
            ((uint8_t*)dest)[2*i2+1]= U;\
1013
            ((uint8_t*)dest)[2*i2+2]= Y2;\
1014
            ((uint8_t*)dest)[2*i2+3]= V;\
1015
        }                \
1016
        break;\
1017
    case PIX_FMT_UYVY422:\
1018
        func2\
1019
            ((uint8_t*)dest)[2*i2+0]= U;\
1020
            ((uint8_t*)dest)[2*i2+1]= Y1;\
1021
            ((uint8_t*)dest)[2*i2+2]= V;\
1022
            ((uint8_t*)dest)[2*i2+3]= Y2;\
1023
        }                \
1024
        break;\
1025
    case PIX_FMT_GRAY16BE:\
1026
        func_g16\
1027
            ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
1028
            ((uint8_t*)dest)[2*i2+1]= Y1;\
1029
            ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
1030
            ((uint8_t*)dest)[2*i2+3]= Y2;\
1031
        }                \
1032
        break;\
1033
    case PIX_FMT_GRAY16LE:\
1034
        func_g16\
1035
            ((uint8_t*)dest)[2*i2+0]= Y1;\
1036
            ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
1037
            ((uint8_t*)dest)[2*i2+2]= Y2;\
1038
            ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
1039
        }                \
1040
        break;\
1041
    }\
1042

    
1043

    
1044
static inline void yuv2packedXinC(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
1045
                                  const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
1046
                                  const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
1047
{
1048
    int i;
1049
    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
1050
}
1051

    
1052
static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
1053
                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
1054
                                    const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
1055
{
1056
    int i;
1057
    int step= fmt_depth(c->dstFormat)/8;
1058
    int aidx= 3;
1059

    
1060
    switch(c->dstFormat){
1061
    case PIX_FMT_ARGB:
1062
        dest++;
1063
        aidx= 0;
1064
    case PIX_FMT_RGB24:
1065
        aidx--;
1066
    case PIX_FMT_RGBA:
1067
        if (CONFIG_SMALL){
1068
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1069
            YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1070
                dest[aidx]= needAlpha ? A : 255;
1071
                dest[0]= R>>22;
1072
                dest[1]= G>>22;
1073
                dest[2]= B>>22;
1074
                dest+= step;
1075
            }
1076
        }else{
1077
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1078
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1079
                    dest[aidx]= A;
1080
                    dest[0]= R>>22;
1081
                    dest[1]= G>>22;
1082
                    dest[2]= B>>22;
1083
                    dest+= step;
1084
                }
1085
            }else{
1086
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1087
                    dest[aidx]= 255;
1088
                    dest[0]= R>>22;
1089
                    dest[1]= G>>22;
1090
                    dest[2]= B>>22;
1091
                    dest+= step;
1092
                }
1093
            }
1094
        }
1095
        break;
1096
    case PIX_FMT_ABGR:
1097
        dest++;
1098
        aidx= 0;
1099
    case PIX_FMT_BGR24:
1100
        aidx--;
1101
    case PIX_FMT_BGRA:
1102
        if (CONFIG_SMALL){
1103
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1104
            YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1105
                dest[aidx]= needAlpha ? A : 255;
1106
                dest[0]= B>>22;
1107
                dest[1]= G>>22;
1108
                dest[2]= R>>22;
1109
                dest+= step;
1110
            }
1111
        }else{
1112
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1113
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1114
                    dest[aidx]= A;
1115
                    dest[0]= B>>22;
1116
                    dest[1]= G>>22;
1117
                    dest[2]= R>>22;
1118
                    dest+= step;
1119
                }
1120
            }else{
1121
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1122
                    dest[aidx]= 255;
1123
                    dest[0]= B>>22;
1124
                    dest[1]= G>>22;
1125
                    dest[2]= R>>22;
1126
                    dest+= step;
1127
                }
1128
            }
1129
        }
1130
        break;
1131
    default:
1132
        assert(0);
1133
    }
1134
}
1135

    
1136
static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val){
1137
    int i;
1138
    uint8_t *ptr = plane + stride*y;
1139
    for (i=0; i<height; i++){
1140
        memset(ptr, val, width);
1141
        ptr += stride;
1142
    }
1143
}
1144

    
1145
static inline void rgb48ToY(uint8_t *dst, const uint8_t *src, int width)
1146
{
1147
    int i;
1148
    for (i = 0; i < width; i++) {
1149
        int r = src[i*6+0];
1150
        int g = src[i*6+2];
1151
        int b = src[i*6+4];
1152

    
1153
        dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1154
    }
1155
}
1156

    
1157
static inline void rgb48ToUV(uint8_t *dstU, uint8_t *dstV,
1158
                             uint8_t *src1, uint8_t *src2, int width)
1159
{
1160
    int i;
1161
    assert(src1==src2);
1162
    for (i = 0; i < width; i++) {
1163
        int r = src1[6*i + 0];
1164
        int g = src1[6*i + 2];
1165
        int b = src1[6*i + 4];
1166

    
1167
        dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1168
        dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1169
    }
1170
}
1171

    
1172
static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV,
1173
                                  uint8_t *src1, uint8_t *src2, int width)
1174
{
1175
    int i;
1176
    assert(src1==src2);
1177
    for (i = 0; i < width; i++) {
1178
        int r= src1[12*i + 0] + src1[12*i + 6];
1179
        int g= src1[12*i + 2] + src1[12*i + 8];
1180
        int b= src1[12*i + 4] + src1[12*i + 10];
1181

    
1182
        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1183
        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1184
    }
1185
}
1186

    
1187
#define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
1188
static inline void name(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)\
1189
{\
1190
    int i;\
1191
    for (i=0; i<width; i++)\
1192
    {\
1193
        int b= (((const type*)src)[i]>>shb)&maskb;\
1194
        int g= (((const type*)src)[i]>>shg)&maskg;\
1195
        int r= (((const type*)src)[i]>>shr)&maskr;\
1196
\
1197
        dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
1198
    }\
1199
}
1200

    
1201
BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
1202
BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
1203
BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY    , RGB2YUV_SHIFT+8)
1204
BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY    , RGB2YUV_SHIFT+7)
1205
BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1206
BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1207

    
1208
static inline void abgrToA(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused){
1209
    int i;
1210
    for (i=0; i<width; i++){
1211
        dst[i]= src[4*i];
1212
    }
1213
}
1214

    
1215
#define BGR2UV(type, name, shr, shg, shb, maska, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S)\
1216
static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
1217
{\
1218
    int i;\
1219
    for (i=0; i<width; i++)\
1220
    {\
1221
        int b= (((const type*)src)[i]&maskb)>>shb;\
1222
        int g= (((const type*)src)[i]&maskg)>>shg;\
1223
        int r= (((const type*)src)[i]&maskr)>>shr;\
1224
\
1225
        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
1226
        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
1227
    }\
1228
}\
1229
static inline void name ## _half(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
1230
{\
1231
    int i;\
1232
    for (i=0; i<width; i++)\
1233
    {\
1234
        int pix0= ((const type*)src)[2*i+0];\
1235
        int pix1= ((const type*)src)[2*i+1];\
1236
        int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
1237
        int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
1238
        int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
1239
        g&= maskg|(2*maskg);\
1240
\
1241
        g>>=shg;\
1242
\
1243
        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
1244
        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
1245
    }\
1246
}
1247

    
1248
BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0xFF000000, 0xFF0000, 0xFF00,   0x00FF, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
1249
BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0xFF000000,   0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
1250
BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0,          0,   0x001F, 0x07E0,   0xF800, RU<<11, GU<<5, BU    , RV<<11, GV<<5, BV    , RGB2YUV_SHIFT+8)
1251
BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0,          0,   0x001F, 0x03E0,   0x7C00, RU<<10, GU<<5, BU    , RV<<10, GV<<5, BV    , RGB2YUV_SHIFT+7)
1252
BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0,          0,   0xF800, 0x07E0,   0x001F, RU    , GU<<5, BU<<11, RV    , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
1253
BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0,          0,   0x7C00, 0x03E0,   0x001F, RU    , GU<<5, BU<<10, RV    , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
1254

    
1255
static inline void palToY(uint8_t *dst, const uint8_t *src, long width, uint32_t *pal)
1256
{
1257
    int i;
1258
    for (i=0; i<width; i++)
1259
    {
1260
        int d= src[i];
1261

    
1262
        dst[i]= pal[d] & 0xFF;
1263
    }
1264
}
1265

    
1266
static inline void palToUV(uint8_t *dstU, uint8_t *dstV,
1267
                           const uint8_t *src1, const uint8_t *src2,
1268
                           long width, uint32_t *pal)
1269
{
1270
    int i;
1271
    assert(src1 == src2);
1272
    for (i=0; i<width; i++)
1273
    {
1274
        int p= pal[src1[i]];
1275

    
1276
        dstU[i]= p>>8;
1277
        dstV[i]= p>>16;
1278
    }
1279
}
1280

    
1281
static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
1282
{
1283
    int i, j;
1284
    for (i=0; i<width/8; i++){
1285
        int d= ~src[i];
1286
        for(j=0; j<8; j++)
1287
            dst[8*i+j]= ((d>>(7-j))&1)*255;
1288
    }
1289
}
1290

    
1291
static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
1292
{
1293
    int i, j;
1294
    for (i=0; i<width/8; i++){
1295
        int d= src[i];
1296
        for(j=0; j<8; j++)
1297
            dst[8*i+j]= ((d>>(7-j))&1)*255;
1298
    }
1299
}
1300

    
1301

    
1302
//Note: we have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
1303
//Plain C versions
1304
#if !HAVE_MMX || CONFIG_RUNTIME_CPUDETECT || !CONFIG_GPL
1305
#define COMPILE_C
1306
#endif
1307

    
1308
#if ARCH_PPC
1309
#if (HAVE_ALTIVEC || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
1310
#undef COMPILE_C
1311
#define COMPILE_ALTIVEC
1312
#endif
1313
#endif //ARCH_PPC
1314

    
1315
#if ARCH_X86
1316

    
1317
#if ((HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
1318
#define COMPILE_MMX
1319
#endif
1320

    
1321
#if (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
1322
#define COMPILE_MMX2
1323
#endif
1324

    
1325
#if ((HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
1326
#define COMPILE_3DNOW
1327
#endif
1328
#endif //ARCH_X86
1329

    
1330
#undef HAVE_MMX
1331
#undef HAVE_MMX2
1332
#undef HAVE_AMD3DNOW
1333
#undef HAVE_ALTIVEC
1334
#define HAVE_MMX 0
1335
#define HAVE_MMX2 0
1336
#define HAVE_AMD3DNOW 0
1337
#define HAVE_ALTIVEC 0
1338

    
1339
#ifdef COMPILE_C
1340
#define RENAME(a) a ## _C
1341
#include "swscale_template.c"
1342
#endif
1343

    
1344
#ifdef COMPILE_ALTIVEC
1345
#undef RENAME
1346
#undef HAVE_ALTIVEC
1347
#define HAVE_ALTIVEC 1
1348
#define RENAME(a) a ## _altivec
1349
#include "swscale_template.c"
1350
#endif
1351

    
1352
#if ARCH_X86
1353

    
1354
//MMX versions
1355
#ifdef COMPILE_MMX
1356
#undef RENAME
1357
#undef HAVE_MMX
1358
#undef HAVE_MMX2
1359
#undef HAVE_AMD3DNOW
1360
#define HAVE_MMX 1
1361
#define HAVE_MMX2 0
1362
#define HAVE_AMD3DNOW 0
1363
#define RENAME(a) a ## _MMX
1364
#include "swscale_template.c"
1365
#endif
1366

    
1367
//MMX2 versions
1368
#ifdef COMPILE_MMX2
1369
#undef RENAME
1370
#undef HAVE_MMX
1371
#undef HAVE_MMX2
1372
#undef HAVE_AMD3DNOW
1373
#define HAVE_MMX 1
1374
#define HAVE_MMX2 1
1375
#define HAVE_AMD3DNOW 0
1376
#define RENAME(a) a ## _MMX2
1377
#include "swscale_template.c"
1378
#endif
1379

    
1380
//3DNOW versions
1381
#ifdef COMPILE_3DNOW
1382
#undef RENAME
1383
#undef HAVE_MMX
1384
#undef HAVE_MMX2
1385
#undef HAVE_AMD3DNOW
1386
#define HAVE_MMX 1
1387
#define HAVE_MMX2 0
1388
#define HAVE_AMD3DNOW 1
1389
#define RENAME(a) a ## _3DNow
1390
#include "swscale_template.c"
1391
#endif
1392

    
1393
#endif //ARCH_X86
1394

    
1395
// minor note: the HAVE_xyz are messed up after this line so don't use them
1396

    
1397
static double getSplineCoeff(double a, double b, double c, double d, double dist)
1398
{
1399
//    printf("%f %f %f %f %f\n", a,b,c,d,dist);
1400
    if (dist<=1.0)      return ((d*dist + c)*dist + b)*dist +a;
1401
    else                return getSplineCoeff(        0.0,
1402
                                             b+ 2.0*c + 3.0*d,
1403
                                                    c + 3.0*d,
1404
                                            -b- 3.0*c - 6.0*d,
1405
                                            dist-1.0);
1406
}
1407

    
1408
static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
1409
                             int srcW, int dstW, int filterAlign, int one, int flags,
1410
                             SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
1411
{
1412
    int i;
1413
    int filterSize;
1414
    int filter2Size;
1415
    int minFilterSize;
1416
    int64_t *filter=NULL;
1417
    int64_t *filter2=NULL;
1418
    const int64_t fone= 1LL<<54;
1419
    int ret= -1;
1420
#if ARCH_X86
1421
    if (flags & SWS_CPU_CAPS_MMX)
1422
        __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
1423
#endif
1424

    
1425
    // NOTE: the +1 is for the MMX scaler which reads over the end
1426
    *filterPos = av_malloc((dstW+1)*sizeof(int16_t));
1427

    
1428
    if (FFABS(xInc - 0x10000) <10) // unscaled
1429
    {
1430
        int i;
1431
        filterSize= 1;
1432
        filter= av_mallocz(dstW*sizeof(*filter)*filterSize);
1433

    
1434
        for (i=0; i<dstW; i++)
1435
        {
1436
            filter[i*filterSize]= fone;
1437
            (*filterPos)[i]=i;
1438
        }
1439

    
1440
    }
1441
    else if (flags&SWS_POINT) // lame looking point sampling mode
1442
    {
1443
        int i;
1444
        int xDstInSrc;
1445
        filterSize= 1;
1446
        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
1447

    
1448
        xDstInSrc= xInc/2 - 0x8000;
1449
        for (i=0; i<dstW; i++)
1450
        {
1451
            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1452

    
1453
            (*filterPos)[i]= xx;
1454
            filter[i]= fone;
1455
            xDstInSrc+= xInc;
1456
        }
1457
    }
1458
    else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
1459
    {
1460
        int i;
1461
        int xDstInSrc;
1462
        if      (flags&SWS_BICUBIC) filterSize= 4;
1463
        else if (flags&SWS_X      ) filterSize= 4;
1464
        else                        filterSize= 2; // SWS_BILINEAR / SWS_AREA
1465
        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
1466

    
1467
        xDstInSrc= xInc/2 - 0x8000;
1468
        for (i=0; i<dstW; i++)
1469
        {
1470
            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1471
            int j;
1472

    
1473
            (*filterPos)[i]= xx;
1474
                //bilinear upscale / linear interpolate / area averaging
1475
                for (j=0; j<filterSize; j++)
1476
                {
1477
                    int64_t coeff= fone - FFABS((xx<<16) - xDstInSrc)*(fone>>16);
1478
                    if (coeff<0) coeff=0;
1479
                    filter[i*filterSize + j]= coeff;
1480
                    xx++;
1481
                }
1482
            xDstInSrc+= xInc;
1483
        }
1484
    }
1485
    else
1486
    {
1487
        int xDstInSrc;
1488
        int sizeFactor;
1489

    
1490
        if      (flags&SWS_BICUBIC)      sizeFactor=  4;
1491
        else if (flags&SWS_X)            sizeFactor=  8;
1492
        else if (flags&SWS_AREA)         sizeFactor=  1; //downscale only, for upscale it is bilinear
1493
        else if (flags&SWS_GAUSS)        sizeFactor=  8;   // infinite ;)
1494
        else if (flags&SWS_LANCZOS)      sizeFactor= param[0] != SWS_PARAM_DEFAULT ? ceil(2*param[0]) : 6;
1495
        else if (flags&SWS_SINC)         sizeFactor= 20; // infinite ;)
1496
        else if (flags&SWS_SPLINE)       sizeFactor= 20;  // infinite ;)
1497
        else if (flags&SWS_BILINEAR)     sizeFactor=  2;
1498
        else {
1499
            sizeFactor= 0; //GCC warning killer
1500
            assert(0);
1501
        }
1502

    
1503
        if (xInc <= 1<<16)      filterSize= 1 + sizeFactor; // upscale
1504
        else                    filterSize= 1 + (sizeFactor*srcW + dstW - 1)/ dstW;
1505

    
1506
        if (filterSize > srcW-2) filterSize=srcW-2;
1507

    
1508
        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
1509

    
1510
        xDstInSrc= xInc - 0x10000;
1511
        for (i=0; i<dstW; i++)
1512
        {
1513
            int xx= (xDstInSrc - ((filterSize-2)<<16)) / (1<<17);
1514
            int j;
1515
            (*filterPos)[i]= xx;
1516
            for (j=0; j<filterSize; j++)
1517
            {
1518
                int64_t d= ((int64_t)FFABS((xx<<17) - xDstInSrc))<<13;
1519
                double floatd;
1520
                int64_t coeff;
1521

    
1522
                if (xInc > 1<<16)
1523
                    d= d*dstW/srcW;
1524
                floatd= d * (1.0/(1<<30));
1525

    
1526
                if (flags & SWS_BICUBIC)
1527
                {
1528
                    int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] :   0) * (1<<24);
1529
                    int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24);
1530
                    int64_t dd = ( d*d)>>30;
1531
                    int64_t ddd= (dd*d)>>30;
1532

    
1533
                    if      (d < 1LL<<30)
1534
                        coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30);
1535
                    else if (d < 1LL<<31)
1536
                        coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30);
1537
                    else
1538
                        coeff=0.0;
1539
                    coeff *= fone>>(30+24);
1540
                }
1541
/*                else if (flags & SWS_X)
1542
                {
1543
                    double p= param ? param*0.01 : 0.3;
1544
                    coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1545
                    coeff*= pow(2.0, - p*d*d);
1546
                }*/
1547
                else if (flags & SWS_X)
1548
                {
1549
                    double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
1550
                    double c;
1551

    
1552
                    if (floatd<1.0)
1553
                        c = cos(floatd*PI);
1554
                    else
1555
                        c=-1.0;
1556
                    if (c<0.0)      c= -pow(-c, A);
1557
                    else            c=  pow( c, A);
1558
                    coeff= (c*0.5 + 0.5)*fone;
1559
                }
1560
                else if (flags & SWS_AREA)
1561
                {
1562
                    int64_t d2= d - (1<<29);
1563
                    if      (d2*xInc < -(1LL<<(29+16))) coeff= 1.0 * (1LL<<(30+16));
1564
                    else if (d2*xInc <  (1LL<<(29+16))) coeff= -d2*xInc + (1LL<<(29+16));
1565
                    else coeff=0.0;
1566
                    coeff *= fone>>(30+16);
1567
                }
1568
                else if (flags & SWS_GAUSS)
1569
                {
1570
                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1571
                    coeff = (pow(2.0, - p*floatd*floatd))*fone;
1572
                }
1573
                else if (flags & SWS_SINC)
1574
                {
1575
                    coeff = (d ? sin(floatd*PI)/(floatd*PI) : 1.0)*fone;
1576
                }
1577
                else if (flags & SWS_LANCZOS)
1578
                {
1579
                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1580
                    coeff = (d ? sin(floatd*PI)*sin(floatd*PI/p)/(floatd*floatd*PI*PI/p) : 1.0)*fone;
1581
                    if (floatd>p) coeff=0;
1582
                }
1583
                else if (flags & SWS_BILINEAR)
1584
                {
1585
                    coeff= (1<<30) - d;
1586
                    if (coeff<0) coeff=0;
1587
                    coeff *= fone >> 30;
1588
                }
1589
                else if (flags & SWS_SPLINE)
1590
                {
1591
                    double p=-2.196152422706632;
1592
                    coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, floatd) * fone;
1593
                }
1594
                else {
1595
                    coeff= 0.0; //GCC warning killer
1596
                    assert(0);
1597
                }
1598

    
1599
                filter[i*filterSize + j]= coeff;
1600
                xx++;
1601
            }
1602
            xDstInSrc+= 2*xInc;
1603
        }
1604
    }
1605

    
1606
    /* apply src & dst Filter to filter -> filter2
1607
       av_free(filter);
1608
    */
1609
    assert(filterSize>0);
1610
    filter2Size= filterSize;
1611
    if (srcFilter) filter2Size+= srcFilter->length - 1;
1612
    if (dstFilter) filter2Size+= dstFilter->length - 1;
1613
    assert(filter2Size>0);
1614
    filter2= av_mallocz(filter2Size*dstW*sizeof(*filter2));
1615

    
1616
    for (i=0; i<dstW; i++)
1617
    {
1618
        int j, k;
1619

    
1620
        if(srcFilter){
1621
            for (k=0; k<srcFilter->length; k++){
1622
                for (j=0; j<filterSize; j++)
1623
                    filter2[i*filter2Size + k + j] += srcFilter->coeff[k]*filter[i*filterSize + j];
1624
            }
1625
        }else{
1626
            for (j=0; j<filterSize; j++)
1627
                filter2[i*filter2Size + j]= filter[i*filterSize + j];
1628
        }
1629
        //FIXME dstFilter
1630

    
1631
        (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
1632
    }
1633
    av_freep(&filter);
1634

    
1635
    /* try to reduce the filter-size (step1 find size and shift left) */
1636
    // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
1637
    minFilterSize= 0;
1638
    for (i=dstW-1; i>=0; i--)
1639
    {
1640
        int min= filter2Size;
1641
        int j;
1642
        int64_t cutOff=0.0;
1643

    
1644
        /* get rid off near zero elements on the left by shifting left */
1645
        for (j=0; j<filter2Size; j++)
1646
        {
1647
            int k;
1648
            cutOff += FFABS(filter2[i*filter2Size]);
1649

    
1650
            if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
1651

    
1652
            /* preserve monotonicity because the core can't handle the filter otherwise */
1653
            if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1654

    
1655
            // move filter coefficients left
1656
            for (k=1; k<filter2Size; k++)
1657
                filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1658
            filter2[i*filter2Size + k - 1]= 0;
1659
            (*filterPos)[i]++;
1660
        }
1661

    
1662
        cutOff=0;
1663
        /* count near zeros on the right */
1664
        for (j=filter2Size-1; j>0; j--)
1665
        {
1666
            cutOff += FFABS(filter2[i*filter2Size + j]);
1667

    
1668
            if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
1669
            min--;
1670
        }
1671

    
1672
        if (min>minFilterSize) minFilterSize= min;
1673
    }
1674

    
1675
    if (flags & SWS_CPU_CAPS_ALTIVEC) {
1676
        // we can handle the special case 4,
1677
        // so we don't want to go to the full 8
1678
        if (minFilterSize < 5)
1679
            filterAlign = 4;
1680

    
1681
        // We really don't want to waste our time
1682
        // doing useless computation, so fall back on
1683
        // the scalar C code for very small filters.
1684
        // Vectorizing is worth it only if you have a
1685
        // decent-sized vector.
1686
        if (minFilterSize < 3)
1687
            filterAlign = 1;
1688
    }
1689

    
1690
    if (flags & SWS_CPU_CAPS_MMX) {
1691
        // special case for unscaled vertical filtering
1692
        if (minFilterSize == 1 && filterAlign == 2)
1693
            filterAlign= 1;
1694
    }
1695

    
1696
    assert(minFilterSize > 0);
1697
    filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
1698
    assert(filterSize > 0);
1699
    filter= av_malloc(filterSize*dstW*sizeof(*filter));
1700
    if (filterSize >= MAX_FILTER_SIZE*16/((flags&SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
1701
        goto error;
1702
    *outFilterSize= filterSize;
1703

    
1704
    if (flags&SWS_PRINT_INFO)
1705
        av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
1706
    /* try to reduce the filter-size (step2 reduce it) */
1707
    for (i=0; i<dstW; i++)
1708
    {
1709
        int j;
1710

    
1711
        for (j=0; j<filterSize; j++)
1712
        {
1713
            if (j>=filter2Size) filter[i*filterSize + j]= 0;
1714
            else               filter[i*filterSize + j]= filter2[i*filter2Size + j];
1715
            if((flags & SWS_BITEXACT) && j>=minFilterSize)
1716
                filter[i*filterSize + j]= 0;
1717
        }
1718
    }
1719

    
1720

    
1721
    //FIXME try to align filterPos if possible
1722

    
1723
    //fix borders
1724
    for (i=0; i<dstW; i++)
1725
    {
1726
        int j;
1727
        if ((*filterPos)[i] < 0)
1728
        {
1729
            // move filter coefficients left to compensate for filterPos
1730
            for (j=1; j<filterSize; j++)
1731
            {
1732
                int left= FFMAX(j + (*filterPos)[i], 0);
1733
                filter[i*filterSize + left] += filter[i*filterSize + j];
1734
                filter[i*filterSize + j]=0;
1735
            }
1736
            (*filterPos)[i]= 0;
1737
        }
1738

    
1739
        if ((*filterPos)[i] + filterSize > srcW)
1740
        {
1741
            int shift= (*filterPos)[i] + filterSize - srcW;
1742
            // move filter coefficients right to compensate for filterPos
1743
            for (j=filterSize-2; j>=0; j--)
1744
            {
1745
                int right= FFMIN(j + shift, filterSize-1);
1746
                filter[i*filterSize +right] += filter[i*filterSize +j];
1747
                filter[i*filterSize +j]=0;
1748
            }
1749
            (*filterPos)[i]= srcW - filterSize;
1750
        }
1751
    }
1752

    
1753
    // Note the +1 is for the MMX scaler which reads over the end
1754
    /* align at 16 for AltiVec (needed by hScale_altivec_real) */
1755
    *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t));
1756

    
1757
    /* normalize & store in outFilter */
1758
    for (i=0; i<dstW; i++)
1759
    {
1760
        int j;
1761
        int64_t error=0;
1762
        int64_t sum=0;
1763

    
1764
        for (j=0; j<filterSize; j++)
1765
        {
1766
            sum+= filter[i*filterSize + j];
1767
        }
1768
        sum= (sum + one/2)/ one;
1769
        for (j=0; j<*outFilterSize; j++)
1770
        {
1771
            int64_t v= filter[i*filterSize + j] + error;
1772
            int intV= ROUNDED_DIV(v, sum);
1773
            (*outFilter)[i*(*outFilterSize) + j]= intV;
1774
            error= v - intV*sum;
1775
        }
1776
    }
1777

    
1778
    (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
1779
    for (i=0; i<*outFilterSize; i++)
1780
    {
1781
        int j= dstW*(*outFilterSize);
1782
        (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
1783
    }
1784

    
1785
    ret=0;
1786
error:
1787
    av_free(filter);
1788
    av_free(filter2);
1789
    return ret;
1790
}
1791

    
1792
#ifdef COMPILE_MMX2
1793
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
1794
{
1795
    uint8_t *fragmentA;
1796
    x86_reg imm8OfPShufW1A;
1797
    x86_reg imm8OfPShufW2A;
1798
    x86_reg fragmentLengthA;
1799
    uint8_t *fragmentB;
1800
    x86_reg imm8OfPShufW1B;
1801
    x86_reg imm8OfPShufW2B;
1802
    x86_reg fragmentLengthB;
1803
    int fragmentPos;
1804

    
1805
    int xpos, i;
1806

    
1807
    // create an optimized horizontal scaling routine
1808

    
1809
    //code fragment
1810

    
1811
    __asm__ volatile(
1812
        "jmp                         9f                 \n\t"
1813
    // Begin
1814
        "0:                                             \n\t"
1815
        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1816
        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1817
        "movd   1(%%"REG_c", %%"REG_S"), %%mm1          \n\t"
1818
        "punpcklbw                %%mm7, %%mm1          \n\t"
1819
        "punpcklbw                %%mm7, %%mm0          \n\t"
1820
        "pshufw                   $0xFF, %%mm1, %%mm1   \n\t"
1821
        "1:                                             \n\t"
1822
        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1823
        "2:                                             \n\t"
1824
        "psubw                    %%mm1, %%mm0          \n\t"
1825
        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1826
        "pmullw                   %%mm3, %%mm0          \n\t"
1827
        "psllw                       $7, %%mm1          \n\t"
1828
        "paddw                    %%mm1, %%mm0          \n\t"
1829

    
1830
        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1831

    
1832
        "add                         $8, %%"REG_a"      \n\t"
1833
    // End
1834
        "9:                                             \n\t"
1835
//        "int $3                                         \n\t"
1836
        "lea                 " LOCAL_MANGLE(0b) ", %0   \n\t"
1837
        "lea                 " LOCAL_MANGLE(1b) ", %1   \n\t"
1838
        "lea                 " LOCAL_MANGLE(2b) ", %2   \n\t"
1839
        "dec                         %1                 \n\t"
1840
        "dec                         %2                 \n\t"
1841
        "sub                         %0, %1             \n\t"
1842
        "sub                         %0, %2             \n\t"
1843
        "lea                 " LOCAL_MANGLE(9b) ", %3   \n\t"
1844
        "sub                         %0, %3             \n\t"
1845

    
1846

    
1847
        :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
1848
        "=r" (fragmentLengthA)
1849
    );
1850

    
1851
    __asm__ volatile(
1852
        "jmp                         9f                 \n\t"
1853
    // Begin
1854
        "0:                                             \n\t"
1855
        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1856
        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1857
        "punpcklbw                %%mm7, %%mm0          \n\t"
1858
        "pshufw                   $0xFF, %%mm0, %%mm1   \n\t"
1859
        "1:                                             \n\t"
1860
        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1861
        "2:                                             \n\t"
1862
        "psubw                    %%mm1, %%mm0          \n\t"
1863
        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1864
        "pmullw                   %%mm3, %%mm0          \n\t"
1865
        "psllw                       $7, %%mm1          \n\t"
1866
        "paddw                    %%mm1, %%mm0          \n\t"
1867

    
1868
        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1869

    
1870
        "add                         $8, %%"REG_a"      \n\t"
1871
    // End
1872
        "9:                                             \n\t"
1873
//        "int                       $3                   \n\t"
1874
        "lea                 " LOCAL_MANGLE(0b) ", %0   \n\t"
1875
        "lea                 " LOCAL_MANGLE(1b) ", %1   \n\t"
1876
        "lea                 " LOCAL_MANGLE(2b) ", %2   \n\t"
1877
        "dec                         %1                 \n\t"
1878
        "dec                         %2                 \n\t"
1879
        "sub                         %0, %1             \n\t"
1880
        "sub                         %0, %2             \n\t"
1881
        "lea                 " LOCAL_MANGLE(9b) ", %3   \n\t"
1882
        "sub                         %0, %3             \n\t"
1883

    
1884

    
1885
        :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
1886
        "=r" (fragmentLengthB)
1887
    );
1888

    
1889
    xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
1890
    fragmentPos=0;
1891

    
1892
    for (i=0; i<dstW/numSplits; i++)
1893
    {
1894
        int xx=xpos>>16;
1895

    
1896
        if ((i&3) == 0)
1897
        {
1898
            int a=0;
1899
            int b=((xpos+xInc)>>16) - xx;
1900
            int c=((xpos+xInc*2)>>16) - xx;
1901
            int d=((xpos+xInc*3)>>16) - xx;
1902

    
1903
            filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
1904
            filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
1905
            filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
1906
            filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
1907
            filterPos[i/2]= xx;
1908

    
1909
            if (d+1<4)
1910
            {
1911
                int maxShift= 3-(d+1);
1912
                int shift=0;
1913

    
1914
                memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
1915

    
1916
                funnyCode[fragmentPos + imm8OfPShufW1B]=
1917
                    (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
1918
                funnyCode[fragmentPos + imm8OfPShufW2B]=
1919
                    a | (b<<2) | (c<<4) | (d<<6);
1920

    
1921
                if (i+3>=dstW) shift=maxShift; //avoid overread
1922
                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
1923

    
1924
                if (shift && i>=shift)
1925
                {
1926
                    funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
1927
                    funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
1928
                    filterPos[i/2]-=shift;
1929
                }
1930

    
1931
                fragmentPos+= fragmentLengthB;
1932
            }
1933
            else
1934
            {
1935
                int maxShift= 3-d;
1936
                int shift=0;
1937

    
1938
                memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
1939

    
1940
                funnyCode[fragmentPos + imm8OfPShufW1A]=
1941
                funnyCode[fragmentPos + imm8OfPShufW2A]=
1942
                    a | (b<<2) | (c<<4) | (d<<6);
1943

    
1944
                if (i+4>=dstW) shift=maxShift; //avoid overread
1945
                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
1946

    
1947
                if (shift && i>=shift)
1948
                {
1949
                    funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
1950
                    funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
1951
                    filterPos[i/2]-=shift;
1952
                }
1953

    
1954
                fragmentPos+= fragmentLengthA;
1955
            }
1956

    
1957
            funnyCode[fragmentPos]= RET;
1958
        }
1959
        xpos+=xInc;
1960
    }
1961
    filterPos[((i/2)+1)&(~1)]= xpos>>16; // needed to jump to the next part
1962
}
1963
#endif /* COMPILE_MMX2 */
1964

    
1965
static void globalInit(void){
1966
    // generating tables:
1967
    int i;
1968
    for (i=0; i<768; i++){
1969
        int c= av_clip_uint8(i-256);
1970
        clip_table[i]=c;
1971
    }
1972
}
1973

    
1974
static SwsFunc getSwsFunc(SwsContext *c)
1975
{
1976
#if CONFIG_RUNTIME_CPUDETECT && CONFIG_GPL
1977
    int flags = c->flags;
1978

    
1979
#if ARCH_X86
1980
    // ordered per speed fastest first
1981
    if (flags & SWS_CPU_CAPS_MMX2) {
1982
        sws_init_swScale_MMX2(c);
1983
        return swScale_MMX2;
1984
    } else if (flags & SWS_CPU_CAPS_3DNOW) {
1985
        sws_init_swScale_3DNow(c);
1986
        return swScale_3DNow;
1987
    } else if (flags & SWS_CPU_CAPS_MMX) {
1988
        sws_init_swScale_MMX(c);
1989
        return swScale_MMX;
1990
    } else {
1991
        sws_init_swScale_C(c);
1992
        return swScale_C;
1993
    }
1994

    
1995
#else
1996
#if ARCH_PPC
1997
    if (flags & SWS_CPU_CAPS_ALTIVEC) {
1998
        sws_init_swScale_altivec(c);
1999
        return swScale_altivec;
2000
    } else {
2001
        sws_init_swScale_C(c);
2002
        return swScale_C;
2003
    }
2004
#endif
2005
    sws_init_swScale_C(c);
2006
    return swScale_C;
2007
#endif /* ARCH_X86 */
2008
#else //CONFIG_RUNTIME_CPUDETECT
2009
#if   HAVE_MMX2
2010
    sws_init_swScale_MMX2(c);
2011
    return swScale_MMX2;
2012
#elif HAVE_AMD3DNOW
2013
    sws_init_swScale_3DNow(c);
2014
    return swScale_3DNow;
2015
#elif HAVE_MMX
2016
    sws_init_swScale_MMX(c);
2017
    return swScale_MMX;
2018
#elif HAVE_ALTIVEC
2019
    sws_init_swScale_altivec(c);
2020
    return swScale_altivec;
2021
#else
2022
    sws_init_swScale_C(c);
2023
    return swScale_C;
2024
#endif
2025
#endif //!CONFIG_RUNTIME_CPUDETECT
2026
}
2027

    
2028
static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2029
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
2030
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
2031
    /* Copy Y plane */
2032
    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
2033
        memcpy(dst, src[0], srcSliceH*dstStride[0]);
2034
    else
2035
    {
2036
        int i;
2037
        const uint8_t *srcPtr= src[0];
2038
        uint8_t *dstPtr= dst;
2039
        for (i=0; i<srcSliceH; i++)
2040
        {
2041
            memcpy(dstPtr, srcPtr, c->srcW);
2042
            srcPtr+= srcStride[0];
2043
            dstPtr+= dstStride[0];
2044
        }
2045
    }
2046
    dst = dstParam[1] + dstStride[1]*srcSliceY/2;
2047
    if (c->dstFormat == PIX_FMT_NV12)
2048
        interleaveBytes(src[1], src[2], dst, c->srcW/2, srcSliceH/2, srcStride[1], srcStride[2], dstStride[0]);
2049
    else
2050
        interleaveBytes(src[2], src[1], dst, c->srcW/2, srcSliceH/2, srcStride[2], srcStride[1], dstStride[0]);
2051

    
2052
    return srcSliceH;
2053
}
2054

    
2055
static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2056
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
2057
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
2058

    
2059
    yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
2060

    
2061
    return srcSliceH;
2062
}
2063

    
2064
static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2065
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
2066
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
2067

    
2068
    yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
2069

    
2070
    return srcSliceH;
2071
}
2072

    
2073
static int YUV422PToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2074
                                int srcSliceH, uint8_t* dstParam[], int dstStride[]){
2075
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
2076

    
2077
    yuv422ptoyuy2(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
2078

    
2079
    return srcSliceH;
2080
}
2081

    
2082
static int YUV422PToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2083
                                int srcSliceH, uint8_t* dstParam[], int dstStride[]){
2084
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
2085

    
2086
    yuv422ptouyvy(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
2087

    
2088
    return srcSliceH;
2089
}
2090

    
2091
static int YUYV2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2092
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
2093
    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
2094
    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
2095
    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
2096

    
2097
    yuyvtoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
2098

    
2099
    if (dstParam[3])
2100
        fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2101

    
2102
    return srcSliceH;
2103
}
2104

    
2105
static int YUYV2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2106
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
2107
    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
2108
    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
2109
    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
2110

    
2111
    yuyvtoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
2112

    
2113
    return srcSliceH;
2114
}
2115

    
2116
static int UYVY2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2117
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
2118
    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
2119
    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
2120
    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
2121

    
2122
    uyvytoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
2123

    
2124
    if (dstParam[3])
2125
        fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2126

    
2127
    return srcSliceH;
2128
}
2129

    
2130
static int UYVY2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2131
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
2132
    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
2133
    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
2134
    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
2135

    
2136
    uyvytoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
2137

    
2138
    return srcSliceH;
2139
}
2140

    
2141
static int pal2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2142
                          int srcSliceH, uint8_t* dst[], int dstStride[]){
2143
    const enum PixelFormat srcFormat= c->srcFormat;
2144
    const enum PixelFormat dstFormat= c->dstFormat;
2145
    void (*conv)(const uint8_t *src, uint8_t *dst, long num_pixels,
2146
                 const uint8_t *palette)=NULL;
2147
    int i;
2148
    uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2149
    uint8_t *srcPtr= src[0];
2150

    
2151
    if (!usePal(srcFormat))
2152
        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2153
               sws_format_name(srcFormat), sws_format_name(dstFormat));
2154

    
2155
    switch(dstFormat){
2156
    case PIX_FMT_RGB32  : conv = palette8topacked32; break;
2157
    case PIX_FMT_BGR32  : conv = palette8topacked32; break;
2158
    case PIX_FMT_BGR32_1: conv = palette8topacked32; break;
2159
    case PIX_FMT_RGB32_1: conv = palette8topacked32; break;
2160
    case PIX_FMT_RGB24  : conv = palette8topacked24; break;
2161
    case PIX_FMT_BGR24  : conv = palette8topacked24; break;
2162
    default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2163
                    sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
2164
    }
2165

    
2166

    
2167
    for (i=0; i<srcSliceH; i++) {
2168
        conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb);
2169
        srcPtr+= srcStride[0];
2170
        dstPtr+= dstStride[0];
2171
    }
2172

    
2173
    return srcSliceH;
2174
}
2175

    
2176
/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
2177
static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2178
                          int srcSliceH, uint8_t* dst[], int dstStride[]){
2179
    const enum PixelFormat srcFormat= c->srcFormat;
2180
    const enum PixelFormat dstFormat= c->dstFormat;
2181
    const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3;
2182
    const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3;
2183
    const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
2184
    const int dstId= fmt_depth(dstFormat) >> 2;
2185
    void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
2186

    
2187
    /* BGR -> BGR */
2188
    if (  (isBGR(srcFormat) && isBGR(dstFormat))
2189
       || (isRGB(srcFormat) && isRGB(dstFormat))){
2190
        switch(srcId | (dstId<<4)){
2191
        case 0x34: conv= rgb16to15; break;
2192
        case 0x36: conv= rgb24to15; break;
2193
        case 0x38: conv= rgb32to15; break;
2194
        case 0x43: conv= rgb15to16; break;
2195
        case 0x46: conv= rgb24to16; break;
2196
        case 0x48: conv= rgb32to16; break;
2197
        case 0x63: conv= rgb15to24; break;
2198
        case 0x64: conv= rgb16to24; break;
2199
        case 0x68: conv= rgb32to24; break;
2200
        case 0x83: conv= rgb15to32; break;
2201
        case 0x84: conv= rgb16to32; break;
2202
        case 0x86: conv= rgb24to32; break;
2203
        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2204
                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
2205
        }
2206
    }else if (  (isBGR(srcFormat) && isRGB(dstFormat))
2207
             || (isRGB(srcFormat) && isBGR(dstFormat))){
2208
        switch(srcId | (dstId<<4)){
2209
        case 0x33: conv= rgb15tobgr15; break;
2210
        case 0x34: conv= rgb16tobgr15; break;
2211
        case 0x36: conv= rgb24tobgr15; break;
2212
        case 0x38: conv= rgb32tobgr15; break;
2213
        case 0x43: conv= rgb15tobgr16; break;
2214
        case 0x44: conv= rgb16tobgr16; break;
2215
        case 0x46: conv= rgb24tobgr16; break;
2216
        case 0x48: conv= rgb32tobgr16; break;
2217
        case 0x63: conv= rgb15tobgr24; break;
2218
        case 0x64: conv= rgb16tobgr24; break;
2219
        case 0x66: conv= rgb24tobgr24; break;
2220
        case 0x68: conv= rgb32tobgr24; break;
2221
        case 0x83: conv= rgb15tobgr32; break;
2222
        case 0x84: conv= rgb16tobgr32; break;
2223
        case 0x86: conv= rgb24tobgr32; break;
2224
        case 0x88: conv= rgb32tobgr32; break;
2225
        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2226
                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
2227
        }
2228
    }else{
2229
        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2230
               sws_format_name(srcFormat), sws_format_name(dstFormat));
2231
    }
2232

    
2233
    if(conv)
2234
    {
2235
        uint8_t *srcPtr= src[0];
2236
        if(srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1)
2237
            srcPtr += ALT32_CORR;
2238

    
2239
        if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0)
2240
            conv(srcPtr, dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
2241
        else
2242
        {
2243
            int i;
2244
            uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2245

    
2246
            for (i=0; i<srcSliceH; i++)
2247
            {
2248
                conv(srcPtr, dstPtr, c->srcW*srcBpp);
2249
                srcPtr+= srcStride[0];
2250
                dstPtr+= dstStride[0];
2251
            }
2252
        }
2253
    }
2254
    return srcSliceH;
2255
}
2256

    
2257
static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2258
                              int srcSliceH, uint8_t* dst[], int dstStride[]){
2259

    
2260
    rgb24toyv12(
2261
        src[0],
2262
        dst[0]+ srcSliceY    *dstStride[0],
2263
        dst[1]+(srcSliceY>>1)*dstStride[1],
2264
        dst[2]+(srcSliceY>>1)*dstStride[2],
2265
        c->srcW, srcSliceH,
2266
        dstStride[0], dstStride[1], srcStride[0]);
2267
    if (dst[3])
2268
        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2269
    return srcSliceH;
2270
}
2271

    
2272
static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2273
                             int srcSliceH, uint8_t* dst[], int dstStride[]){
2274
    int i;
2275

    
2276
    /* copy Y */
2277
    if (srcStride[0]==dstStride[0] && srcStride[0] > 0)
2278
        memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
2279
    else{
2280
        uint8_t *srcPtr= src[0];
2281
        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2282

    
2283
        for (i=0; i<srcSliceH; i++)
2284
        {
2285
            memcpy(dstPtr, srcPtr, c->srcW);
2286
            srcPtr+= srcStride[0];
2287
            dstPtr+= dstStride[0];
2288
        }
2289
    }
2290

    
2291
    if (c->dstFormat==PIX_FMT_YUV420P || c->dstFormat==PIX_FMT_YUVA420P){
2292
        planar2x(src[1], dst[1] + dstStride[1]*(srcSliceY >> 1), c->chrSrcW,
2293
                 srcSliceH >> 2, srcStride[1], dstStride[1]);
2294
        planar2x(src[2], dst[2] + dstStride[2]*(srcSliceY >> 1), c->chrSrcW,
2295
                 srcSliceH >> 2, srcStride[2], dstStride[2]);
2296
    }else{
2297
        planar2x(src[1], dst[2] + dstStride[2]*(srcSliceY >> 1), c->chrSrcW,
2298
                 srcSliceH >> 2, srcStride[1], dstStride[2]);
2299
        planar2x(src[2], dst[1] + dstStride[1]*(srcSliceY >> 1), c->chrSrcW,
2300
                 srcSliceH >> 2, srcStride[2], dstStride[1]);
2301
    }
2302
    if (dst[3])
2303
        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2304
    return srcSliceH;
2305
}
2306

    
2307
/* unscaled copy like stuff (assumes nearly identical formats) */
2308
static int packedCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2309
                      int srcSliceH, uint8_t* dst[], int dstStride[])
2310
{
2311
    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
2312
        memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
2313
    else
2314
    {
2315
        int i;
2316
        uint8_t *srcPtr= src[0];
2317
        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2318
        int length=0;
2319

    
2320
        /* universal length finder */
2321
        while(length+c->srcW <= FFABS(dstStride[0])
2322
           && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
2323
        assert(length!=0);
2324

    
2325
        for (i=0; i<srcSliceH; i++)
2326
        {
2327
            memcpy(dstPtr, srcPtr, length);
2328
            srcPtr+= srcStride[0];
2329
            dstPtr+= dstStride[0];
2330
        }
2331
    }
2332
    return srcSliceH;
2333
}
2334

    
2335
static int planarCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2336
                      int srcSliceH, uint8_t* dst[], int dstStride[])
2337
{
2338
    int plane, i, j;
2339
    for (plane=0; plane<4; plane++)
2340
    {
2341
        int length= (plane==0 || plane==3) ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
2342
        int y=      (plane==0 || plane==3) ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
2343
        int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
2344
        uint8_t *srcPtr= src[plane];
2345
        uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
2346

    
2347
        if (!dst[plane]) continue;
2348
        // ignore palette for GRAY8
2349
        if (plane == 1 && !dst[2]) continue;
2350
        if (!src[plane] || (plane == 1 && !src[2])){
2351
            if(is16BPS(c->dstFormat))
2352
                length*=2;
2353
            fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128);
2354
        }else
2355
        {
2356
            if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)){
2357
                if (!isBE(c->srcFormat)) srcPtr++;
2358
                for (i=0; i<height; i++){
2359
                    for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
2360
                    srcPtr+= srcStride[plane];
2361
                    dstPtr+= dstStride[plane];
2362
                }
2363
            }else if(!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)){
2364
                for (i=0; i<height; i++){
2365
                    for (j=0; j<length; j++){
2366
                        dstPtr[ j<<1   ] = srcPtr[j];
2367
                        dstPtr[(j<<1)+1] = srcPtr[j];
2368
                    }
2369
                    srcPtr+= srcStride[plane];
2370
                    dstPtr+= dstStride[plane];
2371
                }
2372
            }else if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat)
2373
                  && isBE(c->srcFormat) != isBE(c->dstFormat)){
2374

    
2375
                for (i=0; i<height; i++){
2376
                    for (j=0; j<length; j++)
2377
                        ((uint16_t*)dstPtr)[j] = bswap_16(((uint16_t*)srcPtr)[j]);
2378
                    srcPtr+= srcStride[plane];
2379
                    dstPtr+= dstStride[plane];
2380
                }
2381
            } else if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
2382
                memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
2383
            else
2384
            {
2385
                if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat))
2386
                    length*=2;
2387
                for (i=0; i<height; i++)
2388
                {
2389
                    memcpy(dstPtr, srcPtr, length);
2390
                    srcPtr+= srcStride[plane];
2391
                    dstPtr+= dstStride[plane];
2392
                }
2393
            }
2394
        }
2395
    }
2396
    return srcSliceH;
2397
}
2398

    
2399

    
2400
static void getSubSampleFactors(int *h, int *v, int format){
2401
    switch(format){
2402
    case PIX_FMT_UYVY422:
2403
    case PIX_FMT_YUYV422:
2404
        *h=1;
2405
        *v=0;
2406
        break;
2407
    case PIX_FMT_YUV420P:
2408
    case PIX_FMT_YUV420PLE:
2409
    case PIX_FMT_YUV420PBE:
2410
    case PIX_FMT_YUVA420P:
2411
    case PIX_FMT_GRAY16BE:
2412
    case PIX_FMT_GRAY16LE:
2413
    case PIX_FMT_GRAY8: //FIXME remove after different subsamplings are fully implemented
2414
    case PIX_FMT_NV12:
2415
    case PIX_FMT_NV21:
2416
        *h=1;
2417
        *v=1;
2418
        break;
2419
    case PIX_FMT_YUV440P:
2420
        *h=0;
2421
        *v=1;
2422
        break;
2423
    case PIX_FMT_YUV410P:
2424
        *h=2;
2425
        *v=2;
2426
        break;
2427
    case PIX_FMT_YUV444P:
2428
    case PIX_FMT_YUV444PLE:
2429
    case PIX_FMT_YUV444PBE:
2430
        *h=0;
2431
        *v=0;
2432
        break;
2433
    case PIX_FMT_YUV422P:
2434
    case PIX_FMT_YUV422PLE:
2435
    case PIX_FMT_YUV422PBE:
2436
        *h=1;
2437
        *v=0;
2438
        break;
2439
    case PIX_FMT_YUV411P:
2440
        *h=2;
2441
        *v=0;
2442
        break;
2443
    default:
2444
        *h=0;
2445
        *v=0;
2446
        break;
2447
    }
2448
}
2449

    
2450
static uint16_t roundToInt16(int64_t f){
2451
    int r= (f + (1<<15))>>16;
2452
         if (r<-0x7FFF) return 0x8000;
2453
    else if (r> 0x7FFF) return 0x7FFF;
2454
    else                return r;
2455
}
2456

    
2457
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
2458
    int64_t crv =  inv_table[0];
2459
    int64_t cbu =  inv_table[1];
2460
    int64_t cgu = -inv_table[2];
2461
    int64_t cgv = -inv_table[3];
2462
    int64_t cy  = 1<<16;
2463
    int64_t oy  = 0;
2464

    
2465
    memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
2466
    memcpy(c->dstColorspaceTable,     table, sizeof(int)*4);
2467

    
2468
    c->brightness= brightness;
2469
    c->contrast  = contrast;
2470
    c->saturation= saturation;
2471
    c->srcRange  = srcRange;
2472
    c->dstRange  = dstRange;
2473
    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
2474

    
2475
    c->uOffset=   0x0400040004000400LL;
2476
    c->vOffset=   0x0400040004000400LL;
2477

    
2478
    if (!srcRange){
2479
        cy= (cy*255) / 219;
2480
        oy= 16<<16;
2481
    }else{
2482
        crv= (crv*224) / 255;
2483
        cbu= (cbu*224) / 255;
2484
        cgu= (cgu*224) / 255;
2485
        cgv= (cgv*224) / 255;
2486
    }
2487

    
2488
    cy = (cy *contrast             )>>16;
2489
    crv= (crv*contrast * saturation)>>32;
2490
    cbu= (cbu*contrast * saturation)>>32;
2491
    cgu= (cgu*contrast * saturation)>>32;
2492
    cgv= (cgv*contrast * saturation)>>32;
2493

    
2494
    oy -= 256*brightness;
2495

    
2496
    c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
2497
    c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
2498
    c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
2499
    c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
2500
    c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
2501
    c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
2502

    
2503
    c->yuv2rgb_y_coeff  = (int16_t)roundToInt16(cy <<13);
2504
    c->yuv2rgb_y_offset = (int16_t)roundToInt16(oy << 9);
2505
    c->yuv2rgb_v2r_coeff= (int16_t)roundToInt16(crv<<13);
2506
    c->yuv2rgb_v2g_coeff= (int16_t)roundToInt16(cgv<<13);
2507
    c->yuv2rgb_u2g_coeff= (int16_t)roundToInt16(cgu<<13);
2508
    c->yuv2rgb_u2b_coeff= (int16_t)roundToInt16(cbu<<13);
2509

    
2510
    ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
2511
    //FIXME factorize
2512

    
2513
#ifdef COMPILE_ALTIVEC
2514
    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
2515
        ff_yuv2rgb_init_tables_altivec(c, inv_table, brightness, contrast, saturation);
2516
#endif
2517
    return 0;
2518
}
2519

    
2520
int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
2521
    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
2522

    
2523
    *inv_table = c->srcColorspaceTable;
2524
    *table     = c->dstColorspaceTable;
2525
    *srcRange  = c->srcRange;
2526
    *dstRange  = c->dstRange;
2527
    *brightness= c->brightness;
2528
    *contrast  = c->contrast;
2529
    *saturation= c->saturation;
2530

    
2531
    return 0;
2532
}
2533

    
2534
static int handle_jpeg(enum PixelFormat *format)
2535
{
2536
    switch (*format) {
2537
        case PIX_FMT_YUVJ420P:
2538
            *format = PIX_FMT_YUV420P;
2539
            return 1;
2540
        case PIX_FMT_YUVJ422P:
2541
            *format = PIX_FMT_YUV422P;
2542
            return 1;
2543
        case PIX_FMT_YUVJ444P:
2544
            *format = PIX_FMT_YUV444P;
2545
            return 1;
2546
        case PIX_FMT_YUVJ440P:
2547
            *format = PIX_FMT_YUV440P;
2548
            return 1;
2549
        default:
2550
            return 0;
2551
    }
2552
}
2553

    
2554
SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int dstW, int dstH, enum PixelFormat dstFormat, int flags,
2555
                           SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)
2556
{
2557

    
2558
    SwsContext *c;
2559
    int i;
2560
    int usesVFilter, usesHFilter;
2561
    int unscaled, needsDither;
2562
    int srcRange, dstRange;
2563
    SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
2564
#if ARCH_X86
2565
    if (flags & SWS_CPU_CAPS_MMX)
2566
        __asm__ volatile("emms\n\t"::: "memory");
2567
#endif
2568

    
2569
#if !CONFIG_RUNTIME_CPUDETECT || !CONFIG_GPL //ensure that the flags match the compiled variant if cpudetect is off
2570
    flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN);
2571
#if   HAVE_MMX2
2572
    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
2573
#elif HAVE_AMD3DNOW
2574
    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
2575
#elif HAVE_MMX
2576
    flags |= SWS_CPU_CAPS_MMX;
2577
#elif HAVE_ALTIVEC
2578
    flags |= SWS_CPU_CAPS_ALTIVEC;
2579
#elif ARCH_BFIN
2580
    flags |= SWS_CPU_CAPS_BFIN;
2581
#endif
2582
#endif /* CONFIG_RUNTIME_CPUDETECT */
2583
    if (clip_table[512] != 255) globalInit();
2584
    if (!rgb15to16) sws_rgb2rgb_init(flags);
2585

    
2586
    unscaled = (srcW == dstW && srcH == dstH);
2587
    needsDither= (isBGR(dstFormat) || isRGB(dstFormat))
2588
        && (fmt_depth(dstFormat))<24
2589
        && ((fmt_depth(dstFormat))<(fmt_depth(srcFormat)) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
2590

    
2591
    srcRange = handle_jpeg(&srcFormat);
2592
    dstRange = handle_jpeg(&dstFormat);
2593

    
2594
    if (!isSupportedIn(srcFormat))
2595
    {
2596
        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat));
2597
        return NULL;
2598
    }
2599
    if (!isSupportedOut(dstFormat))
2600
    {
2601
        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat));
2602
        return NULL;
2603
    }
2604

    
2605
    i= flags & ( SWS_POINT
2606
                |SWS_AREA
2607
                |SWS_BILINEAR
2608
                |SWS_FAST_BILINEAR
2609
                |SWS_BICUBIC
2610
                |SWS_X
2611
                |SWS_GAUSS
2612
                |SWS_LANCZOS
2613
                |SWS_SINC
2614
                |SWS_SPLINE
2615
                |SWS_BICUBLIN);
2616
    if(!i || (i & (i-1)))
2617
    {
2618
        av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be chosen\n");
2619
        return NULL;
2620
    }
2621

    
2622
    /* sanity check */
2623
    if (srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
2624
    {
2625
        av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
2626
               srcW, srcH, dstW, dstH);
2627
        return NULL;
2628
    }
2629
    if(srcW > VOFW || dstW > VOFW){
2630
        av_log(NULL, AV_LOG_ERROR, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n");
2631
        return NULL;
2632
    }
2633

    
2634
    if (!dstFilter) dstFilter= &dummyFilter;
2635
    if (!srcFilter) srcFilter= &dummyFilter;
2636

    
2637
    c= av_mallocz(sizeof(SwsContext));
2638

    
2639
    c->av_class = &sws_context_class;
2640
    c->srcW= srcW;
2641
    c->srcH= srcH;
2642
    c->dstW= dstW;
2643
    c->dstH= dstH;
2644
    c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
2645
    c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
2646
    c->flags= flags;
2647
    c->dstFormat= dstFormat;
2648
    c->srcFormat= srcFormat;
2649
    c->vRounder= 4* 0x0001000100010001ULL;
2650

    
2651
    usesHFilter= usesVFilter= 0;
2652
    if (dstFilter->lumV && dstFilter->lumV->length>1) usesVFilter=1;
2653
    if (dstFilter->lumH && dstFilter->lumH->length>1) usesHFilter=1;
2654
    if (dstFilter->chrV && dstFilter->chrV->length>1) usesVFilter=1;
2655
    if (dstFilter->chrH && dstFilter->chrH->length>1) usesHFilter=1;
2656
    if (srcFilter->lumV && srcFilter->lumV->length>1) usesVFilter=1;
2657
    if (srcFilter->lumH && srcFilter->lumH->length>1) usesHFilter=1;
2658
    if (srcFilter->chrV && srcFilter->chrV->length>1) usesVFilter=1;
2659
    if (srcFilter->chrH && srcFilter->chrH->length>1) usesHFilter=1;
2660

    
2661
    getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
2662
    getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
2663

    
2664
    // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation
2665
    if ((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
2666

    
2667
    // drop some chroma lines if the user wants it
2668
    c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
2669
    c->chrSrcVSubSample+= c->vChrDrop;
2670

    
2671
    // drop every other pixel for chroma calculation unless user wants full chroma
2672
    if ((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)
2673
      && srcFormat!=PIX_FMT_RGB8      && srcFormat!=PIX_FMT_BGR8
2674
      && srcFormat!=PIX_FMT_RGB4      && srcFormat!=PIX_FMT_BGR4
2675
      && srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE
2676
      && ((dstW>>c->chrDstHSubSample) <= (srcW>>1) || (flags&(SWS_FAST_BILINEAR|SWS_POINT))))
2677
        c->chrSrcHSubSample=1;
2678

    
2679
    if (param){
2680
        c->param[0] = param[0];
2681
        c->param[1] = param[1];
2682
    }else{
2683
        c->param[0] =
2684
        c->param[1] = SWS_PARAM_DEFAULT;
2685
    }
2686

    
2687
    c->chrIntHSubSample= c->chrDstHSubSample;
2688
    c->chrIntVSubSample= c->chrSrcVSubSample;
2689

    
2690
    // Note the -((-x)>>y) is so that we always round toward +inf.
2691
    c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
2692
    c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
2693
    c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
2694
    c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
2695

    
2696
    sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], srcRange, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16);
2697

    
2698
    /* unscaled special cases */
2699
    if (unscaled && !usesHFilter && !usesVFilter && (srcRange == dstRange || isBGR(dstFormat) || isRGB(dstFormat)))
2700
    {
2701
        /* yv12_to_nv12 */
2702
        if ((srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21))
2703
        {
2704
            c->swScale= PlanarToNV12Wrapper;
2705
        }
2706
        /* yuv2bgr */
2707
        if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P || srcFormat==PIX_FMT_YUVA420P) && (isBGR(dstFormat) || isRGB(dstFormat))
2708
            && !(flags & SWS_ACCURATE_RND) && !(dstH&1))
2709
        {
2710
            c->swScale= ff_yuv2rgb_get_func_ptr(c);
2711
        }
2712

    
2713
        if (srcFormat==PIX_FMT_YUV410P && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_BITEXACT))
2714
        {
2715
            c->swScale= yvu9toyv12Wrapper;
2716
        }
2717

    
2718
        /* bgr24toYV12 */
2719
        if (srcFormat==PIX_FMT_BGR24 && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_ACCURATE_RND))
2720
            c->swScale= bgr24toyv12Wrapper;
2721

    
2722
        /* RGB/BGR -> RGB/BGR (no dither needed forms) */
2723
        if (  (isBGR(srcFormat) || isRGB(srcFormat))
2724
           && (isBGR(dstFormat) || isRGB(dstFormat))
2725
           && srcFormat != PIX_FMT_BGR8      && dstFormat != PIX_FMT_BGR8
2726
           && srcFormat != PIX_FMT_RGB8      && dstFormat != PIX_FMT_RGB8
2727
           && srcFormat != PIX_FMT_BGR4      && dstFormat != PIX_FMT_BGR4
2728
           && srcFormat != PIX_FMT_RGB4      && dstFormat != PIX_FMT_RGB4
2729
           && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE
2730
           && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE
2731
           && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK
2732
           && srcFormat != PIX_FMT_MONOWHITE && dstFormat != PIX_FMT_MONOWHITE
2733
                                             && dstFormat != PIX_FMT_RGB32_1
2734
                                             && dstFormat != PIX_FMT_BGR32_1
2735
           && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT))))
2736
             c->swScale= rgb2rgbWrapper;
2737

    
2738
        if ((usePal(srcFormat) && (
2739
                 dstFormat == PIX_FMT_RGB32   ||
2740
                 dstFormat == PIX_FMT_RGB32_1 ||
2741
                 dstFormat == PIX_FMT_RGB24   ||
2742
                 dstFormat == PIX_FMT_BGR32   ||
2743
                 dstFormat == PIX_FMT_BGR32_1 ||
2744
                 dstFormat == PIX_FMT_BGR24)))
2745
             c->swScale= pal2rgbWrapper;
2746

    
2747
        if (srcFormat == PIX_FMT_YUV422P)
2748
        {
2749
            if (dstFormat == PIX_FMT_YUYV422)
2750
                c->swScale= YUV422PToYuy2Wrapper;
2751
            else if (dstFormat == PIX_FMT_UYVY422)
2752
                c->swScale= YUV422PToUyvyWrapper;
2753
        }
2754

    
2755
        /* LQ converters if -sws 0 or -sws 4*/
2756
        if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
2757
            /* yv12_to_yuy2 */
2758
            if (srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P)
2759
            {
2760
                if (dstFormat == PIX_FMT_YUYV422)
2761
                    c->swScale= PlanarToYuy2Wrapper;
2762
                else if (dstFormat == PIX_FMT_UYVY422)
2763
                    c->swScale= PlanarToUyvyWrapper;
2764
            }
2765
        }
2766
        if(srcFormat == PIX_FMT_YUYV422 && (dstFormat == PIX_FMT_YUV420P || dstFormat == PIX_FMT_YUVA420P))
2767
            c->swScale= YUYV2YUV420Wrapper;
2768
        if(srcFormat == PIX_FMT_UYVY422 && (dstFormat == PIX_FMT_YUV420P || dstFormat == PIX_FMT_YUVA420P))
2769
            c->swScale= UYVY2YUV420Wrapper;
2770
        if(srcFormat == PIX_FMT_YUYV422 && dstFormat == PIX_FMT_YUV422P)
2771
            c->swScale= YUYV2YUV422Wrapper;
2772
        if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P)
2773
            c->swScale= UYVY2YUV422Wrapper;
2774

    
2775
#ifdef COMPILE_ALTIVEC
2776
        if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
2777
            !(c->flags & SWS_BITEXACT) &&
2778
            srcFormat == PIX_FMT_YUV420P) {
2779
          // unscaled YV12 -> packed YUV, we want speed
2780
          if (dstFormat == PIX_FMT_YUYV422)
2781
              c->swScale= yv12toyuy2_unscaled_altivec;
2782
          else if (dstFormat == PIX_FMT_UYVY422)
2783
              c->swScale= yv12touyvy_unscaled_altivec;
2784
        }
2785
#endif
2786

    
2787
        /* simple copy */
2788
        if (  srcFormat == dstFormat
2789
            || (srcFormat == PIX_FMT_YUVA420P && dstFormat == PIX_FMT_YUV420P)
2790
            || (srcFormat == PIX_FMT_YUV420P && dstFormat == PIX_FMT_YUVA420P)
2791
            || (isPlanarYUV(srcFormat) && isGray(dstFormat))
2792
            || (isPlanarYUV(dstFormat) && isGray(srcFormat))
2793
            || (isGray(dstFormat) && isGray(srcFormat))
2794
            || (isPlanarYUV(srcFormat) && isPlanarYUV(dstFormat)
2795
                && c->chrDstHSubSample == c->chrSrcHSubSample
2796
                && c->chrDstVSubSample == c->chrSrcVSubSample))
2797
        {
2798
            if (isPacked(c->srcFormat))
2799
                c->swScale= packedCopy;
2800
            else /* Planar YUV or gray */
2801
                c->swScale= planarCopy;
2802
        }
2803
#if ARCH_BFIN
2804
        if (flags & SWS_CPU_CAPS_BFIN)
2805
            ff_bfin_get_unscaled_swscale (c);
2806
#endif
2807

    
2808
        if (c->swScale){
2809
            if (flags&SWS_PRINT_INFO)
2810
                av_log(c, AV_LOG_INFO, "using unscaled %s -> %s special converter\n",
2811
                                sws_format_name(srcFormat), sws_format_name(dstFormat));
2812
            return c;
2813
        }
2814
    }
2815

    
2816
    if (flags & SWS_CPU_CAPS_MMX2)
2817
    {
2818
        c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
2819
        if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
2820
        {
2821
            if (flags&SWS_PRINT_INFO)
2822
                av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n");
2823
        }
2824
        if (usesHFilter) c->canMMX2BeUsed=0;
2825
    }
2826
    else
2827
        c->canMMX2BeUsed=0;
2828

    
2829
    c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
2830
    c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
2831

    
2832
    // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
2833
    // but only for the FAST_BILINEAR mode otherwise do correct scaling
2834
    // n-2 is the last chrominance sample available
2835
    // this is not perfect, but no one should notice the difference, the more correct variant
2836
    // would be like the vertical one, but that would require some special code for the
2837
    // first and last pixel
2838
    if (flags&SWS_FAST_BILINEAR)
2839
    {
2840
        if (c->canMMX2BeUsed)
2841
        {
2842
            c->lumXInc+= 20;
2843
            c->chrXInc+= 20;
2844
        }
2845
        //we don't use the x86 asm scaler if MMX is available
2846
        else if (flags & SWS_CPU_CAPS_MMX)
2847
        {
2848
            c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
2849
            c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
2850
        }
2851
    }
2852

    
2853
    /* precalculate horizontal scaler filter coefficients */
2854
    {
2855
        const int filterAlign=
2856
            (flags & SWS_CPU_CAPS_MMX) ? 4 :
2857
            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2858
            1;
2859

    
2860
        initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
2861
                   srcW      ,       dstW, filterAlign, 1<<14,
2862
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2863
                   srcFilter->lumH, dstFilter->lumH, c->param);
2864
        initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
2865
                   c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
2866
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2867
                   srcFilter->chrH, dstFilter->chrH, c->param);
2868

    
2869
#define MAX_FUNNY_CODE_SIZE 10000
2870
#if defined(COMPILE_MMX2)
2871
// can't downscale !!!
2872
        if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
2873
        {
2874
#ifdef MAP_ANONYMOUS
2875
            c->funnyYCode  = mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2876
            c->funnyUVCode = mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2877
#elif HAVE_VIRTUALALLOC
2878
            c->funnyYCode  = VirtualAlloc(NULL, MAX_FUNNY_CODE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
2879
            c->funnyUVCode = VirtualAlloc(NULL, MAX_FUNNY_CODE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
2880
#else
2881
            c->funnyYCode  = av_malloc(MAX_FUNNY_CODE_SIZE);
2882
            c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE);
2883
#endif
2884

    
2885
            c->lumMmx2Filter   = av_malloc((dstW        /8+8)*sizeof(int16_t));
2886
            c->chrMmx2Filter   = av_malloc((c->chrDstW  /4+8)*sizeof(int16_t));
2887
            c->lumMmx2FilterPos= av_malloc((dstW      /2/8+8)*sizeof(int32_t));
2888
            c->chrMmx2FilterPos= av_malloc((c->chrDstW/2/4+8)*sizeof(int32_t));
2889

    
2890
            initMMX2HScaler(      dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
2891
            initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
2892
        }
2893
#endif /* defined(COMPILE_MMX2) */
2894
    } // initialize horizontal stuff
2895

    
2896

    
2897

    
2898
    /* precalculate vertical scaler filter coefficients */
2899
    {
2900
        const int filterAlign=
2901
            (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
2902
            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2903
            1;
2904

    
2905
        initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
2906
                   srcH      ,        dstH, filterAlign, (1<<12),
2907
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2908
                   srcFilter->lumV, dstFilter->lumV, c->param);
2909
        initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
2910
                   c->chrSrcH, c->chrDstH, filterAlign, (1<<12),
2911
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2912
                   srcFilter->chrV, dstFilter->chrV, c->param);
2913

    
2914
#if HAVE_ALTIVEC
2915
        c->vYCoeffsBank = av_malloc(sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
2916
        c->vCCoeffsBank = av_malloc(sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH);
2917

    
2918
        for (i=0;i<c->vLumFilterSize*c->dstH;i++) {
2919
            int j;
2920
            short *p = (short *)&c->vYCoeffsBank[i];
2921
            for (j=0;j<8;j++)
2922
                p[j] = c->vLumFilter[i];
2923
        }
2924

    
2925
        for (i=0;i<c->vChrFilterSize*c->chrDstH;i++) {
2926
            int j;
2927
            short *p = (short *)&c->vCCoeffsBank[i];
2928
            for (j=0;j<8;j++)
2929
                p[j] = c->vChrFilter[i];
2930
        }
2931
#endif
2932
    }
2933

    
2934
    // calculate buffer sizes so that they won't run out while handling these damn slices
2935
    c->vLumBufSize= c->vLumFilterSize;
2936
    c->vChrBufSize= c->vChrFilterSize;
2937
    for (i=0; i<dstH; i++)
2938
    {
2939
        int chrI= i*c->chrDstH / dstH;
2940
        int nextSlice= FFMAX(c->vLumFilterPos[i   ] + c->vLumFilterSize - 1,
2941
                           ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
2942

    
2943
        nextSlice>>= c->chrSrcVSubSample;
2944
        nextSlice<<= c->chrSrcVSubSample;
2945
        if (c->vLumFilterPos[i   ] + c->vLumBufSize < nextSlice)
2946
            c->vLumBufSize= nextSlice - c->vLumFilterPos[i];
2947
        if (c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
2948
            c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
2949
    }
2950

    
2951
    // allocate pixbufs (we use dynamic allocation because otherwise we would need to
2952
    c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
2953
    c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*));
2954
    if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat))
2955
        c->alpPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
2956
    //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
2957
    /* align at 16 bytes for AltiVec */
2958
    for (i=0; i<c->vLumBufSize; i++)
2959
        c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(VOF+1);
2960
    for (i=0; i<c->vChrBufSize; i++)
2961
        c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= av_malloc((VOF+1)*2);
2962
    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
2963
        for (i=0; i<c->vLumBufSize; i++)
2964
            c->alpPixBuf[i]= c->alpPixBuf[i+c->vLumBufSize]= av_mallocz(VOF+1);
2965

    
2966
    //try to avoid drawing green stuff between the right end and the stride end
2967
    for (i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, (VOF+1)*2);
2968

    
2969
    assert(2*VOFW == VOF);
2970

    
2971
    assert(c->chrDstH <= dstH);
2972

    
2973
    if (flags&SWS_PRINT_INFO)
2974
    {
2975
#ifdef DITHER1XBPP
2976
        const char *dither= " dithered";
2977
#else
2978
        const char *dither= "";
2979
#endif
2980
        if (flags&SWS_FAST_BILINEAR)
2981
            av_log(c, AV_LOG_INFO, "FAST_BILINEAR scaler, ");
2982
        else if (flags&SWS_BILINEAR)
2983
            av_log(c, AV_LOG_INFO, "BILINEAR scaler, ");
2984
        else if (flags&SWS_BICUBIC)
2985
            av_log(c, AV_LOG_INFO, "BICUBIC scaler, ");
2986
        else if (flags&SWS_X)
2987
            av_log(c, AV_LOG_INFO, "Experimental scaler, ");
2988
        else if (flags&SWS_POINT)
2989
            av_log(c, AV_LOG_INFO, "Nearest Neighbor / POINT scaler, ");
2990
        else if (flags&SWS_AREA)
2991
            av_log(c, AV_LOG_INFO, "Area Averageing scaler, ");
2992
        else if (flags&SWS_BICUBLIN)
2993
            av_log(c, AV_LOG_INFO, "luma BICUBIC / chroma BILINEAR scaler, ");
2994
        else if (flags&SWS_GAUSS)
2995
            av_log(c, AV_LOG_INFO, "Gaussian scaler, ");
2996
        else if (flags&SWS_SINC)
2997
            av_log(c, AV_LOG_INFO, "Sinc scaler, ");
2998
        else if (flags&SWS_LANCZOS)
2999
            av_log(c, AV_LOG_INFO, "Lanczos scaler, ");
3000
        else if (flags&SWS_SPLINE)
3001
            av_log(c, AV_LOG_INFO, "Bicubic spline scaler, ");
3002
        else
3003
            av_log(c, AV_LOG_INFO, "ehh flags invalid?! ");
3004

    
3005
        if (dstFormat==PIX_FMT_BGR555 || dstFormat==PIX_FMT_BGR565)
3006
            av_log(c, AV_LOG_INFO, "from %s to%s %s ",
3007
                   sws_format_name(srcFormat), dither, sws_format_name(dstFormat));
3008
        else
3009
            av_log(c, AV_LOG_INFO, "from %s to %s ",
3010
                   sws_format_name(srcFormat), sws_format_name(dstFormat));
3011

    
3012
        if (flags & SWS_CPU_CAPS_MMX2)
3013
            av_log(c, AV_LOG_INFO, "using MMX2\n");
3014
        else if (flags & SWS_CPU_CAPS_3DNOW)
3015
            av_log(c, AV_LOG_INFO, "using 3DNOW\n");
3016
        else if (flags & SWS_CPU_CAPS_MMX)
3017
            av_log(c, AV_LOG_INFO, "using MMX\n");
3018
        else if (flags & SWS_CPU_CAPS_ALTIVEC)
3019
            av_log(c, AV_LOG_INFO, "using AltiVec\n");
3020
        else
3021
            av_log(c, AV_LOG_INFO, "using C\n");
3022
    }
3023

    
3024
    if (flags & SWS_PRINT_INFO)
3025
    {
3026
        if (flags & SWS_CPU_CAPS_MMX)
3027
        {
3028
            if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
3029
                av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
3030
            else
3031
            {
3032
                if (c->hLumFilterSize==4)
3033
                    av_log(c, AV_LOG_VERBOSE, "using 4-tap MMX scaler for horizontal luminance scaling\n");
3034
                else if (c->hLumFilterSize==8)
3035
                    av_log(c, AV_LOG_VERBOSE, "using 8-tap MMX scaler for horizontal luminance scaling\n");
3036
                else
3037
                    av_log(c, AV_LOG_VERBOSE, "using n-tap MMX scaler for horizontal luminance scaling\n");
3038

    
3039
                if (c->hChrFilterSize==4)
3040
                    av_log(c, AV_LOG_VERBOSE, "using 4-tap MMX scaler for horizontal chrominance scaling\n");
3041
                else if (c->hChrFilterSize==8)
3042
                    av_log(c, AV_LOG_VERBOSE, "using 8-tap MMX scaler for horizontal chrominance scaling\n");
3043
                else
3044
                    av_log(c, AV_LOG_VERBOSE, "using n-tap MMX scaler for horizontal chrominance scaling\n");
3045
            }
3046
        }
3047
        else
3048
        {
3049
#if ARCH_X86
3050
            av_log(c, AV_LOG_VERBOSE, "using x86 asm scaler for horizontal scaling\n");
3051
#else
3052
            if (flags & SWS_FAST_BILINEAR)
3053
                av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR C scaler for horizontal scaling\n");
3054
            else
3055
                av_log(c, AV_LOG_VERBOSE, "using C scaler for horizontal scaling\n");
3056
#endif
3057
        }
3058
        if (isPlanarYUV(dstFormat))
3059
        {
3060
            if (c->vLumFilterSize==1)
3061
                av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
3062
            else
3063
                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
3064
        }
3065
        else
3066
        {
3067
            if (c->vLumFilterSize==1 && c->vChrFilterSize==2)
3068
                av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
3069
                       "      2-tap scaler for vertical chrominance scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
3070
            else if (c->vLumFilterSize==2 && c->vChrFilterSize==2)
3071
                av_log(c, AV_LOG_VERBOSE, "using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
3072
            else
3073
                av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
3074
        }
3075

    
3076
        if (dstFormat==PIX_FMT_BGR24)
3077
            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 converter\n",
3078
                   (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
3079
        else if (dstFormat==PIX_FMT_RGB32)
3080
            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
3081
        else if (dstFormat==PIX_FMT_BGR565)
3082
            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
3083
        else if (dstFormat==PIX_FMT_BGR555)
3084
            av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
3085

    
3086
        av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
3087
    }
3088
    if (flags & SWS_PRINT_INFO)
3089
    {
3090
        av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
3091
               c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
3092
        av_log(c, AV_LOG_DEBUG, "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
3093
               c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
3094
    }
3095

    
3096
    c->swScale= getSwsFunc(c);
3097
    return c;
3098
}
3099

    
3100
static void reset_ptr(uint8_t* src[], int format){
3101
    if(!isALPHA(format))
3102
        src[3]=NULL;
3103
    if(!isPlanarYUV(format)){
3104
        src[3]=src[2]=NULL;
3105
        if(   format != PIX_FMT_PAL8
3106
           && format != PIX_FMT_RGB8
3107
           && format != PIX_FMT_BGR8
3108
           && format != PIX_FMT_RGB4_BYTE
3109
           && format != PIX_FMT_BGR4_BYTE
3110
          )
3111
            src[1]= NULL;
3112
    }
3113
}
3114

    
3115
/**
3116
 * swscale wrapper, so we don't need to export the SwsContext.
3117
 * Assumes planar YUV to be in YUV order instead of YVU.
3118
 */
3119
int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
3120
              int srcSliceH, uint8_t* dst[], int dstStride[]){
3121
    int i;
3122
    uint8_t* src2[4]= {src[0], src[1], src[2], src[3]};
3123
    uint8_t* dst2[4]= {dst[0], dst[1], dst[2], dst[3]};
3124

    
3125
    if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
3126
        av_log(c, AV_LOG_ERROR, "Slices start in the middle!\n");
3127
        return 0;
3128
    }
3129
    if (c->sliceDir == 0) {
3130
        if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
3131
    }
3132

    
3133
    if (usePal(c->srcFormat)){
3134
        for (i=0; i<256; i++){
3135
            int p, r, g, b,y,u,v;
3136
            if(c->srcFormat == PIX_FMT_PAL8){
3137
                p=((uint32_t*)(src[1]))[i];
3138
                r= (p>>16)&0xFF;
3139
                g= (p>> 8)&0xFF;
3140
                b=  p     &0xFF;
3141
            }else if(c->srcFormat == PIX_FMT_RGB8){
3142
                r= (i>>5    )*36;
3143
                g= ((i>>2)&7)*36;
3144
                b= (i&3     )*85;
3145
            }else if(c->srcFormat == PIX_FMT_BGR8){
3146
                b= (i>>6    )*85;
3147
                g= ((i>>3)&7)*36;
3148
                r= (i&7     )*36;
3149
            }else if(c->srcFormat == PIX_FMT_RGB4_BYTE){
3150
                r= (i>>3    )*255;
3151
                g= ((i>>1)&3)*85;
3152
                b= (i&1     )*255;
3153
            }else {
3154
                assert(c->srcFormat == PIX_FMT_BGR4_BYTE);
3155
                b= (i>>3    )*255;
3156
                g= ((i>>1)&3)*85;
3157
                r= (i&1     )*255;
3158
            }
3159
            y= av_clip_uint8((RY*r + GY*g + BY*b + ( 33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
3160
            u= av_clip_uint8((RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
3161
            v= av_clip_uint8((RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
3162
            c->pal_yuv[i]= y + (u<<8) + (v<<16);
3163

    
3164

    
3165
            switch(c->dstFormat) {
3166
            case PIX_FMT_BGR32:
3167
#ifndef WORDS_BIGENDIAN
3168
            case PIX_FMT_RGB24:
3169
#endif
3170
                c->pal_rgb[i]=  r + (g<<8) + (b<<16);
3171
                break;
3172
            case PIX_FMT_BGR32_1:
3173
#ifdef  WORDS_BIGENDIAN
3174
            case PIX_FMT_BGR24:
3175
#endif
3176
                c->pal_rgb[i]= (r + (g<<8) + (b<<16)) << 8;
3177
                break;
3178
            case PIX_FMT_RGB32_1:
3179
#ifdef  WORDS_BIGENDIAN
3180
            case PIX_FMT_RGB24:
3181
#endif
3182
                c->pal_rgb[i]= (b + (g<<8) + (r<<16)) << 8;
3183
                break;
3184
            case PIX_FMT_RGB32:
3185
#ifndef WORDS_BIGENDIAN
3186
            case PIX_FMT_BGR24:
3187
#endif
3188
            default:
3189
                c->pal_rgb[i]=  b + (g<<8) + (r<<16);
3190
            }
3191
        }
3192
    }
3193

    
3194
    // copy strides, so they can safely be modified
3195
    if (c->sliceDir == 1) {
3196
        // slices go from top to bottom
3197
        int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2], srcStride[3]};
3198
        int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2], dstStride[3]};
3199

    
3200
        reset_ptr(src2, c->srcFormat);
3201
        reset_ptr(dst2, c->dstFormat);
3202

    
3203
        return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst2, dstStride2);
3204
    } else {
3205
        // slices go from bottom to top => we flip the image internally
3206
        int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2], -srcStride[3]};
3207
        int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2], -dstStride[3]};
3208

    
3209
        src2[0] += (srcSliceH-1)*srcStride[0];
3210
        if (!usePal(c->srcFormat))
3211
            src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1];
3212
        src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2];
3213
        src2[3] += (srcSliceH-1)*srcStride[3];
3214
        dst2[0] += ( c->dstH                      -1)*dstStride[0];
3215
        dst2[1] += ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1];
3216
        dst2[2] += ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2];
3217
        dst2[3] += ( c->dstH                      -1)*dstStride[3];
3218

    
3219
        reset_ptr(src2, c->srcFormat);
3220
        reset_ptr(dst2, c->dstFormat);
3221

    
3222
        return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
3223
    }
3224
}
3225

    
3226
#if LIBSWSCALE_VERSION_MAJOR < 1
3227
int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
3228
                      int srcSliceH, uint8_t* dst[], int dstStride[]){
3229
    return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
3230
}
3231
#endif
3232

    
3233
SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
3234
                                float lumaSharpen, float chromaSharpen,
3235
                                float chromaHShift, float chromaVShift,
3236
                                int verbose)
3237
{
3238
    SwsFilter *filter= av_malloc(sizeof(SwsFilter));
3239

    
3240
    if (lumaGBlur!=0.0){
3241
        filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0);
3242
        filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0);
3243
    }else{
3244
        filter->lumH= sws_getIdentityVec();
3245
        filter->lumV= sws_getIdentityVec();
3246
    }
3247

    
3248
    if (chromaGBlur!=0.0){
3249
        filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0);
3250
        filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0);
3251
    }else{
3252
        filter->chrH= sws_getIdentityVec();
3253
        filter->chrV= sws_getIdentityVec();
3254
    }
3255

    
3256
    if (chromaSharpen!=0.0){
3257
        SwsVector *id= sws_getIdentityVec();
3258
        sws_scaleVec(filter->chrH, -chromaSharpen);
3259
        sws_scaleVec(filter->chrV, -chromaSharpen);
3260
        sws_addVec(filter->chrH, id);
3261
        sws_addVec(filter->chrV, id);
3262
        sws_freeVec(id);
3263
    }
3264

    
3265
    if (lumaSharpen!=0.0){
3266
        SwsVector *id= sws_getIdentityVec();
3267
        sws_scaleVec(filter->lumH, -lumaSharpen);
3268
        sws_scaleVec(filter->lumV, -lumaSharpen);
3269
        sws_addVec(filter->lumH, id);
3270
        sws_addVec(filter->lumV, id);
3271
        sws_freeVec(id);
3272
    }
3273

    
3274
    if (chromaHShift != 0.0)
3275
        sws_shiftVec(filter->chrH, (int)(chromaHShift+0.5));
3276

    
3277
    if (chromaVShift != 0.0)
3278
        sws_shiftVec(filter->chrV, (int)(chromaVShift+0.5));
3279

    
3280
    sws_normalizeVec(filter->chrH, 1.0);
3281
    sws_normalizeVec(filter->chrV, 1.0);
3282
    sws_normalizeVec(filter->lumH, 1.0);
3283
    sws_normalizeVec(filter->lumV, 1.0);
3284

    
3285
    if (verbose) sws_printVec2(filter->chrH, NULL, AV_LOG_DEBUG);
3286
    if (verbose) sws_printVec2(filter->lumH, NULL, AV_LOG_DEBUG);
3287

    
3288
    return filter;
3289
}
3290

    
3291
SwsVector *sws_getGaussianVec(double variance, double quality){
3292
    const int length= (int)(variance*quality + 0.5) | 1;
3293
    int i;
3294
    double *coeff= av_malloc(length*sizeof(double));
3295
    double middle= (length-1)*0.5;
3296
    SwsVector *vec= av_malloc(sizeof(SwsVector));
3297

    
3298
    vec->coeff= coeff;
3299
    vec->length= length;
3300

    
3301
    for (i=0; i<length; i++)
3302
    {
3303
        double dist= i-middle;
3304
        coeff[i]= exp(-dist*dist/(2*variance*variance)) / sqrt(2*variance*PI);
3305
    }
3306

    
3307
    sws_normalizeVec(vec, 1.0);
3308

    
3309
    return vec;
3310
}
3311

    
3312
SwsVector *sws_getConstVec(double c, int length){
3313
    int i;
3314
    double *coeff= av_malloc(length*sizeof(double));
3315
    SwsVector *vec= av_malloc(sizeof(SwsVector));
3316

    
3317
    vec->coeff= coeff;
3318
    vec->length= length;
3319

    
3320
    for (i=0; i<length; i++)
3321
        coeff[i]= c;
3322

    
3323
    return vec;
3324
}
3325

    
3326

    
3327
SwsVector *sws_getIdentityVec(void){
3328
    return sws_getConstVec(1.0, 1);
3329
}
3330

    
3331
double sws_dcVec(SwsVector *a){
3332
    int i;
3333
    double sum=0;
3334

    
3335
    for (i=0; i<a->length; i++)
3336
        sum+= a->coeff[i];
3337

    
3338
    return sum;
3339
}
3340

    
3341
void sws_scaleVec(SwsVector *a, double scalar){
3342
    int i;
3343

    
3344
    for (i=0; i<a->length; i++)
3345
        a->coeff[i]*= scalar;
3346
}
3347

    
3348
void sws_normalizeVec(SwsVector *a, double height){
3349
    sws_scaleVec(a, height/sws_dcVec(a));
3350
}
3351

    
3352
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){
3353
    int length= a->length + b->length - 1;
3354
    double *coeff= av_malloc(length*sizeof(double));
3355
    int i, j;
3356
    SwsVector *vec= av_malloc(sizeof(SwsVector));
3357

    
3358
    vec->coeff= coeff;
3359
    vec->length= length;
3360

    
3361
    for (i=0; i<length; i++) coeff[i]= 0.0;
3362

    
3363
    for (i=0; i<a->length; i++)
3364
    {
3365
        for (j=0; j<b->length; j++)
3366
        {
3367
            coeff[i+j]+= a->coeff[i]*b->coeff[j];
3368
        }
3369
    }
3370

    
3371
    return vec;
3372
}
3373

    
3374
static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){
3375
    int length= FFMAX(a->length, b->length);
3376
    double *coeff= av_malloc(length*sizeof(double));
3377
    int i;
3378
    SwsVector *vec= av_malloc(sizeof(SwsVector));
3379

    
3380
    vec->coeff= coeff;
3381
    vec->length= length;
3382

    
3383
    for (i=0; i<length; i++) coeff[i]= 0.0;
3384

    
3385
    for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
3386
    for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];
3387

    
3388
    return vec;
3389
}
3390

    
3391
static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){
3392
    int length= FFMAX(a->length, b->length);
3393
    double *coeff= av_malloc(length*sizeof(double));
3394
    int i;
3395
    SwsVector *vec= av_malloc(sizeof(SwsVector));
3396

    
3397
    vec->coeff= coeff;
3398
    vec->length= length;
3399

    
3400
    for (i=0; i<length; i++) coeff[i]= 0.0;
3401

    
3402
    for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
3403
    for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];
3404

    
3405
    return vec;
3406
}
3407

    
3408
/* shift left / or right if "shift" is negative */
3409
static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){
3410
    int length= a->length + FFABS(shift)*2;
3411
    double *coeff= av_malloc(length*sizeof(double));
3412
    int i;
3413
    SwsVector *vec= av_malloc(sizeof(SwsVector));
3414

    
3415
    vec->coeff= coeff;
3416
    vec->length= length;
3417

    
3418
    for (i=0; i<length; i++) coeff[i]= 0.0;
3419

    
3420
    for (i=0; i<a->length; i++)
3421
    {
3422
        coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
3423
    }
3424

    
3425
    return vec;
3426
}
3427

    
3428
void sws_shiftVec(SwsVector *a, int shift){
3429
    SwsVector *shifted= sws_getShiftedVec(a, shift);
3430
    av_free(a->coeff);
3431
    a->coeff= shifted->coeff;
3432
    a->length= shifted->length;
3433
    av_free(shifted);
3434
}
3435

    
3436
void sws_addVec(SwsVector *a, SwsVector *b){
3437
    SwsVector *sum= sws_sumVec(a, b);
3438
    av_free(a->coeff);
3439
    a->coeff= sum->coeff;
3440
    a->length= sum->length;
3441
    av_free(sum);
3442
}
3443

    
3444
void sws_subVec(SwsVector *a, SwsVector *b){
3445
    SwsVector *diff= sws_diffVec(a, b);
3446
    av_free(a->coeff);
3447
    a->coeff= diff->coeff;
3448
    a->length= diff->length;
3449
    av_free(diff);
3450
}
3451

    
3452
void sws_convVec(SwsVector *a, SwsVector *b){
3453
    SwsVector *conv= sws_getConvVec(a, b);
3454
    av_free(a->coeff);
3455
    a->coeff= conv->coeff;
3456
    a->length= conv->length;
3457
    av_free(conv);
3458
}
3459

    
3460
SwsVector *sws_cloneVec(SwsVector *a){
3461
    double *coeff= av_malloc(a->length*sizeof(double));
3462
    int i;
3463
    SwsVector *vec= av_malloc(sizeof(SwsVector));
3464

    
3465
    vec->coeff= coeff;
3466
    vec->length= a->length;
3467

    
3468
    for (i=0; i<a->length; i++) coeff[i]= a->coeff[i];
3469

    
3470
    return vec;
3471
}
3472

    
3473
void sws_printVec2(SwsVector *a, AVClass *log_ctx, int log_level){
3474
    int i;
3475
    double max=0;
3476
    double min=0;
3477
    double range;
3478

    
3479
    for (i=0; i<a->length; i++)
3480
        if (a->coeff[i]>max) max= a->coeff[i];
3481

    
3482
    for (i=0; i<a->length; i++)
3483
        if (a->coeff[i]<min) min= a->coeff[i];
3484

    
3485
    range= max - min;
3486

    
3487
    for (i=0; i<a->length; i++)
3488
    {
3489
        int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
3490
        av_log(log_ctx, log_level, "%1.3f ", a->coeff[i]);
3491
        for (;x>0; x--) av_log(log_ctx, log_level, " ");
3492
        av_log(log_ctx, log_level, "|\n");
3493
    }
3494
}
3495

    
3496
#if LIBSWSCALE_VERSION_MAJOR < 1
3497
void sws_printVec(SwsVector *a){
3498
    sws_printVec2(a, NULL, AV_LOG_DEBUG);
3499
}
3500
#endif
3501

    
3502
void sws_freeVec(SwsVector *a){
3503
    if (!a) return;
3504
    av_freep(&a->coeff);
3505
    a->length=0;
3506
    av_free(a);
3507
}
3508

    
3509
void sws_freeFilter(SwsFilter *filter){
3510
    if (!filter) return;
3511

    
3512
    if (filter->lumH) sws_freeVec(filter->lumH);
3513
    if (filter->lumV) sws_freeVec(filter->lumV);
3514
    if (filter->chrH) sws_freeVec(filter->chrH);
3515
    if (filter->chrV) sws_freeVec(filter->chrV);
3516
    av_free(filter);
3517
}
3518

    
3519

    
3520
void sws_freeContext(SwsContext *c){
3521
    int i;
3522
    if (!c) return;
3523

    
3524
    if (c->lumPixBuf)
3525
    {
3526
        for (i=0; i<c->vLumBufSize; i++)
3527
            av_freep(&c->lumPixBuf[i]);
3528
        av_freep(&c->lumPixBuf);
3529
    }
3530

    
3531
    if (c->chrPixBuf)
3532
    {
3533
        for (i=0; i<c->vChrBufSize; i++)
3534
            av_freep(&c->chrPixBuf[i]);
3535
        av_freep(&c->chrPixBuf);
3536
    }
3537

    
3538
    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
3539
        for (i=0; i<c->vLumBufSize; i++)
3540
            av_freep(&c->alpPixBuf[i]);
3541
        av_freep(&c->alpPixBuf);
3542
    }
3543

    
3544
    av_freep(&c->vLumFilter);
3545
    av_freep(&c->vChrFilter);
3546
    av_freep(&c->hLumFilter);
3547
    av_freep(&c->hChrFilter);
3548
#if HAVE_ALTIVEC
3549
    av_freep(&c->vYCoeffsBank);
3550
    av_freep(&c->vCCoeffsBank);
3551
#endif
3552

    
3553
    av_freep(&c->vLumFilterPos);
3554
    av_freep(&c->vChrFilterPos);
3555
    av_freep(&c->hLumFilterPos);
3556
    av_freep(&c->hChrFilterPos);
3557

    
3558
#if ARCH_X86 && CONFIG_GPL
3559
#ifdef MAP_ANONYMOUS
3560
    if (c->funnyYCode ) munmap(c->funnyYCode , MAX_FUNNY_CODE_SIZE);
3561
    if (c->funnyUVCode) munmap(c->funnyUVCode, MAX_FUNNY_CODE_SIZE);
3562
#elif HAVE_VIRTUALALLOC
3563
    if (c->funnyYCode ) VirtualFree(c->funnyYCode , MAX_FUNNY_CODE_SIZE, MEM_RELEASE);
3564
    if (c->funnyUVCode) VirtualFree(c->funnyUVCode, MAX_FUNNY_CODE_SIZE, MEM_RELEASE);
3565
#else
3566
    av_free(c->funnyYCode );
3567
    av_free(c->funnyUVCode);
3568
#endif
3569
    c->funnyYCode=NULL;
3570
    c->funnyUVCode=NULL;
3571
#endif /* ARCH_X86 && CONFIG_GPL */
3572

    
3573
    av_freep(&c->lumMmx2Filter);
3574
    av_freep(&c->chrMmx2Filter);
3575
    av_freep(&c->lumMmx2FilterPos);
3576
    av_freep(&c->chrMmx2FilterPos);
3577
    av_freep(&c->yuvTable);
3578

    
3579
    av_free(c);
3580
}
3581

    
3582
struct SwsContext *sws_getCachedContext(struct SwsContext *context,
3583
                                        int srcW, int srcH, enum PixelFormat srcFormat,
3584
                                        int dstW, int dstH, enum PixelFormat dstFormat, int flags,
3585
                                        SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)
3586
{
3587
    static const double default_param[2] = {SWS_PARAM_DEFAULT, SWS_PARAM_DEFAULT};
3588

    
3589
    if (!param)
3590
        param = default_param;
3591

    
3592
    if (context) {
3593
        if (context->srcW != srcW || context->srcH != srcH ||
3594
            context->srcFormat != srcFormat ||
3595
            context->dstW != dstW || context->dstH != dstH ||
3596
            context->dstFormat != dstFormat || context->flags != flags ||
3597
            context->param[0] != param[0] || context->param[1] != param[1])
3598
        {
3599
            sws_freeContext(context);
3600
            context = NULL;
3601
        }
3602
    }
3603
    if (!context) {
3604
        return sws_getContext(srcW, srcH, srcFormat,
3605
                              dstW, dstH, dstFormat, flags,
3606
                              srcFilter, dstFilter, param);
3607
    }
3608
    return context;
3609
}
3610