Statistics
| Branch: | Revision:

ffmpeg / libswscale / swscale.c @ e76709d8

History | View | Annotate | Download (107 KB)

1 fe8054c0 Michael Niedermayer
/*
2 d026b45e Diego Biurrun
 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with FFmpeg; if not, write to the Free Software
18 b19bcbaa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 d026b45e Diego Biurrun
 *
20 807e0c66 Luca Abeni
 * the C code (not assembly, mmx, ...) of this file can be used
21
 * under the LGPL license too
22 d026b45e Diego Biurrun
 */
23 783e9cc9 Michael Niedermayer
24 28bf81c9 Michael Niedermayer
/*
25 9990e426 Michael Niedermayer
  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
26 caeaabe7 Alex Beregszaszi
  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
27 e09d12f4 Michael Niedermayer
  {BGR,RGB}{1,4,8,15,16} support dithering
28 6a4970ab Diego Biurrun

29 e09d12f4 Michael Niedermayer
  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
30
  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
31
  x -> x
32
  YUV9 -> YV12
33
  YUV9/YV12 -> Y800
34
  Y800 -> YUV9/YV12
35 b935781b Michael Niedermayer
  BGR24 -> BGR32 & RGB24 -> RGB32
36
  BGR32 -> BGR24 & RGB32 -> RGB24
37 4bb3fa5e Michael Niedermayer
  BGR15 -> BGR16
38 b935781b Michael Niedermayer
*/
39
40 6a4970ab Diego Biurrun
/*
41 a6f6b237 Diego Biurrun
tested special converters (most are tested actually, but I did not write it down ...)
42 e09d12f4 Michael Niedermayer
 YV12 -> BGR16
43 b935781b Michael Niedermayer
 YV12 -> YV12
44 4bb3fa5e Michael Niedermayer
 BGR15 -> BGR16
45 1e1c4fe9 Michael Niedermayer
 BGR16 -> BGR16
46 e09d12f4 Michael Niedermayer
 YVU9 -> YV12
47 b935781b Michael Niedermayer

48
untested special converters
49 f40c7dbb Diego Biurrun
  YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
50 1e1c4fe9 Michael Niedermayer
  YV12/I420 -> YV12/I420
51
  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
52 b935781b Michael Niedermayer
  BGR24 -> BGR32 & RGB24 -> RGB32
53
  BGR32 -> BGR24 & RGB32 -> RGB24
54 ec22603f Michael Niedermayer
  BGR24 -> YV12
55 28bf81c9 Michael Niedermayer
*/
56
57 d63a2cb1 Michael Niedermayer
#define _SVID_SOURCE //needed for MAP_ANONYMOUS
58 d3f41512 Michael Niedermayer
#include <inttypes.h>
59 dda87e9f Pierre Lombard
#include <string.h>
60 077ea8a7 Michael Niedermayer
#include <math.h>
61 c1b0bfb4 Michael Niedermayer
#include <stdio.h>
62 171d7d78 Bohdan Horst
#include <unistd.h>
63 b2d374c9 Diego Biurrun
#include "config.h"
64 81b7c056 Michael Niedermayer
#include <assert.h>
65 b63f641e Aurelien Jacobs
#if HAVE_SYS_MMAN_H
66 38d5c282 Aurelien Jacobs
#include <sys/mman.h>
67 113ef149 Reimar Döffinger
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
68
#define MAP_ANONYMOUS MAP_ANON
69
#endif
70 38d5c282 Aurelien Jacobs
#endif
71 d604bab9 Michael Niedermayer
#include "swscale.h"
72 5427e242 Michael Niedermayer
#include "swscale_internal.h"
73 37079906 Michael Niedermayer
#include "rgb2rgb.h"
74 83da2c6f Diego Biurrun
#include "libavutil/x86_cpu.h"
75
#include "libavutil/bswap.h"
76 0d9f3d85 Arpi
77 b3e03fa7 Stefano Sabatini
unsigned swscale_version(void)
78
{
79
    return LIBSWSCALE_VERSION_INT;
80
}
81
82 541c4eb9 Michael Niedermayer
#undef MOVNTQ
83 7d7f78b5 Michael Niedermayer
#undef PAVGB
84 d3f41512 Michael Niedermayer
85 783e9cc9 Michael Niedermayer
//#undef HAVE_MMX2
86 f4406ec1 Diego Biurrun
//#define HAVE_AMD3DNOW
87 d3f41512 Michael Niedermayer
//#undef HAVE_MMX
88 783e9cc9 Michael Niedermayer
//#undef ARCH_X86
89 2ba1bff0 Michael Niedermayer
//#define WORDS_BIGENDIAN
90 d604bab9 Michael Niedermayer
#define DITHER1XBPP
91 d3f41512 Michael Niedermayer
92 f40c7dbb Diego Biurrun
#define FAST_BGR2YV12 // use 7 bit coefficients instead of 15 bit
93 ac6a2e45 Michael Niedermayer
94 f40c7dbb Diego Biurrun
#define RET 0xC3 //near return opcode for x86
95 c1b0bfb4 Michael Niedermayer
96 28bf81c9 Michael Niedermayer
#ifdef M_PI
97
#define PI M_PI
98
#else
99
#define PI 3.14159265358979323846
100
#endif
101 c1b0bfb4 Michael Niedermayer
102 9d9de37d Ivo van Poorten
#define isSupportedIn(x)    (       \
103
           (x)==PIX_FMT_YUV420P     \
104 79973335 Aurelien Jacobs
        || (x)==PIX_FMT_YUVA420P    \
105 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_YUYV422     \
106
        || (x)==PIX_FMT_UYVY422     \
107
        || (x)==PIX_FMT_RGB32       \
108 9990e426 Michael Niedermayer
        || (x)==PIX_FMT_RGB32_1     \
109 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_BGR24       \
110
        || (x)==PIX_FMT_BGR565      \
111
        || (x)==PIX_FMT_BGR555      \
112
        || (x)==PIX_FMT_BGR32       \
113 9990e426 Michael Niedermayer
        || (x)==PIX_FMT_BGR32_1     \
114 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_RGB24       \
115
        || (x)==PIX_FMT_RGB565      \
116
        || (x)==PIX_FMT_RGB555      \
117
        || (x)==PIX_FMT_GRAY8       \
118
        || (x)==PIX_FMT_YUV410P     \
119 6c80eb16 Michael Niedermayer
        || (x)==PIX_FMT_YUV440P     \
120 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_GRAY16BE    \
121
        || (x)==PIX_FMT_GRAY16LE    \
122
        || (x)==PIX_FMT_YUV444P     \
123
        || (x)==PIX_FMT_YUV422P     \
124
        || (x)==PIX_FMT_YUV411P     \
125
        || (x)==PIX_FMT_PAL8        \
126
        || (x)==PIX_FMT_BGR8        \
127
        || (x)==PIX_FMT_RGB8        \
128
        || (x)==PIX_FMT_BGR4_BYTE   \
129
        || (x)==PIX_FMT_RGB4_BYTE   \
130 9ba7fe6d Andreas Öman
        || (x)==PIX_FMT_YUV440P     \
131 3d05e078 Michael Niedermayer
        || (x)==PIX_FMT_MONOWHITE   \
132
        || (x)==PIX_FMT_MONOBLACK   \
133 9d9de37d Ivo van Poorten
    )
134
#define isSupportedOut(x)   (       \
135
           (x)==PIX_FMT_YUV420P     \
136
        || (x)==PIX_FMT_YUYV422     \
137
        || (x)==PIX_FMT_UYVY422     \
138
        || (x)==PIX_FMT_YUV444P     \
139
        || (x)==PIX_FMT_YUV422P     \
140
        || (x)==PIX_FMT_YUV411P     \
141
        || isRGB(x)                 \
142
        || isBGR(x)                 \
143
        || (x)==PIX_FMT_NV12        \
144
        || (x)==PIX_FMT_NV21        \
145
        || (x)==PIX_FMT_GRAY16BE    \
146
        || (x)==PIX_FMT_GRAY16LE    \
147
        || (x)==PIX_FMT_GRAY8       \
148
        || (x)==PIX_FMT_YUV410P     \
149 6c80eb16 Michael Niedermayer
        || (x)==PIX_FMT_YUV440P     \
150 9d9de37d Ivo van Poorten
    )
151
#define isPacked(x)         (       \
152
           (x)==PIX_FMT_PAL8        \
153
        || (x)==PIX_FMT_YUYV422     \
154
        || (x)==PIX_FMT_UYVY422     \
155
        || isRGB(x)                 \
156
        || isBGR(x)                 \
157
    )
158 49004617 Vitor Sessak
#define usePal(x)           (       \
159
           (x)==PIX_FMT_PAL8        \
160
        || (x)==PIX_FMT_BGR4_BYTE   \
161
        || (x)==PIX_FMT_RGB4_BYTE   \
162
        || (x)==PIX_FMT_BGR8        \
163
        || (x)==PIX_FMT_RGB8        \
164
    )
165 6ff0ad6b Michael Niedermayer
166 6b79dbce Michael Niedermayer
#define RGB2YUV_SHIFT 15
167 7b5d7b9e Michael Niedermayer
#define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
168
#define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
169
#define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
170
#define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
171
#define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
172
#define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
173
#define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
174
#define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
175
#define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
176 6c7506de Michael Niedermayer
177 fa58ba15 Kostya Shishkov
extern const int32_t ff_yuv2rgb_coeffs[8][4];
178 0481412a Michael Niedermayer
179 0f5d4aa8 Michael Niedermayer
static const double rgb2yuv_table[8][9]={
180
    {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
181
    {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
182
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
183
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
184
    {0.59  , 0.11  , 0.30  , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
185
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
186
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //SMPTE 170M
187
    {0.701 , 0.087 , 0.212 , -0.384, 0.5  -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
188
};
189
190 783e9cc9 Michael Niedermayer
/*
191
NOTES
192 d604bab9 Michael Niedermayer
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
193 31190492 Arpi

194 783e9cc9 Michael Niedermayer
TODO
195 bd7c6fd5 Diego Biurrun
more intelligent misalignment avoidance for the horizontal scaler
196 c1b0bfb4 Michael Niedermayer
write special vertical cubic upscale version
197 f40c7dbb Diego Biurrun
optimize C code (YV12 / minmax)
198
add support for packed pixel YUV input & output
199 6ff0ad6b Michael Niedermayer
add support for Y8 output
200 f40c7dbb Diego Biurrun
optimize BGR24 & BGR32
201 ff7ba856 Michael Niedermayer
add BGR4 output support
202 1e621b18 Michael Niedermayer
write special BGR->BGR scaler
203 783e9cc9 Michael Niedermayer
*/
204 31190492 Arpi
205 b63f641e Aurelien Jacobs
#if ARCH_X86 && CONFIG_GPL
206 d334c7c2 Reimar Döffinger
DECLARE_ASM_CONST(8, uint64_t, bF8)=       0xF8F8F8F8F8F8F8F8LL;
207
DECLARE_ASM_CONST(8, uint64_t, bFC)=       0xFCFCFCFCFCFCFCFCLL;
208
DECLARE_ASM_CONST(8, uint64_t, w10)=       0x0010001000100010LL;
209
DECLARE_ASM_CONST(8, uint64_t, w02)=       0x0002000200020002LL;
210
DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
211
DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
212
DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
213
DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
214 d604bab9 Michael Niedermayer
215 0cb25594 Carl Eugen Hoyos
const DECLARE_ALIGNED(8, uint64_t, ff_dither4[2]) = {
216 221b804f Diego Biurrun
        0x0103010301030103LL,
217
        0x0200020002000200LL,};
218 d8fa3c54 Michael Niedermayer
219 0cb25594 Carl Eugen Hoyos
const DECLARE_ALIGNED(8, uint64_t, ff_dither8[2]) = {
220 221b804f Diego Biurrun
        0x0602060206020602LL,
221
        0x0004000400040004LL,};
222 d604bab9 Michael Niedermayer
223 d334c7c2 Reimar Döffinger
DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
224
DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
225
DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
226
DECLARE_ASM_CONST(8, uint64_t, b15Mask)=   0x001F001F001F001FLL;
227
DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
228
DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
229 d604bab9 Michael Niedermayer
230 5802683a Reimar Döffinger
DECLARE_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
231
DECLARE_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
232
DECLARE_ALIGNED(8, const uint64_t, ff_M24C)         = 0x0000FF0000FF0000LL;
233 99d2cb72 Michael Niedermayer
234 ac6a2e45 Michael Niedermayer
#ifdef FAST_BGR2YV12
235 5802683a Reimar Döffinger
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000000210041000DULL;
236
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000FFEEFFDC0038ULL;
237
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00000038FFD2FFF8ULL;
238 ac6a2e45 Michael Niedermayer
#else
239 5802683a Reimar Döffinger
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000020E540830C8BULL;
240
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000ED0FDAC23831ULL;
241
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00003831D0E6F6EAULL;
242 69796008 Diego Biurrun
#endif /* FAST_BGR2YV12 */
243 5802683a Reimar Döffinger
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
244
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
245
DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
246 dfb09bd1 Michael Niedermayer
247 b5c44b1a Diego Pettenò
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
248
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
249
DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
250
DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
251
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
252 dfb09bd1 Michael Niedermayer
253 b5c44b1a Diego Pettenò
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV[2][4]) = {
254 dfb09bd1 Michael Niedermayer
    {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
255
    {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
256
};
257
258 b5c44b1a Diego Pettenò
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
259 dfb09bd1 Michael Niedermayer
260 7a24ec50 Diego Biurrun
#endif /* ARCH_X86 && CONFIG_GPL */
261 783e9cc9 Michael Niedermayer
262
// clipping helper table for C implementations:
263
static unsigned char clip_table[768];
264
265 d4e24275 Michael Niedermayer
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
266 6a4970ab Diego Biurrun
267 d0b69b28 Diego Pettenò
static const uint8_t  __attribute__((aligned(8))) dither_2x2_4[2][8]={
268 45e18be8 Michael Niedermayer
{  1,   3,   1,   3,   1,   3,   1,   3, },
269
{  2,   0,   2,   0,   2,   0,   2,   0, },
270
};
271
272 d0b69b28 Diego Pettenò
static const uint8_t  __attribute__((aligned(8))) dither_2x2_8[2][8]={
273 45e18be8 Michael Niedermayer
{  6,   2,   6,   2,   6,   2,   6,   2, },
274
{  0,   4,   0,   4,   0,   4,   0,   4, },
275
};
276
277
const uint8_t  __attribute__((aligned(8))) dither_8x8_32[8][8]={
278
{ 17,   9,  23,  15,  16,   8,  22,  14, },
279
{  5,  29,   3,  27,   4,  28,   2,  26, },
280
{ 21,  13,  19,  11,  20,  12,  18,  10, },
281
{  0,  24,   6,  30,   1,  25,   7,  31, },
282
{ 16,   8,  22,  14,  17,   9,  23,  15, },
283
{  4,  28,   2,  26,   5,  29,   3,  27, },
284
{ 20,  12,  18,  10,  21,  13,  19,  11, },
285
{  1,  25,   7,  31,   0,  24,   6,  30, },
286
};
287
288
#if 0
289
const uint8_t  __attribute__((aligned(8))) dither_8x8_64[8][8]={
290
{  0,  48,  12,  60,   3,  51,  15,  63, },
291
{ 32,  16,  44,  28,  35,  19,  47,  31, },
292
{  8,  56,   4,  52,  11,  59,   7,  55, },
293
{ 40,  24,  36,  20,  43,  27,  39,  23, },
294
{  2,  50,  14,  62,   1,  49,  13,  61, },
295
{ 34,  18,  46,  30,  33,  17,  45,  29, },
296
{ 10,  58,   6,  54,   9,  57,   5,  53, },
297
{ 42,  26,  38,  22,  41,  25,  37,  21, },
298
};
299
#endif
300
301
const uint8_t  __attribute__((aligned(8))) dither_8x8_73[8][8]={
302
{  0,  55,  14,  68,   3,  58,  17,  72, },
303
{ 37,  18,  50,  32,  40,  22,  54,  35, },
304
{  9,  64,   5,  59,  13,  67,   8,  63, },
305
{ 46,  27,  41,  23,  49,  31,  44,  26, },
306
{  2,  57,  16,  71,   1,  56,  15,  70, },
307
{ 39,  21,  52,  34,  38,  19,  51,  33, },
308
{ 11,  66,   7,  62,  10,  65,   6,  60, },
309
{ 48,  30,  43,  25,  47,  29,  42,  24, },
310
};
311
312
#if 0
313
const uint8_t  __attribute__((aligned(8))) dither_8x8_128[8][8]={
314
{ 68,  36,  92,  60,  66,  34,  90,  58, },
315
{ 20, 116,  12, 108,  18, 114,  10, 106, },
316
{ 84,  52,  76,  44,  82,  50,  74,  42, },
317
{  0,  96,  24, 120,   6, 102,  30, 126, },
318
{ 64,  32,  88,  56,  70,  38,  94,  62, },
319
{ 16, 112,   8, 104,  22, 118,  14, 110, },
320
{ 80,  48,  72,  40,  86,  54,  78,  46, },
321
{  4, 100,  28, 124,   2,  98,  26, 122, },
322
};
323
#endif
324
325
#if 1
326
const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
327
{117,  62, 158, 103, 113,  58, 155, 100, },
328
{ 34, 199,  21, 186,  31, 196,  17, 182, },
329
{144,  89, 131,  76, 141,  86, 127,  72, },
330
{  0, 165,  41, 206,  10, 175,  52, 217, },
331
{110,  55, 151,  96, 120,  65, 162, 107, },
332
{ 28, 193,  14, 179,  38, 203,  24, 189, },
333
{138,  83, 124,  69, 148,  93, 134,  79, },
334
{  7, 172,  48, 213,   3, 168,  45, 210, },
335
};
336
#elif 1
337
// tries to correct a gamma of 1.5
338
const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
339
{  0, 143,  18, 200,   2, 156,  25, 215, },
340
{ 78,  28, 125,  64,  89,  36, 138,  74, },
341
{ 10, 180,   3, 161,  16, 195,   8, 175, },
342
{109,  51,  93,  38, 121,  60, 105,  47, },
343
{  1, 152,  23, 210,   0, 147,  20, 205, },
344
{ 85,  33, 134,  71,  81,  30, 130,  67, },
345
{ 14, 190,   6, 171,  12, 185,   5, 166, },
346
{117,  57, 101,  44, 113,  54,  97,  41, },
347
};
348
#elif 1
349
// tries to correct a gamma of 2.0
350
const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
351
{  0, 124,   8, 193,   0, 140,  12, 213, },
352
{ 55,  14, 104,  42,  66,  19, 119,  52, },
353
{  3, 168,   1, 145,   6, 187,   3, 162, },
354
{ 86,  31,  70,  21,  99,  39,  82,  28, },
355
{  0, 134,  11, 206,   0, 129,   9, 200, },
356
{ 62,  17, 114,  48,  58,  16, 109,  45, },
357
{  5, 181,   2, 157,   4, 175,   1, 151, },
358
{ 95,  36,  78,  26,  90,  34,  74,  24, },
359
};
360
#else
361
// tries to correct a gamma of 2.5
362
const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
363
{  0, 107,   3, 187,   0, 125,   6, 212, },
364
{ 39,   7,  86,  28,  49,  11, 102,  36, },
365
{  1, 158,   0, 131,   3, 180,   1, 151, },
366
{ 68,  19,  52,  12,  81,  25,  64,  17, },
367
{  0, 119,   5, 203,   0, 113,   4, 195, },
368
{ 45,   9,  96,  33,  42,   8,  91,  30, },
369
{  2, 172,   1, 144,   2, 165,   0, 137, },
370
{ 77,  23,  60,  15,  72,  21,  56,  14, },
371
};
372
#endif
373 5cebb24b Michael Niedermayer
374 8055ede6 Baptiste Coudurier
const char *sws_format_name(enum PixelFormat format)
375 94c4def2 Luca Abeni
{
376 e9e12f0e Luca Abeni
    switch (format) {
377
        case PIX_FMT_YUV420P:
378
            return "yuv420p";
379 79973335 Aurelien Jacobs
        case PIX_FMT_YUVA420P:
380
            return "yuva420p";
381 e9e12f0e Luca Abeni
        case PIX_FMT_YUYV422:
382
            return "yuyv422";
383
        case PIX_FMT_RGB24:
384
            return "rgb24";
385
        case PIX_FMT_BGR24:
386
            return "bgr24";
387
        case PIX_FMT_YUV422P:
388
            return "yuv422p";
389
        case PIX_FMT_YUV444P:
390
            return "yuv444p";
391
        case PIX_FMT_RGB32:
392
            return "rgb32";
393
        case PIX_FMT_YUV410P:
394
            return "yuv410p";
395
        case PIX_FMT_YUV411P:
396
            return "yuv411p";
397
        case PIX_FMT_RGB565:
398
            return "rgb565";
399
        case PIX_FMT_RGB555:
400
            return "rgb555";
401 4884b9e5 Kostya Shishkov
        case PIX_FMT_GRAY16BE:
402
            return "gray16be";
403
        case PIX_FMT_GRAY16LE:
404
            return "gray16le";
405 e9e12f0e Luca Abeni
        case PIX_FMT_GRAY8:
406
            return "gray8";
407
        case PIX_FMT_MONOWHITE:
408
            return "mono white";
409
        case PIX_FMT_MONOBLACK:
410
            return "mono black";
411
        case PIX_FMT_PAL8:
412
            return "Palette";
413
        case PIX_FMT_YUVJ420P:
414
            return "yuvj420p";
415
        case PIX_FMT_YUVJ422P:
416
            return "yuvj422p";
417
        case PIX_FMT_YUVJ444P:
418
            return "yuvj444p";
419
        case PIX_FMT_XVMC_MPEG2_MC:
420
            return "xvmc_mpeg2_mc";
421
        case PIX_FMT_XVMC_MPEG2_IDCT:
422
            return "xvmc_mpeg2_idct";
423
        case PIX_FMT_UYVY422:
424
            return "uyvy422";
425
        case PIX_FMT_UYYVYY411:
426
            return "uyyvyy411";
427
        case PIX_FMT_RGB32_1:
428
            return "rgb32x";
429
        case PIX_FMT_BGR32_1:
430
            return "bgr32x";
431
        case PIX_FMT_BGR32:
432
            return "bgr32";
433
        case PIX_FMT_BGR565:
434
            return "bgr565";
435
        case PIX_FMT_BGR555:
436
            return "bgr555";
437
        case PIX_FMT_BGR8:
438
            return "bgr8";
439
        case PIX_FMT_BGR4:
440
            return "bgr4";
441
        case PIX_FMT_BGR4_BYTE:
442
            return "bgr4 byte";
443
        case PIX_FMT_RGB8:
444
            return "rgb8";
445
        case PIX_FMT_RGB4:
446
            return "rgb4";
447
        case PIX_FMT_RGB4_BYTE:
448
            return "rgb4 byte";
449
        case PIX_FMT_NV12:
450
            return "nv12";
451
        case PIX_FMT_NV21:
452
            return "nv21";
453 9ba7fe6d Andreas Öman
        case PIX_FMT_YUV440P:
454
            return "yuv440p";
455 420169e5 Carl Eugen Hoyos
        case PIX_FMT_VDPAU_H264:
456
            return "vdpau_h264";
457 4e2b4876 NVIDIA Corporation
        case PIX_FMT_VDPAU_MPEG1:
458
            return "vdpau_mpeg1";
459
        case PIX_FMT_VDPAU_MPEG2:
460
            return "vdpau_mpeg2";
461 0ab80395 Carl Eugen Hoyos
        case PIX_FMT_VDPAU_WMV3:
462
            return "vdpau_wmv3";
463
        case PIX_FMT_VDPAU_VC1:
464
            return "vdpau_vc1";
465 e9e12f0e Luca Abeni
        default:
466
            return "Unknown format";
467
    }
468 94c4def2 Luca Abeni
}
469
470 5859233b Michael Niedermayer
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
471 221b804f Diego Biurrun
                               int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
472
                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
473 e3d2500f Michael Niedermayer
{
474 f40c7dbb Diego Biurrun
    //FIXME Optimize (just quickly written not optimized..)
475 221b804f Diego Biurrun
    int i;
476
    for (i=0; i<dstW; i++)
477
    {
478
        int val=1<<18;
479
        int j;
480
        for (j=0; j<lumFilterSize; j++)
481
            val += lumSrc[j][i] * lumFilter[j];
482
483
        dest[i]= av_clip_uint8(val>>19);
484
    }
485
486 1b0a4572 Benoit Fouet
    if (uDest)
487 221b804f Diego Biurrun
        for (i=0; i<chrDstW; i++)
488
        {
489
            int u=1<<18;
490
            int v=1<<18;
491
            int j;
492
            for (j=0; j<chrFilterSize; j++)
493
            {
494
                u += chrSrc[j][i] * chrFilter[j];
495 8b2fce0d Michael Niedermayer
                v += chrSrc[j][i + VOFW] * chrFilter[j];
496 221b804f Diego Biurrun
            }
497
498
            uDest[i]= av_clip_uint8(u>>19);
499
            vDest[i]= av_clip_uint8(v>>19);
500
        }
501 e3d2500f Michael Niedermayer
}
502
503 6118e52e Ville Syrjälä
static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
504 221b804f Diego Biurrun
                                int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
505
                                uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
506 6118e52e Ville Syrjälä
{
507 f40c7dbb Diego Biurrun
    //FIXME Optimize (just quickly written not optimized..)
508 221b804f Diego Biurrun
    int i;
509
    for (i=0; i<dstW; i++)
510
    {
511
        int val=1<<18;
512
        int j;
513
        for (j=0; j<lumFilterSize; j++)
514
            val += lumSrc[j][i] * lumFilter[j];
515
516
        dest[i]= av_clip_uint8(val>>19);
517
    }
518
519 1b0a4572 Benoit Fouet
    if (!uDest)
520 221b804f Diego Biurrun
        return;
521
522
    if (dstFormat == PIX_FMT_NV12)
523
        for (i=0; i<chrDstW; i++)
524
        {
525
            int u=1<<18;
526
            int v=1<<18;
527
            int j;
528
            for (j=0; j<chrFilterSize; j++)
529
            {
530
                u += chrSrc[j][i] * chrFilter[j];
531 8b2fce0d Michael Niedermayer
                v += chrSrc[j][i + VOFW] * chrFilter[j];
532 221b804f Diego Biurrun
            }
533
534
            uDest[2*i]= av_clip_uint8(u>>19);
535
            uDest[2*i+1]= av_clip_uint8(v>>19);
536
        }
537
    else
538
        for (i=0; i<chrDstW; i++)
539
        {
540
            int u=1<<18;
541
            int v=1<<18;
542
            int j;
543
            for (j=0; j<chrFilterSize; j++)
544
            {
545
                u += chrSrc[j][i] * chrFilter[j];
546 8b2fce0d Michael Niedermayer
                v += chrSrc[j][i + VOFW] * chrFilter[j];
547 221b804f Diego Biurrun
            }
548
549
            uDest[2*i]= av_clip_uint8(v>>19);
550
            uDest[2*i+1]= av_clip_uint8(u>>19);
551
        }
552 6118e52e Ville Syrjälä
}
553 46de8b73 Michael Niedermayer
554 bdf397ba Michael Niedermayer
#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type) \
555 221b804f Diego Biurrun
    for (i=0; i<(dstW>>1); i++){\
556
        int j;\
557
        int Y1 = 1<<18;\
558
        int Y2 = 1<<18;\
559
        int U  = 1<<18;\
560
        int V  = 1<<18;\
561 2db27aad Carl Eugen Hoyos
        type av_unused *r, *b, *g;\
562 221b804f Diego Biurrun
        const int i2= 2*i;\
563
        \
564
        for (j=0; j<lumFilterSize; j++)\
565
        {\
566
            Y1 += lumSrc[j][i2] * lumFilter[j];\
567
            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
568
        }\
569
        for (j=0; j<chrFilterSize; j++)\
570
        {\
571
            U += chrSrc[j][i] * chrFilter[j];\
572 8b2fce0d Michael Niedermayer
            V += chrSrc[j][i+VOFW] * chrFilter[j];\
573 221b804f Diego Biurrun
        }\
574
        Y1>>=19;\
575
        Y2>>=19;\
576
        U >>=19;\
577
        V >>=19;\
578 bdf397ba Michael Niedermayer
579
#define YSCALE_YUV_2_PACKEDX_C(type) \
580
        YSCALE_YUV_2_PACKEDX_NOCLIP_C(type)\
581 221b804f Diego Biurrun
        if ((Y1|Y2|U|V)&256)\
582
        {\
583
            if (Y1>255)   Y1=255; \
584
            else if (Y1<0)Y1=0;   \
585
            if (Y2>255)   Y2=255; \
586
            else if (Y2<0)Y2=0;   \
587
            if (U>255)    U=255;  \
588
            else if (U<0) U=0;    \
589
            if (V>255)    V=255;  \
590
            else if (V<0) V=0;    \
591
        }
592 6a4970ab Diego Biurrun
593 f0faee4c Michael Niedermayer
#define YSCALE_YUV_2_PACKEDX_FULL_C \
594
    for (i=0; i<dstW; i++){\
595
        int j;\
596
        int Y = 0;\
597
        int U = -128<<19;\
598
        int V = -128<<19;\
599
        int R,G,B;\
600
        \
601
        for (j=0; j<lumFilterSize; j++){\
602
            Y += lumSrc[j][i     ] * lumFilter[j];\
603
        }\
604
        for (j=0; j<chrFilterSize; j++){\
605
            U += chrSrc[j][i     ] * chrFilter[j];\
606
            V += chrSrc[j][i+VOFW] * chrFilter[j];\
607
        }\
608
        Y >>=10;\
609
        U >>=10;\
610
        V >>=10;\
611
612
#define YSCALE_YUV_2_RGBX_FULL_C(rnd) \
613
    YSCALE_YUV_2_PACKEDX_FULL_C\
614 43c16478 Michael Niedermayer
        Y-= c->yuv2rgb_y_offset;\
615
        Y*= c->yuv2rgb_y_coeff;\
616 f0faee4c Michael Niedermayer
        Y+= rnd;\
617 43c16478 Michael Niedermayer
        R= Y + V*c->yuv2rgb_v2r_coeff;\
618
        G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
619
        B= Y +                          U*c->yuv2rgb_u2b_coeff;\
620 f0faee4c Michael Niedermayer
        if ((R|G|B)&(0xC0000000)){\
621
            if (R>=(256<<22))   R=(256<<22)-1; \
622
            else if (R<0)R=0;   \
623
            if (G>=(256<<22))   G=(256<<22)-1; \
624
            else if (G<0)G=0;   \
625
            if (B>=(256<<22))   B=(256<<22)-1; \
626
            else if (B<0)B=0;   \
627
        }\
628
629
630 e69bd294 Michael Niedermayer
#define YSCALE_YUV_2_GRAY16_C \
631 b0880d5d Michael Niedermayer
    for (i=0; i<(dstW>>1); i++){\
632
        int j;\
633
        int Y1 = 1<<18;\
634
        int Y2 = 1<<18;\
635
        int U  = 1<<18;\
636
        int V  = 1<<18;\
637 e69bd294 Michael Niedermayer
        \
638 b0880d5d Michael Niedermayer
        const int i2= 2*i;\
639
        \
640
        for (j=0; j<lumFilterSize; j++)\
641
        {\
642
            Y1 += lumSrc[j][i2] * lumFilter[j];\
643
            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
644
        }\
645
        Y1>>=11;\
646
        Y2>>=11;\
647
        if ((Y1|Y2|U|V)&65536)\
648
        {\
649
            if (Y1>65535)   Y1=65535; \
650
            else if (Y1<0)Y1=0;   \
651
            if (Y2>65535)   Y2=65535; \
652
            else if (Y2<0)Y2=0;   \
653
        }
654
655 46de8b73 Michael Niedermayer
#define YSCALE_YUV_2_RGBX_C(type) \
656 f40c7dbb Diego Biurrun
    YSCALE_YUV_2_PACKEDX_C(type)  /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
657 221b804f Diego Biurrun
    r = (type *)c->table_rV[V];   \
658
    g = (type *)(c->table_gU[U] + c->table_gV[V]); \
659
    b = (type *)c->table_bU[U];   \
660
661
#define YSCALE_YUV_2_PACKED2_C   \
662
    for (i=0; i<(dstW>>1); i++){ \
663
        const int i2= 2*i;       \
664
        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
665
        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
666
        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;  \
667 8b2fce0d Michael Niedermayer
        int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19;  \
668 46de8b73 Michael Niedermayer
669 b0880d5d Michael Niedermayer
#define YSCALE_YUV_2_GRAY16_2_C   \
670
    for (i=0; i<(dstW>>1); i++){ \
671
        const int i2= 2*i;       \
672
        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>11;           \
673
        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;           \
674
675 46de8b73 Michael Niedermayer
#define YSCALE_YUV_2_RGB2_C(type) \
676 221b804f Diego Biurrun
    YSCALE_YUV_2_PACKED2_C\
677
    type *r, *b, *g;\
678
    r = (type *)c->table_rV[V];\
679
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
680
    b = (type *)c->table_bU[U];\
681 cf7d1c1a Michael Niedermayer
682 25593e29 Michael Niedermayer
#define YSCALE_YUV_2_PACKED1_C \
683 221b804f Diego Biurrun
    for (i=0; i<(dstW>>1); i++){\
684
        const int i2= 2*i;\
685
        int Y1= buf0[i2  ]>>7;\
686
        int Y2= buf0[i2+1]>>7;\
687
        int U= (uvbuf1[i     ])>>7;\
688 8b2fce0d Michael Niedermayer
        int V= (uvbuf1[i+VOFW])>>7;\
689 46de8b73 Michael Niedermayer
690 b0880d5d Michael Niedermayer
#define YSCALE_YUV_2_GRAY16_1_C \
691
    for (i=0; i<(dstW>>1); i++){\
692
        const int i2= 2*i;\
693
        int Y1= buf0[i2  ]<<1;\
694
        int Y2= buf0[i2+1]<<1;\
695
696 46de8b73 Michael Niedermayer
#define YSCALE_YUV_2_RGB1_C(type) \
697 221b804f Diego Biurrun
    YSCALE_YUV_2_PACKED1_C\
698
    type *r, *b, *g;\
699
    r = (type *)c->table_rV[V];\
700
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
701
    b = (type *)c->table_bU[U];\
702 cf7d1c1a Michael Niedermayer
703 25593e29 Michael Niedermayer
#define YSCALE_YUV_2_PACKED1B_C \
704 221b804f Diego Biurrun
    for (i=0; i<(dstW>>1); i++){\
705
        const int i2= 2*i;\
706
        int Y1= buf0[i2  ]>>7;\
707
        int Y2= buf0[i2+1]>>7;\
708
        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
709 8b2fce0d Michael Niedermayer
        int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\
710 46de8b73 Michael Niedermayer
711
#define YSCALE_YUV_2_RGB1B_C(type) \
712 221b804f Diego Biurrun
    YSCALE_YUV_2_PACKED1B_C\
713
    type *r, *b, *g;\
714
    r = (type *)c->table_rV[V];\
715
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
716
    b = (type *)c->table_bU[U];\
717 cf7d1c1a Michael Niedermayer
718 ec1bca2a Michael Niedermayer
#define YSCALE_YUV_2_MONO2_C \
719 e69bd294 Michael Niedermayer
    const uint8_t * const d128=dither_8x8_220[y&7];\
720
    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
721
    for (i=0; i<dstW-7; i+=8){\
722
        int acc;\
723
        acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
724
        acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
725
        acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
726
        acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
727
        acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
728
        acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
729
        acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
730
        acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
731 ec1bca2a Michael Niedermayer
        ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
732 e69bd294 Michael Niedermayer
        dest++;\
733
    }\
734
735
736 ec1bca2a Michael Niedermayer
#define YSCALE_YUV_2_MONOX_C \
737 e69bd294 Michael Niedermayer
    const uint8_t * const d128=dither_8x8_220[y&7];\
738
    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
739
    int acc=0;\
740
    for (i=0; i<dstW-1; i+=2){\
741
        int j;\
742
        int Y1=1<<18;\
743
        int Y2=1<<18;\
744
\
745
        for (j=0; j<lumFilterSize; j++)\
746
        {\
747
            Y1 += lumSrc[j][i] * lumFilter[j];\
748
            Y2 += lumSrc[j][i+1] * lumFilter[j];\
749
        }\
750
        Y1>>=19;\
751
        Y2>>=19;\
752
        if ((Y1|Y2)&256)\
753
        {\
754
            if (Y1>255)   Y1=255;\
755
            else if (Y1<0)Y1=0;\
756
            if (Y2>255)   Y2=255;\
757
            else if (Y2<0)Y2=0;\
758
        }\
759
        acc+= acc + g[Y1+d128[(i+0)&7]];\
760
        acc+= acc + g[Y2+d128[(i+1)&7]];\
761
        if ((i&7)==6){\
762 ec1bca2a Michael Niedermayer
            ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
763 e69bd294 Michael Niedermayer
            dest++;\
764
        }\
765
    }
766
767
768
#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
769 221b804f Diego Biurrun
    switch(c->dstFormat)\
770
    {\
771
    case PIX_FMT_RGB32:\
772
    case PIX_FMT_BGR32:\
773 9990e426 Michael Niedermayer
    case PIX_FMT_RGB32_1:\
774
    case PIX_FMT_BGR32_1:\
775 221b804f Diego Biurrun
        func(uint32_t)\
776
            ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
777
            ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
778
        }                \
779
        break;\
780
    case PIX_FMT_RGB24:\
781
        func(uint8_t)\
782
            ((uint8_t*)dest)[0]= r[Y1];\
783
            ((uint8_t*)dest)[1]= g[Y1];\
784
            ((uint8_t*)dest)[2]= b[Y1];\
785
            ((uint8_t*)dest)[3]= r[Y2];\
786
            ((uint8_t*)dest)[4]= g[Y2];\
787
            ((uint8_t*)dest)[5]= b[Y2];\
788
            dest+=6;\
789
        }\
790
        break;\
791
    case PIX_FMT_BGR24:\
792
        func(uint8_t)\
793
            ((uint8_t*)dest)[0]= b[Y1];\
794
            ((uint8_t*)dest)[1]= g[Y1];\
795
            ((uint8_t*)dest)[2]= r[Y1];\
796
            ((uint8_t*)dest)[3]= b[Y2];\
797
            ((uint8_t*)dest)[4]= g[Y2];\
798
            ((uint8_t*)dest)[5]= r[Y2];\
799
            dest+=6;\
800
        }\
801
        break;\
802
    case PIX_FMT_RGB565:\
803
    case PIX_FMT_BGR565:\
804
        {\
805
            const int dr1= dither_2x2_8[y&1    ][0];\
806
            const int dg1= dither_2x2_4[y&1    ][0];\
807
            const int db1= dither_2x2_8[(y&1)^1][0];\
808
            const int dr2= dither_2x2_8[y&1    ][1];\
809
            const int dg2= dither_2x2_4[y&1    ][1];\
810
            const int db2= dither_2x2_8[(y&1)^1][1];\
811
            func(uint16_t)\
812
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
813
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
814
            }\
815
        }\
816
        break;\
817
    case PIX_FMT_RGB555:\
818
    case PIX_FMT_BGR555:\
819
        {\
820
            const int dr1= dither_2x2_8[y&1    ][0];\
821
            const int dg1= dither_2x2_8[y&1    ][1];\
822
            const int db1= dither_2x2_8[(y&1)^1][0];\
823
            const int dr2= dither_2x2_8[y&1    ][1];\
824
            const int dg2= dither_2x2_8[y&1    ][0];\
825
            const int db2= dither_2x2_8[(y&1)^1][1];\
826
            func(uint16_t)\
827
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
828
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
829
            }\
830
        }\
831
        break;\
832
    case PIX_FMT_RGB8:\
833
    case PIX_FMT_BGR8:\
834
        {\
835
            const uint8_t * const d64= dither_8x8_73[y&7];\
836
            const uint8_t * const d32= dither_8x8_32[y&7];\
837
            func(uint8_t)\
838
                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
839
                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
840
            }\
841
        }\
842
        break;\
843
    case PIX_FMT_RGB4:\
844
    case PIX_FMT_BGR4:\
845
        {\
846
            const uint8_t * const d64= dither_8x8_73 [y&7];\
847
            const uint8_t * const d128=dither_8x8_220[y&7];\
848
            func(uint8_t)\
849
                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
850
                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
851
            }\
852
        }\
853
        break;\
854
    case PIX_FMT_RGB4_BYTE:\
855
    case PIX_FMT_BGR4_BYTE:\
856
        {\
857
            const uint8_t * const d64= dither_8x8_73 [y&7];\
858
            const uint8_t * const d128=dither_8x8_220[y&7];\
859
            func(uint8_t)\
860
                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
861
                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
862
            }\
863
        }\
864
        break;\
865
    case PIX_FMT_MONOBLACK:\
866 ec1bca2a Michael Niedermayer
    case PIX_FMT_MONOWHITE:\
867 221b804f Diego Biurrun
        {\
868 e69bd294 Michael Niedermayer
            func_monoblack\
869 221b804f Diego Biurrun
        }\
870
        break;\
871
    case PIX_FMT_YUYV422:\
872
        func2\
873
            ((uint8_t*)dest)[2*i2+0]= Y1;\
874
            ((uint8_t*)dest)[2*i2+1]= U;\
875
            ((uint8_t*)dest)[2*i2+2]= Y2;\
876
            ((uint8_t*)dest)[2*i2+3]= V;\
877
        }                \
878
        break;\
879
    case PIX_FMT_UYVY422:\
880
        func2\
881
            ((uint8_t*)dest)[2*i2+0]= U;\
882
            ((uint8_t*)dest)[2*i2+1]= Y1;\
883
            ((uint8_t*)dest)[2*i2+2]= V;\
884
            ((uint8_t*)dest)[2*i2+3]= Y2;\
885
        }                \
886
        break;\
887 b0880d5d Michael Niedermayer
    case PIX_FMT_GRAY16BE:\
888
        func_g16\
889
            ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
890
            ((uint8_t*)dest)[2*i2+1]= Y1;\
891
            ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
892
            ((uint8_t*)dest)[2*i2+3]= Y2;\
893
        }                \
894
        break;\
895
    case PIX_FMT_GRAY16LE:\
896
        func_g16\
897
            ((uint8_t*)dest)[2*i2+0]= Y1;\
898
            ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
899
            ((uint8_t*)dest)[2*i2+2]= Y2;\
900
            ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
901
        }                \
902
        break;\
903 221b804f Diego Biurrun
    }\
904 cf7d1c1a Michael Niedermayer
905
906 25593e29 Michael Niedermayer
static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
907 221b804f Diego Biurrun
                                  int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
908
                                  uint8_t *dest, int dstW, int y)
909 e3d2500f Michael Niedermayer
{
910 221b804f Diego Biurrun
    int i;
911 ec1bca2a Michael Niedermayer
    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
912 e3d2500f Michael Niedermayer
}
913
914 f0faee4c Michael Niedermayer
static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
915
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
916
                                    uint8_t *dest, int dstW, int y)
917
{
918
    int i;
919
    int step= fmt_depth(c->dstFormat)/8;
920 d616c8ae Michael Niedermayer
    int aidx= 3;
921 f0faee4c Michael Niedermayer
922
    switch(c->dstFormat){
923
    case PIX_FMT_ARGB:
924
        dest++;
925 d616c8ae Michael Niedermayer
        aidx= 0;
926 f0faee4c Michael Niedermayer
    case PIX_FMT_RGB24:
927 d616c8ae Michael Niedermayer
        aidx--;
928 f0faee4c Michael Niedermayer
    case PIX_FMT_RGBA:
929
        YSCALE_YUV_2_RGBX_FULL_C(1<<21)
930 d616c8ae Michael Niedermayer
            dest[aidx]= 0;
931 f0faee4c Michael Niedermayer
            dest[0]= R>>22;
932
            dest[1]= G>>22;
933
            dest[2]= B>>22;
934
            dest+= step;
935
        }
936
        break;
937
    case PIX_FMT_ABGR:
938
        dest++;
939 d616c8ae Michael Niedermayer
        aidx= 0;
940 f0faee4c Michael Niedermayer
    case PIX_FMT_BGR24:
941 d616c8ae Michael Niedermayer
        aidx--;
942 f0faee4c Michael Niedermayer
    case PIX_FMT_BGRA:
943
        YSCALE_YUV_2_RGBX_FULL_C(1<<21)
944 d616c8ae Michael Niedermayer
            dest[aidx]= 0;
945 f0faee4c Michael Niedermayer
            dest[0]= B>>22;
946
            dest[1]= G>>22;
947
            dest[2]= R>>22;
948
            dest+= step;
949
        }
950
        break;
951
    default:
952
        assert(0);
953
    }
954
}
955 e3d2500f Michael Niedermayer
956 f40c7dbb Diego Biurrun
//Note: we have C, X86, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
957 7630f2e0 Michael Niedermayer
//Plain C versions
958 b63f641e Aurelien Jacobs
#if !HAVE_MMX || defined (RUNTIME_CPUDETECT) || !CONFIG_GPL
959 726a959a Michael Niedermayer
#define COMPILE_C
960
#endif
961
962 b63f641e Aurelien Jacobs
#if ARCH_PPC
963
#if (HAVE_ALTIVEC || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
964 dfe44a85 Diego Biurrun
#undef COMPILE_C
965 a2faa401 Romain Dolbeau
#define COMPILE_ALTIVEC
966 7a24ec50 Diego Biurrun
#endif
967 cb82a073 Diego Biurrun
#endif //ARCH_PPC
968 a2faa401 Romain Dolbeau
969 b63f641e Aurelien Jacobs
#if ARCH_X86
970 726a959a Michael Niedermayer
971 f4406ec1 Diego Biurrun
#if ((HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
972 726a959a Michael Niedermayer
#define COMPILE_MMX
973
#endif
974
975 b63f641e Aurelien Jacobs
#if (HAVE_MMX2 || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
976 726a959a Michael Niedermayer
#define COMPILE_MMX2
977
#endif
978
979 f4406ec1 Diego Biurrun
#if ((HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
980 726a959a Michael Niedermayer
#define COMPILE_3DNOW
981
#endif
982 7a24ec50 Diego Biurrun
#endif //ARCH_X86
983 726a959a Michael Niedermayer
984
#undef HAVE_MMX
985
#undef HAVE_MMX2
986 f4406ec1 Diego Biurrun
#undef HAVE_AMD3DNOW
987 b63f641e Aurelien Jacobs
#undef HAVE_ALTIVEC
988
#define HAVE_MMX 0
989
#define HAVE_MMX2 0
990 f4406ec1 Diego Biurrun
#define HAVE_AMD3DNOW 0
991 b63f641e Aurelien Jacobs
#define HAVE_ALTIVEC 0
992 726a959a Michael Niedermayer
993
#ifdef COMPILE_C
994 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _C
995
#include "swscale_template.c"
996 726a959a Michael Niedermayer
#endif
997 397c035e Michael Niedermayer
998 a2faa401 Romain Dolbeau
#ifdef COMPILE_ALTIVEC
999
#undef RENAME
1000 b63f641e Aurelien Jacobs
#undef HAVE_ALTIVEC
1001
#define HAVE_ALTIVEC 1
1002 a2faa401 Romain Dolbeau
#define RENAME(a) a ## _altivec
1003
#include "swscale_template.c"
1004
#endif
1005
1006 b63f641e Aurelien Jacobs
#if ARCH_X86
1007 397c035e Michael Niedermayer
1008 f40c7dbb Diego Biurrun
//x86 versions
1009 7630f2e0 Michael Niedermayer
/*
1010
#undef RENAME
1011
#undef HAVE_MMX
1012
#undef HAVE_MMX2
1013 f4406ec1 Diego Biurrun
#undef HAVE_AMD3DNOW
1014 7630f2e0 Michael Niedermayer
#define ARCH_X86
1015
#define RENAME(a) a ## _X86
1016
#include "swscale_template.c"
1017 1faf0867 Michael Niedermayer
*/
1018 7630f2e0 Michael Niedermayer
//MMX versions
1019 726a959a Michael Niedermayer
#ifdef COMPILE_MMX
1020 7630f2e0 Michael Niedermayer
#undef RENAME
1021 b63f641e Aurelien Jacobs
#undef HAVE_MMX
1022 7630f2e0 Michael Niedermayer
#undef HAVE_MMX2
1023 f4406ec1 Diego Biurrun
#undef HAVE_AMD3DNOW
1024 b63f641e Aurelien Jacobs
#define HAVE_MMX 1
1025
#define HAVE_MMX2 0
1026 f4406ec1 Diego Biurrun
#define HAVE_AMD3DNOW 0
1027 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _MMX
1028
#include "swscale_template.c"
1029 726a959a Michael Niedermayer
#endif
1030 7630f2e0 Michael Niedermayer
1031
//MMX2 versions
1032 726a959a Michael Niedermayer
#ifdef COMPILE_MMX2
1033 7630f2e0 Michael Niedermayer
#undef RENAME
1034 b63f641e Aurelien Jacobs
#undef HAVE_MMX
1035
#undef HAVE_MMX2
1036 f4406ec1 Diego Biurrun
#undef HAVE_AMD3DNOW
1037 b63f641e Aurelien Jacobs
#define HAVE_MMX 1
1038
#define HAVE_MMX2 1
1039 f4406ec1 Diego Biurrun
#define HAVE_AMD3DNOW 0
1040 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _MMX2
1041
#include "swscale_template.c"
1042 726a959a Michael Niedermayer
#endif
1043 7630f2e0 Michael Niedermayer
1044
//3DNOW versions
1045 726a959a Michael Niedermayer
#ifdef COMPILE_3DNOW
1046 7630f2e0 Michael Niedermayer
#undef RENAME
1047 b63f641e Aurelien Jacobs
#undef HAVE_MMX
1048 7630f2e0 Michael Niedermayer
#undef HAVE_MMX2
1049 f4406ec1 Diego Biurrun
#undef HAVE_AMD3DNOW
1050 b63f641e Aurelien Jacobs
#define HAVE_MMX 1
1051
#define HAVE_MMX2 0
1052 f4406ec1 Diego Biurrun
#define HAVE_AMD3DNOW 1
1053 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _3DNow
1054
#include "swscale_template.c"
1055 726a959a Michael Niedermayer
#endif
1056 7630f2e0 Michael Niedermayer
1057 7a24ec50 Diego Biurrun
#endif //ARCH_X86
1058 7630f2e0 Michael Niedermayer
1059 f40c7dbb Diego Biurrun
// minor note: the HAVE_xyz are messed up after this line so don't use them
1060 d604bab9 Michael Niedermayer
1061 a86c461c Michael Niedermayer
static double getSplineCoeff(double a, double b, double c, double d, double dist)
1062
{
1063 221b804f Diego Biurrun
//    printf("%f %f %f %f %f\n", a,b,c,d,dist);
1064
    if (dist<=1.0)      return ((d*dist + c)*dist + b)*dist +a;
1065
    else                return getSplineCoeff(        0.0,
1066
                                             b+ 2.0*c + 3.0*d,
1067
                                                    c + 3.0*d,
1068
                                            -b- 3.0*c - 6.0*d,
1069
                                            dist-1.0);
1070 a86c461c Michael Niedermayer
}
1071 6c7506de Michael Niedermayer
1072 bca11e75 Michael Niedermayer
static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
1073 221b804f Diego Biurrun
                             int srcW, int dstW, int filterAlign, int one, int flags,
1074
                             SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
1075 28bf81c9 Michael Niedermayer
{
1076 221b804f Diego Biurrun
    int i;
1077
    int filterSize;
1078
    int filter2Size;
1079
    int minFilterSize;
1080 a64a062f Michael Niedermayer
    int64_t *filter=NULL;
1081
    int64_t *filter2=NULL;
1082
    const int64_t fone= 1LL<<54;
1083 091d3bdc Michael Niedermayer
    int ret= -1;
1084 b63f641e Aurelien Jacobs
#if ARCH_X86
1085 221b804f Diego Biurrun
    if (flags & SWS_CPU_CAPS_MMX)
1086 7ad6469e Diego Pettenò
        __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
1087 726a959a Michael Niedermayer
#endif
1088 31190492 Arpi
1089 f40c7dbb Diego Biurrun
    // NOTE: the +1 is for the MMX scaler which reads over the end
1090 221b804f Diego Biurrun
    *filterPos = av_malloc((dstW+1)*sizeof(int16_t));
1091
1092
    if (FFABS(xInc - 0x10000) <10) // unscaled
1093
    {
1094
        int i;
1095
        filterSize= 1;
1096 8588e148 Michael Niedermayer
        filter= av_mallocz(dstW*sizeof(*filter)*filterSize);
1097 221b804f Diego Biurrun
1098
        for (i=0; i<dstW; i++)
1099
        {
1100 a64a062f Michael Niedermayer
            filter[i*filterSize]= fone;
1101 221b804f Diego Biurrun
            (*filterPos)[i]=i;
1102
        }
1103
1104
    }
1105
    else if (flags&SWS_POINT) // lame looking point sampling mode
1106
    {
1107
        int i;
1108
        int xDstInSrc;
1109
        filterSize= 1;
1110 8588e148 Michael Niedermayer
        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
1111 221b804f Diego Biurrun
1112
        xDstInSrc= xInc/2 - 0x8000;
1113
        for (i=0; i<dstW; i++)
1114
        {
1115
            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1116
1117
            (*filterPos)[i]= xx;
1118 a64a062f Michael Niedermayer
            filter[i]= fone;
1119 221b804f Diego Biurrun
            xDstInSrc+= xInc;
1120
        }
1121
    }
1122
    else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
1123
    {
1124
        int i;
1125
        int xDstInSrc;
1126
        if      (flags&SWS_BICUBIC) filterSize= 4;
1127
        else if (flags&SWS_X      ) filterSize= 4;
1128
        else                        filterSize= 2; // SWS_BILINEAR / SWS_AREA
1129 8588e148 Michael Niedermayer
        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
1130 221b804f Diego Biurrun
1131
        xDstInSrc= xInc/2 - 0x8000;
1132
        for (i=0; i<dstW; i++)
1133
        {
1134
            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1135
            int j;
1136
1137
            (*filterPos)[i]= xx;
1138 f40c7dbb Diego Biurrun
                //bilinear upscale / linear interpolate / area averaging
1139 221b804f Diego Biurrun
                for (j=0; j<filterSize; j++)
1140
                {
1141 a64a062f Michael Niedermayer
                    int64_t coeff= fone - FFABS((xx<<16) - xDstInSrc)*(fone>>16);
1142 221b804f Diego Biurrun
                    if (coeff<0) coeff=0;
1143
                    filter[i*filterSize + j]= coeff;
1144
                    xx++;
1145
                }
1146
            xDstInSrc+= xInc;
1147
        }
1148
    }
1149
    else
1150
    {
1151 a64a062f Michael Niedermayer
        int xDstInSrc;
1152
        int sizeFactor;
1153
1154
        if      (flags&SWS_BICUBIC)      sizeFactor=  4;
1155
        else if (flags&SWS_X)            sizeFactor=  8;
1156
        else if (flags&SWS_AREA)         sizeFactor=  1; //downscale only, for upscale it is bilinear
1157
        else if (flags&SWS_GAUSS)        sizeFactor=  8;   // infinite ;)
1158
        else if (flags&SWS_LANCZOS)      sizeFactor= param[0] != SWS_PARAM_DEFAULT ? ceil(2*param[0]) : 6;
1159
        else if (flags&SWS_SINC)         sizeFactor= 20; // infinite ;)
1160
        else if (flags&SWS_SPLINE)       sizeFactor= 20;  // infinite ;)
1161
        else if (flags&SWS_BILINEAR)     sizeFactor=  2;
1162 221b804f Diego Biurrun
        else {
1163 a64a062f Michael Niedermayer
            sizeFactor= 0; //GCC warning killer
1164 fcc402b1 Luca Barbato
            assert(0);
1165 221b804f Diego Biurrun
        }
1166
1167 a64a062f Michael Niedermayer
        if (xInc <= 1<<16)      filterSize= 1 + sizeFactor; // upscale
1168
        else                    filterSize= 1 + (sizeFactor*srcW + dstW - 1)/ dstW;
1169 221b804f Diego Biurrun
1170
        if (filterSize > srcW-2) filterSize=srcW-2;
1171
1172 8588e148 Michael Niedermayer
        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
1173 221b804f Diego Biurrun
1174 a64a062f Michael Niedermayer
        xDstInSrc= xInc - 0x10000;
1175 221b804f Diego Biurrun
        for (i=0; i<dstW; i++)
1176
        {
1177 a64a062f Michael Niedermayer
            int xx= (xDstInSrc - ((filterSize-2)<<16)) / (1<<17);
1178 221b804f Diego Biurrun
            int j;
1179
            (*filterPos)[i]= xx;
1180
            for (j=0; j<filterSize; j++)
1181
            {
1182 a64a062f Michael Niedermayer
                int64_t d= ((int64_t)FFABS((xx<<17) - xDstInSrc))<<13;
1183
                double floatd;
1184
                int64_t coeff;
1185
1186
                if (xInc > 1<<16)
1187
                    d= d*dstW/srcW;
1188
                floatd= d * (1.0/(1<<30));
1189
1190 221b804f Diego Biurrun
                if (flags & SWS_BICUBIC)
1191
                {
1192 a64a062f Michael Niedermayer
                    int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] :   0) * (1<<24);
1193
                    int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24);
1194
                    int64_t dd = ( d*d)>>30;
1195
                    int64_t ddd= (dd*d)>>30;
1196
1197
                    if      (d < 1LL<<30)
1198
                        coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30);
1199
                    else if (d < 1LL<<31)
1200
                        coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30);
1201 221b804f Diego Biurrun
                    else
1202
                        coeff=0.0;
1203 a64a062f Michael Niedermayer
                    coeff *= fone>>(30+24);
1204 221b804f Diego Biurrun
                }
1205
/*                else if (flags & SWS_X)
1206
                {
1207
                    double p= param ? param*0.01 : 0.3;
1208
                    coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1209
                    coeff*= pow(2.0, - p*d*d);
1210
                }*/
1211
                else if (flags & SWS_X)
1212
                {
1213
                    double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
1214 a64a062f Michael Niedermayer
                    double c;
1215 221b804f Diego Biurrun
1216 a64a062f Michael Niedermayer
                    if (floatd<1.0)
1217
                        c = cos(floatd*PI);
1218 221b804f Diego Biurrun
                    else
1219 a64a062f Michael Niedermayer
                        c=-1.0;
1220
                    if (c<0.0)      c= -pow(-c, A);
1221
                    else            c=  pow( c, A);
1222
                    coeff= (c*0.5 + 0.5)*fone;
1223 221b804f Diego Biurrun
                }
1224
                else if (flags & SWS_AREA)
1225
                {
1226 a64a062f Michael Niedermayer
                    int64_t d2= d - (1<<29);
1227
                    if      (d2*xInc < -(1LL<<(29+16))) coeff= 1.0 * (1LL<<(30+16));
1228
                    else if (d2*xInc <  (1LL<<(29+16))) coeff= -d2*xInc + (1LL<<(29+16));
1229 221b804f Diego Biurrun
                    else coeff=0.0;
1230 a64a062f Michael Niedermayer
                    coeff *= fone>>(30+16);
1231 221b804f Diego Biurrun
                }
1232
                else if (flags & SWS_GAUSS)
1233
                {
1234
                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1235 a64a062f Michael Niedermayer
                    coeff = (pow(2.0, - p*floatd*floatd))*fone;
1236 221b804f Diego Biurrun
                }
1237
                else if (flags & SWS_SINC)
1238
                {
1239 a64a062f Michael Niedermayer
                    coeff = (d ? sin(floatd*PI)/(floatd*PI) : 1.0)*fone;
1240 221b804f Diego Biurrun
                }
1241
                else if (flags & SWS_LANCZOS)
1242
                {
1243
                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1244 a64a062f Michael Niedermayer
                    coeff = (d ? sin(floatd*PI)*sin(floatd*PI/p)/(floatd*floatd*PI*PI/p) : 1.0)*fone;
1245
                    if (floatd>p) coeff=0;
1246 221b804f Diego Biurrun
                }
1247
                else if (flags & SWS_BILINEAR)
1248
                {
1249 a64a062f Michael Niedermayer
                    coeff= (1<<30) - d;
1250 221b804f Diego Biurrun
                    if (coeff<0) coeff=0;
1251 a64a062f Michael Niedermayer
                    coeff *= fone >> 30;
1252 221b804f Diego Biurrun
                }
1253
                else if (flags & SWS_SPLINE)
1254
                {
1255
                    double p=-2.196152422706632;
1256 f830d824 Michael Niedermayer
                    coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, floatd) * fone;
1257 221b804f Diego Biurrun
                }
1258
                else {
1259
                    coeff= 0.0; //GCC warning killer
1260 fcc402b1 Luca Barbato
                    assert(0);
1261 221b804f Diego Biurrun
                }
1262
1263
                filter[i*filterSize + j]= coeff;
1264
                xx++;
1265
            }
1266 a64a062f Michael Niedermayer
            xDstInSrc+= 2*xInc;
1267 221b804f Diego Biurrun
        }
1268
    }
1269
1270
    /* apply src & dst Filter to filter -> filter2
1271
       av_free(filter);
1272
    */
1273 fcc402b1 Luca Barbato
    assert(filterSize>0);
1274 221b804f Diego Biurrun
    filter2Size= filterSize;
1275
    if (srcFilter) filter2Size+= srcFilter->length - 1;
1276
    if (dstFilter) filter2Size+= dstFilter->length - 1;
1277 fcc402b1 Luca Barbato
    assert(filter2Size>0);
1278 a64a062f Michael Niedermayer
    filter2= av_mallocz(filter2Size*dstW*sizeof(*filter2));
1279 221b804f Diego Biurrun
1280
    for (i=0; i<dstW; i++)
1281
    {
1282 a64a062f Michael Niedermayer
        int j, k;
1283 221b804f Diego Biurrun
1284 a64a062f Michael Niedermayer
        if(srcFilter){
1285
            for (k=0; k<srcFilter->length; k++){
1286
                for (j=0; j<filterSize; j++)
1287
                    filter2[i*filter2Size + k + j] += srcFilter->coeff[k]*filter[i*filterSize + j];
1288
            }
1289
        }else{
1290
            for (j=0; j<filterSize; j++)
1291
                filter2[i*filter2Size + j]= filter[i*filterSize + j];
1292 221b804f Diego Biurrun
        }
1293 a64a062f Michael Niedermayer
        //FIXME dstFilter
1294 221b804f Diego Biurrun
1295
        (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
1296
    }
1297 47b7382d Michael Niedermayer
    av_freep(&filter);
1298 221b804f Diego Biurrun
1299
    /* try to reduce the filter-size (step1 find size and shift left) */
1300 86bdf3fd Diego Biurrun
    // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
1301 221b804f Diego Biurrun
    minFilterSize= 0;
1302
    for (i=dstW-1; i>=0; i--)
1303
    {
1304
        int min= filter2Size;
1305
        int j;
1306 a64a062f Michael Niedermayer
        int64_t cutOff=0.0;
1307 221b804f Diego Biurrun
1308
        /* get rid off near zero elements on the left by shifting left */
1309
        for (j=0; j<filter2Size; j++)
1310
        {
1311
            int k;
1312
            cutOff += FFABS(filter2[i*filter2Size]);
1313
1314 a64a062f Michael Niedermayer
            if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
1315 221b804f Diego Biurrun
1316 86bdf3fd Diego Biurrun
            /* preserve monotonicity because the core can't handle the filter otherwise */
1317 221b804f Diego Biurrun
            if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1318
1319 f40c7dbb Diego Biurrun
            // move filter coefficients left
1320 221b804f Diego Biurrun
            for (k=1; k<filter2Size; k++)
1321
                filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1322 a64a062f Michael Niedermayer
            filter2[i*filter2Size + k - 1]= 0;
1323 221b804f Diego Biurrun
            (*filterPos)[i]++;
1324
        }
1325
1326 a64a062f Michael Niedermayer
        cutOff=0;
1327 221b804f Diego Biurrun
        /* count near zeros on the right */
1328
        for (j=filter2Size-1; j>0; j--)
1329
        {
1330
            cutOff += FFABS(filter2[i*filter2Size + j]);
1331
1332 a64a062f Michael Niedermayer
            if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
1333 221b804f Diego Biurrun
            min--;
1334
        }
1335
1336
        if (min>minFilterSize) minFilterSize= min;
1337
    }
1338
1339
    if (flags & SWS_CPU_CAPS_ALTIVEC) {
1340
        // we can handle the special case 4,
1341
        // so we don't want to go to the full 8
1342
        if (minFilterSize < 5)
1343 8c266f0c Romain Dolbeau
            filterAlign = 4;
1344
1345 f40c7dbb Diego Biurrun
        // We really don't want to waste our time
1346
        // doing useless computation, so fall back on
1347
        // the scalar C code for very small filters.
1348
        // Vectorizing is worth it only if you have a
1349 221b804f Diego Biurrun
        // decent-sized vector.
1350
        if (minFilterSize < 3)
1351 8c266f0c Romain Dolbeau
            filterAlign = 1;
1352 221b804f Diego Biurrun
    }
1353
1354
    if (flags & SWS_CPU_CAPS_MMX) {
1355
        // special case for unscaled vertical filtering
1356
        if (minFilterSize == 1 && filterAlign == 2)
1357
            filterAlign= 1;
1358
    }
1359
1360 fcc402b1 Luca Barbato
    assert(minFilterSize > 0);
1361 221b804f Diego Biurrun
    filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
1362 fcc402b1 Luca Barbato
    assert(filterSize > 0);
1363 8588e148 Michael Niedermayer
    filter= av_malloc(filterSize*dstW*sizeof(*filter));
1364 1625216e Michael Niedermayer
    if (filterSize >= MAX_FILTER_SIZE*16/((flags&SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
1365 091d3bdc Michael Niedermayer
        goto error;
1366 221b804f Diego Biurrun
    *outFilterSize= filterSize;
1367
1368
    if (flags&SWS_PRINT_INFO)
1369
        av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
1370
    /* try to reduce the filter-size (step2 reduce it) */
1371
    for (i=0; i<dstW; i++)
1372
    {
1373
        int j;
1374
1375
        for (j=0; j<filterSize; j++)
1376
        {
1377 a64a062f Michael Niedermayer
            if (j>=filter2Size) filter[i*filterSize + j]= 0;
1378 221b804f Diego Biurrun
            else               filter[i*filterSize + j]= filter2[i*filter2Size + j];
1379 88bc5a64 Michael Niedermayer
            if((flags & SWS_BITEXACT) && j>=minFilterSize)
1380 a64a062f Michael Niedermayer
                filter[i*filterSize + j]= 0;
1381 8c266f0c Romain Dolbeau
        }
1382 221b804f Diego Biurrun
    }
1383
1384
1385 f40c7dbb Diego Biurrun
    //FIXME try to align filterPos if possible
1386 8c266f0c Romain Dolbeau
1387 221b804f Diego Biurrun
    //fix borders
1388
    for (i=0; i<dstW; i++)
1389
    {
1390
        int j;
1391
        if ((*filterPos)[i] < 0)
1392
        {
1393 f40c7dbb Diego Biurrun
            // move filter coefficients left to compensate for filterPos
1394 221b804f Diego Biurrun
            for (j=1; j<filterSize; j++)
1395
            {
1396
                int left= FFMAX(j + (*filterPos)[i], 0);
1397
                filter[i*filterSize + left] += filter[i*filterSize + j];
1398
                filter[i*filterSize + j]=0;
1399
            }
1400
            (*filterPos)[i]= 0;
1401 bca11e75 Michael Niedermayer
        }
1402
1403 221b804f Diego Biurrun
        if ((*filterPos)[i] + filterSize > srcW)
1404
        {
1405
            int shift= (*filterPos)[i] + filterSize - srcW;
1406 f40c7dbb Diego Biurrun
            // move filter coefficients right to compensate for filterPos
1407 221b804f Diego Biurrun
            for (j=filterSize-2; j>=0; j--)
1408
            {
1409
                int right= FFMIN(j + shift, filterSize-1);
1410
                filter[i*filterSize +right] += filter[i*filterSize +j];
1411
                filter[i*filterSize +j]=0;
1412
            }
1413
            (*filterPos)[i]= srcW - filterSize;
1414
        }
1415
    }
1416
1417 f40c7dbb Diego Biurrun
    // Note the +1 is for the MMX scaler which reads over the end
1418 221b804f Diego Biurrun
    /* align at 16 for AltiVec (needed by hScale_altivec_real) */
1419
    *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t));
1420
1421 f40c7dbb Diego Biurrun
    /* normalize & store in outFilter */
1422 221b804f Diego Biurrun
    for (i=0; i<dstW; i++)
1423
    {
1424
        int j;
1425 a64a062f Michael Niedermayer
        int64_t error=0;
1426
        int64_t sum=0;
1427 221b804f Diego Biurrun
1428
        for (j=0; j<filterSize; j++)
1429
        {
1430
            sum+= filter[i*filterSize + j];
1431
        }
1432 a64a062f Michael Niedermayer
        sum= (sum + one/2)/ one;
1433 221b804f Diego Biurrun
        for (j=0; j<*outFilterSize; j++)
1434
        {
1435 a64a062f Michael Niedermayer
            int64_t v= filter[i*filterSize + j] + error;
1436
            int intV= ROUNDED_DIV(v, sum);
1437 221b804f Diego Biurrun
            (*outFilter)[i*(*outFilterSize) + j]= intV;
1438 a64a062f Michael Niedermayer
            error= v - intV*sum;
1439 221b804f Diego Biurrun
        }
1440
    }
1441
1442
    (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
1443
    for (i=0; i<*outFilterSize; i++)
1444
    {
1445
        int j= dstW*(*outFilterSize);
1446
        (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
1447
    }
1448
1449 091d3bdc Michael Niedermayer
    ret=0;
1450
error:
1451 221b804f Diego Biurrun
    av_free(filter);
1452 091d3bdc Michael Niedermayer
    av_free(filter2);
1453
    return ret;
1454 7630f2e0 Michael Niedermayer
}
1455 31190492 Arpi
1456 17c613ef Uoti Urpala
#ifdef COMPILE_MMX2
1457 b7dc6f66 Michael Niedermayer
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
1458 28bf81c9 Michael Niedermayer
{
1459 221b804f Diego Biurrun
    uint8_t *fragmentA;
1460
    long imm8OfPShufW1A;
1461
    long imm8OfPShufW2A;
1462
    long fragmentLengthA;
1463
    uint8_t *fragmentB;
1464
    long imm8OfPShufW1B;
1465
    long imm8OfPShufW2B;
1466
    long fragmentLengthB;
1467
    int fragmentPos;
1468
1469
    int xpos, i;
1470
1471
    // create an optimized horizontal scaling routine
1472
1473
    //code fragment
1474
1475 7ad6469e Diego Pettenò
    __asm__ volatile(
1476 221b804f Diego Biurrun
        "jmp                         9f                 \n\t"
1477
    // Begin
1478
        "0:                                             \n\t"
1479
        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1480
        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1481
        "movd   1(%%"REG_c", %%"REG_S"), %%mm1          \n\t"
1482
        "punpcklbw                %%mm7, %%mm1          \n\t"
1483
        "punpcklbw                %%mm7, %%mm0          \n\t"
1484
        "pshufw                   $0xFF, %%mm1, %%mm1   \n\t"
1485
        "1:                                             \n\t"
1486
        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1487
        "2:                                             \n\t"
1488
        "psubw                    %%mm1, %%mm0          \n\t"
1489
        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1490
        "pmullw                   %%mm3, %%mm0          \n\t"
1491
        "psllw                       $7, %%mm1          \n\t"
1492
        "paddw                    %%mm1, %%mm0          \n\t"
1493
1494
        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1495
1496
        "add                         $8, %%"REG_a"      \n\t"
1497
    // End
1498
        "9:                                             \n\t"
1499
//        "int $3                                         \n\t"
1500 86593486 Alexander Strange
        "lea                 " LOCAL_MANGLE(0b) ", %0   \n\t"
1501
        "lea                 " LOCAL_MANGLE(1b) ", %1   \n\t"
1502
        "lea                 " LOCAL_MANGLE(2b) ", %2   \n\t"
1503 221b804f Diego Biurrun
        "dec                         %1                 \n\t"
1504
        "dec                         %2                 \n\t"
1505
        "sub                         %0, %1             \n\t"
1506
        "sub                         %0, %2             \n\t"
1507 86593486 Alexander Strange
        "lea                 " LOCAL_MANGLE(9b) ", %3   \n\t"
1508 221b804f Diego Biurrun
        "sub                         %0, %3             \n\t"
1509
1510
1511
        :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
1512
        "=r" (fragmentLengthA)
1513
    );
1514
1515 7ad6469e Diego Pettenò
    __asm__ volatile(
1516 221b804f Diego Biurrun
        "jmp                         9f                 \n\t"
1517
    // Begin
1518
        "0:                                             \n\t"
1519
        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1520
        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1521
        "punpcklbw                %%mm7, %%mm0          \n\t"
1522
        "pshufw                   $0xFF, %%mm0, %%mm1   \n\t"
1523
        "1:                                             \n\t"
1524
        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1525
        "2:                                             \n\t"
1526
        "psubw                    %%mm1, %%mm0          \n\t"
1527
        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1528
        "pmullw                   %%mm3, %%mm0          \n\t"
1529
        "psllw                       $7, %%mm1          \n\t"
1530
        "paddw                    %%mm1, %%mm0          \n\t"
1531
1532
        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1533
1534
        "add                         $8, %%"REG_a"      \n\t"
1535
    // End
1536
        "9:                                             \n\t"
1537
//        "int                       $3                   \n\t"
1538 86593486 Alexander Strange
        "lea                 " LOCAL_MANGLE(0b) ", %0   \n\t"
1539
        "lea                 " LOCAL_MANGLE(1b) ", %1   \n\t"
1540
        "lea                 " LOCAL_MANGLE(2b) ", %2   \n\t"
1541 221b804f Diego Biurrun
        "dec                         %1                 \n\t"
1542
        "dec                         %2                 \n\t"
1543
        "sub                         %0, %1             \n\t"
1544
        "sub                         %0, %2             \n\t"
1545 86593486 Alexander Strange
        "lea                 " LOCAL_MANGLE(9b) ", %3   \n\t"
1546 221b804f Diego Biurrun
        "sub                         %0, %3             \n\t"
1547
1548
1549
        :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
1550
        "=r" (fragmentLengthB)
1551
    );
1552
1553
    xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
1554
    fragmentPos=0;
1555
1556
    for (i=0; i<dstW/numSplits; i++)
1557
    {
1558
        int xx=xpos>>16;
1559
1560
        if ((i&3) == 0)
1561
        {
1562
            int a=0;
1563
            int b=((xpos+xInc)>>16) - xx;
1564
            int c=((xpos+xInc*2)>>16) - xx;
1565
            int d=((xpos+xInc*3)>>16) - xx;
1566
1567
            filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
1568
            filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
1569
            filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
1570
            filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
1571
            filterPos[i/2]= xx;
1572
1573
            if (d+1<4)
1574
            {
1575
                int maxShift= 3-(d+1);
1576
                int shift=0;
1577
1578
                memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
1579
1580
                funnyCode[fragmentPos + imm8OfPShufW1B]=
1581
                    (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
1582
                funnyCode[fragmentPos + imm8OfPShufW2B]=
1583
                    a | (b<<2) | (c<<4) | (d<<6);
1584
1585
                if (i+3>=dstW) shift=maxShift; //avoid overread
1586
                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
1587
1588
                if (shift && i>=shift)
1589
                {
1590
                    funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
1591
                    funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
1592
                    filterPos[i/2]-=shift;
1593
                }
1594
1595
                fragmentPos+= fragmentLengthB;
1596
            }
1597
            else
1598
            {
1599
                int maxShift= 3-d;
1600
                int shift=0;
1601
1602
                memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
1603
1604
                funnyCode[fragmentPos + imm8OfPShufW1A]=
1605
                funnyCode[fragmentPos + imm8OfPShufW2A]=
1606
                    a | (b<<2) | (c<<4) | (d<<6);
1607
1608
                if (i+4>=dstW) shift=maxShift; //avoid overread
1609
                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
1610
1611
                if (shift && i>=shift)
1612
                {
1613
                    funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
1614
                    funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
1615
                    filterPos[i/2]-=shift;
1616
                }
1617
1618
                fragmentPos+= fragmentLengthA;
1619
            }
1620
1621
            funnyCode[fragmentPos]= RET;
1622
        }
1623
        xpos+=xInc;
1624
    }
1625
    filterPos[i/2]= xpos>>16; // needed to jump to the next part
1626 28bf81c9 Michael Niedermayer
}
1627 17c613ef Uoti Urpala
#endif /* COMPILE_MMX2 */
1628 28bf81c9 Michael Niedermayer
1629 9b2283cc Stefan Huehner
static void globalInit(void){
1630 31190492 Arpi
    // generating tables:
1631
    int i;
1632 221b804f Diego Biurrun
    for (i=0; i<768; i++){
1633
        int c= av_clip_uint8(i-256);
1634
        clip_table[i]=c;
1635 b18ea156 Michael Niedermayer
    }
1636 516b1f82 Michael Niedermayer
}
1637 c1b0bfb4 Michael Niedermayer
1638 516b1f82 Michael Niedermayer
static SwsFunc getSwsFunc(int flags){
1639 6a4970ab Diego Biurrun
1640 b63f641e Aurelien Jacobs
#if defined(RUNTIME_CPUDETECT) && CONFIG_GPL
1641
#if ARCH_X86
1642 c14731d8 Reimar Döffinger
    // ordered per speed fastest first
1643 221b804f Diego Biurrun
    if (flags & SWS_CPU_CAPS_MMX2)
1644
        return swScale_MMX2;
1645
    else if (flags & SWS_CPU_CAPS_3DNOW)
1646
        return swScale_3DNow;
1647
    else if (flags & SWS_CPU_CAPS_MMX)
1648
        return swScale_MMX;
1649
    else
1650
        return swScale_C;
1651 28bf81c9 Michael Niedermayer
1652
#else
1653 b63f641e Aurelien Jacobs
#if ARCH_PPC
1654 221b804f Diego Biurrun
    if (flags & SWS_CPU_CAPS_ALTIVEC)
1655
        return swScale_altivec;
1656
    else
1657
        return swScale_C;
1658 a2faa401 Romain Dolbeau
#endif
1659 221b804f Diego Biurrun
    return swScale_C;
1660 b63f641e Aurelien Jacobs
#endif /* ARCH_X86 */
1661 28bf81c9 Michael Niedermayer
#else //RUNTIME_CPUDETECT
1662 b63f641e Aurelien Jacobs
#if   HAVE_MMX2
1663 221b804f Diego Biurrun
    return swScale_MMX2;
1664 f4406ec1 Diego Biurrun
#elif HAVE_AMD3DNOW
1665 221b804f Diego Biurrun
    return swScale_3DNow;
1666 b63f641e Aurelien Jacobs
#elif HAVE_MMX
1667 221b804f Diego Biurrun
    return swScale_MMX;
1668 b63f641e Aurelien Jacobs
#elif HAVE_ALTIVEC
1669 221b804f Diego Biurrun
    return swScale_altivec;
1670 28bf81c9 Michael Niedermayer
#else
1671 221b804f Diego Biurrun
    return swScale_C;
1672 28bf81c9 Michael Niedermayer
#endif
1673
#endif //!RUNTIME_CPUDETECT
1674 31190492 Arpi
}
1675 7630f2e0 Michael Niedermayer
1676 d4e24275 Michael Niedermayer
static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1677 221b804f Diego Biurrun
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1678
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1679
    /* Copy Y plane */
1680
    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1681
        memcpy(dst, src[0], srcSliceH*dstStride[0]);
1682
    else
1683
    {
1684
        int i;
1685
        uint8_t *srcPtr= src[0];
1686
        uint8_t *dstPtr= dst;
1687
        for (i=0; i<srcSliceH; i++)
1688
        {
1689
            memcpy(dstPtr, srcPtr, c->srcW);
1690
            srcPtr+= srcStride[0];
1691
            dstPtr+= dstStride[0];
1692
        }
1693
    }
1694
    dst = dstParam[1] + dstStride[1]*srcSliceY/2;
1695
    if (c->dstFormat == PIX_FMT_NV12)
1696 30c48a0a Benoit Fouet
        interleaveBytes(src[1], src[2], dst, c->srcW/2, srcSliceH/2, srcStride[1], srcStride[2], dstStride[0]);
1697 221b804f Diego Biurrun
    else
1698 30c48a0a Benoit Fouet
        interleaveBytes(src[2], src[1], dst, c->srcW/2, srcSliceH/2, srcStride[2], srcStride[1], dstStride[0]);
1699 221b804f Diego Biurrun
1700
    return srcSliceH;
1701 0d9f3d85 Arpi
}
1702
1703 d4e24275 Michael Niedermayer
static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1704 221b804f Diego Biurrun
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1705
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1706 0d9f3d85 Arpi
1707 30c48a0a Benoit Fouet
    yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
1708 fccb9b2b Michael Niedermayer
1709 221b804f Diego Biurrun
    return srcSliceH;
1710 0d9f3d85 Arpi
}
1711
1712 caeaabe7 Alex Beregszaszi
static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1713 221b804f Diego Biurrun
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1714
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1715 caeaabe7 Alex Beregszaszi
1716 30c48a0a Benoit Fouet
    yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
1717 caeaabe7 Alex Beregszaszi
1718 221b804f Diego Biurrun
    return srcSliceH;
1719 caeaabe7 Alex Beregszaszi
}
1720
1721 a6100f39 Baptiste Coudurier
static int YUV422PToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1722
                                int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1723
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1724
1725
    yuv422ptoyuy2(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
1726
1727
    return srcSliceH;
1728
}
1729
1730
static int YUV422PToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1731
                                int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1732
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1733
1734
    yuv422ptouyvy(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
1735
1736
    return srcSliceH;
1737
}
1738
1739 49004617 Vitor Sessak
static int pal2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1740
                          int srcSliceH, uint8_t* dst[], int dstStride[]){
1741 f5a2c981 Carl Eugen Hoyos
    const enum PixelFormat srcFormat= c->srcFormat;
1742
    const enum PixelFormat dstFormat= c->dstFormat;
1743 49004617 Vitor Sessak
    void (*conv)(const uint8_t *src, uint8_t *dst, long num_pixels,
1744
                 const uint8_t *palette)=NULL;
1745
    int i;
1746
    uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1747
    uint8_t *srcPtr= src[0];
1748
1749
    if (!usePal(srcFormat))
1750
        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
1751
               sws_format_name(srcFormat), sws_format_name(dstFormat));
1752
1753
    switch(dstFormat){
1754 522ce957 Vitor Sessak
    case PIX_FMT_RGB32  : conv = palette8topacked32; break;
1755
    case PIX_FMT_BGR32  : conv = palette8topacked32; break;
1756
    case PIX_FMT_BGR32_1: conv = palette8topacked32; break;
1757
    case PIX_FMT_RGB32_1: conv = palette8topacked32; break;
1758
    case PIX_FMT_RGB24  : conv = palette8topacked24; break;
1759
    case PIX_FMT_BGR24  : conv = palette8topacked24; break;
1760 49004617 Vitor Sessak
    default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
1761
                    sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1762
    }
1763
1764
1765
    for (i=0; i<srcSliceH; i++) {
1766 65f65c30 Vitor Sessak
        conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb);
1767 49004617 Vitor Sessak
        srcPtr+= srcStride[0];
1768
        dstPtr+= dstStride[0];
1769
    }
1770
1771
    return srcSliceH;
1772
}
1773
1774 9990e426 Michael Niedermayer
/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
1775 d4e24275 Michael Niedermayer
static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1776 221b804f Diego Biurrun
                          int srcSliceH, uint8_t* dst[], int dstStride[]){
1777 58e4b706 Carl Eugen Hoyos
    const enum PixelFormat srcFormat= c->srcFormat;
1778
    const enum PixelFormat dstFormat= c->dstFormat;
1779 221b804f Diego Biurrun
    const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3;
1780
    const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3;
1781
    const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
1782
    const int dstId= fmt_depth(dstFormat) >> 2;
1783
    void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
1784
1785
    /* BGR -> BGR */
1786
    if (  (isBGR(srcFormat) && isBGR(dstFormat))
1787
       || (isRGB(srcFormat) && isRGB(dstFormat))){
1788
        switch(srcId | (dstId<<4)){
1789
        case 0x34: conv= rgb16to15; break;
1790
        case 0x36: conv= rgb24to15; break;
1791
        case 0x38: conv= rgb32to15; break;
1792
        case 0x43: conv= rgb15to16; break;
1793
        case 0x46: conv= rgb24to16; break;
1794
        case 0x48: conv= rgb32to16; break;
1795
        case 0x63: conv= rgb15to24; break;
1796
        case 0x64: conv= rgb16to24; break;
1797
        case 0x68: conv= rgb32to24; break;
1798
        case 0x83: conv= rgb15to32; break;
1799
        case 0x84: conv= rgb16to32; break;
1800
        case 0x86: conv= rgb24to32; break;
1801 3f0bc115 Diego Biurrun
        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
1802 221b804f Diego Biurrun
                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1803
        }
1804
    }else if (  (isBGR(srcFormat) && isRGB(dstFormat))
1805
             || (isRGB(srcFormat) && isBGR(dstFormat))){
1806
        switch(srcId | (dstId<<4)){
1807
        case 0x33: conv= rgb15tobgr15; break;
1808
        case 0x34: conv= rgb16tobgr15; break;
1809
        case 0x36: conv= rgb24tobgr15; break;
1810
        case 0x38: conv= rgb32tobgr15; break;
1811
        case 0x43: conv= rgb15tobgr16; break;
1812
        case 0x44: conv= rgb16tobgr16; break;
1813
        case 0x46: conv= rgb24tobgr16; break;
1814
        case 0x48: conv= rgb32tobgr16; break;
1815
        case 0x63: conv= rgb15tobgr24; break;
1816
        case 0x64: conv= rgb16tobgr24; break;
1817
        case 0x66: conv= rgb24tobgr24; break;
1818
        case 0x68: conv= rgb32tobgr24; break;
1819
        case 0x83: conv= rgb15tobgr32; break;
1820
        case 0x84: conv= rgb16tobgr32; break;
1821
        case 0x86: conv= rgb24tobgr32; break;
1822
        case 0x88: conv= rgb32tobgr32; break;
1823 3f0bc115 Diego Biurrun
        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
1824 221b804f Diego Biurrun
                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1825
        }
1826
    }else{
1827 3f0bc115 Diego Biurrun
        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
1828 221b804f Diego Biurrun
               sws_format_name(srcFormat), sws_format_name(dstFormat));
1829
    }
1830
1831 068b0f4f Benoit Fouet
    if(conv)
1832
    {
1833 9990e426 Michael Niedermayer
        uint8_t *srcPtr= src[0];
1834
        if(srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1)
1835
            srcPtr += ALT32_CORR;
1836
1837 5efaf000 Peter Schlaile
        if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0)
1838 9990e426 Michael Niedermayer
            conv(srcPtr, dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1839 c4ca31d0 Benoit Fouet
        else
1840 221b804f Diego Biurrun
        {
1841 c4ca31d0 Benoit Fouet
            int i;
1842
            uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1843
1844
            for (i=0; i<srcSliceH; i++)
1845
            {
1846
                conv(srcPtr, dstPtr, c->srcW*srcBpp);
1847
                srcPtr+= srcStride[0];
1848
                dstPtr+= dstStride[0];
1849
            }
1850 221b804f Diego Biurrun
        }
1851
    }
1852
    return srcSliceH;
1853 0d9f3d85 Arpi
}
1854
1855 d4e24275 Michael Niedermayer
static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1856 221b804f Diego Biurrun
                              int srcSliceH, uint8_t* dst[], int dstStride[]){
1857
1858
    rgb24toyv12(
1859
        src[0],
1860
        dst[0]+ srcSliceY    *dstStride[0],
1861
        dst[1]+(srcSliceY>>1)*dstStride[1],
1862
        dst[2]+(srcSliceY>>1)*dstStride[2],
1863
        c->srcW, srcSliceH,
1864
        dstStride[0], dstStride[1], srcStride[0]);
1865
    return srcSliceH;
1866 ec22603f Michael Niedermayer
}
1867
1868 d4e24275 Michael Niedermayer
static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1869 221b804f Diego Biurrun
                             int srcSliceH, uint8_t* dst[], int dstStride[]){
1870
    int i;
1871
1872
    /* copy Y */
1873
    if (srcStride[0]==dstStride[0] && srcStride[0] > 0)
1874
        memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
1875
    else{
1876
        uint8_t *srcPtr= src[0];
1877
        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1878
1879
        for (i=0; i<srcSliceH; i++)
1880
        {
1881
            memcpy(dstPtr, srcPtr, c->srcW);
1882
            srcPtr+= srcStride[0];
1883
            dstPtr+= dstStride[0];
1884
        }
1885
    }
1886
1887
    if (c->dstFormat==PIX_FMT_YUV420P){
1888
        planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
1889
        planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
1890
    }else{
1891
        planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
1892
        planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
1893
    }
1894
    return srcSliceH;
1895 b241cbf2 Michael Niedermayer
}
1896
1897 b6654a54 Michael Niedermayer
/* unscaled copy like stuff (assumes nearly identical formats) */
1898 2d35ae56 Luca Barbato
static int packedCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1899 bc5a0444 Luca Barbato
                      int srcSliceH, uint8_t* dst[], int dstStride[])
1900
{
1901
    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1902
        memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
1903
    else
1904
    {
1905
        int i;
1906
        uint8_t *srcPtr= src[0];
1907
        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1908
        int length=0;
1909 221b804f Diego Biurrun
1910 bc5a0444 Luca Barbato
        /* universal length finder */
1911
        while(length+c->srcW <= FFABS(dstStride[0])
1912
           && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
1913 fcc402b1 Luca Barbato
        assert(length!=0);
1914 2d35ae56 Luca Barbato
1915 bc5a0444 Luca Barbato
        for (i=0; i<srcSliceH; i++)
1916 221b804f Diego Biurrun
        {
1917 bc5a0444 Luca Barbato
            memcpy(dstPtr, srcPtr, length);
1918
            srcPtr+= srcStride[0];
1919
            dstPtr+= dstStride[0];
1920 221b804f Diego Biurrun
        }
1921 bc5a0444 Luca Barbato
    }
1922 2d35ae56 Luca Barbato
    return srcSliceH;
1923
}
1924 bc5a0444 Luca Barbato
1925 2d35ae56 Luca Barbato
static int planarCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1926
                      int srcSliceH, uint8_t* dst[], int dstStride[])
1927
{
1928 bc5a0444 Luca Barbato
    int plane;
1929
    for (plane=0; plane<3; plane++)
1930
    {
1931
        int length= plane==0 ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
1932
        int y=      plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
1933
        int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
1934 2d35ae56 Luca Barbato
1935 bc5a0444 Luca Barbato
        if ((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
1936 221b804f Diego Biurrun
        {
1937 bc5a0444 Luca Barbato
            if (!isGray(c->dstFormat))
1938
                memset(dst[plane], 128, dstStride[plane]*height);
1939
        }
1940
        else
1941
        {
1942
            if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
1943
                memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
1944 221b804f Diego Biurrun
            else
1945
            {
1946 bc5a0444 Luca Barbato
                int i;
1947
                uint8_t *srcPtr= src[plane];
1948
                uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
1949
                for (i=0; i<height; i++)
1950 221b804f Diego Biurrun
                {
1951 bc5a0444 Luca Barbato
                    memcpy(dstPtr, srcPtr, length);
1952
                    srcPtr+= srcStride[plane];
1953
                    dstPtr+= dstStride[plane];
1954 221b804f Diego Biurrun
                }
1955
            }
1956
        }
1957 bc5a0444 Luca Barbato
    }
1958 221b804f Diego Biurrun
    return srcSliceH;
1959 37079906 Michael Niedermayer
}
1960 28bf81c9 Michael Niedermayer
1961 4884b9e5 Kostya Shishkov
static int gray16togray(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1962 221b804f Diego Biurrun
                        int srcSliceH, uint8_t* dst[], int dstStride[]){
1963
1964
    int length= c->srcW;
1965
    int y=      srcSliceY;
1966
    int height= srcSliceH;
1967
    int i, j;
1968
    uint8_t *srcPtr= src[0];
1969
    uint8_t *dstPtr= dst[0] + dstStride[0]*y;
1970
1971
    if (!isGray(c->dstFormat)){
1972
        int height= -((-srcSliceH)>>c->chrDstVSubSample);
1973
        memset(dst[1], 128, dstStride[1]*height);
1974
        memset(dst[2], 128, dstStride[2]*height);
1975
    }
1976
    if (c->srcFormat == PIX_FMT_GRAY16LE) srcPtr++;
1977
    for (i=0; i<height; i++)
1978
    {
1979
        for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
1980
        srcPtr+= srcStride[0];
1981
        dstPtr+= dstStride[0];
1982
    }
1983
    return srcSliceH;
1984 4884b9e5 Kostya Shishkov
}
1985
1986
static int graytogray16(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1987 221b804f Diego Biurrun
                        int srcSliceH, uint8_t* dst[], int dstStride[]){
1988
1989
    int length= c->srcW;
1990
    int y=      srcSliceY;
1991
    int height= srcSliceH;
1992
    int i, j;
1993
    uint8_t *srcPtr= src[0];
1994
    uint8_t *dstPtr= dst[0] + dstStride[0]*y;
1995
    for (i=0; i<height; i++)
1996
    {
1997
        for (j=0; j<length; j++)
1998
        {
1999
            dstPtr[j<<1] = srcPtr[j];
2000
            dstPtr[(j<<1)+1] = srcPtr[j];
2001
        }
2002
        srcPtr+= srcStride[0];
2003
        dstPtr+= dstStride[0];
2004
    }
2005
    return srcSliceH;
2006 4884b9e5 Kostya Shishkov
}
2007
2008
static int gray16swap(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2009 221b804f Diego Biurrun
                      int srcSliceH, uint8_t* dst[], int dstStride[]){
2010
2011
    int length= c->srcW;
2012
    int y=      srcSliceY;
2013
    int height= srcSliceH;
2014
    int i, j;
2015 73d046e2 Baptiste Coudurier
    uint16_t *srcPtr= (uint16_t*)src[0];
2016 b8b015f4 Baptiste Coudurier
    uint16_t *dstPtr= (uint16_t*)(dst[0] + dstStride[0]*y/2);
2017 221b804f Diego Biurrun
    for (i=0; i<height; i++)
2018
    {
2019
        for (j=0; j<length; j++) dstPtr[j] = bswap_16(srcPtr[j]);
2020
        srcPtr+= srcStride[0]/2;
2021
        dstPtr+= dstStride[0]/2;
2022
    }
2023
    return srcSliceH;
2024 4884b9e5 Kostya Shishkov
}
2025
2026
2027 c7a810cc Michael Niedermayer
static void getSubSampleFactors(int *h, int *v, int format){
2028 221b804f Diego Biurrun
    switch(format){
2029
    case PIX_FMT_UYVY422:
2030
    case PIX_FMT_YUYV422:
2031
        *h=1;
2032
        *v=0;
2033
        break;
2034
    case PIX_FMT_YUV420P:
2035 79973335 Aurelien Jacobs
    case PIX_FMT_YUVA420P:
2036 221b804f Diego Biurrun
    case PIX_FMT_GRAY16BE:
2037
    case PIX_FMT_GRAY16LE:
2038
    case PIX_FMT_GRAY8: //FIXME remove after different subsamplings are fully implemented
2039
    case PIX_FMT_NV12:
2040
    case PIX_FMT_NV21:
2041
        *h=1;
2042
        *v=1;
2043
        break;
2044 9ba7fe6d Andreas Öman
    case PIX_FMT_YUV440P:
2045
        *h=0;
2046
        *v=1;
2047
        break;
2048 221b804f Diego Biurrun
    case PIX_FMT_YUV410P:
2049
        *h=2;
2050
        *v=2;
2051
        break;
2052
    case PIX_FMT_YUV444P:
2053
        *h=0;
2054
        *v=0;
2055
        break;
2056
    case PIX_FMT_YUV422P:
2057
        *h=1;
2058
        *v=0;
2059
        break;
2060
    case PIX_FMT_YUV411P:
2061
        *h=2;
2062
        *v=0;
2063
        break;
2064
    default:
2065
        *h=0;
2066
        *v=0;
2067
        break;
2068
    }
2069 c7a810cc Michael Niedermayer
}
2070
2071 5427e242 Michael Niedermayer
static uint16_t roundToInt16(int64_t f){
2072 221b804f Diego Biurrun
    int r= (f + (1<<15))>>16;
2073
         if (r<-0x7FFF) return 0x8000;
2074
    else if (r> 0x7FFF) return 0x7FFF;
2075
    else                return r;
2076 0481412a Michael Niedermayer
}
2077
2078
/**
2079 fa58ba15 Kostya Shishkov
 * @param inv_table the yuv2rgb coefficients, normally ff_yuv2rgb_coeffs[x]
2080 86bdf3fd Diego Biurrun
 * @param fullRange if 1 then the luma range is 0..255 if 0 it is 16..235
2081 5427e242 Michael Niedermayer
 * @return -1 if not supported
2082 0481412a Michael Niedermayer
 */
2083 5427e242 Michael Niedermayer
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
2084 221b804f Diego Biurrun
    int64_t crv =  inv_table[0];
2085
    int64_t cbu =  inv_table[1];
2086
    int64_t cgu = -inv_table[2];
2087
    int64_t cgv = -inv_table[3];
2088
    int64_t cy  = 1<<16;
2089
    int64_t oy  = 0;
2090
2091
    memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
2092
    memcpy(c->dstColorspaceTable,     table, sizeof(int)*4);
2093
2094
    c->brightness= brightness;
2095
    c->contrast  = contrast;
2096
    c->saturation= saturation;
2097
    c->srcRange  = srcRange;
2098
    c->dstRange  = dstRange;
2099 6bc0c792 Michael Niedermayer
    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return 0;
2100 221b804f Diego Biurrun
2101
    c->uOffset=   0x0400040004000400LL;
2102
    c->vOffset=   0x0400040004000400LL;
2103
2104
    if (!srcRange){
2105
        cy= (cy*255) / 219;
2106
        oy= 16<<16;
2107
    }else{
2108
        crv= (crv*224) / 255;
2109
        cbu= (cbu*224) / 255;
2110
        cgu= (cgu*224) / 255;
2111
        cgv= (cgv*224) / 255;
2112
    }
2113 0481412a Michael Niedermayer
2114 221b804f Diego Biurrun
    cy = (cy *contrast             )>>16;
2115
    crv= (crv*contrast * saturation)>>32;
2116
    cbu= (cbu*contrast * saturation)>>32;
2117
    cgu= (cgu*contrast * saturation)>>32;
2118
    cgv= (cgv*contrast * saturation)>>32;
2119 0481412a Michael Niedermayer
2120 221b804f Diego Biurrun
    oy -= 256*brightness;
2121 0481412a Michael Niedermayer
2122 221b804f Diego Biurrun
    c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
2123
    c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
2124
    c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
2125
    c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
2126
    c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
2127
    c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
2128 5427e242 Michael Niedermayer
2129 43c16478 Michael Niedermayer
    c->yuv2rgb_y_coeff  = (int16_t)roundToInt16(cy <<13);
2130
    c->yuv2rgb_y_offset = (int16_t)roundToInt16(oy << 9);
2131
    c->yuv2rgb_v2r_coeff= (int16_t)roundToInt16(crv<<13);
2132
    c->yuv2rgb_v2g_coeff= (int16_t)roundToInt16(cgv<<13);
2133
    c->yuv2rgb_u2g_coeff= (int16_t)roundToInt16(cgu<<13);
2134
    c->yuv2rgb_u2b_coeff= (int16_t)roundToInt16(cbu<<13);
2135 f0faee4c Michael Niedermayer
2136 e2a004ad Kostya Shishkov
    sws_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
2137 221b804f Diego Biurrun
    //FIXME factorize
2138 a31de956 Michael Niedermayer
2139 6634d0ef Nicolas Plourde
#ifdef COMPILE_ALTIVEC
2140 221b804f Diego Biurrun
    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
2141 e2a004ad Kostya Shishkov
        sws_yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation);
2142 6a4970ab Diego Biurrun
#endif
2143 221b804f Diego Biurrun
    return 0;
2144 5427e242 Michael Niedermayer
}
2145
2146
/**
2147
 * @return -1 if not supported
2148
 */
2149
int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
2150 221b804f Diego Biurrun
    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
2151 5427e242 Michael Niedermayer
2152 221b804f Diego Biurrun
    *inv_table = c->srcColorspaceTable;
2153
    *table     = c->dstColorspaceTable;
2154
    *srcRange  = c->srcRange;
2155
    *dstRange  = c->dstRange;
2156
    *brightness= c->brightness;
2157
    *contrast  = c->contrast;
2158
    *saturation= c->saturation;
2159 6a4970ab Diego Biurrun
2160 221b804f Diego Biurrun
    return 0;
2161 0481412a Michael Niedermayer
}
2162
2163 13394e8c Aurelien Jacobs
static int handle_jpeg(enum PixelFormat *format)
2164 44cdb423 Luca Abeni
{
2165 221b804f Diego Biurrun
    switch (*format) {
2166
        case PIX_FMT_YUVJ420P:
2167
            *format = PIX_FMT_YUV420P;
2168
            return 1;
2169
        case PIX_FMT_YUVJ422P:
2170
            *format = PIX_FMT_YUV422P;
2171
            return 1;
2172
        case PIX_FMT_YUVJ444P:
2173
            *format = PIX_FMT_YUV444P;
2174
            return 1;
2175 9ba7fe6d Andreas Öman
        case PIX_FMT_YUVJ440P:
2176
            *format = PIX_FMT_YUV440P;
2177
            return 1;
2178 221b804f Diego Biurrun
        default:
2179
            return 0;
2180
    }
2181 44cdb423 Luca Abeni
}
2182
2183 58e4b706 Carl Eugen Hoyos
SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int dstW, int dstH, enum PixelFormat dstFormat, int flags,
2184 221b804f Diego Biurrun
                           SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){
2185
2186
    SwsContext *c;
2187
    int i;
2188
    int usesVFilter, usesHFilter;
2189
    int unscaled, needsDither;
2190
    int srcRange, dstRange;
2191
    SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
2192 b63f641e Aurelien Jacobs
#if ARCH_X86
2193 221b804f Diego Biurrun
    if (flags & SWS_CPU_CAPS_MMX)
2194 7ad6469e Diego Pettenò
        __asm__ volatile("emms\n\t"::: "memory");
2195 5cebb24b Michael Niedermayer
#endif
2196 516b1f82 Michael Niedermayer
2197 b63f641e Aurelien Jacobs
#if !defined(RUNTIME_CPUDETECT) || !CONFIG_GPL //ensure that the flags match the compiled variant if cpudetect is off
2198 d3f3eea9 Marc Hoffman
    flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN);
2199 b63f641e Aurelien Jacobs
#if   HAVE_MMX2
2200 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
2201 f4406ec1 Diego Biurrun
#elif HAVE_AMD3DNOW
2202 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
2203 b63f641e Aurelien Jacobs
#elif HAVE_MMX
2204 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_MMX;
2205 b63f641e Aurelien Jacobs
#elif HAVE_ALTIVEC
2206 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_ALTIVEC;
2207 b63f641e Aurelien Jacobs
#elif ARCH_BFIN
2208 d3f3eea9 Marc Hoffman
    flags |= SWS_CPU_CAPS_BFIN;
2209 516b1f82 Michael Niedermayer
#endif
2210 69796008 Diego Biurrun
#endif /* RUNTIME_CPUDETECT */
2211 221b804f Diego Biurrun
    if (clip_table[512] != 255) globalInit();
2212 1b0a4572 Benoit Fouet
    if (!rgb15to16) sws_rgb2rgb_init(flags);
2213 221b804f Diego Biurrun
2214
    unscaled = (srcW == dstW && srcH == dstH);
2215
    needsDither= (isBGR(dstFormat) || isRGB(dstFormat))
2216
        && (fmt_depth(dstFormat))<24
2217
        && ((fmt_depth(dstFormat))<(fmt_depth(srcFormat)) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
2218
2219
    srcRange = handle_jpeg(&srcFormat);
2220
    dstRange = handle_jpeg(&dstFormat);
2221
2222
    if (!isSupportedIn(srcFormat))
2223
    {
2224 0d6fd5ec Stefano Sabatini
        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat));
2225 221b804f Diego Biurrun
        return NULL;
2226
    }
2227
    if (!isSupportedOut(dstFormat))
2228
    {
2229 0d6fd5ec Stefano Sabatini
        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat));
2230 221b804f Diego Biurrun
        return NULL;
2231
    }
2232
2233 010c00bc Michael Niedermayer
    i= flags & ( SWS_POINT
2234
                |SWS_AREA
2235 6afc7c19 Michael Niedermayer
                |SWS_BILINEAR
2236 010c00bc Michael Niedermayer
                |SWS_FAST_BILINEAR
2237
                |SWS_BICUBIC
2238
                |SWS_X
2239
                |SWS_GAUSS
2240
                |SWS_LANCZOS
2241
                |SWS_SINC
2242
                |SWS_SPLINE
2243
                |SWS_BICUBLIN);
2244
    if(!i || (i & (i-1)))
2245
    {
2246 f40c7dbb Diego Biurrun
        av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be chosen\n");
2247 010c00bc Michael Niedermayer
        return NULL;
2248
    }
2249
2250 221b804f Diego Biurrun
    /* sanity check */
2251
    if (srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
2252
    {
2253
        av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
2254
               srcW, srcH, dstW, dstH);
2255
        return NULL;
2256
    }
2257 8b2fce0d Michael Niedermayer
    if(srcW > VOFW || dstW > VOFW){
2258 f40c7dbb Diego Biurrun
        av_log(NULL, AV_LOG_ERROR, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n");
2259 8b2fce0d Michael Niedermayer
        return NULL;
2260
    }
2261 221b804f Diego Biurrun
2262
    if (!dstFilter) dstFilter= &dummyFilter;
2263
    if (!srcFilter) srcFilter= &dummyFilter;
2264
2265
    c= av_mallocz(sizeof(SwsContext));
2266
2267
    c->av_class = &sws_context_class;
2268
    c->srcW= srcW;
2269
    c->srcH= srcH;
2270
    c->dstW= dstW;
2271
    c->dstH= dstH;
2272
    c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
2273
    c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
2274
    c->flags= flags;
2275
    c->dstFormat= dstFormat;
2276
    c->srcFormat= srcFormat;
2277
    c->vRounder= 4* 0x0001000100010001ULL;
2278
2279
    usesHFilter= usesVFilter= 0;
2280 1b0a4572 Benoit Fouet
    if (dstFilter->lumV && dstFilter->lumV->length>1) usesVFilter=1;
2281
    if (dstFilter->lumH && dstFilter->lumH->length>1) usesHFilter=1;
2282
    if (dstFilter->chrV && dstFilter->chrV->length>1) usesVFilter=1;
2283
    if (dstFilter->chrH && dstFilter->chrH->length>1) usesHFilter=1;
2284
    if (srcFilter->lumV && srcFilter->lumV->length>1) usesVFilter=1;
2285
    if (srcFilter->lumH && srcFilter->lumH->length>1) usesHFilter=1;
2286
    if (srcFilter->chrV && srcFilter->chrV->length>1) usesVFilter=1;
2287
    if (srcFilter->chrH && srcFilter->chrH->length>1) usesHFilter=1;
2288 221b804f Diego Biurrun
2289
    getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
2290
    getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
2291
2292 f40c7dbb Diego Biurrun
    // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation
2293 221b804f Diego Biurrun
    if ((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
2294
2295
    // drop some chroma lines if the user wants it
2296
    c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
2297
    c->chrSrcVSubSample+= c->vChrDrop;
2298
2299 f40c7dbb Diego Biurrun
    // drop every other pixel for chroma calculation unless user wants full chroma
2300 221b804f Diego Biurrun
    if ((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)
2301
      && srcFormat!=PIX_FMT_RGB8      && srcFormat!=PIX_FMT_BGR8
2302
      && srcFormat!=PIX_FMT_RGB4      && srcFormat!=PIX_FMT_BGR4
2303 dfb09bd1 Michael Niedermayer
      && srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE
2304 2f60f629 Michael Niedermayer
      && ((dstW>>c->chrDstHSubSample) <= (srcW>>1) || (flags&(SWS_FAST_BILINEAR|SWS_POINT))))
2305 221b804f Diego Biurrun
        c->chrSrcHSubSample=1;
2306
2307
    if (param){
2308
        c->param[0] = param[0];
2309
        c->param[1] = param[1];
2310
    }else{
2311
        c->param[0] =
2312
        c->param[1] = SWS_PARAM_DEFAULT;
2313
    }
2314
2315
    c->chrIntHSubSample= c->chrDstHSubSample;
2316
    c->chrIntVSubSample= c->chrSrcVSubSample;
2317
2318
    // Note the -((-x)>>y) is so that we always round toward +inf.
2319
    c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
2320
    c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
2321
    c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
2322
    c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
2323
2324 fa58ba15 Kostya Shishkov
    sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], srcRange, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16);
2325 221b804f Diego Biurrun
2326 f40c7dbb Diego Biurrun
    /* unscaled special cases */
2327 6bc0c792 Michael Niedermayer
    if (unscaled && !usesHFilter && !usesVFilter && (srcRange == dstRange || isBGR(dstFormat) || isRGB(dstFormat)))
2328 221b804f Diego Biurrun
    {
2329
        /* yv12_to_nv12 */
2330
        if (srcFormat == PIX_FMT_YUV420P && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21))
2331
        {
2332
            c->swScale= PlanarToNV12Wrapper;
2333
        }
2334
        /* yuv2bgr */
2335 4155ece5 Michael Niedermayer
        if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P) && (isBGR(dstFormat) || isRGB(dstFormat))
2336 9b0d44ef Michael Niedermayer
            && !(flags & SWS_ACCURATE_RND) && !(dstH&1))
2337 221b804f Diego Biurrun
        {
2338 e2a004ad Kostya Shishkov
            c->swScale= sws_yuv2rgb_get_func_ptr(c);
2339 221b804f Diego Biurrun
        }
2340 6a4970ab Diego Biurrun
2341 84c12535 Michael Niedermayer
        if (srcFormat==PIX_FMT_YUV410P && dstFormat==PIX_FMT_YUV420P && !(flags & SWS_BITEXACT))
2342 221b804f Diego Biurrun
        {
2343
            c->swScale= yvu9toyv12Wrapper;
2344
        }
2345
2346
        /* bgr24toYV12 */
2347 08857704 Michael Niedermayer
        if (srcFormat==PIX_FMT_BGR24 && dstFormat==PIX_FMT_YUV420P && !(flags & SWS_ACCURATE_RND))
2348 221b804f Diego Biurrun
            c->swScale= bgr24toyv12Wrapper;
2349
2350 f40c7dbb Diego Biurrun
        /* RGB/BGR -> RGB/BGR (no dither needed forms) */
2351 221b804f Diego Biurrun
        if (  (isBGR(srcFormat) || isRGB(srcFormat))
2352
           && (isBGR(dstFormat) || isRGB(dstFormat))
2353
           && srcFormat != PIX_FMT_BGR8      && dstFormat != PIX_FMT_BGR8
2354
           && srcFormat != PIX_FMT_RGB8      && dstFormat != PIX_FMT_RGB8
2355
           && srcFormat != PIX_FMT_BGR4      && dstFormat != PIX_FMT_BGR4
2356
           && srcFormat != PIX_FMT_RGB4      && dstFormat != PIX_FMT_RGB4
2357
           && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE
2358
           && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE
2359
           && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK
2360 ec1bca2a Michael Niedermayer
           && srcFormat != PIX_FMT_MONOWHITE && dstFormat != PIX_FMT_MONOWHITE
2361 9990e426 Michael Niedermayer
                                             && dstFormat != PIX_FMT_RGB32_1
2362
                                             && dstFormat != PIX_FMT_BGR32_1
2363 736143c8 Michael Niedermayer
           && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT))))
2364 221b804f Diego Biurrun
             c->swScale= rgb2rgbWrapper;
2365
2366 49004617 Vitor Sessak
        if ((usePal(srcFormat) && (
2367 76e9fd01 Vitor Sessak
                 dstFormat == PIX_FMT_RGB32   ||
2368 522ce957 Vitor Sessak
                 dstFormat == PIX_FMT_RGB32_1 ||
2369 76e9fd01 Vitor Sessak
                 dstFormat == PIX_FMT_RGB24   ||
2370
                 dstFormat == PIX_FMT_BGR32   ||
2371 522ce957 Vitor Sessak
                 dstFormat == PIX_FMT_BGR32_1 ||
2372 49004617 Vitor Sessak
                 dstFormat == PIX_FMT_BGR24)))
2373
             c->swScale= pal2rgbWrapper;
2374
2375 a6100f39 Baptiste Coudurier
        if (srcFormat == PIX_FMT_YUV422P)
2376
        {
2377
            if (dstFormat == PIX_FMT_YUYV422)
2378
                c->swScale= YUV422PToYuy2Wrapper;
2379
            else if (dstFormat == PIX_FMT_UYVY422)
2380
                c->swScale= YUV422PToUyvyWrapper;
2381
        }
2382
2383 221b804f Diego Biurrun
        /* LQ converters if -sws 0 or -sws 4*/
2384
        if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
2385
            /* yv12_to_yuy2 */
2386 3b38f1c6 Baptiste Coudurier
            if (srcFormat == PIX_FMT_YUV420P)
2387 221b804f Diego Biurrun
            {
2388
                if (dstFormat == PIX_FMT_YUYV422)
2389
                    c->swScale= PlanarToYuy2Wrapper;
2390 3b38f1c6 Baptiste Coudurier
                else if (dstFormat == PIX_FMT_UYVY422)
2391 221b804f Diego Biurrun
                    c->swScale= PlanarToUyvyWrapper;
2392
            }
2393
        }
2394 ec22603f Michael Niedermayer
2395 6634d0ef Nicolas Plourde
#ifdef COMPILE_ALTIVEC
2396 221b804f Diego Biurrun
        if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
2397 12794f73 Kostya Shishkov
            !(c->flags & SWS_BITEXACT) &&
2398 3b38f1c6 Baptiste Coudurier
            srcFormat == PIX_FMT_YUV420P) {
2399 221b804f Diego Biurrun
          // unscaled YV12 -> packed YUV, we want speed
2400
          if (dstFormat == PIX_FMT_YUYV422)
2401
              c->swScale= yv12toyuy2_unscaled_altivec;
2402 3b38f1c6 Baptiste Coudurier
          else if (dstFormat == PIX_FMT_UYVY422)
2403 221b804f Diego Biurrun
              c->swScale= yv12touyvy_unscaled_altivec;
2404
        }
2405 b71cf33c Romain Dolbeau
#endif
2406
2407 221b804f Diego Biurrun
        /* simple copy */
2408
        if (  srcFormat == dstFormat
2409
            || (isPlanarYUV(srcFormat) && isGray(dstFormat))
2410 e5091488 Benoit Fouet
            || (isPlanarYUV(dstFormat) && isGray(srcFormat)))
2411 221b804f Diego Biurrun
        {
2412 2d35ae56 Luca Barbato
            if (isPacked(c->srcFormat))
2413
                c->swScale= packedCopy;
2414
            else /* Planar YUV or gray */
2415
                c->swScale= planarCopy;
2416 221b804f Diego Biurrun
        }
2417
2418
        /* gray16{le,be} conversions */
2419
        if (isGray16(srcFormat) && (isPlanarYUV(dstFormat) || (dstFormat == PIX_FMT_GRAY8)))
2420
        {
2421
            c->swScale= gray16togray;
2422
        }
2423
        if ((isPlanarYUV(srcFormat) || (srcFormat == PIX_FMT_GRAY8)) && isGray16(dstFormat))
2424
        {
2425
            c->swScale= graytogray16;
2426
        }
2427
        if (srcFormat != dstFormat && isGray16(srcFormat) && isGray16(dstFormat))
2428
        {
2429
            c->swScale= gray16swap;
2430
        }
2431
2432 b63f641e Aurelien Jacobs
#if ARCH_BFIN
2433 1ebbfe15 Marc Hoffman
        if (flags & SWS_CPU_CAPS_BFIN)
2434
            ff_bfin_get_unscaled_swscale (c);
2435
#endif
2436
2437 221b804f Diego Biurrun
        if (c->swScale){
2438
            if (flags&SWS_PRINT_INFO)
2439 4b0c30b7 Baptiste Coudurier
                av_log(c, AV_LOG_INFO, "using unscaled %s -> %s special converter\n",
2440 221b804f Diego Biurrun
                                sws_format_name(srcFormat), sws_format_name(dstFormat));
2441
            return c;
2442
        }
2443
    }
2444
2445
    if (flags & SWS_CPU_CAPS_MMX2)
2446
    {
2447
        c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
2448
        if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
2449
        {
2450
            if (flags&SWS_PRINT_INFO)
2451 f40c7dbb Diego Biurrun
                av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n");
2452 221b804f Diego Biurrun
        }
2453
        if (usesHFilter) c->canMMX2BeUsed=0;
2454
    }
2455
    else
2456
        c->canMMX2BeUsed=0;
2457
2458
    c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
2459
    c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
2460
2461
    // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
2462
    // but only for the FAST_BILINEAR mode otherwise do correct scaling
2463
    // n-2 is the last chrominance sample available
2464 86bdf3fd Diego Biurrun
    // this is not perfect, but no one should notice the difference, the more correct variant
2465 221b804f Diego Biurrun
    // would be like the vertical one, but that would require some special code for the
2466
    // first and last pixel
2467
    if (flags&SWS_FAST_BILINEAR)
2468
    {
2469
        if (c->canMMX2BeUsed)
2470
        {
2471
            c->lumXInc+= 20;
2472
            c->chrXInc+= 20;
2473
        }
2474 f40c7dbb Diego Biurrun
        //we don't use the x86 asm scaler if MMX is available
2475 221b804f Diego Biurrun
        else if (flags & SWS_CPU_CAPS_MMX)
2476
        {
2477
            c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
2478
            c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
2479
        }
2480
    }
2481
2482
    /* precalculate horizontal scaler filter coefficients */
2483
    {
2484
        const int filterAlign=
2485
            (flags & SWS_CPU_CAPS_MMX) ? 4 :
2486
            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2487
            1;
2488
2489
        initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
2490
                   srcW      ,       dstW, filterAlign, 1<<14,
2491
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2492
                   srcFilter->lumH, dstFilter->lumH, c->param);
2493
        initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
2494
                   c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
2495
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2496
                   srcFilter->chrH, dstFilter->chrH, c->param);
2497 28bf81c9 Michael Niedermayer
2498 dbdae6ec Diego Biurrun
#define MAX_FUNNY_CODE_SIZE 10000
2499 17c613ef Uoti Urpala
#if defined(COMPILE_MMX2)
2500 77a416e8 Gabucino
// can't downscale !!!
2501 221b804f Diego Biurrun
        if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
2502
        {
2503 113ef149 Reimar Döffinger
#ifdef MAP_ANONYMOUS
2504 221b804f Diego Biurrun
            c->funnyYCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2505
            c->funnyUVCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2506 38d5c282 Aurelien Jacobs
#else
2507 221b804f Diego Biurrun
            c->funnyYCode = av_malloc(MAX_FUNNY_CODE_SIZE);
2508
            c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE);
2509 38d5c282 Aurelien Jacobs
#endif
2510
2511 221b804f Diego Biurrun
            c->lumMmx2Filter   = av_malloc((dstW        /8+8)*sizeof(int16_t));
2512
            c->chrMmx2Filter   = av_malloc((c->chrDstW  /4+8)*sizeof(int16_t));
2513
            c->lumMmx2FilterPos= av_malloc((dstW      /2/8+8)*sizeof(int32_t));
2514
            c->chrMmx2FilterPos= av_malloc((c->chrDstW/2/4+8)*sizeof(int32_t));
2515 b7dc6f66 Michael Niedermayer
2516 221b804f Diego Biurrun
            initMMX2HScaler(      dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
2517
            initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
2518
        }
2519 17c613ef Uoti Urpala
#endif /* defined(COMPILE_MMX2) */
2520 f40c7dbb Diego Biurrun
    } // initialize horizontal stuff
2521 28bf81c9 Michael Niedermayer
2522
2523
2524 221b804f Diego Biurrun
    /* precalculate vertical scaler filter coefficients */
2525
    {
2526
        const int filterAlign=
2527
            (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
2528
            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2529
            1;
2530 8c266f0c Romain Dolbeau
2531 221b804f Diego Biurrun
        initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
2532 5fe4aad7 Michael Niedermayer
                   srcH      ,        dstH, filterAlign, (1<<12),
2533 221b804f Diego Biurrun
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2534
                   srcFilter->lumV, dstFilter->lumV, c->param);
2535
        initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
2536 5fe4aad7 Michael Niedermayer
                   c->chrSrcH, c->chrDstH, filterAlign, (1<<12),
2537 221b804f Diego Biurrun
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2538
                   srcFilter->chrV, dstFilter->chrV, c->param);
2539 d33d485e Alan Curry
2540 b63f641e Aurelien Jacobs
#if HAVE_ALTIVEC
2541 221b804f Diego Biurrun
        c->vYCoeffsBank = av_malloc(sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
2542
        c->vCCoeffsBank = av_malloc(sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH);
2543
2544
        for (i=0;i<c->vLumFilterSize*c->dstH;i++) {
2545
            int j;
2546
            short *p = (short *)&c->vYCoeffsBank[i];
2547
            for (j=0;j<8;j++)
2548
                p[j] = c->vLumFilter[i];
2549
        }
2550
2551
        for (i=0;i<c->vChrFilterSize*c->chrDstH;i++) {
2552
            int j;
2553
            short *p = (short *)&c->vCCoeffsBank[i];
2554
            for (j=0;j<8;j++)
2555
                p[j] = c->vChrFilter[i];
2556
        }
2557 d33d485e Alan Curry
#endif
2558 221b804f Diego Biurrun
    }
2559
2560 f40c7dbb Diego Biurrun
    // calculate buffer sizes so that they won't run out while handling these damn slices
2561 221b804f Diego Biurrun
    c->vLumBufSize= c->vLumFilterSize;
2562
    c->vChrBufSize= c->vChrFilterSize;
2563
    for (i=0; i<dstH; i++)
2564
    {
2565
        int chrI= i*c->chrDstH / dstH;
2566
        int nextSlice= FFMAX(c->vLumFilterPos[i   ] + c->vLumFilterSize - 1,
2567
                           ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
2568
2569
        nextSlice>>= c->chrSrcVSubSample;
2570
        nextSlice<<= c->chrSrcVSubSample;
2571
        if (c->vLumFilterPos[i   ] + c->vLumBufSize < nextSlice)
2572 e5091488 Benoit Fouet
            c->vLumBufSize= nextSlice - c->vLumFilterPos[i];
2573 221b804f Diego Biurrun
        if (c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
2574
            c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
2575
    }
2576
2577
    // allocate pixbufs (we use dynamic allocation because otherwise we would need to
2578
    c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
2579
    c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*));
2580 f40c7dbb Diego Biurrun
    //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
2581 221b804f Diego Biurrun
    /* align at 16 bytes for AltiVec */
2582
    for (i=0; i<c->vLumBufSize; i++)
2583 8b2fce0d Michael Niedermayer
        c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(VOF+1);
2584 221b804f Diego Biurrun
    for (i=0; i<c->vChrBufSize; i++)
2585 8b2fce0d Michael Niedermayer
        c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= av_malloc((VOF+1)*2);
2586 221b804f Diego Biurrun
2587
    //try to avoid drawing green stuff between the right end and the stride end
2588 8b2fce0d Michael Niedermayer
    for (i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, (VOF+1)*2);
2589
2590 5a352b14 Zuxy Meng
    assert(2*VOFW == VOF);
2591 221b804f Diego Biurrun
2592 fcc402b1 Luca Barbato
    assert(c->chrDstH <= dstH);
2593 221b804f Diego Biurrun
2594
    if (flags&SWS_PRINT_INFO)
2595
    {
2596 28bf81c9 Michael Niedermayer
#ifdef DITHER1XBPP
2597 3e62b7e3 Baptiste Coudurier
        const char *dither= " dithered";
2598 5521b193 Michael Niedermayer
#else
2599 3e62b7e3 Baptiste Coudurier
        const char *dither= "";
2600 28bf81c9 Michael Niedermayer
#endif
2601 221b804f Diego Biurrun
        if (flags&SWS_FAST_BILINEAR)
2602 4b0c30b7 Baptiste Coudurier
            av_log(c, AV_LOG_INFO, "FAST_BILINEAR scaler, ");
2603 221b804f Diego Biurrun
        else if (flags&SWS_BILINEAR)
2604 4b0c30b7 Baptiste Coudurier
            av_log(c, AV_LOG_INFO, "BILINEAR scaler, ");
2605 221b804f Diego Biurrun
        else if (flags&SWS_BICUBIC)
2606 4b0c30b7 Baptiste Coudurier
            av_log(c, AV_LOG_INFO, "BICUBIC scaler, ");
2607 221b804f Diego Biurrun
        else if (flags&SWS_X)
2608 4b0c30b7 Baptiste Coudurier
            av_log(c, AV_LOG_INFO, "Experimental scaler, ");
2609 221b804f Diego Biurrun
        else if (flags&SWS_POINT)
2610 4b0c30b7 Baptiste Coudurier
            av_log(c, AV_LOG_INFO, "Nearest Neighbor / POINT scaler, ");
2611 221b804f Diego Biurrun
        else if (flags&SWS_AREA)
2612 4b0c30b7 Baptiste Coudurier
            av_log(c, AV_LOG_INFO, "Area Averageing scaler, ");
2613 221b804f Diego Biurrun
        else if (flags&SWS_BICUBLIN)
2614 4b0c30b7 Baptiste Coudurier
            av_log(c, AV_LOG_INFO, "luma BICUBIC / chroma BILINEAR scaler, ");
2615 221b804f Diego Biurrun
        else if (flags&SWS_GAUSS)
2616 4b0c30b7 Baptiste Coudurier
            av_log(c, AV_LOG_INFO, "Gaussian scaler, ");
2617 221b804f Diego Biurrun
        else if (flags&SWS_SINC)
2618 4b0c30b7 Baptiste Coudurier
            av_log(c, AV_LOG_INFO, "Sinc scaler, ");
2619 221b804f Diego Biurrun
        else if (flags&SWS_LANCZOS)
2620 4b0c30b7 Baptiste Coudurier
            av_log(c, AV_LOG_INFO, "Lanczos scaler, ");
2621 221b804f Diego Biurrun
        else if (flags&SWS_SPLINE)
2622 4b0c30b7 Baptiste Coudurier
            av_log(c, AV_LOG_INFO, "Bicubic spline scaler, ");
2623 221b804f Diego Biurrun
        else
2624 4b0c30b7 Baptiste Coudurier
            av_log(c, AV_LOG_INFO, "ehh flags invalid?! ");
2625 221b804f Diego Biurrun
2626
        if (dstFormat==PIX_FMT_BGR555 || dstFormat==PIX_FMT_BGR565)
2627
            av_log(c, AV_LOG_INFO, "from %s to%s %s ",
2628
                   sws_format_name(srcFormat), dither, sws_format_name(dstFormat));
2629
        else
2630
            av_log(c, AV_LOG_INFO, "from %s to %s ",
2631
                   sws_format_name(srcFormat), sws_format_name(dstFormat));
2632
2633
        if (flags & SWS_CPU_CAPS_MMX2)
2634
            av_log(c, AV_LOG_INFO, "using MMX2\n");
2635
        else if (flags & SWS_CPU_CAPS_3DNOW)
2636
            av_log(c, AV_LOG_INFO, "using 3DNOW\n");
2637
        else if (flags & SWS_CPU_CAPS_MMX)
2638
            av_log(c, AV_LOG_INFO, "using MMX\n");
2639
        else if (flags & SWS_CPU_CAPS_ALTIVEC)
2640
            av_log(c, AV_LOG_INFO, "using AltiVec\n");
2641
        else
2642
            av_log(c, AV_LOG_INFO, "using C\n");
2643
    }
2644
2645
    if (flags & SWS_PRINT_INFO)
2646
    {
2647
        if (flags & SWS_CPU_CAPS_MMX)
2648
        {
2649
            if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
2650 4b0c30b7 Baptiste Coudurier
                av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
2651 221b804f Diego Biurrun
            else
2652
            {
2653
                if (c->hLumFilterSize==4)
2654 4b0c30b7 Baptiste Coudurier
                    av_log(c, AV_LOG_VERBOSE, "using 4-tap MMX scaler for horizontal luminance scaling\n");
2655 221b804f Diego Biurrun
                else if (c->hLumFilterSize==8)
2656 4b0c30b7 Baptiste Coudurier
                    av_log(c, AV_LOG_VERBOSE, "using 8-tap MMX scaler for horizontal luminance scaling\n");
2657 221b804f Diego Biurrun
                else
2658 4b0c30b7 Baptiste Coudurier
                    av_log(c, AV_LOG_VERBOSE, "using n-tap MMX scaler for horizontal luminance scaling\n");
2659 221b804f Diego Biurrun
2660
                if (c->hChrFilterSize==4)
2661 4b0c30b7 Baptiste Coudurier
                    av_log(c, AV_LOG_VERBOSE, "using 4-tap MMX scaler for horizontal chrominance scaling\n");
2662 221b804f Diego Biurrun
                else if (c->hChrFilterSize==8)
2663 4b0c30b7 Baptiste Coudurier
                    av_log(c, AV_LOG_VERBOSE, "using 8-tap MMX scaler for horizontal chrominance scaling\n");
2664 221b804f Diego Biurrun
                else
2665 4b0c30b7 Baptiste Coudurier
                    av_log(c, AV_LOG_VERBOSE, "using n-tap MMX scaler for horizontal chrominance scaling\n");
2666 221b804f Diego Biurrun
            }
2667
        }
2668
        else
2669
        {
2670 b63f641e Aurelien Jacobs
#if ARCH_X86
2671 f40c7dbb Diego Biurrun
            av_log(c, AV_LOG_VERBOSE, "using x86 asm scaler for horizontal scaling\n");
2672 28bf81c9 Michael Niedermayer
#else
2673 221b804f Diego Biurrun
            if (flags & SWS_FAST_BILINEAR)
2674 4b0c30b7 Baptiste Coudurier
                av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR C scaler for horizontal scaling\n");
2675 221b804f Diego Biurrun
            else
2676 4b0c30b7 Baptiste Coudurier
                av_log(c, AV_LOG_VERBOSE, "using C scaler for horizontal scaling\n");
2677 28bf81c9 Michael Niedermayer
#endif
2678 221b804f Diego Biurrun
        }
2679
        if (isPlanarYUV(dstFormat))
2680
        {
2681
            if (c->vLumFilterSize==1)
2682 4b0c30b7 Baptiste Coudurier
                av_log(c, AV_LOG_VERBOSE, "using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2683 221b804f Diego Biurrun
            else