Statistics
| Branch: | Revision:

ffmpeg / libswscale / swscale.c @ 3164d25e

History | View | Annotate | Download (115 KB)

1 fe8054c0 Michael Niedermayer
/*
2 d026b45e Diego Biurrun
 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with FFmpeg; if not, write to the Free Software
18 b19bcbaa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 d026b45e Diego Biurrun
 *
20 807e0c66 Luca Abeni
 * the C code (not assembly, mmx, ...) of this file can be used
21
 * under the LGPL license too
22 d026b45e Diego Biurrun
 */
23 783e9cc9 Michael Niedermayer
24 28bf81c9 Michael Niedermayer
/*
25 9990e426 Michael Niedermayer
  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
26 caeaabe7 Alex Beregszaszi
  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
27 e09d12f4 Michael Niedermayer
  {BGR,RGB}{1,4,8,15,16} support dithering
28 6a4970ab Diego Biurrun

29 e09d12f4 Michael Niedermayer
  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
30
  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
31
  x -> x
32
  YUV9 -> YV12
33
  YUV9/YV12 -> Y800
34
  Y800 -> YUV9/YV12
35 b935781b Michael Niedermayer
  BGR24 -> BGR32 & RGB24 -> RGB32
36
  BGR32 -> BGR24 & RGB32 -> RGB24
37 4bb3fa5e Michael Niedermayer
  BGR15 -> BGR16
38 b935781b Michael Niedermayer
*/
39
40 6a4970ab Diego Biurrun
/*
41 a6f6b237 Diego Biurrun
tested special converters (most are tested actually, but I did not write it down ...)
42 e09d12f4 Michael Niedermayer
 YV12 -> BGR16
43 b935781b Michael Niedermayer
 YV12 -> YV12
44 4bb3fa5e Michael Niedermayer
 BGR15 -> BGR16
45 1e1c4fe9 Michael Niedermayer
 BGR16 -> BGR16
46 e09d12f4 Michael Niedermayer
 YVU9 -> YV12
47 b935781b Michael Niedermayer

48
untested special converters
49 f40c7dbb Diego Biurrun
  YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
50 1e1c4fe9 Michael Niedermayer
  YV12/I420 -> YV12/I420
51
  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
52 b935781b Michael Niedermayer
  BGR24 -> BGR32 & RGB24 -> RGB32
53
  BGR32 -> BGR24 & RGB32 -> RGB24
54 ec22603f Michael Niedermayer
  BGR24 -> YV12
55 28bf81c9 Michael Niedermayer
*/
56
57 d63a2cb1 Michael Niedermayer
#define _SVID_SOURCE //needed for MAP_ANONYMOUS
58 d3f41512 Michael Niedermayer
#include <inttypes.h>
59 dda87e9f Pierre Lombard
#include <string.h>
60 077ea8a7 Michael Niedermayer
#include <math.h>
61 c1b0bfb4 Michael Niedermayer
#include <stdio.h>
62 171d7d78 Bohdan Horst
#include <unistd.h>
63 b2d374c9 Diego Biurrun
#include "config.h"
64 81b7c056 Michael Niedermayer
#include <assert.h>
65 b63f641e Aurelien Jacobs
#if HAVE_SYS_MMAN_H
66 38d5c282 Aurelien Jacobs
#include <sys/mman.h>
67 113ef149 Reimar Döffinger
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
68
#define MAP_ANONYMOUS MAP_ANON
69
#endif
70 38d5c282 Aurelien Jacobs
#endif
71 dd35beb2 Ramiro Polla
#if HAVE_VIRTUALALLOC
72
#define WIN32_LEAN_AND_MEAN
73
#include <windows.h>
74
#endif
75 d604bab9 Michael Niedermayer
#include "swscale.h"
76 5427e242 Michael Niedermayer
#include "swscale_internal.h"
77 37079906 Michael Niedermayer
#include "rgb2rgb.h"
78 83da2c6f Diego Biurrun
#include "libavutil/x86_cpu.h"
79
#include "libavutil/bswap.h"
80 0d9f3d85 Arpi
81 b3e03fa7 Stefano Sabatini
unsigned swscale_version(void)
82
{
83
    return LIBSWSCALE_VERSION_INT;
84
}
85
86 541c4eb9 Michael Niedermayer
#undef MOVNTQ
87 7d7f78b5 Michael Niedermayer
#undef PAVGB
88 d3f41512 Michael Niedermayer
89 783e9cc9 Michael Niedermayer
//#undef HAVE_MMX2
90 f4406ec1 Diego Biurrun
//#define HAVE_AMD3DNOW
91 d3f41512 Michael Niedermayer
//#undef HAVE_MMX
92 783e9cc9 Michael Niedermayer
//#undef ARCH_X86
93 2ba1bff0 Michael Niedermayer
//#define WORDS_BIGENDIAN
94 d604bab9 Michael Niedermayer
#define DITHER1XBPP
95 d3f41512 Michael Niedermayer
96 f40c7dbb Diego Biurrun
#define FAST_BGR2YV12 // use 7 bit coefficients instead of 15 bit
97 ac6a2e45 Michael Niedermayer
98 f40c7dbb Diego Biurrun
#define RET 0xC3 //near return opcode for x86
99 c1b0bfb4 Michael Niedermayer
100 28bf81c9 Michael Niedermayer
#ifdef M_PI
101
#define PI M_PI
102
#else
103
#define PI 3.14159265358979323846
104
#endif
105 c1b0bfb4 Michael Niedermayer
106 9d9de37d Ivo van Poorten
#define isSupportedIn(x)    (       \
107
           (x)==PIX_FMT_YUV420P     \
108 79973335 Aurelien Jacobs
        || (x)==PIX_FMT_YUVA420P    \
109 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_YUYV422     \
110
        || (x)==PIX_FMT_UYVY422     \
111
        || (x)==PIX_FMT_RGB32       \
112 9990e426 Michael Niedermayer
        || (x)==PIX_FMT_RGB32_1     \
113 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_BGR24       \
114
        || (x)==PIX_FMT_BGR565      \
115
        || (x)==PIX_FMT_BGR555      \
116
        || (x)==PIX_FMT_BGR32       \
117 9990e426 Michael Niedermayer
        || (x)==PIX_FMT_BGR32_1     \
118 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_RGB24       \
119
        || (x)==PIX_FMT_RGB565      \
120
        || (x)==PIX_FMT_RGB555      \
121
        || (x)==PIX_FMT_GRAY8       \
122
        || (x)==PIX_FMT_YUV410P     \
123 6c80eb16 Michael Niedermayer
        || (x)==PIX_FMT_YUV440P     \
124 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_GRAY16BE    \
125
        || (x)==PIX_FMT_GRAY16LE    \
126
        || (x)==PIX_FMT_YUV444P     \
127
        || (x)==PIX_FMT_YUV422P     \
128
        || (x)==PIX_FMT_YUV411P     \
129
        || (x)==PIX_FMT_PAL8        \
130
        || (x)==PIX_FMT_BGR8        \
131
        || (x)==PIX_FMT_RGB8        \
132
        || (x)==PIX_FMT_BGR4_BYTE   \
133
        || (x)==PIX_FMT_RGB4_BYTE   \
134 9ba7fe6d Andreas Öman
        || (x)==PIX_FMT_YUV440P     \
135 3d05e078 Michael Niedermayer
        || (x)==PIX_FMT_MONOWHITE   \
136
        || (x)==PIX_FMT_MONOBLACK   \
137 9d9de37d Ivo van Poorten
    )
138
#define isSupportedOut(x)   (       \
139
           (x)==PIX_FMT_YUV420P     \
140 6268f55b Cédric Schieli
        || (x)==PIX_FMT_YUVA420P    \
141 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_YUYV422     \
142
        || (x)==PIX_FMT_UYVY422     \
143
        || (x)==PIX_FMT_YUV444P     \
144
        || (x)==PIX_FMT_YUV422P     \
145
        || (x)==PIX_FMT_YUV411P     \
146
        || isRGB(x)                 \
147
        || isBGR(x)                 \
148
        || (x)==PIX_FMT_NV12        \
149
        || (x)==PIX_FMT_NV21        \
150
        || (x)==PIX_FMT_GRAY16BE    \
151
        || (x)==PIX_FMT_GRAY16LE    \
152
        || (x)==PIX_FMT_GRAY8       \
153
        || (x)==PIX_FMT_YUV410P     \
154 6c80eb16 Michael Niedermayer
        || (x)==PIX_FMT_YUV440P     \
155 9d9de37d Ivo van Poorten
    )
156
#define isPacked(x)         (       \
157
           (x)==PIX_FMT_PAL8        \
158
        || (x)==PIX_FMT_YUYV422     \
159
        || (x)==PIX_FMT_UYVY422     \
160
        || isRGB(x)                 \
161
        || isBGR(x)                 \
162
    )
163 49004617 Vitor Sessak
#define usePal(x)           (       \
164
           (x)==PIX_FMT_PAL8        \
165
        || (x)==PIX_FMT_BGR4_BYTE   \
166
        || (x)==PIX_FMT_RGB4_BYTE   \
167
        || (x)==PIX_FMT_BGR8        \
168
        || (x)==PIX_FMT_RGB8        \
169
    )
170 6ff0ad6b Michael Niedermayer
171 6b79dbce Michael Niedermayer
#define RGB2YUV_SHIFT 15
172 7b5d7b9e Michael Niedermayer
#define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
173
#define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
174
#define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
175
#define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
176
#define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
177
#define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
178
#define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
179
#define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
180
#define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
181 6c7506de Michael Niedermayer
182 fa58ba15 Kostya Shishkov
extern const int32_t ff_yuv2rgb_coeffs[8][4];
183 0481412a Michael Niedermayer
184 0f5d4aa8 Michael Niedermayer
static const double rgb2yuv_table[8][9]={
185
    {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
186
    {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
187
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
188
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
189
    {0.59  , 0.11  , 0.30  , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
190
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
191
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //SMPTE 170M
192
    {0.701 , 0.087 , 0.212 , -0.384, 0.5  -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
193
};
194
195 783e9cc9 Michael Niedermayer
/*
196
NOTES
197 d604bab9 Michael Niedermayer
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
198 31190492 Arpi

199 783e9cc9 Michael Niedermayer
TODO
200 bd7c6fd5 Diego Biurrun
more intelligent misalignment avoidance for the horizontal scaler
201 c1b0bfb4 Michael Niedermayer
write special vertical cubic upscale version
202 f40c7dbb Diego Biurrun
optimize C code (YV12 / minmax)
203
add support for packed pixel YUV input & output
204 6ff0ad6b Michael Niedermayer
add support for Y8 output
205 f40c7dbb Diego Biurrun
optimize BGR24 & BGR32
206 ff7ba856 Michael Niedermayer
add BGR4 output support
207 1e621b18 Michael Niedermayer
write special BGR->BGR scaler
208 783e9cc9 Michael Niedermayer
*/
209 31190492 Arpi
210 b63f641e Aurelien Jacobs
#if ARCH_X86 && CONFIG_GPL
211 d334c7c2 Reimar Döffinger
DECLARE_ASM_CONST(8, uint64_t, bF8)=       0xF8F8F8F8F8F8F8F8LL;
212
DECLARE_ASM_CONST(8, uint64_t, bFC)=       0xFCFCFCFCFCFCFCFCLL;
213
DECLARE_ASM_CONST(8, uint64_t, w10)=       0x0010001000100010LL;
214
DECLARE_ASM_CONST(8, uint64_t, w02)=       0x0002000200020002LL;
215
DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
216
DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
217
DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
218
DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
219 d604bab9 Michael Niedermayer
220 0cb25594 Carl Eugen Hoyos
const DECLARE_ALIGNED(8, uint64_t, ff_dither4[2]) = {
221 221b804f Diego Biurrun
        0x0103010301030103LL,
222
        0x0200020002000200LL,};
223 d8fa3c54 Michael Niedermayer
224 0cb25594 Carl Eugen Hoyos
const DECLARE_ALIGNED(8, uint64_t, ff_dither8[2]) = {
225 221b804f Diego Biurrun
        0x0602060206020602LL,
226
        0x0004000400040004LL,};
227 d604bab9 Michael Niedermayer
228 d334c7c2 Reimar Döffinger
DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
229
DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
230
DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
231
DECLARE_ASM_CONST(8, uint64_t, b15Mask)=   0x001F001F001F001FLL;
232
DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
233
DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
234 d604bab9 Michael Niedermayer
235 5802683a Reimar Döffinger
DECLARE_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
236
DECLARE_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
237
DECLARE_ALIGNED(8, const uint64_t, ff_M24C)         = 0x0000FF0000FF0000LL;
238 99d2cb72 Michael Niedermayer
239 ac6a2e45 Michael Niedermayer
#ifdef FAST_BGR2YV12
240 5802683a Reimar Döffinger
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000000210041000DULL;
241
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000FFEEFFDC0038ULL;
242
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00000038FFD2FFF8ULL;
243 ac6a2e45 Michael Niedermayer
#else
244 5802683a Reimar Döffinger
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000020E540830C8BULL;
245
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000ED0FDAC23831ULL;
246
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00003831D0E6F6EAULL;
247 69796008 Diego Biurrun
#endif /* FAST_BGR2YV12 */
248 5802683a Reimar Döffinger
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
249
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
250
DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
251 dfb09bd1 Michael Niedermayer
252 b5c44b1a Diego Pettenò
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
253
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
254
DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
255
DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
256
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
257 dfb09bd1 Michael Niedermayer
258 b5c44b1a Diego Pettenò
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV[2][4]) = {
259 dfb09bd1 Michael Niedermayer
    {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
260
    {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
261
};
262
263 b5c44b1a Diego Pettenò
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
264 dfb09bd1 Michael Niedermayer
265 7a24ec50 Diego Biurrun
#endif /* ARCH_X86 && CONFIG_GPL */
266 783e9cc9 Michael Niedermayer
267
// clipping helper table for C implementations:
268
static unsigned char clip_table[768];
269
270 d4e24275 Michael Niedermayer
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
271 6a4970ab Diego Biurrun
272 d0b69b28 Diego Pettenò
static const uint8_t  __attribute__((aligned(8))) dither_2x2_4[2][8]={
273 45e18be8 Michael Niedermayer
{  1,   3,   1,   3,   1,   3,   1,   3, },
274
{  2,   0,   2,   0,   2,   0,   2,   0, },
275
};
276
277 d0b69b28 Diego Pettenò
static const uint8_t  __attribute__((aligned(8))) dither_2x2_8[2][8]={
278 45e18be8 Michael Niedermayer
{  6,   2,   6,   2,   6,   2,   6,   2, },
279
{  0,   4,   0,   4,   0,   4,   0,   4, },
280
};
281
282
const uint8_t  __attribute__((aligned(8))) dither_8x8_32[8][8]={
283
{ 17,   9,  23,  15,  16,   8,  22,  14, },
284
{  5,  29,   3,  27,   4,  28,   2,  26, },
285
{ 21,  13,  19,  11,  20,  12,  18,  10, },
286
{  0,  24,   6,  30,   1,  25,   7,  31, },
287
{ 16,   8,  22,  14,  17,   9,  23,  15, },
288
{  4,  28,   2,  26,   5,  29,   3,  27, },
289
{ 20,  12,  18,  10,  21,  13,  19,  11, },
290
{  1,  25,   7,  31,   0,  24,   6,  30, },
291
};
292
293
#if 0
294
const uint8_t  __attribute__((aligned(8))) dither_8x8_64[8][8]={
295
{  0,  48,  12,  60,   3,  51,  15,  63, },
296
{ 32,  16,  44,  28,  35,  19,  47,  31, },
297
{  8,  56,   4,  52,  11,  59,   7,  55, },
298
{ 40,  24,  36,  20,  43,  27,  39,  23, },
299
{  2,  50,  14,  62,   1,  49,  13,  61, },
300
{ 34,  18,  46,  30,  33,  17,  45,  29, },
301
{ 10,  58,   6,  54,   9,  57,   5,  53, },
302
{ 42,  26,  38,  22,  41,  25,  37,  21, },
303
};
304
#endif
305
306
const uint8_t  __attribute__((aligned(8))) dither_8x8_73[8][8]={
307
{  0,  55,  14,  68,   3,  58,  17,  72, },
308
{ 37,  18,  50,  32,  40,  22,  54,  35, },
309
{  9,  64,   5,  59,  13,  67,   8,  63, },
310
{ 46,  27,  41,  23,  49,  31,  44,  26, },
311
{  2,  57,  16,  71,   1,  56,  15,  70, },
312
{ 39,  21,  52,  34,  38,  19,  51,  33, },
313
{ 11,  66,   7,  62,  10,  65,   6,  60, },
314
{ 48,  30,  43,  25,  47,  29,  42,  24, },
315
};
316
317
#if 0
318
const uint8_t  __attribute__((aligned(8))) dither_8x8_128[8][8]={
319
{ 68,  36,  92,  60,  66,  34,  90,  58, },
320
{ 20, 116,  12, 108,  18, 114,  10, 106, },
321
{ 84,  52,  76,  44,  82,  50,  74,  42, },
322
{  0,  96,  24, 120,   6, 102,  30, 126, },
323
{ 64,  32,  88,  56,  70,  38,  94,  62, },
324
{ 16, 112,   8, 104,  22, 118,  14, 110, },
325
{ 80,  48,  72,  40,  86,  54,  78,  46, },
326
{  4, 100,  28, 124,   2,  98,  26, 122, },
327
};
328
#endif
329
330
#if 1
331
const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
332
{117,  62, 158, 103, 113,  58, 155, 100, },
333
{ 34, 199,  21, 186,  31, 196,  17, 182, },
334
{144,  89, 131,  76, 141,  86, 127,  72, },
335
{  0, 165,  41, 206,  10, 175,  52, 217, },
336
{110,  55, 151,  96, 120,  65, 162, 107, },
337
{ 28, 193,  14, 179,  38, 203,  24, 189, },
338
{138,  83, 124,  69, 148,  93, 134,  79, },
339
{  7, 172,  48, 213,   3, 168,  45, 210, },
340
};
341
#elif 1
342
// tries to correct a gamma of 1.5
343
const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
344
{  0, 143,  18, 200,   2, 156,  25, 215, },
345
{ 78,  28, 125,  64,  89,  36, 138,  74, },
346
{ 10, 180,   3, 161,  16, 195,   8, 175, },
347
{109,  51,  93,  38, 121,  60, 105,  47, },
348
{  1, 152,  23, 210,   0, 147,  20, 205, },
349
{ 85,  33, 134,  71,  81,  30, 130,  67, },
350
{ 14, 190,   6, 171,  12, 185,   5, 166, },
351
{117,  57, 101,  44, 113,  54,  97,  41, },
352
};
353
#elif 1
354
// tries to correct a gamma of 2.0
355
const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
356
{  0, 124,   8, 193,   0, 140,  12, 213, },
357
{ 55,  14, 104,  42,  66,  19, 119,  52, },
358
{  3, 168,   1, 145,   6, 187,   3, 162, },
359
{ 86,  31,  70,  21,  99,  39,  82,  28, },
360
{  0, 134,  11, 206,   0, 129,   9, 200, },
361
{ 62,  17, 114,  48,  58,  16, 109,  45, },
362
{  5, 181,   2, 157,   4, 175,   1, 151, },
363
{ 95,  36,  78,  26,  90,  34,  74,  24, },
364
};
365
#else
366
// tries to correct a gamma of 2.5
367
const uint8_t  __attribute__((aligned(8))) dither_8x8_220[8][8]={
368
{  0, 107,   3, 187,   0, 125,   6, 212, },
369
{ 39,   7,  86,  28,  49,  11, 102,  36, },
370
{  1, 158,   0, 131,   3, 180,   1, 151, },
371
{ 68,  19,  52,  12,  81,  25,  64,  17, },
372
{  0, 119,   5, 203,   0, 113,   4, 195, },
373
{ 45,   9,  96,  33,  42,   8,  91,  30, },
374
{  2, 172,   1, 144,   2, 165,   0, 137, },
375
{ 77,  23,  60,  15,  72,  21,  56,  14, },
376
};
377
#endif
378 5cebb24b Michael Niedermayer
379 8055ede6 Baptiste Coudurier
const char *sws_format_name(enum PixelFormat format)
380 94c4def2 Luca Abeni
{
381 e9e12f0e Luca Abeni
    switch (format) {
382
        case PIX_FMT_YUV420P:
383
            return "yuv420p";
384 79973335 Aurelien Jacobs
        case PIX_FMT_YUVA420P:
385
            return "yuva420p";
386 e9e12f0e Luca Abeni
        case PIX_FMT_YUYV422:
387
            return "yuyv422";
388
        case PIX_FMT_RGB24:
389
            return "rgb24";
390
        case PIX_FMT_BGR24:
391
            return "bgr24";
392
        case PIX_FMT_YUV422P:
393
            return "yuv422p";
394
        case PIX_FMT_YUV444P:
395
            return "yuv444p";
396
        case PIX_FMT_RGB32:
397
            return "rgb32";
398
        case PIX_FMT_YUV410P:
399
            return "yuv410p";
400
        case PIX_FMT_YUV411P:
401
            return "yuv411p";
402
        case PIX_FMT_RGB565:
403
            return "rgb565";
404
        case PIX_FMT_RGB555:
405
            return "rgb555";
406 4884b9e5 Kostya Shishkov
        case PIX_FMT_GRAY16BE:
407
            return "gray16be";
408
        case PIX_FMT_GRAY16LE:
409
            return "gray16le";
410 e9e12f0e Luca Abeni
        case PIX_FMT_GRAY8:
411
            return "gray8";
412
        case PIX_FMT_MONOWHITE:
413
            return "mono white";
414
        case PIX_FMT_MONOBLACK:
415
            return "mono black";
416
        case PIX_FMT_PAL8:
417
            return "Palette";
418
        case PIX_FMT_YUVJ420P:
419
            return "yuvj420p";
420
        case PIX_FMT_YUVJ422P:
421
            return "yuvj422p";
422
        case PIX_FMT_YUVJ444P:
423
            return "yuvj444p";
424
        case PIX_FMT_XVMC_MPEG2_MC:
425
            return "xvmc_mpeg2_mc";
426
        case PIX_FMT_XVMC_MPEG2_IDCT:
427
            return "xvmc_mpeg2_idct";
428
        case PIX_FMT_UYVY422:
429
            return "uyvy422";
430
        case PIX_FMT_UYYVYY411:
431
            return "uyyvyy411";
432
        case PIX_FMT_RGB32_1:
433
            return "rgb32x";
434
        case PIX_FMT_BGR32_1:
435
            return "bgr32x";
436
        case PIX_FMT_BGR32:
437
            return "bgr32";
438
        case PIX_FMT_BGR565:
439
            return "bgr565";
440
        case PIX_FMT_BGR555:
441
            return "bgr555";
442
        case PIX_FMT_BGR8:
443
            return "bgr8";
444
        case PIX_FMT_BGR4:
445
            return "bgr4";
446
        case PIX_FMT_BGR4_BYTE:
447
            return "bgr4 byte";
448
        case PIX_FMT_RGB8:
449
            return "rgb8";
450
        case PIX_FMT_RGB4:
451
            return "rgb4";
452
        case PIX_FMT_RGB4_BYTE:
453
            return "rgb4 byte";
454
        case PIX_FMT_NV12:
455
            return "nv12";
456
        case PIX_FMT_NV21:
457
            return "nv21";
458 9ba7fe6d Andreas Öman
        case PIX_FMT_YUV440P:
459
            return "yuv440p";
460 420169e5 Carl Eugen Hoyos
        case PIX_FMT_VDPAU_H264:
461
            return "vdpau_h264";
462 4e2b4876 NVIDIA Corporation
        case PIX_FMT_VDPAU_MPEG1:
463
            return "vdpau_mpeg1";
464
        case PIX_FMT_VDPAU_MPEG2:
465
            return "vdpau_mpeg2";
466 0ab80395 Carl Eugen Hoyos
        case PIX_FMT_VDPAU_WMV3:
467
            return "vdpau_wmv3";
468
        case PIX_FMT_VDPAU_VC1:
469
            return "vdpau_vc1";
470 e9e12f0e Luca Abeni
        default:
471
            return "Unknown format";
472
    }
473 94c4def2 Luca Abeni
}
474
475 5859233b Michael Niedermayer
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
476 221b804f Diego Biurrun
                               int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
477 6858492e Cédric Schieli
                               int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW)
478 e3d2500f Michael Niedermayer
{
479 f40c7dbb Diego Biurrun
    //FIXME Optimize (just quickly written not optimized..)
480 221b804f Diego Biurrun
    int i;
481
    for (i=0; i<dstW; i++)
482
    {
483
        int val=1<<18;
484
        int j;
485
        for (j=0; j<lumFilterSize; j++)
486
            val += lumSrc[j][i] * lumFilter[j];
487
488
        dest[i]= av_clip_uint8(val>>19);
489
    }
490
491 1b0a4572 Benoit Fouet
    if (uDest)
492 221b804f Diego Biurrun
        for (i=0; i<chrDstW; i++)
493
        {
494
            int u=1<<18;
495
            int v=1<<18;
496
            int j;
497
            for (j=0; j<chrFilterSize; j++)
498
            {
499
                u += chrSrc[j][i] * chrFilter[j];
500 8b2fce0d Michael Niedermayer
                v += chrSrc[j][i + VOFW] * chrFilter[j];
501 221b804f Diego Biurrun
            }
502
503
            uDest[i]= av_clip_uint8(u>>19);
504
            vDest[i]= av_clip_uint8(v>>19);
505
        }
506 6858492e Cédric Schieli
507
    if (CONFIG_SWSCALE_ALPHA && aDest)
508
        for (i=0; i<dstW; i++){
509
            int val=1<<18;
510
            int j;
511
            for (j=0; j<lumFilterSize; j++)
512
                val += alpSrc[j][i] * lumFilter[j];
513
514
            aDest[i]= av_clip_uint8(val>>19);
515
        }
516
517 e3d2500f Michael Niedermayer
}
518
519 6118e52e Ville Syrjälä
static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
520 221b804f Diego Biurrun
                                int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
521
                                uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
522 6118e52e Ville Syrjälä
{
523 f40c7dbb Diego Biurrun
    //FIXME Optimize (just quickly written not optimized..)
524 221b804f Diego Biurrun
    int i;
525
    for (i=0; i<dstW; i++)
526
    {
527
        int val=1<<18;
528
        int j;
529
        for (j=0; j<lumFilterSize; j++)
530
            val += lumSrc[j][i] * lumFilter[j];
531
532
        dest[i]= av_clip_uint8(val>>19);
533
    }
534
535 1b0a4572 Benoit Fouet
    if (!uDest)
536 221b804f Diego Biurrun
        return;
537
538
    if (dstFormat == PIX_FMT_NV12)
539
        for (i=0; i<chrDstW; i++)
540
        {
541
            int u=1<<18;
542
            int v=1<<18;
543
            int j;
544
            for (j=0; j<chrFilterSize; j++)
545
            {
546
                u += chrSrc[j][i] * chrFilter[j];
547 8b2fce0d Michael Niedermayer
                v += chrSrc[j][i + VOFW] * chrFilter[j];
548 221b804f Diego Biurrun
            }
549
550
            uDest[2*i]= av_clip_uint8(u>>19);
551
            uDest[2*i+1]= av_clip_uint8(v>>19);
552
        }
553
    else
554
        for (i=0; i<chrDstW; i++)
555
        {
556
            int u=1<<18;
557
            int v=1<<18;
558
            int j;
559
            for (j=0; j<chrFilterSize; j++)
560
            {
561
                u += chrSrc[j][i] * chrFilter[j];
562 8b2fce0d Michael Niedermayer
                v += chrSrc[j][i + VOFW] * chrFilter[j];
563 221b804f Diego Biurrun
            }
564
565
            uDest[2*i]= av_clip_uint8(v>>19);
566
            uDest[2*i+1]= av_clip_uint8(u>>19);
567
        }
568 6118e52e Ville Syrjälä
}
569 46de8b73 Michael Niedermayer
570 6858492e Cédric Schieli
#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
571 221b804f Diego Biurrun
    for (i=0; i<(dstW>>1); i++){\
572
        int j;\
573
        int Y1 = 1<<18;\
574
        int Y2 = 1<<18;\
575
        int U  = 1<<18;\
576
        int V  = 1<<18;\
577 6858492e Cédric Schieli
        int av_unused A1, A2;\
578 2db27aad Carl Eugen Hoyos
        type av_unused *r, *b, *g;\
579 221b804f Diego Biurrun
        const int i2= 2*i;\
580
        \
581
        for (j=0; j<lumFilterSize; j++)\
582
        {\
583
            Y1 += lumSrc[j][i2] * lumFilter[j];\
584
            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
585
        }\
586
        for (j=0; j<chrFilterSize; j++)\
587
        {\
588
            U += chrSrc[j][i] * chrFilter[j];\
589 8b2fce0d Michael Niedermayer
            V += chrSrc[j][i+VOFW] * chrFilter[j];\
590 221b804f Diego Biurrun
        }\
591
        Y1>>=19;\
592
        Y2>>=19;\
593
        U >>=19;\
594
        V >>=19;\
595 6858492e Cédric Schieli
        if (alpha){\
596
            A1 = 1<<18;\
597
            A2 = 1<<18;\
598
            for (j=0; j<lumFilterSize; j++){\
599
                A1 += alpSrc[j][i2  ] * lumFilter[j];\
600
                A2 += alpSrc[j][i2+1] * lumFilter[j];\
601
            }\
602
            A1>>=19;\
603
            A2>>=19;\
604
        }\
605 bdf397ba Michael Niedermayer
606 6858492e Cédric Schieli
#define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
607
        YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
608 221b804f Diego Biurrun
        if ((Y1|Y2|U|V)&256)\
609
        {\
610
            if (Y1>255)   Y1=255; \
611
            else if (Y1<0)Y1=0;   \
612
            if (Y2>255)   Y2=255; \
613
            else if (Y2<0)Y2=0;   \
614
            if (U>255)    U=255;  \
615
            else if (U<0) U=0;    \
616
            if (V>255)    V=255;  \
617
            else if (V<0) V=0;    \
618 6858492e Cédric Schieli
        }\
619
        if (alpha && ((A1|A2)&256)){\
620
            A1=av_clip_uint8(A1);\
621
            A2=av_clip_uint8(A2);\
622 221b804f Diego Biurrun
        }
623 6a4970ab Diego Biurrun
624 6858492e Cédric Schieli
#define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
625 f0faee4c Michael Niedermayer
    for (i=0; i<dstW; i++){\
626
        int j;\
627
        int Y = 0;\
628
        int U = -128<<19;\
629
        int V = -128<<19;\
630 6858492e Cédric Schieli
        int av_unused A;\
631 f0faee4c Michael Niedermayer
        int R,G,B;\
632
        \
633
        for (j=0; j<lumFilterSize; j++){\
634
            Y += lumSrc[j][i     ] * lumFilter[j];\
635
        }\
636
        for (j=0; j<chrFilterSize; j++){\
637
            U += chrSrc[j][i     ] * chrFilter[j];\
638
            V += chrSrc[j][i+VOFW] * chrFilter[j];\
639
        }\
640
        Y >>=10;\
641
        U >>=10;\
642
        V >>=10;\
643 6858492e Cédric Schieli
        if (alpha){\
644
            A = rnd;\
645
            for (j=0; j<lumFilterSize; j++)\
646
                A += alpSrc[j][i     ] * lumFilter[j];\
647
            A >>=19;\
648
            if (A&256)\
649
                A = av_clip_uint8(A);\
650
        }\
651 f0faee4c Michael Niedermayer
652 6858492e Cédric Schieli
#define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
653
    YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
654 43c16478 Michael Niedermayer
        Y-= c->yuv2rgb_y_offset;\
655
        Y*= c->yuv2rgb_y_coeff;\
656 f0faee4c Michael Niedermayer
        Y+= rnd;\
657 43c16478 Michael Niedermayer
        R= Y + V*c->yuv2rgb_v2r_coeff;\
658
        G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
659
        B= Y +                          U*c->yuv2rgb_u2b_coeff;\
660 f0faee4c Michael Niedermayer
        if ((R|G|B)&(0xC0000000)){\
661
            if (R>=(256<<22))   R=(256<<22)-1; \
662
            else if (R<0)R=0;   \
663
            if (G>=(256<<22))   G=(256<<22)-1; \
664
            else if (G<0)G=0;   \
665
            if (B>=(256<<22))   B=(256<<22)-1; \
666
            else if (B<0)B=0;   \
667
        }\
668
669
670 e69bd294 Michael Niedermayer
#define YSCALE_YUV_2_GRAY16_C \
671 b0880d5d Michael Niedermayer
    for (i=0; i<(dstW>>1); i++){\
672
        int j;\
673
        int Y1 = 1<<18;\
674
        int Y2 = 1<<18;\
675
        int U  = 1<<18;\
676
        int V  = 1<<18;\
677 e69bd294 Michael Niedermayer
        \
678 b0880d5d Michael Niedermayer
        const int i2= 2*i;\
679
        \
680
        for (j=0; j<lumFilterSize; j++)\
681
        {\
682
            Y1 += lumSrc[j][i2] * lumFilter[j];\
683
            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
684
        }\
685
        Y1>>=11;\
686
        Y2>>=11;\
687
        if ((Y1|Y2|U|V)&65536)\
688
        {\
689
            if (Y1>65535)   Y1=65535; \
690
            else if (Y1<0)Y1=0;   \
691
            if (Y2>65535)   Y2=65535; \
692
            else if (Y2<0)Y2=0;   \
693
        }
694
695 6858492e Cédric Schieli
#define YSCALE_YUV_2_RGBX_C(type,alpha) \
696
    YSCALE_YUV_2_PACKEDX_C(type,alpha)  /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
697 221b804f Diego Biurrun
    r = (type *)c->table_rV[V];   \
698
    g = (type *)(c->table_gU[U] + c->table_gV[V]); \
699
    b = (type *)c->table_bU[U];   \
700
701 6858492e Cédric Schieli
#define YSCALE_YUV_2_PACKED2_C(type,alpha)   \
702 221b804f Diego Biurrun
    for (i=0; i<(dstW>>1); i++){ \
703
        const int i2= 2*i;       \
704
        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
705
        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
706
        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;  \
707 8b2fce0d Michael Niedermayer
        int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19;  \
708 6858492e Cédric Schieli
        type av_unused *r, *b, *g;                                    \
709
        int av_unused A1, A2;                                         \
710
        if (alpha){\
711
            A1= (abuf0[i2  ]*yalpha1+abuf1[i2  ]*yalpha)>>19;         \
712
            A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19;         \
713
        }\
714 46de8b73 Michael Niedermayer
715 b0880d5d Michael Niedermayer
#define YSCALE_YUV_2_GRAY16_2_C   \
716
    for (i=0; i<(dstW>>1); i++){ \
717
        const int i2= 2*i;       \
718
        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>11;           \
719
        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;           \
720
721 6858492e Cédric Schieli
#define YSCALE_YUV_2_RGB2_C(type,alpha) \
722
    YSCALE_YUV_2_PACKED2_C(type,alpha)\
723 221b804f Diego Biurrun
    r = (type *)c->table_rV[V];\
724
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
725
    b = (type *)c->table_bU[U];\
726 cf7d1c1a Michael Niedermayer
727 6858492e Cédric Schieli
#define YSCALE_YUV_2_PACKED1_C(type,alpha) \
728 221b804f Diego Biurrun
    for (i=0; i<(dstW>>1); i++){\
729
        const int i2= 2*i;\
730
        int Y1= buf0[i2  ]>>7;\
731
        int Y2= buf0[i2+1]>>7;\
732
        int U= (uvbuf1[i     ])>>7;\
733 8b2fce0d Michael Niedermayer
        int V= (uvbuf1[i+VOFW])>>7;\
734 6858492e Cédric Schieli
        type av_unused *r, *b, *g;\
735
        int av_unused A1, A2;\
736
        if (alpha){\
737
            A1= abuf0[i2  ]>>7;\
738
            A2= abuf0[i2+1]>>7;\
739
        }\
740 46de8b73 Michael Niedermayer
741 b0880d5d Michael Niedermayer
#define YSCALE_YUV_2_GRAY16_1_C \
742
    for (i=0; i<(dstW>>1); i++){\
743
        const int i2= 2*i;\
744
        int Y1= buf0[i2  ]<<1;\
745
        int Y2= buf0[i2+1]<<1;\
746
747 6858492e Cédric Schieli
#define YSCALE_YUV_2_RGB1_C(type,alpha) \
748
    YSCALE_YUV_2_PACKED1_C(type,alpha)\
749 221b804f Diego Biurrun
    r = (type *)c->table_rV[V];\
750
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
751
    b = (type *)c->table_bU[U];\
752 cf7d1c1a Michael Niedermayer
753 6858492e Cédric Schieli
#define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
754 221b804f Diego Biurrun
    for (i=0; i<(dstW>>1); i++){\
755
        const int i2= 2*i;\
756
        int Y1= buf0[i2  ]>>7;\
757
        int Y2= buf0[i2+1]>>7;\
758
        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
759 8b2fce0d Michael Niedermayer
        int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\
760 6858492e Cédric Schieli
        type av_unused *r, *b, *g;\
761
        int av_unused A1, A2;\
762
        if (alpha){\
763
            A1= abuf0[i2  ]>>7;\
764
            A2= abuf0[i2+1]>>7;\
765
        }\
766 46de8b73 Michael Niedermayer
767 6858492e Cédric Schieli
#define YSCALE_YUV_2_RGB1B_C(type,alpha) \
768
    YSCALE_YUV_2_PACKED1B_C(type,alpha)\
769 221b804f Diego Biurrun
    r = (type *)c->table_rV[V];\
770
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
771
    b = (type *)c->table_bU[U];\
772 cf7d1c1a Michael Niedermayer
773 ec1bca2a Michael Niedermayer
#define YSCALE_YUV_2_MONO2_C \
774 e69bd294 Michael Niedermayer
    const uint8_t * const d128=dither_8x8_220[y&7];\
775
    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
776
    for (i=0; i<dstW-7; i+=8){\
777
        int acc;\
778
        acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
779
        acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
780
        acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
781
        acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
782
        acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
783
        acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
784
        acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
785
        acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
786 ec1bca2a Michael Niedermayer
        ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
787 e69bd294 Michael Niedermayer
        dest++;\
788
    }\
789
790
791 ec1bca2a Michael Niedermayer
#define YSCALE_YUV_2_MONOX_C \
792 e69bd294 Michael Niedermayer
    const uint8_t * const d128=dither_8x8_220[y&7];\
793
    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
794
    int acc=0;\
795
    for (i=0; i<dstW-1; i+=2){\
796
        int j;\
797
        int Y1=1<<18;\
798
        int Y2=1<<18;\
799
\
800
        for (j=0; j<lumFilterSize; j++)\
801
        {\
802
            Y1 += lumSrc[j][i] * lumFilter[j];\
803
            Y2 += lumSrc[j][i+1] * lumFilter[j];\
804
        }\
805
        Y1>>=19;\
806
        Y2>>=19;\
807
        if ((Y1|Y2)&256)\
808
        {\
809
            if (Y1>255)   Y1=255;\
810
            else if (Y1<0)Y1=0;\
811
            if (Y2>255)   Y2=255;\
812
            else if (Y2<0)Y2=0;\
813
        }\
814
        acc+= acc + g[Y1+d128[(i+0)&7]];\
815
        acc+= acc + g[Y2+d128[(i+1)&7]];\
816
        if ((i&7)==6){\
817 ec1bca2a Michael Niedermayer
            ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
818 e69bd294 Michael Niedermayer
            dest++;\
819
        }\
820
    }
821
822
823
#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
824 221b804f Diego Biurrun
    switch(c->dstFormat)\
825
    {\
826 6858492e Cédric Schieli
    case PIX_FMT_RGBA:\
827
    case PIX_FMT_BGRA:\
828
        if (CONFIG_SMALL){\
829
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
830
            func(uint32_t,needAlpha)\
831
                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
832
                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
833
            }\
834
        }else{\
835
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){\
836
                func(uint32_t,1)\
837
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
838
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
839
                }\
840
            }else{\
841
                func(uint32_t,0)\
842
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
843
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
844
                }\
845
            }\
846
        }\
847
        break;\
848
    case PIX_FMT_ARGB:\
849
    case PIX_FMT_ABGR:\
850
        if (CONFIG_SMALL){\
851
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
852
            func(uint32_t,needAlpha)\
853
                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
854
                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
855
            }\
856
        }else{\
857
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){\
858
                func(uint32_t,1)\
859
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
860
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
861
                }\
862
            }else{\
863
                func(uint32_t,0)\
864
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
865
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
866
                }\
867
            }\
868 221b804f Diego Biurrun
        }                \
869
        break;\
870
    case PIX_FMT_RGB24:\
871 6858492e Cédric Schieli
        func(uint8_t,0)\
872 221b804f Diego Biurrun
            ((uint8_t*)dest)[0]= r[Y1];\
873
            ((uint8_t*)dest)[1]= g[Y1];\
874
            ((uint8_t*)dest)[2]= b[Y1];\
875
            ((uint8_t*)dest)[3]= r[Y2];\
876
            ((uint8_t*)dest)[4]= g[Y2];\
877
            ((uint8_t*)dest)[5]= b[Y2];\
878
            dest+=6;\
879
        }\
880
        break;\
881
    case PIX_FMT_BGR24:\
882 6858492e Cédric Schieli
        func(uint8_t,0)\
883 221b804f Diego Biurrun
            ((uint8_t*)dest)[0]= b[Y1];\
884
            ((uint8_t*)dest)[1]= g[Y1];\
885
            ((uint8_t*)dest)[2]= r[Y1];\
886
            ((uint8_t*)dest)[3]= b[Y2];\
887
            ((uint8_t*)dest)[4]= g[Y2];\
888
            ((uint8_t*)dest)[5]= r[Y2];\
889
            dest+=6;\
890
        }\
891
        break;\
892
    case PIX_FMT_RGB565:\
893
    case PIX_FMT_BGR565:\
894
        {\
895
            const int dr1= dither_2x2_8[y&1    ][0];\
896
            const int dg1= dither_2x2_4[y&1    ][0];\
897
            const int db1= dither_2x2_8[(y&1)^1][0];\
898
            const int dr2= dither_2x2_8[y&1    ][1];\
899
            const int dg2= dither_2x2_4[y&1    ][1];\
900
            const int db2= dither_2x2_8[(y&1)^1][1];\
901 6858492e Cédric Schieli
            func(uint16_t,0)\
902 221b804f Diego Biurrun
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
903
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
904
            }\
905
        }\
906
        break;\
907
    case PIX_FMT_RGB555:\
908
    case PIX_FMT_BGR555:\
909
        {\
910
            const int dr1= dither_2x2_8[y&1    ][0];\
911
            const int dg1= dither_2x2_8[y&1    ][1];\
912
            const int db1= dither_2x2_8[(y&1)^1][0];\
913
            const int dr2= dither_2x2_8[y&1    ][1];\
914
            const int dg2= dither_2x2_8[y&1    ][0];\
915
            const int db2= dither_2x2_8[(y&1)^1][1];\
916 6858492e Cédric Schieli
            func(uint16_t,0)\
917 221b804f Diego Biurrun
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
918
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
919
            }\
920
        }\
921
        break;\
922
    case PIX_FMT_RGB8:\
923
    case PIX_FMT_BGR8:\
924
        {\
925
            const uint8_t * const d64= dither_8x8_73[y&7];\
926
            const uint8_t * const d32= dither_8x8_32[y&7];\
927 6858492e Cédric Schieli
            func(uint8_t,0)\
928 221b804f Diego Biurrun
                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
929
                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
930
            }\
931
        }\
932
        break;\
933
    case PIX_FMT_RGB4:\
934
    case PIX_FMT_BGR4:\
935
        {\
936
            const uint8_t * const d64= dither_8x8_73 [y&7];\
937
            const uint8_t * const d128=dither_8x8_220[y&7];\
938 6858492e Cédric Schieli
            func(uint8_t,0)\
939 221b804f Diego Biurrun
                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
940
                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
941
            }\
942
        }\
943
        break;\
944
    case PIX_FMT_RGB4_BYTE:\
945
    case PIX_FMT_BGR4_BYTE:\
946
        {\
947
            const uint8_t * const d64= dither_8x8_73 [y&7];\
948
            const uint8_t * const d128=dither_8x8_220[y&7];\
949 6858492e Cédric Schieli
            func(uint8_t,0)\
950 221b804f Diego Biurrun
                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
951
                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
952
            }\
953
        }\
954
        break;\
955
    case PIX_FMT_MONOBLACK:\
956 ec1bca2a Michael Niedermayer
    case PIX_FMT_MONOWHITE:\
957 221b804f Diego Biurrun
        {\
958 e69bd294 Michael Niedermayer
            func_monoblack\
959 221b804f Diego Biurrun
        }\
960
        break;\
961
    case PIX_FMT_YUYV422:\
962
        func2\
963
            ((uint8_t*)dest)[2*i2+0]= Y1;\
964
            ((uint8_t*)dest)[2*i2+1]= U;\
965
            ((uint8_t*)dest)[2*i2+2]= Y2;\
966
            ((uint8_t*)dest)[2*i2+3]= V;\
967
        }                \
968
        break;\
969
    case PIX_FMT_UYVY422:\
970
        func2\
971
            ((uint8_t*)dest)[2*i2+0]= U;\
972
            ((uint8_t*)dest)[2*i2+1]= Y1;\
973
            ((uint8_t*)dest)[2*i2+2]= V;\
974
            ((uint8_t*)dest)[2*i2+3]= Y2;\
975
        }                \
976
        break;\
977 b0880d5d Michael Niedermayer
    case PIX_FMT_GRAY16BE:\
978
        func_g16\
979
            ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
980
            ((uint8_t*)dest)[2*i2+1]= Y1;\
981
            ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
982
            ((uint8_t*)dest)[2*i2+3]= Y2;\
983
        }                \
984
        break;\
985
    case PIX_FMT_GRAY16LE:\
986
        func_g16\
987
            ((uint8_t*)dest)[2*i2+0]= Y1;\
988
            ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
989
            ((uint8_t*)dest)[2*i2+2]= Y2;\
990
            ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
991
        }                \
992
        break;\
993 221b804f Diego Biurrun
    }\
994 cf7d1c1a Michael Niedermayer
995
996 25593e29 Michael Niedermayer
static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
997 221b804f Diego Biurrun
                                  int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
998 6858492e Cédric Schieli
                                  int16_t **alpSrc, uint8_t *dest, int dstW, int y)
999 e3d2500f Michael Niedermayer
{
1000 221b804f Diego Biurrun
    int i;
1001 6858492e Cédric Schieli
    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
1002 e3d2500f Michael Niedermayer
}
1003
1004 f0faee4c Michael Niedermayer
static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
1005
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
1006 6858492e Cédric Schieli
                                    int16_t **alpSrc, uint8_t *dest, int dstW, int y)
1007 f0faee4c Michael Niedermayer
{
1008
    int i;
1009
    int step= fmt_depth(c->dstFormat)/8;
1010 d616c8ae Michael Niedermayer
    int aidx= 3;
1011 f0faee4c Michael Niedermayer
1012
    switch(c->dstFormat){
1013
    case PIX_FMT_ARGB:
1014
        dest++;
1015 a3398feb Cédric Schieli
        aidx= 0;
1016 f0faee4c Michael Niedermayer
    case PIX_FMT_RGB24:
1017 d616c8ae Michael Niedermayer
        aidx--;
1018 f0faee4c Michael Niedermayer
    case PIX_FMT_RGBA:
1019 6858492e Cédric Schieli
        if (CONFIG_SMALL){
1020
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1021
            YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1022
                dest[aidx]= needAlpha ? A : 255;
1023
                dest[0]= R>>22;
1024
                dest[1]= G>>22;
1025
                dest[2]= B>>22;
1026
                dest+= step;
1027
            }
1028
        }else{
1029
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1030
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1031
                    dest[aidx]= A;
1032
                    dest[0]= R>>22;
1033
                    dest[1]= G>>22;
1034
                    dest[2]= B>>22;
1035
                    dest+= step;
1036
                }
1037
            }else{
1038
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1039
                    dest[aidx]= 255;
1040
                    dest[0]= R>>22;
1041
                    dest[1]= G>>22;
1042
                    dest[2]= B>>22;
1043
                    dest+= step;
1044
                }
1045
            }
1046 f0faee4c Michael Niedermayer
        }
1047
        break;
1048
    case PIX_FMT_ABGR:
1049
        dest++;
1050 a3398feb Cédric Schieli
        aidx= 0;
1051 f0faee4c Michael Niedermayer
    case PIX_FMT_BGR24:
1052 d616c8ae Michael Niedermayer
        aidx--;
1053 f0faee4c Michael Niedermayer
    case PIX_FMT_BGRA:
1054 6858492e Cédric Schieli
        if (CONFIG_SMALL){
1055
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1056
            YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1057
                dest[aidx]= needAlpha ? A : 255;
1058
                dest[0]= B>>22;
1059
                dest[1]= G>>22;
1060
                dest[2]= R>>22;
1061
                dest+= step;
1062
            }
1063
        }else{
1064
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1065
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1066
                    dest[aidx]= A;
1067
                    dest[0]= B>>22;
1068
                    dest[1]= G>>22;
1069
                    dest[2]= R>>22;
1070
                    dest+= step;
1071
                }
1072
            }else{
1073
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1074
                    dest[aidx]= 255;
1075
                    dest[0]= B>>22;
1076
                    dest[1]= G>>22;
1077
                    dest[2]= R>>22;
1078
                    dest+= step;
1079
                }
1080
            }
1081 f0faee4c Michael Niedermayer
        }
1082
        break;
1083
    default:
1084
        assert(0);
1085
    }
1086
}
1087 e3d2500f Michael Niedermayer
1088 d4da3e47 Cédric Schieli
static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val){
1089
    int i;
1090
    uint8_t *ptr = plane + stride*y;
1091
    for (i=0; i<height; i++){
1092
        memset(ptr, val, width);
1093
        ptr += stride;
1094
    }
1095
}
1096
1097 f40c7dbb Diego Biurrun
//Note: we have C, X86, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
1098 7630f2e0 Michael Niedermayer
//Plain C versions
1099 b63f641e Aurelien Jacobs
#if !HAVE_MMX || defined (RUNTIME_CPUDETECT) || !CONFIG_GPL
1100 726a959a Michael Niedermayer
#define COMPILE_C
1101
#endif
1102
1103 b63f641e Aurelien Jacobs
#if ARCH_PPC
1104
#if (HAVE_ALTIVEC || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
1105 dfe44a85 Diego Biurrun
#undef COMPILE_C
1106 a2faa401 Romain Dolbeau
#define COMPILE_ALTIVEC
1107 7a24ec50 Diego Biurrun
#endif
1108 cb82a073 Diego Biurrun
#endif //ARCH_PPC
1109 a2faa401 Romain Dolbeau
1110 b63f641e Aurelien Jacobs
#if ARCH_X86
1111 726a959a Michael Niedermayer
1112 f4406ec1 Diego Biurrun
#if ((HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
1113 726a959a Michael Niedermayer
#define COMPILE_MMX
1114
#endif
1115
1116 b63f641e Aurelien Jacobs
#if (HAVE_MMX2 || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
1117 726a959a Michael Niedermayer
#define COMPILE_MMX2
1118
#endif
1119
1120 f4406ec1 Diego Biurrun
#if ((HAVE_AMD3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && CONFIG_GPL
1121 726a959a Michael Niedermayer
#define COMPILE_3DNOW
1122
#endif
1123 7a24ec50 Diego Biurrun
#endif //ARCH_X86
1124 726a959a Michael Niedermayer
1125
#undef HAVE_MMX
1126
#undef HAVE_MMX2
1127 f4406ec1 Diego Biurrun
#undef HAVE_AMD3DNOW
1128 b63f641e Aurelien Jacobs
#undef HAVE_ALTIVEC
1129
#define HAVE_MMX 0
1130
#define HAVE_MMX2 0
1131 f4406ec1 Diego Biurrun
#define HAVE_AMD3DNOW 0
1132 b63f641e Aurelien Jacobs
#define HAVE_ALTIVEC 0
1133 726a959a Michael Niedermayer
1134
#ifdef COMPILE_C
1135 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _C
1136
#include "swscale_template.c"
1137 726a959a Michael Niedermayer
#endif
1138 397c035e Michael Niedermayer
1139 a2faa401 Romain Dolbeau
#ifdef COMPILE_ALTIVEC
1140
#undef RENAME
1141 b63f641e Aurelien Jacobs
#undef HAVE_ALTIVEC
1142
#define HAVE_ALTIVEC 1
1143 a2faa401 Romain Dolbeau
#define RENAME(a) a ## _altivec
1144
#include "swscale_template.c"
1145
#endif
1146
1147 b63f641e Aurelien Jacobs
#if ARCH_X86
1148 397c035e Michael Niedermayer
1149 f40c7dbb Diego Biurrun
//x86 versions
1150 7630f2e0 Michael Niedermayer
/*
1151
#undef RENAME
1152
#undef HAVE_MMX
1153
#undef HAVE_MMX2
1154 f4406ec1 Diego Biurrun
#undef HAVE_AMD3DNOW
1155 7630f2e0 Michael Niedermayer
#define ARCH_X86
1156
#define RENAME(a) a ## _X86
1157
#include "swscale_template.c"
1158 1faf0867 Michael Niedermayer
*/
1159 7630f2e0 Michael Niedermayer
//MMX versions
1160 726a959a Michael Niedermayer
#ifdef COMPILE_MMX
1161 7630f2e0 Michael Niedermayer
#undef RENAME
1162 b63f641e Aurelien Jacobs
#undef HAVE_MMX
1163 7630f2e0 Michael Niedermayer
#undef HAVE_MMX2
1164 f4406ec1 Diego Biurrun
#undef HAVE_AMD3DNOW
1165 b63f641e Aurelien Jacobs
#define HAVE_MMX 1
1166
#define HAVE_MMX2 0
1167 f4406ec1 Diego Biurrun
#define HAVE_AMD3DNOW 0
1168 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _MMX
1169
#include "swscale_template.c"
1170 726a959a Michael Niedermayer
#endif
1171 7630f2e0 Michael Niedermayer
1172
//MMX2 versions
1173 726a959a Michael Niedermayer
#ifdef COMPILE_MMX2
1174 7630f2e0 Michael Niedermayer
#undef RENAME
1175 b63f641e Aurelien Jacobs
#undef HAVE_MMX
1176
#undef HAVE_MMX2
1177 f4406ec1 Diego Biurrun
#undef HAVE_AMD3DNOW
1178 b63f641e Aurelien Jacobs
#define HAVE_MMX 1
1179
#define HAVE_MMX2 1
1180 f4406ec1 Diego Biurrun
#define HAVE_AMD3DNOW 0
1181 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _MMX2
1182
#include "swscale_template.c"
1183 726a959a Michael Niedermayer
#endif
1184 7630f2e0 Michael Niedermayer
1185
//3DNOW versions
1186 726a959a Michael Niedermayer
#ifdef COMPILE_3DNOW
1187 7630f2e0 Michael Niedermayer
#undef RENAME
1188 b63f641e Aurelien Jacobs
#undef HAVE_MMX
1189 7630f2e0 Michael Niedermayer
#undef HAVE_MMX2
1190 f4406ec1 Diego Biurrun
#undef HAVE_AMD3DNOW
1191 b63f641e Aurelien Jacobs
#define HAVE_MMX 1
1192
#define HAVE_MMX2 0
1193 f4406ec1 Diego Biurrun
#define HAVE_AMD3DNOW 1
1194 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _3DNow
1195
#include "swscale_template.c"
1196 726a959a Michael Niedermayer
#endif
1197 7630f2e0 Michael Niedermayer
1198 7a24ec50 Diego Biurrun
#endif //ARCH_X86
1199 7630f2e0 Michael Niedermayer
1200 f40c7dbb Diego Biurrun
// minor note: the HAVE_xyz are messed up after this line so don't use them
1201 d604bab9 Michael Niedermayer
1202 a86c461c Michael Niedermayer
static double getSplineCoeff(double a, double b, double c, double d, double dist)
1203
{
1204 221b804f Diego Biurrun
//    printf("%f %f %f %f %f\n", a,b,c,d,dist);
1205
    if (dist<=1.0)      return ((d*dist + c)*dist + b)*dist +a;
1206
    else                return getSplineCoeff(        0.0,
1207
                                             b+ 2.0*c + 3.0*d,
1208
                                                    c + 3.0*d,
1209
                                            -b- 3.0*c - 6.0*d,
1210
                                            dist-1.0);
1211 a86c461c Michael Niedermayer
}
1212 6c7506de Michael Niedermayer
1213 bca11e75 Michael Niedermayer
static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
1214 221b804f Diego Biurrun
                             int srcW, int dstW, int filterAlign, int one, int flags,
1215
                             SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
1216 28bf81c9 Michael Niedermayer
{
1217 221b804f Diego Biurrun
    int i;
1218
    int filterSize;
1219
    int filter2Size;
1220
    int minFilterSize;
1221 a64a062f Michael Niedermayer
    int64_t *filter=NULL;
1222
    int64_t *filter2=NULL;
1223
    const int64_t fone= 1LL<<54;
1224 091d3bdc Michael Niedermayer
    int ret= -1;
1225 b63f641e Aurelien Jacobs
#if ARCH_X86
1226 221b804f Diego Biurrun
    if (flags & SWS_CPU_CAPS_MMX)
1227 7ad6469e Diego Pettenò
        __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
1228 726a959a Michael Niedermayer
#endif
1229 31190492 Arpi
1230 f40c7dbb Diego Biurrun
    // NOTE: the +1 is for the MMX scaler which reads over the end
1231 221b804f Diego Biurrun
    *filterPos = av_malloc((dstW+1)*sizeof(int16_t));
1232
1233
    if (FFABS(xInc - 0x10000) <10) // unscaled
1234
    {
1235
        int i;
1236
        filterSize= 1;
1237 8588e148 Michael Niedermayer
        filter= av_mallocz(dstW*sizeof(*filter)*filterSize);
1238 221b804f Diego Biurrun
1239
        for (i=0; i<dstW; i++)
1240
        {
1241 a64a062f Michael Niedermayer
            filter[i*filterSize]= fone;
1242 221b804f Diego Biurrun
            (*filterPos)[i]=i;
1243
        }
1244
1245
    }
1246
    else if (flags&SWS_POINT) // lame looking point sampling mode
1247
    {
1248
        int i;
1249
        int xDstInSrc;
1250
        filterSize= 1;
1251 8588e148 Michael Niedermayer
        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
1252 221b804f Diego Biurrun
1253
        xDstInSrc= xInc/2 - 0x8000;
1254
        for (i=0; i<dstW; i++)
1255
        {
1256
            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1257
1258
            (*filterPos)[i]= xx;
1259 a64a062f Michael Niedermayer
            filter[i]= fone;
1260 221b804f Diego Biurrun
            xDstInSrc+= xInc;
1261
        }
1262
    }
1263
    else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
1264
    {
1265
        int i;
1266
        int xDstInSrc;
1267
        if      (flags&SWS_BICUBIC) filterSize= 4;
1268
        else if (flags&SWS_X      ) filterSize= 4;
1269
        else                        filterSize= 2; // SWS_BILINEAR / SWS_AREA
1270 8588e148 Michael Niedermayer
        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
1271 221b804f Diego Biurrun
1272
        xDstInSrc= xInc/2 - 0x8000;
1273
        for (i=0; i<dstW; i++)
1274
        {
1275
            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1276
            int j;
1277
1278
            (*filterPos)[i]= xx;
1279 f40c7dbb Diego Biurrun
                //bilinear upscale / linear interpolate / area averaging
1280 221b804f Diego Biurrun
                for (j=0; j<filterSize; j++)
1281
                {
1282 a64a062f Michael Niedermayer
                    int64_t coeff= fone - FFABS((xx<<16) - xDstInSrc)*(fone>>16);
1283 221b804f Diego Biurrun
                    if (coeff<0) coeff=0;
1284
                    filter[i*filterSize + j]= coeff;
1285
                    xx++;
1286
                }
1287
            xDstInSrc+= xInc;
1288
        }
1289
    }
1290
    else
1291
    {
1292 a64a062f Michael Niedermayer
        int xDstInSrc;
1293
        int sizeFactor;
1294
1295
        if      (flags&SWS_BICUBIC)      sizeFactor=  4;
1296
        else if (flags&SWS_X)            sizeFactor=  8;
1297
        else if (flags&SWS_AREA)         sizeFactor=  1; //downscale only, for upscale it is bilinear
1298
        else if (flags&SWS_GAUSS)        sizeFactor=  8;   // infinite ;)
1299
        else if (flags&SWS_LANCZOS)      sizeFactor= param[0] != SWS_PARAM_DEFAULT ? ceil(2*param[0]) : 6;
1300
        else if (flags&SWS_SINC)         sizeFactor= 20; // infinite ;)
1301
        else if (flags&SWS_SPLINE)       sizeFactor= 20;  // infinite ;)
1302
        else if (flags&SWS_BILINEAR)     sizeFactor=  2;
1303 221b804f Diego Biurrun
        else {
1304 a64a062f Michael Niedermayer
            sizeFactor= 0; //GCC warning killer
1305 fcc402b1 Luca Barbato
            assert(0);
1306 221b804f Diego Biurrun
        }
1307
1308 a64a062f Michael Niedermayer
        if (xInc <= 1<<16)      filterSize= 1 + sizeFactor; // upscale
1309
        else                    filterSize= 1 + (sizeFactor*srcW + dstW - 1)/ dstW;
1310 221b804f Diego Biurrun
1311
        if (filterSize > srcW-2) filterSize=srcW-2;
1312
1313 8588e148 Michael Niedermayer
        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
1314 221b804f Diego Biurrun
1315 a64a062f Michael Niedermayer
        xDstInSrc= xInc - 0x10000;
1316 221b804f Diego Biurrun
        for (i=0; i<dstW; i++)
1317
        {
1318 a64a062f Michael Niedermayer
            int xx= (xDstInSrc - ((filterSize-2)<<16)) / (1<<17);
1319 221b804f Diego Biurrun
            int j;
1320
            (*filterPos)[i]= xx;
1321
            for (j=0; j<filterSize; j++)
1322
            {
1323 a64a062f Michael Niedermayer
                int64_t d= ((int64_t)FFABS((xx<<17) - xDstInSrc))<<13;
1324
                double floatd;
1325
                int64_t coeff;
1326
1327
                if (xInc > 1<<16)
1328
                    d= d*dstW/srcW;
1329
                floatd= d * (1.0/(1<<30));
1330
1331 221b804f Diego Biurrun
                if (flags & SWS_BICUBIC)
1332
                {
1333 a64a062f Michael Niedermayer
                    int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] :   0) * (1<<24);
1334
                    int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24);
1335
                    int64_t dd = ( d*d)>>30;
1336
                    int64_t ddd= (dd*d)>>30;
1337
1338
                    if      (d < 1LL<<30)
1339
                        coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30);
1340
                    else if (d < 1LL<<31)
1341
                        coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30);
1342 221b804f Diego Biurrun
                    else
1343
                        coeff=0.0;
1344 a64a062f Michael Niedermayer
                    coeff *= fone>>(30+24);
1345 221b804f Diego Biurrun
                }
1346
/*                else if (flags & SWS_X)
1347
                {
1348
                    double p= param ? param*0.01 : 0.3;
1349
                    coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1350
                    coeff*= pow(2.0, - p*d*d);
1351
                }*/
1352
                else if (flags & SWS_X)
1353
                {
1354
                    double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
1355 a64a062f Michael Niedermayer
                    double c;
1356 221b804f Diego Biurrun
1357 a64a062f Michael Niedermayer
                    if (floatd<1.0)
1358
                        c = cos(floatd*PI);
1359 221b804f Diego Biurrun
                    else
1360 a64a062f Michael Niedermayer
                        c=-1.0;
1361
                    if (c<0.0)      c= -pow(-c, A);
1362
                    else            c=  pow( c, A);
1363
                    coeff= (c*0.5 + 0.5)*fone;
1364 221b804f Diego Biurrun
                }
1365
                else if (flags & SWS_AREA)
1366
                {
1367 a64a062f Michael Niedermayer
                    int64_t d2= d - (1<<29);
1368
                    if      (d2*xInc < -(1LL<<(29+16))) coeff= 1.0 * (1LL<<(30+16));
1369
                    else if (d2*xInc <  (1LL<<(29+16))) coeff= -d2*xInc + (1LL<<(29+16));
1370 221b804f Diego Biurrun
                    else coeff=0.0;
1371 a64a062f Michael Niedermayer
                    coeff *= fone>>(30+16);
1372 221b804f Diego Biurrun
                }
1373
                else if (flags & SWS_GAUSS)
1374
                {
1375
                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1376 a64a062f Michael Niedermayer
                    coeff = (pow(2.0, - p*floatd*floatd))*fone;
1377 221b804f Diego Biurrun
                }
1378
                else if (flags & SWS_SINC)
1379
                {
1380 a64a062f Michael Niedermayer
                    coeff = (d ? sin(floatd*PI)/(floatd*PI) : 1.0)*fone;
1381 221b804f Diego Biurrun
                }
1382
                else if (flags & SWS_LANCZOS)
1383
                {
1384
                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1385 a64a062f Michael Niedermayer
                    coeff = (d ? sin(floatd*PI)*sin(floatd*PI/p)/(floatd*floatd*PI*PI/p) : 1.0)*fone;
1386
                    if (floatd>p) coeff=0;
1387 221b804f Diego Biurrun
                }
1388
                else if (flags & SWS_BILINEAR)
1389
                {
1390 a64a062f Michael Niedermayer
                    coeff= (1<<30) - d;
1391 221b804f Diego Biurrun
                    if (coeff<0) coeff=0;
1392 a64a062f Michael Niedermayer
                    coeff *= fone >> 30;
1393 221b804f Diego Biurrun
                }
1394
                else if (flags & SWS_SPLINE)
1395
                {
1396
                    double p=-2.196152422706632;
1397 f830d824 Michael Niedermayer
                    coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, floatd) * fone;
1398 221b804f Diego Biurrun
                }
1399
                else {
1400
                    coeff= 0.0; //GCC warning killer
1401 fcc402b1 Luca Barbato
                    assert(0);
1402 221b804f Diego Biurrun
                }
1403
1404
                filter[i*filterSize + j]= coeff;
1405
                xx++;
1406
            }
1407 a64a062f Michael Niedermayer
            xDstInSrc+= 2*xInc;
1408 221b804f Diego Biurrun
        }
1409
    }
1410
1411
    /* apply src & dst Filter to filter -> filter2
1412
       av_free(filter);
1413
    */
1414 fcc402b1 Luca Barbato
    assert(filterSize>0);
1415 221b804f Diego Biurrun
    filter2Size= filterSize;
1416
    if (srcFilter) filter2Size+= srcFilter->length - 1;
1417
    if (dstFilter) filter2Size+= dstFilter->length - 1;
1418 fcc402b1 Luca Barbato
    assert(filter2Size>0);
1419 a64a062f Michael Niedermayer
    filter2= av_mallocz(filter2Size*dstW*sizeof(*filter2));
1420 221b804f Diego Biurrun
1421
    for (i=0; i<dstW; i++)
1422
    {
1423 a64a062f Michael Niedermayer
        int j, k;
1424 221b804f Diego Biurrun
1425 a64a062f Michael Niedermayer
        if(srcFilter){
1426
            for (k=0; k<srcFilter->length; k++){
1427
                for (j=0; j<filterSize; j++)
1428
                    filter2[i*filter2Size + k + j] += srcFilter->coeff[k]*filter[i*filterSize + j];
1429
            }
1430
        }else{
1431
            for (j=0; j<filterSize; j++)
1432
                filter2[i*filter2Size + j]= filter[i*filterSize + j];
1433 221b804f Diego Biurrun
        }
1434 a64a062f Michael Niedermayer
        //FIXME dstFilter
1435 221b804f Diego Biurrun
1436
        (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
1437
    }
1438 47b7382d Michael Niedermayer
    av_freep(&filter);
1439 221b804f Diego Biurrun
1440
    /* try to reduce the filter-size (step1 find size and shift left) */
1441 86bdf3fd Diego Biurrun
    // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
1442 221b804f Diego Biurrun
    minFilterSize= 0;
1443
    for (i=dstW-1; i>=0; i--)
1444
    {
1445
        int min= filter2Size;
1446
        int j;
1447 a64a062f Michael Niedermayer
        int64_t cutOff=0.0;
1448 221b804f Diego Biurrun
1449
        /* get rid off near zero elements on the left by shifting left */
1450
        for (j=0; j<filter2Size; j++)
1451
        {
1452
            int k;
1453
            cutOff += FFABS(filter2[i*filter2Size]);
1454
1455 a64a062f Michael Niedermayer
            if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
1456 221b804f Diego Biurrun
1457 86bdf3fd Diego Biurrun
            /* preserve monotonicity because the core can't handle the filter otherwise */
1458 221b804f Diego Biurrun
            if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1459
1460 f40c7dbb Diego Biurrun
            // move filter coefficients left
1461 221b804f Diego Biurrun
            for (k=1; k<filter2Size; k++)
1462
                filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1463 a64a062f Michael Niedermayer
            filter2[i*filter2Size + k - 1]= 0;
1464 221b804f Diego Biurrun
            (*filterPos)[i]++;
1465
        }
1466
1467 a64a062f Michael Niedermayer
        cutOff=0;
1468 221b804f Diego Biurrun
        /* count near zeros on the right */
1469
        for (j=filter2Size-1; j>0; j--)
1470
        {
1471
            cutOff += FFABS(filter2[i*filter2Size + j]);
1472
1473 a64a062f Michael Niedermayer
            if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
1474 221b804f Diego Biurrun
            min--;
1475
        }
1476
1477
        if (min>minFilterSize) minFilterSize= min;
1478
    }
1479
1480
    if (flags & SWS_CPU_CAPS_ALTIVEC) {
1481
        // we can handle the special case 4,
1482
        // so we don't want to go to the full 8
1483
        if (minFilterSize < 5)
1484 8c266f0c Romain Dolbeau
            filterAlign = 4;
1485
1486 f40c7dbb Diego Biurrun
        // We really don't want to waste our time
1487
        // doing useless computation, so fall back on
1488
        // the scalar C code for very small filters.
1489
        // Vectorizing is worth it only if you have a
1490 221b804f Diego Biurrun
        // decent-sized vector.
1491
        if (minFilterSize < 3)
1492 8c266f0c Romain Dolbeau
            filterAlign = 1;
1493 221b804f Diego Biurrun
    }
1494
1495
    if (flags & SWS_CPU_CAPS_MMX) {
1496
        // special case for unscaled vertical filtering
1497
        if (minFilterSize == 1 && filterAlign == 2)
1498
            filterAlign= 1;
1499
    }
1500
1501 fcc402b1 Luca Barbato
    assert(minFilterSize > 0);
1502 221b804f Diego Biurrun
    filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
1503 fcc402b1 Luca Barbato
    assert(filterSize > 0);
1504 8588e148 Michael Niedermayer
    filter= av_malloc(filterSize*dstW*sizeof(*filter));
1505 1625216e Michael Niedermayer
    if (filterSize >= MAX_FILTER_SIZE*16/((flags&SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
1506 091d3bdc Michael Niedermayer
        goto error;
1507 221b804f Diego Biurrun
    *outFilterSize= filterSize;
1508
1509
    if (flags&SWS_PRINT_INFO)
1510
        av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
1511
    /* try to reduce the filter-size (step2 reduce it) */
1512
    for (i=0; i<dstW; i++)
1513
    {
1514
        int j;
1515
1516
        for (j=0; j<filterSize; j++)
1517
        {
1518 a64a062f Michael Niedermayer
            if (j>=filter2Size) filter[i*filterSize + j]= 0;
1519 221b804f Diego Biurrun
            else               filter[i*filterSize + j]= filter2[i*filter2Size + j];
1520 88bc5a64 Michael Niedermayer
            if((flags & SWS_BITEXACT) && j>=minFilterSize)
1521 a64a062f Michael Niedermayer
                filter[i*filterSize + j]= 0;
1522 8c266f0c Romain Dolbeau
        }
1523 221b804f Diego Biurrun
    }
1524
1525
1526 f40c7dbb Diego Biurrun
    //FIXME try to align filterPos if possible
1527 8c266f0c Romain Dolbeau
1528 221b804f Diego Biurrun
    //fix borders
1529
    for (i=0; i<dstW; i++)
1530
    {
1531
        int j;
1532
        if ((*filterPos)[i] < 0)
1533
        {
1534 f40c7dbb Diego Biurrun
            // move filter coefficients left to compensate for filterPos
1535 221b804f Diego Biurrun
            for (j=1; j<filterSize; j++)
1536
            {
1537
                int left= FFMAX(j + (*filterPos)[i], 0);
1538
                filter[i*filterSize + left] += filter[i*filterSize + j];
1539
                filter[i*filterSize + j]=0;
1540
            }
1541
            (*filterPos)[i]= 0;
1542 bca11e75 Michael Niedermayer
        }
1543
1544 221b804f Diego Biurrun
        if ((*filterPos)[i] + filterSize > srcW)
1545
        {
1546
            int shift= (*filterPos)[i] + filterSize - srcW;
1547 f40c7dbb Diego Biurrun
            // move filter coefficients right to compensate for filterPos
1548 221b804f Diego Biurrun
            for (j=filterSize-2; j>=0; j--)
1549
            {
1550
                int right= FFMIN(j + shift, filterSize-1);
1551
                filter[i*filterSize +right] += filter[i*filterSize +j];
1552
                filter[i*filterSize +j]=0;
1553
            }
1554
            (*filterPos)[i]= srcW - filterSize;
1555
        }
1556
    }
1557
1558 f40c7dbb Diego Biurrun
    // Note the +1 is for the MMX scaler which reads over the end
1559 221b804f Diego Biurrun
    /* align at 16 for AltiVec (needed by hScale_altivec_real) */
1560
    *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t));
1561
1562 f40c7dbb Diego Biurrun
    /* normalize & store in outFilter */
1563 221b804f Diego Biurrun
    for (i=0; i<dstW; i++)
1564
    {
1565
        int j;
1566 a64a062f Michael Niedermayer
        int64_t error=0;
1567
        int64_t sum=0;
1568 221b804f Diego Biurrun
1569
        for (j=0; j<filterSize; j++)
1570
        {
1571
            sum+= filter[i*filterSize + j];
1572
        }
1573 a64a062f Michael Niedermayer
        sum= (sum + one/2)/ one;
1574 221b804f Diego Biurrun
        for (j=0; j<*outFilterSize; j++)
1575
        {
1576 a64a062f Michael Niedermayer
            int64_t v= filter[i*filterSize + j] + error;
1577
            int intV= ROUNDED_DIV(v, sum);
1578 221b804f Diego Biurrun
            (*outFilter)[i*(*outFilterSize) + j]= intV;
1579 a64a062f Michael Niedermayer
            error= v - intV*sum;
1580 221b804f Diego Biurrun
        }
1581
    }
1582
1583
    (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
1584
    for (i=0; i<*outFilterSize; i++)
1585
    {
1586
        int j= dstW*(*outFilterSize);
1587
        (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
1588
    }
1589
1590 091d3bdc Michael Niedermayer
    ret=0;
1591
error:
1592 221b804f Diego Biurrun
    av_free(filter);
1593 091d3bdc Michael Niedermayer
    av_free(filter2);
1594
    return ret;
1595 7630f2e0 Michael Niedermayer
}
1596 31190492 Arpi
1597 17c613ef Uoti Urpala
#ifdef COMPILE_MMX2
1598 b7dc6f66 Michael Niedermayer
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
1599 28bf81c9 Michael Niedermayer
{
1600 221b804f Diego Biurrun
    uint8_t *fragmentA;
1601 d0ce212a Ramiro Polla
    x86_reg imm8OfPShufW1A;
1602
    x86_reg imm8OfPShufW2A;
1603
    x86_reg fragmentLengthA;
1604 221b804f Diego Biurrun
    uint8_t *fragmentB;
1605 d0ce212a Ramiro Polla
    x86_reg imm8OfPShufW1B;
1606
    x86_reg imm8OfPShufW2B;
1607
    x86_reg fragmentLengthB;
1608 221b804f Diego Biurrun
    int fragmentPos;
1609
1610
    int xpos, i;
1611
1612
    // create an optimized horizontal scaling routine
1613
1614
    //code fragment
1615
1616 7ad6469e Diego Pettenò
    __asm__ volatile(
1617 221b804f Diego Biurrun
        "jmp                         9f                 \n\t"
1618
    // Begin
1619
        "0:                                             \n\t"
1620
        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1621
        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1622
        "movd   1(%%"REG_c", %%"REG_S"), %%mm1          \n\t"
1623
        "punpcklbw                %%mm7, %%mm1          \n\t"
1624
        "punpcklbw                %%mm7, %%mm0          \n\t"
1625
        "pshufw                   $0xFF, %%mm1, %%mm1   \n\t"
1626
        "1:                                             \n\t"
1627
        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1628
        "2:                                             \n\t"
1629
        "psubw                    %%mm1, %%mm0          \n\t"
1630
        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1631
        "pmullw                   %%mm3, %%mm0          \n\t"
1632
        "psllw                       $7, %%mm1          \n\t"
1633
        "paddw                    %%mm1, %%mm0          \n\t"
1634
1635
        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1636
1637
        "add                         $8, %%"REG_a"      \n\t"
1638
    // End
1639
        "9:                                             \n\t"
1640
//        "int $3                                         \n\t"
1641 86593486 Alexander Strange
        "lea                 " LOCAL_MANGLE(0b) ", %0   \n\t"
1642
        "lea                 " LOCAL_MANGLE(1b) ", %1   \n\t"
1643
        "lea                 " LOCAL_MANGLE(2b) ", %2   \n\t"
1644 221b804f Diego Biurrun
        "dec                         %1                 \n\t"
1645
        "dec                         %2                 \n\t"
1646
        "sub                         %0, %1             \n\t"
1647
        "sub                         %0, %2             \n\t"
1648 86593486 Alexander Strange
        "lea                 " LOCAL_MANGLE(9b) ", %3   \n\t"
1649 221b804f Diego Biurrun
        "sub                         %0, %3             \n\t"
1650
1651
1652
        :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
1653
        "=r" (fragmentLengthA)
1654
    );
1655
1656 7ad6469e Diego Pettenò
    __asm__ volatile(
1657 221b804f Diego Biurrun
        "jmp                         9f                 \n\t"
1658
    // Begin
1659
        "0:                                             \n\t"
1660
        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1661
        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1662
        "punpcklbw                %%mm7, %%mm0          \n\t"
1663
        "pshufw                   $0xFF, %%mm0, %%mm1   \n\t"
1664
        "1:                                             \n\t"
1665
        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1666
        "2:                                             \n\t"
1667
        "psubw                    %%mm1, %%mm0          \n\t"
1668
        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1669
        "pmullw                   %%mm3, %%mm0          \n\t"
1670
        "psllw                       $7, %%mm1          \n\t"
1671
        "paddw                    %%mm1, %%mm0          \n\t"
1672
1673
        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1674
1675
        "add                         $8, %%"REG_a"      \n\t"
1676
    // End
1677
        "9:                                             \n\t"
1678
//        "int                       $3                   \n\t"
1679 86593486 Alexander Strange
        "lea                 " LOCAL_MANGLE(0b) ", %0   \n\t"
1680
        "lea                 " LOCAL_MANGLE(1b) ", %1   \n\t"
1681
        "lea                 " LOCAL_MANGLE(2b) ", %2   \n\t"
1682 221b804f Diego Biurrun
        "dec                         %1                 \n\t"
1683
        "dec                         %2                 \n\t"
1684
        "sub                         %0, %1             \n\t"
1685
        "sub                         %0, %2             \n\t"
1686 86593486 Alexander Strange
        "lea                 " LOCAL_MANGLE(9b) ", %3   \n\t"
1687 221b804f Diego Biurrun
        "sub                         %0, %3             \n\t"
1688
1689
1690
        :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
1691
        "=r" (fragmentLengthB)
1692
    );
1693
1694
    xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
1695
    fragmentPos=0;
1696
1697
    for (i=0; i<dstW/numSplits; i++)
1698
    {
1699
        int xx=xpos>>16;
1700
1701
        if ((i&3) == 0)
1702
        {
1703
            int a=0;
1704
            int b=((xpos+xInc)>>16) - xx;
1705
            int c=((xpos+xInc*2)>>16) - xx;
1706
            int d=((xpos+xInc*3)>>16) - xx;
1707
1708
            filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
1709
            filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
1710
            filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
1711
            filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
1712
            filterPos[i/2]= xx;
1713
1714
            if (d+1<4)
1715
            {
1716
                int maxShift= 3-(d+1);
1717
                int shift=0;
1718
1719
                memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
1720
1721
                funnyCode[fragmentPos + imm8OfPShufW1B]=
1722
                    (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
1723
                funnyCode[fragmentPos + imm8OfPShufW2B]=
1724
                    a | (b<<2) | (c<<4) | (d<<6);
1725
1726
                if (i+3>=dstW) shift=maxShift; //avoid overread
1727
                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
1728
1729
                if (shift && i>=shift)
1730
                {
1731
                    funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
1732
                    funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
1733
                    filterPos[i/2]-=shift;
1734
                }
1735
1736
                fragmentPos+= fragmentLengthB;
1737
            }
1738
            else
1739
            {
1740
                int maxShift= 3-d;
1741
                int shift=0;
1742
1743
                memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
1744
1745
                funnyCode[fragmentPos + imm8OfPShufW1A]=
1746
                funnyCode[fragmentPos + imm8OfPShufW2A]=
1747
                    a | (b<<2) | (c<<4) | (d<<6);
1748
1749
                if (i+4>=dstW) shift=maxShift; //avoid overread
1750
                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
1751
1752
                if (shift && i>=shift)
1753
                {
1754
                    funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
1755
                    funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
1756
                    filterPos[i/2]-=shift;
1757
                }
1758
1759
                fragmentPos+= fragmentLengthA;
1760
            }
1761
1762
            funnyCode[fragmentPos]= RET;
1763
        }
1764
        xpos+=xInc;
1765
    }
1766 c662e788 Cédric Schieli
    filterPos[((i/2)+1)&(~1)]= xpos>>16; // needed to jump to the next part
1767 28bf81c9 Michael Niedermayer
}
1768 17c613ef Uoti Urpala
#endif /* COMPILE_MMX2 */
1769 28bf81c9 Michael Niedermayer
1770 9b2283cc Stefan Huehner
static void globalInit(void){
1771 31190492 Arpi
    // generating tables:
1772
    int i;
1773 221b804f Diego Biurrun
    for (i=0; i<768; i++){
1774
        int c= av_clip_uint8(i-256);
1775
        clip_table[i]=c;
1776 b18ea156 Michael Niedermayer
    }
1777 516b1f82 Michael Niedermayer
}
1778 c1b0bfb4 Michael Niedermayer
1779 516b1f82 Michael Niedermayer
static SwsFunc getSwsFunc(int flags){
1780 6a4970ab Diego Biurrun
1781 b63f641e Aurelien Jacobs
#if defined(RUNTIME_CPUDETECT) && CONFIG_GPL
1782
#if ARCH_X86
1783 c14731d8 Reimar Döffinger
    // ordered per speed fastest first
1784 221b804f Diego Biurrun
    if (flags & SWS_CPU_CAPS_MMX2)
1785
        return swScale_MMX2;
1786
    else if (flags & SWS_CPU_CAPS_3DNOW)
1787
        return swScale_3DNow;
1788
    else if (flags & SWS_CPU_CAPS_MMX)
1789
        return swScale_MMX;
1790
    else
1791
        return swScale_C;
1792 28bf81c9 Michael Niedermayer
1793
#else
1794 b63f641e Aurelien Jacobs
#if ARCH_PPC
1795 221b804f Diego Biurrun
    if (flags & SWS_CPU_CAPS_ALTIVEC)
1796
        return swScale_altivec;
1797
    else
1798
        return swScale_C;
1799 a2faa401 Romain Dolbeau
#endif
1800 221b804f Diego Biurrun
    return swScale_C;
1801 b63f641e Aurelien Jacobs
#endif /* ARCH_X86 */
1802 28bf81c9 Michael Niedermayer
#else //RUNTIME_CPUDETECT
1803 b63f641e Aurelien Jacobs
#if   HAVE_MMX2
1804 221b804f Diego Biurrun
    return swScale_MMX2;
1805 f4406ec1 Diego Biurrun
#elif HAVE_AMD3DNOW
1806 221b804f Diego Biurrun
    return swScale_3DNow;
1807 b63f641e Aurelien Jacobs
#elif HAVE_MMX
1808 221b804f Diego Biurrun
    return swScale_MMX;
1809 b63f641e Aurelien Jacobs
#elif HAVE_ALTIVEC
1810 221b804f Diego Biurrun
    return swScale_altivec;
1811 28bf81c9 Michael Niedermayer
#else
1812 221b804f Diego Biurrun
    return swScale_C;
1813 28bf81c9 Michael Niedermayer
#endif
1814
#endif //!RUNTIME_CPUDETECT
1815 31190492 Arpi
}
1816 7630f2e0 Michael Niedermayer
1817 d4e24275 Michael Niedermayer
static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1818 221b804f Diego Biurrun
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1819
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1820
    /* Copy Y plane */
1821
    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1822
        memcpy(dst, src[0], srcSliceH*dstStride[0]);
1823
    else
1824
    {
1825
        int i;
1826
        uint8_t *srcPtr= src[0];
1827
        uint8_t *dstPtr= dst;
1828
        for (i=0; i<srcSliceH; i++)
1829
        {
1830
            memcpy(dstPtr, srcPtr, c->srcW);
1831
            srcPtr+= srcStride[0];
1832
            dstPtr+= dstStride[0];
1833
        }
1834
    }
1835
    dst = dstParam[1] + dstStride[1]*srcSliceY/2;
1836
    if (c->dstFormat == PIX_FMT_NV12)
1837 30c48a0a Benoit Fouet
        interleaveBytes(src[1], src[2], dst, c->srcW/2, srcSliceH/2, srcStride[1], srcStride[2], dstStride[0]);
1838 221b804f Diego Biurrun
    else
1839 30c48a0a Benoit Fouet
        interleaveBytes(src[2], src[1], dst, c->srcW/2, srcSliceH/2, srcStride[2], srcStride[1], dstStride[0]);
1840 221b804f Diego Biurrun
1841
    return srcSliceH;
1842 0d9f3d85 Arpi
}
1843
1844 d4e24275 Michael Niedermayer
static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1845 221b804f Diego Biurrun
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1846
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1847 0d9f3d85 Arpi
1848 30c48a0a Benoit Fouet
    yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
1849 fccb9b2b Michael Niedermayer
1850 221b804f Diego Biurrun
    return srcSliceH;
1851 0d9f3d85 Arpi
}
1852
1853 caeaabe7 Alex Beregszaszi
static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1854 221b804f Diego Biurrun
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1855
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1856 caeaabe7 Alex Beregszaszi
1857 30c48a0a Benoit Fouet
    yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
1858 caeaabe7 Alex Beregszaszi
1859 221b804f Diego Biurrun
    return srcSliceH;
1860 caeaabe7 Alex Beregszaszi
}
1861
1862 a6100f39 Baptiste Coudurier
static int YUV422PToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1863
                                int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1864
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1865
1866
    yuv422ptoyuy2(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
1867
1868
    return srcSliceH;
1869
}
1870
1871
static int YUV422PToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1872
                                int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1873
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1874
1875
    yuv422ptouyvy(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
1876
1877
    return srcSliceH;
1878
}
1879
1880 0411072e Michael Niedermayer
static int YUYV2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1881
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1882
    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
1883
    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
1884
    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
1885
1886
    yuyvtoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
1887
1888
    return srcSliceH;
1889
}
1890
1891
static int YUYV2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1892
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1893
    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
1894 72ef3dd7 Michael Niedermayer
    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
1895
    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
1896 0411072e Michael Niedermayer
1897
    yuyvtoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
1898
1899
    return srcSliceH;
1900
}
1901
1902
static int UYVY2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1903
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1904
    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
1905
    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
1906
    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
1907
1908
    uyvytoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
1909
1910
    return srcSliceH;
1911
}
1912
1913
static int UYVY2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1914
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1915
    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
1916 72ef3dd7 Michael Niedermayer
    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
1917
    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
1918 0411072e Michael Niedermayer
1919
    uyvytoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
1920
1921
    return srcSliceH;
1922
}
1923
1924 49004617 Vitor Sessak
static int pal2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1925
                          int srcSliceH, uint8_t* dst[], int dstStride[]){
1926 f5a2c981 Carl Eugen Hoyos
    const enum PixelFormat srcFormat= c->srcFormat;
1927
    const enum PixelFormat dstFormat= c->dstFormat;
1928 49004617 Vitor Sessak
    void (*conv)(const uint8_t *src, uint8_t *dst, long num_pixels,
1929
                 const uint8_t *palette)=NULL;
1930
    int i;
1931
    uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1932
    uint8_t *srcPtr= src[0];
1933
1934
    if (!usePal(srcFormat))
1935
        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
1936
               sws_format_name(srcFormat), sws_format_name(dstFormat));
1937
1938
    switch(dstFormat){
1939 522ce957 Vitor Sessak
    case PIX_FMT_RGB32  : conv = palette8topacked32; break;
1940
    case PIX_FMT_BGR32  : conv = palette8topacked32; break;
1941
    case PIX_FMT_BGR32_1: conv = palette8topacked32; break;
1942
    case PIX_FMT_RGB32_1: conv = palette8topacked32; break;
1943
    case PIX_FMT_RGB24  : conv = palette8topacked24; break;
1944
    case PIX_FMT_BGR24  : conv = palette8topacked24; break;
1945 49004617 Vitor Sessak
    default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
1946
                    sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1947
    }
1948
1949
1950
    for (i=0; i<srcSliceH; i++) {
1951 65f65c30 Vitor Sessak
        conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb);
1952 49004617 Vitor Sessak
        srcPtr+= srcStride[0];
1953
        dstPtr+= dstStride[0];
1954
    }
1955
1956
    return srcSliceH;
1957
}
1958
1959 9990e426 Michael Niedermayer
/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
1960 d4e24275 Michael Niedermayer
static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1961 221b804f Diego Biurrun
                          int srcSliceH, uint8_t* dst[], int dstStride[]){
1962 58e4b706 Carl Eugen Hoyos
    const enum PixelFormat srcFormat= c->srcFormat;
1963
    const enum PixelFormat dstFormat= c->dstFormat;
1964 221b804f Diego Biurrun
    const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3;
1965
    const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3;
1966
    const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
1967
    const int dstId= fmt_depth(dstFormat) >> 2;
1968
    void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
1969
1970
    /* BGR -> BGR */
1971
    if (  (isBGR(srcFormat) && isBGR(dstFormat))
1972
       || (isRGB(srcFormat) && isRGB(dstFormat))){
1973
        switch(srcId | (dstId<<4)){
1974
        case 0x34: conv= rgb16to15; break;
1975
        case 0x36: conv= rgb24to15; break;
1976
        case 0x38: conv= rgb32to15; break;
1977
        case 0x43: conv= rgb15to16; break;
1978
        case 0x46: conv= rgb24to16; break;
1979
        case 0x48: conv= rgb32to16; break;
1980
        case 0x63: conv= rgb15to24; break;
1981
        case 0x64: conv= rgb16to24; break;
1982
        case 0x68: conv= rgb32to24; break;
1983
        case 0x83: conv= rgb15to32; break;
1984
        case 0x84: conv= rgb16to32; break;
1985
        case 0x86: conv= rgb24to32; break;
1986 3f0bc115 Diego Biurrun
        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
1987 221b804f Diego Biurrun
                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1988
        }
1989
    }else if (  (isBGR(srcFormat) && isRGB(dstFormat))
1990
             || (isRGB(srcFormat) && isBGR(dstFormat))){
1991
        switch(srcId | (dstId<<4)){
1992
        case 0x33: conv= rgb15tobgr15; break;
1993
        case 0x34: conv= rgb16tobgr15; break;
1994
        case 0x36: conv= rgb24tobgr15; break;
1995
        case 0x38: conv= rgb32tobgr15; break;
1996
        case 0x43: conv= rgb15tobgr16; break;
1997
        case 0x44: conv= rgb16tobgr16; break;
1998
        case 0x46: conv= rgb24tobgr16; break;
1999
        case 0x48: conv= rgb32tobgr16; break;
2000
        case 0x63: conv= rgb15tobgr24; break;
2001
        case 0x64: conv= rgb16tobgr24; break;
2002
        case 0x66: conv= rgb24tobgr24; break;
2003
        case 0x68: conv= rgb32tobgr24; break;
2004
        case 0x83: conv= rgb15tobgr32; break;
2005
        case 0x84: conv= rgb16tobgr32; break;
2006
        case 0x86: conv= rgb24tobgr32; break;
2007
        case 0x88: conv= rgb32tobgr32; break;
2008 3f0bc115 Diego Biurrun
        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2009 221b804f Diego Biurrun
                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
2010
        }
2011
    }else{
2012 3f0bc115 Diego Biurrun
        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2013 221b804f Diego Biurrun
               sws_format_name(srcFormat), sws_format_name(dstFormat));
2014
    }
2015
2016 068b0f4f Benoit Fouet
    if(conv)
2017
    {
2018 9990e426 Michael Niedermayer
        uint8_t *srcPtr= src[0];
2019
        if(srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1)
2020
            srcPtr += ALT32_CORR;
2021
2022 5efaf000 Peter Schlaile
        if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0)
2023 9990e426 Michael Niedermayer
            conv(srcPtr, dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
2024 c4ca31d0 Benoit Fouet
        else
2025 221b804f Diego Biurrun
        {
2026 c4ca31d0 Benoit Fouet
            int i;
2027
            uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2028
2029
            for (i=0; i<srcSliceH; i++)
2030
            {
2031
                conv(srcPtr, dstPtr, c->srcW*srcBpp);
2032
                srcPtr+= srcStride[0];
2033
                dstPtr+= dstStride[0];
2034
            }
2035 221b804f Diego Biurrun
        }
2036
    }
2037
    return srcSliceH;
2038 0d9f3d85 Arpi
}
2039
2040 d4e24275 Michael Niedermayer
static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2041 221b804f Diego Biurrun
                              int srcSliceH, uint8_t* dst[], int dstStride[]){
2042
2043
    rgb24toyv12(
2044
        src[0],
2045
        dst[0]+ srcSliceY    *dstStride[0],
2046
        dst[1]+(srcSliceY>>1)*dstStride[1],
2047
        dst[2]+(srcSliceY>>1)*dstStride[2],
2048
        c->srcW, srcSliceH,
2049
        dstStride[0], dstStride[1], srcStride[0]);
2050
    return srcSliceH;
2051 ec22603f Michael Niedermayer
}
2052
2053 d4e24275 Michael Niedermayer
static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2054 221b804f Diego Biurrun
                             int srcSliceH, uint8_t* dst[], int dstStride[]){
2055
    int i;
2056
2057
    /* copy Y */
2058
    if (srcStride[0]==dstStride[0] && srcStride[0] > 0)
2059
        memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
2060
    else{
2061
        uint8_t *srcPtr= src[0];
2062
        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2063
2064
        for (i=0; i<srcSliceH; i++)
2065
        {
2066
            memcpy(dstPtr, srcPtr, c->srcW);
2067
            srcPtr+= srcStride[0];
2068
            dstPtr+= dstStride[0];
2069
        }
2070
    }
2071
2072 6268f55b Cédric Schieli
    if (c->dstFormat==PIX_FMT_YUV420P || c->dstFormat==PIX_FMT_YUVA420P){
2073 221b804f Diego Biurrun
        planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
2074
        planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
2075
    }else{
2076
        planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
2077
        planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
2078
    }
2079
    return srcSliceH;
2080 b241cbf2 Michael Niedermayer
}
2081
2082 b6654a54 Michael Niedermayer
/* unscaled copy like stuff (assumes nearly identical formats) */
2083 2d35ae56 Luca Barbato
static int packedCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2084 bc5a0444 Luca Barbato
                      int srcSliceH, uint8_t* dst[], int dstStride[])
2085
{
2086
    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
2087
        memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
2088
    else
2089
    {
2090
        int i;
2091
        uint8_t *srcPtr= src[0];
2092
        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2093
        int length=0;
2094 221b804f Diego Biurrun
2095 bc5a0444 Luca Barbato
        /* universal length finder */
2096
        while(length+c->srcW <= FFABS(dstStride[0])
2097
           && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
2098 fcc402b1 Luca Barbato
        assert(length!=0);
2099 2d35ae56 Luca Barbato
2100 bc5a0444 Luca Barbato
        for (i=0; i<srcSliceH; i++)
2101 221b804f Diego Biurrun
        {
2102 bc5a0444 Luca Barbato
            memcpy(dstPtr, srcPtr, length);
2103
            srcPtr+= srcStride[0];
2104
            dstPtr+= dstStride[0];
2105 221b804f Diego Biurrun
        }
2106 bc5a0444 Luca Barbato
    }
2107 2d35ae56 Luca Barbato
    return srcSliceH;
2108
}
2109 bc5a0444 Luca Barbato
2110 2d35ae56 Luca Barbato
static int planarCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2111
                      int srcSliceH, uint8_t* dst[], int dstStride[])
2112
{
2113 bc5a0444 Luca Barbato
    int plane;
2114 6268f55b Cédric Schieli
    for (plane=0; plane<4; plane++)
2115 bc5a0444 Luca Barbato
    {
2116 6268f55b Cédric Schieli
        int length= (plane==0 || plane==3) ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
2117
        int y=      (plane==0 || plane==3) ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
2118
        int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
2119 2d35ae56 Luca Barbato
2120 f6cf4ed0 Cédric Schieli
        if (dst[plane] && !src[plane])
2121 6268f55b Cédric Schieli
            fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128);
2122 bc5a0444 Luca Barbato
        else
2123
        {
2124
            if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
2125
                memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
2126 221b804f Diego Biurrun
            else
2127
            {
2128 bc5a0444 Luca Barbato
                int i;
2129
                uint8_t *srcPtr= src[plane];
2130
                uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
2131
                for (i=0; i<height; i++)
2132 221b804f Diego Biurrun
                {
2133 bc5a0444 Luca Barbato
                    memcpy(dstPtr, srcPtr, length);
2134
                    srcPtr+= srcStride[plane];
2135
                    dstPtr+= dstStride[plane];
2136 221b804f Diego Biurrun
                }
2137
            }
2138
        }
2139 bc5a0444 Luca Barbato
    }
2140 221b804f Diego Biurrun
    return srcSliceH;
2141 37079906 Michael Niedermayer
}
2142 28bf81c9 Michael Niedermayer
2143 4884b9e5 Kostya Shishkov
static int gray16togray(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2144 221b804f Diego Biurrun
                        int srcSliceH, uint8_t* dst[], int dstStride[]){
2145
2146
    int length= c->srcW;
2147
    int y=      srcSliceY;
2148
    int height= srcSliceH;
2149
    int i, j;
2150
    uint8_t *srcPtr= src[0];
2151
    uint8_t *dstPtr= dst[0] + dstStride[0]*y;
2152
2153
    if (!isGray(c->dstFormat)){
2154
        int height= -((-srcSliceH)>>c->chrDstVSubSample);
2155
        memset(dst[1], 128, dstStride[1]*height);
2156
        memset(dst[2], 128, dstStride[2]*height);
2157
    }
2158
    if (c->srcFormat == PIX_FMT_GRAY16LE) srcPtr++;
2159
    for (i=0; i<height; i++)
2160
    {
2161
        for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
2162
        srcPtr+= srcStride[0];
2163
        dstPtr+= dstStride[0];
2164
    }
2165
    return srcSliceH;
2166 4884b9e5 Kostya Shishkov
}
2167
2168
static int graytogray16(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2169 221b804f Diego Biurrun
                        int srcSliceH, uint8_t* dst[], int dstStride[]){
2170
2171
    int length= c->srcW;
2172
    int y=      srcSliceY;
2173
    int height= srcSliceH;
2174
    int i, j;
2175
    uint8_t *srcPtr= src[0];
2176
    uint8_t *dstPtr= dst[0] + dstStride[0]*y;
2177
    for (i=0; i<height; i++)
2178
    {
2179
        for (j=0; j<length; j++)
2180
        {
2181
            dstPtr[j<<1] = srcPtr[j];
2182
            dstPtr[(j<<1)+1] = srcPtr[j];
2183
        }
2184
        srcPtr+= srcStride[0];
2185
        dstPtr+= dstStride[0];
2186
    }
2187
    return srcSliceH;
2188 4884b9e5 Kostya Shishkov
}
2189
2190
static int gray16swap(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2191 221b804f Diego Biurrun
                      int srcSliceH, uint8_t* dst[], int dstStride[]){
2192
2193
    int length= c->srcW;
2194
    int y=      srcSliceY;
2195
    int height= srcSliceH;
2196
    int i, j;
2197 73d046e2 Baptiste Coudurier
    uint16_t *srcPtr= (uint16_t*)src[0];
2198 b8b015f4 Baptiste Coudurier
    uint16_t *dstPtr= (uint16_t*)(dst[0] + dstStride[0]*y/2);
2199 221b804f Diego Biurrun
    for (i=0; i<height; i++)
2200
    {
2201
        for (j=0; j<length; j++) dstPtr[j] = bswap_16(srcPtr[j]);
2202
        srcPtr+= srcStride[0]/2;
2203
        dstPtr+= dstStride[0]/2;
2204
    }
2205
    return srcSliceH;
2206 4884b9e5 Kostya Shishkov
}
2207
2208
2209 c7a810cc Michael Niedermayer
static void getSubSampleFactors(int *h, int *v, int format){
2210 221b804f Diego Biurrun
    switch(format){
2211
    case PIX_FMT_UYVY422:
2212
    case PIX_FMT_YUYV422:
2213
        *h=1;
2214
        *v=0;
2215
        break;
2216
    case PIX_FMT_YUV420P:
2217 79973335 Aurelien Jacobs
    case PIX_FMT_YUVA420P:
2218 221b804f Diego Biurrun
    case PIX_FMT_GRAY16BE:
2219
    case PIX_FMT_GRAY16LE:
2220
    case PIX_FMT_GRAY8: //FIXME remove after different subsamplings are fully implemented
2221
    case PIX_FMT_NV12:
2222
    case PIX_FMT_NV21:
2223
        *h=1;
2224
        *v=1;
2225
        break;
2226 9ba7fe6d Andreas Öman
    case PIX_FMT_YUV440P:
2227
        *h=0;
2228
        *v=1;
2229
        break;
2230 221b804f Diego Biurrun
    case PIX_FMT_YUV410P:
2231
        *h=2;
2232
        *v=2;
2233
        break;
2234
    case PIX_FMT_YUV444P:
2235
        *h=0;
2236
        *v=0;
2237
        break;
2238
    case PIX_FMT_YUV422P:
2239
        *h=1;
2240
        *v=0;
2241
        break;
2242
    case PIX_FMT_YUV411P:
2243
        *h=2;
2244
        *v=0;
2245
        break;
2246
    default:
2247
        *h=0;
2248
        *v=0;
2249
        break;
2250
    }
2251 c7a810cc Michael Niedermayer
}
2252
2253 5427e242 Michael Niedermayer
static uint16_t roundToInt16(int64_t f){
2254 221b804f Diego Biurrun
    int r= (f + (1<<15))>>16;
2255
         if (r<-0x7FFF) return 0x8000;
2256
    else if (r> 0x7FFF) return 0x7FFF;
2257
    else                return r;
2258 0481412a Michael Niedermayer
}
2259
2260
/**
2261 fa58ba15 Kostya Shishkov
 * @param inv_table the yuv2rgb coefficients, normally ff_yuv2rgb_coeffs[x]
2262 86bdf3fd Diego Biurrun
 * @param fullRange if 1 then the luma range is 0..255 if 0 it is 16..235
2263 5427e242 Michael Niedermayer
 * @return -1 if not supported
2264 0481412a Michael Niedermayer
 */
2265 5427e242 Michael Niedermayer
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
2266 221b804f Diego Biurrun
    int64_t crv =  inv_table[0];
2267
    int64_t cbu =  inv_table[1];
2268
    int64_t cgu = -inv_table[2];
2269
    int64_t cgv = -inv_table[3];
2270
    int64_t cy  = 1<<16;
2271
    int64_t oy  = 0;
2272
2273
    memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
2274
    memcpy(c->dstColorspaceTable,     table, sizeof(int)*4);
2275
2276
    c->brightness= brightness;
2277
    c->contrast  = contrast;
2278
    c->saturation= saturation;
2279
    c->srcRange  = srcRange;
2280
    c->dstRange  = dstRange;
2281 6bc0c792 Michael Niedermayer
    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return 0;
2282 221b804f Diego Biurrun
2283
    c->uOffset=   0x0400040004000400LL;
2284
    c->vOffset=   0x0400040004000400LL;
2285
2286
    if (!srcRange){
2287
        cy= (cy*255) / 219;
2288
        oy= 16<<16;
2289
    }else{
2290
        crv= (crv*224) / 255;
2291
        cbu= (cbu*224) / 255;
2292
        cgu= (cgu*224) / 255;
2293
        cgv= (cgv*224) / 255;
2294
    }
2295 0481412a Michael Niedermayer
2296 221b804f Diego Biurrun
    cy = (cy *contrast             )>>16;
2297
    crv= (crv*contrast * saturation)>>32;
2298
    cbu= (cbu*contrast * saturation)>>32;
2299
    cgu= (cgu*contrast * saturation)>>32;
2300
    cgv= (cgv*contrast * saturation)>>32;
2301 0481412a Michael Niedermayer
2302 221b804f Diego Biurrun
    oy -= 256*brightness;
2303 0481412a Michael Niedermayer
2304 221b804f Diego Biurrun
    c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
2305
    c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
2306
    c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
2307
    c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
2308
    c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
2309
    c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
2310 5427e242 Michael Niedermayer
2311 43c16478 Michael Niedermayer
    c->yuv2rgb_y_coeff  = (int16_t)roundToInt16(cy <<13);
2312
    c->yuv2rgb_y_offset = (int16_t)roundToInt16(oy << 9);
2313
    c->yuv2rgb_v2r_coeff= (int16_t)roundToInt16(crv<<13);
2314
    c->yuv2rgb_v2g_coeff= (int16_t)roundToInt16(cgv<<13);
2315
    c->yuv2rgb_u2g_coeff= (int16_t)roundToInt16(cgu<<13);
2316
    c->yuv2rgb_u2b_coeff= (int16_t)roundToInt16(cbu<<13);
2317 f0faee4c Michael Niedermayer
2318 780daf2b Diego Biurrun
    ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
2319 221b804f Diego Biurrun
    //FIXME factorize
2320 a31de956 Michael Niedermayer
2321 6634d0ef Nicolas Plourde
#ifdef COMPILE_ALTIVEC
2322 221b804f Diego Biurrun
    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
2323 780daf2b Diego Biurrun
        ff_yuv2rgb_init_tables_altivec(c, inv_table, brightness, contrast, saturation);
2324 6a4970ab Diego Biurrun
#endif
2325 221b804f Diego Biurrun
    return 0;
2326 5427e242 Michael Niedermayer
}
2327
2328
/**
2329
 * @return -1 if not supported
2330
 */
2331
int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
2332 221b804f Diego Biurrun
    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
2333 5427e242 Michael Niedermayer
2334 221b804f Diego Biurrun
    *inv_table = c->srcColorspaceTable;
2335
    *table     = c->dstColorspaceTable;
2336
    *srcRange  = c->srcRange;
2337
    *dstRange  = c->dstRange;
2338
    *brightness= c->brightness;
2339
    *contrast  = c->contrast;
2340
    *saturation= c->saturation;
2341 6a4970ab Diego Biurrun
2342 221b804f Diego Biurrun
    return 0;
2343 0481412a Michael Niedermayer
}
2344
2345 13394e8c Aurelien Jacobs
static int handle_jpeg(enum PixelFormat *format)
2346 44cdb423 Luca Abeni
{
2347 221b804f Diego Biurrun
    switch (*format) {
2348
        case PIX_FMT_YUVJ420P:
2349
            *format = PIX_FMT_YUV420P;
2350
            return 1;
2351
        case PIX_FMT_YUVJ422P:
2352
            *format = PIX_FMT_YUV422P;
2353
            return 1;
2354
        case PIX_FMT_YUVJ444P:
2355
            *format = PIX_FMT_YUV444P;
2356
            return 1;
2357 9ba7fe6d Andreas Öman
        case PIX_FMT_YUVJ440P:
2358
            *format = PIX_FMT_YUV440P;
2359
            return 1;
2360 221b804f Diego Biurrun
        default:
2361
            return 0;
2362
    }
2363 44cdb423 Luca Abeni
}
2364
2365 58e4b706 Carl Eugen Hoyos
SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int dstW, int dstH, enum PixelFormat dstFormat, int flags,
2366 221b804f Diego Biurrun
                           SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){
2367
2368
    SwsContext *c;
2369
    int i;
2370
    int usesVFilter, usesHFilter;
2371
    int unscaled, needsDither;
2372
    int srcRange, dstRange;
2373
    SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
2374 b63f641e Aurelien Jacobs
#if ARCH_X86
2375 221b804f Diego Biurrun
    if (flags & SWS_CPU_CAPS_MMX)
2376 7ad6469e Diego Pettenò
        __asm__ volatile("emms\n\t"::: "memory");
2377 5cebb24b Michael Niedermayer
#endif
2378 516b1f82 Michael Niedermayer
2379 b63f641e Aurelien Jacobs
#if !defined(RUNTIME_CPUDETECT) || !CONFIG_GPL //ensure that the flags match the compiled variant if cpudetect is off
2380 d3f3eea9 Marc Hoffman
    flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN);
2381 b63f641e Aurelien Jacobs
#if   HAVE_MMX2
2382 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
2383 f4406ec1 Diego Biurrun
#elif HAVE_AMD3DNOW
2384 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
2385 b63f641e Aurelien Jacobs
#elif HAVE_MMX
2386 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_MMX;
2387 b63f641e Aurelien Jacobs
#elif HAVE_ALTIVEC
2388 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_ALTIVEC;
2389 b63f641e Aurelien Jacobs
#elif ARCH_BFIN
2390 d3f3eea9 Marc Hoffman
    flags |= SWS_CPU_CAPS_BFIN;
2391 516b1f82 Michael Niedermayer
#endif
2392 69796008 Diego Biurrun
#endif /* RUNTIME_CPUDETECT */
2393 221b804f Diego Biurrun
    if (clip_table[512] != 255) globalInit();
2394 1b0a4572 Benoit Fouet
    if (!rgb15to16) sws_rgb2rgb_init(flags);
2395 221b804f Diego Biurrun
2396
    unscaled = (srcW == dstW && srcH == dstH);
2397
    needsDither= (isBGR(dstFormat) || isRGB(dstFormat))
2398
        && (fmt_depth(dstFormat))<24
2399
        && ((fmt_depth(dstFormat))<(fmt_depth(srcFormat)) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
2400
2401
    srcRange = handle_jpeg(&srcFormat);
2402
    dstRange = handle_jpeg(&dstFormat);
2403
2404
    if (!isSupportedIn(srcFormat))
2405
    {
2406 0d6fd5ec Stefano Sabatini
        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat));
2407 221b804f Diego Biurrun
        return NULL;
2408
    }
2409
    if (!isSupportedOut(dstFormat))
2410
    {
2411 0d6fd5ec Stefano Sabatini
        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat));
2412 221b804f Diego Biurrun
        return NULL;
2413
    }
2414
2415 010c00bc Michael Niedermayer
    i= flags & ( SWS_POINT
2416
                |SWS_AREA
2417 6afc7c19 Michael Niedermayer
                |SWS_BILINEAR
2418 010c00bc Michael Niedermayer
                |SWS_FAST_BILINEAR
2419
                |SWS_BICUBIC
2420
                |SWS_X
2421
                |SWS_GAUSS
2422
                |SWS_LANCZOS
2423
                |SWS_SINC
2424
                |SWS_SPLINE
2425
                |SWS_BICUBLIN);
2426
    if(!i || (i & (i-1)))
2427
    {
2428 f40c7dbb Diego Biurrun
        av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be chosen\n");
2429 010c00bc Michael Niedermayer
        return NULL;
2430
    }
2431
2432 221b804f Diego Biurrun
    /* sanity check */
2433
    if (srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
2434
    {
2435
        av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
2436
               srcW, srcH, dstW, dstH);
2437
        return NULL;
2438
    }
2439 8b2fce0d Michael Niedermayer
    if(srcW > VOFW || dstW > VOFW){
2440 f40c7dbb Diego Biurrun
        av_log(NULL, AV_LOG_ERROR, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n");
2441 8b2fce0d Michael Niedermayer
        return NULL;
2442
    }
2443 221b804f Diego Biurrun
2444
    if (!dstFilter) dstFilter= &dummyFilter;
2445
    if (!srcFilter) srcFilter= &dummyFilter;
2446
2447
    c= av_mallocz(sizeof(SwsContext));
2448
2449
    c->av_class = &sws_context_class;
2450
    c->srcW= srcW;
2451
    c->srcH= srcH;
2452
    c->dstW= dstW;
2453
    c->dstH= dstH;
2454
    c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
2455
    c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
2456
    c->flags= flags;
2457
    c->dstFormat= dstFormat;
2458
    c->srcFormat= srcFormat;
2459
    c->vRounder= 4* 0x0001000100010001ULL;
2460
2461
    usesHFilter= usesVFilter= 0;
2462 1b0a4572 Benoit Fouet
    if (dstFilter->lumV && dstFilter->lumV->length>1) usesVFilter=1;
2463
    if (dstFilter->lumH && dstFilter->lumH->length>1) usesHFilter=1;
2464
    if (dstFilter->chrV && dstFilter->chrV->length>1) usesVFilter=1;
2465
    if (dstFilter->chrH && dstFilter->chrH->length>1) usesHFilter=1;
2466
    if (srcFilter->lumV && srcFilter->lumV->length>1) usesVFilter=1;
2467
    if (srcFilter->lumH && srcFilter->lumH->length>1) usesHFilter=1;
2468
    if (srcFilter->chrV && srcFilter->chrV->length>1) usesVFilter=1;
2469
    if (srcFilter->chrH && srcFilter->chrH->length>1) usesHFilter=1;
2470 221b804f Diego Biurrun
2471
    getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
2472
    getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
2473
2474 f40c7dbb Diego Biurrun
    // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation
2475 221b804f Diego Biurrun
    if ((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
2476
2477
    // drop some chroma lines if the user wants it
2478
    c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
2479
    c->chrSrcVSubSample+= c->vChrDrop;
2480
2481 f40c7dbb Diego Biurrun
    // drop every other pixel for chroma calculation unless user wants full chroma
2482 221b804f Diego Biurrun
    if ((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)
2483
      && srcFormat!=PIX_FMT_RGB8      && srcFormat!=PIX_FMT_BGR8
2484
      && srcFormat!=PIX_FMT_RGB4      && srcFormat!=PIX_FMT_BGR4
2485 dfb09bd1 Michael Niedermayer
      && srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE
2486 2f60f629 Michael Niedermayer
      && ((dstW>>c->chrDstHSubSample) <= (srcW>>1) || (flags&(SWS_FAST_BILINEAR|SWS_POINT))))
2487 221b804f Diego Biurrun
        c->chrSrcHSubSample=1;
2488
2489
    if (param){
2490
        c->param[0] = param[0];
2491
        c->param[1] = param[1];
2492
    }else{
2493
        c->param[0] =
2494
        c->param[1] = SWS_PARAM_DEFAULT;
2495
    }
2496
2497
    c->chrIntHSubSample= c->chrDstHSubSample;
2498
    c->chrIntVSubSample= c->chrSrcVSubSample;
2499
2500
    // Note the -((-x)>>y) is so that we always round toward +inf.
2501
    c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
2502
    c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
2503
    c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
2504
    c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
2505
2506 fa58ba15 Kostya Shishkov
    sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], srcRange, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16);
2507 221b804f Diego Biurrun
2508 f40c7dbb Diego Biurrun
    /* unscaled special cases */
2509 6bc0c792 Michael Niedermayer
    if (unscaled && !usesHFilter && !usesVFilter && (srcRange == dstRange || isBGR(dstFormat) || isRGB(dstFormat)))
2510 221b804f Diego Biurrun
    {
2511
        /* yv12_to_nv12 */
2512 2c897342 Cédric Schieli
        if ((srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21))
2513 221b804f Diego Biurrun
        {
2514
            c->swScale= PlanarToNV12Wrapper;
2515
        }
2516
        /* yuv2bgr */
2517 2c897342 Cédric Schieli
        if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P || srcFormat==PIX_FMT_YUVA420P) && (isBGR(dstFormat) || isRGB(dstFormat))
2518 9b0d44ef Michael Niedermayer
            && !(flags & SWS_ACCURATE_RND) && !(dstH&1))
2519 221b804f Diego Biurrun
        {
2520 780daf2b Diego Biurrun
            c->swScale= ff_yuv2rgb_get_func_ptr(c);
2521 221b804f Diego Biurrun
        }
2522 6a4970ab Diego Biurrun
2523 6268f55b Cédric Schieli
        if (srcFormat==PIX_FMT_YUV410P && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_BITEXACT))
2524 221b804f Diego Biurrun
        {
2525
            c->swScale= yvu9toyv12Wrapper;
2526
        }
2527
2528
        /* bgr24toYV12 */
2529 6268f55b Cédric Schieli
        if (srcFormat==PIX_FMT_BGR24 && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_ACCURATE_RND))
2530 221b804f Diego Biurrun
            c->swScale= bgr24toyv12Wrapper;
2531
2532 f40c7dbb Diego Biurrun
        /* RGB/BGR -> RGB/BGR (no dither needed forms) */
2533 221b804f Diego Biurrun
        if (  (isBGR(srcFormat) || isRGB(srcFormat))
2534
           && (isBGR(dstFormat) || isRGB(dstFormat))
2535
           && srcFormat != PIX_FMT_BGR8      && dstFormat != PIX_FMT_BGR8
2536
           && srcFormat != PIX_FMT_RGB8      && dstFormat != PIX_FMT_RGB8
2537
           && srcFormat != PIX_FMT_BGR4      && dstFormat != PIX_FMT_BGR4
2538
           && srcFormat != PIX_FMT_RGB4      && dstFormat != PIX_FMT_RGB4
2539
           && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE
2540
           && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE
2541
           && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK
2542 ec1bca2a Michael Niedermayer
           && srcFormat != PIX_FMT_MONOWHITE && dstFormat != PIX_FMT_MONOWHITE
2543 9990e426 Michael Niedermayer
                                             && dstFormat != PIX_FMT_RGB32_1
2544
                                             && dstFormat != PIX_FMT_BGR32_1
2545 736143c8 Michael Niedermayer
           && (!needsDither || (c->flags&(SWS_FAST_BILINEAR|SWS_POINT))))
2546 221b804f Diego Biurrun
             c->swScale= rgb2rgbWrapper;
2547
2548 49004617 Vitor Sessak
        if ((usePal(srcFormat) && (
2549 76e9fd01 Vitor Sessak
                 dstFormat == PIX_FMT_RGB32   ||
2550 522ce957 Vitor Sessak
                 dstFormat == PIX_FMT_RGB32_1 ||
2551 76e9fd01 Vitor Sessak
                 dstFormat == PIX_FMT_RGB24   ||
2552
                 dstFormat == PIX_FMT_BGR32   ||
2553 522ce957 Vitor Sessak
                 dstFormat == PIX_FMT_BGR32_1 ||
2554 49004617 Vitor Sessak
                 dstFormat == PIX_FMT_BGR24)))
2555
             c->swScale= pal2rgbWrapper;
2556
2557 a6100f39 Baptiste Coudurier
        if (srcFormat == PIX_FMT_YUV422P)
2558
        {
2559
            if (dstFormat == PIX_FMT_YUYV422)
2560
                c->swScale= YUV422PToYuy2Wrapper;
2561
            else if (dstFormat == PIX_FMT_UYVY422)
2562
                c->swScale= YUV422PToUyvyWrapper;
2563
        }
2564
2565 221b804f Diego Biurrun
        /* LQ converters if -sws 0 or -sws 4*/
2566
        if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
2567
            /* yv12_to_yuy2 */
2568 2c897342 Cédric Schieli
            if (srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P)
2569 221b804f Diego Biurrun
            {
2570
                if (dstFormat == PIX_FMT_YUYV422)
2571
                    c->swScale= PlanarToYuy2Wrapper;
2572 3b38f1c6 Baptiste Coudurier
                else if (dstFormat == PIX_FMT_UYVY422)
2573 221b804f Diego Biurrun
                    c->swScale= PlanarToUyvyWrapper;
2574
            }
2575 0411072e Michael Niedermayer
2576
            if(srcFormat == PIX_FMT_YUYV422 && dstFormat == PIX_FMT_YUV420P)
2577
                c->swScale= YUYV2YUV420Wrapper;
2578
            if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV420P)
2579
                c->swScale= UYVY2YUV420Wrapper;
2580 221b804f Diego Biurrun
        }
2581 0411072e Michael Niedermayer
        if(srcFormat == PIX_FMT_YUYV422 && dstFormat == PIX_FMT_YUV422P)
2582
            c->swScale= YUYV2YUV422Wrapper;
2583
        if(srcFormat == PIX_FMT_UYVY422 && dstFormat == PIX_FMT_YUV422P)
2584
            c->swScale= UYVY2YUV422Wrapper;
2585 ec22603f Michael Niedermayer
2586 6634d0ef Nicolas Plourde
#ifdef COMPILE_ALTIVEC
2587 221b804f Diego Biurrun
        if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
2588 12794f73 Kostya Shishkov
            !(c->flags & SWS_BITEXACT) &&
2589 3b38f1c6 Baptiste Coudurier
            srcFormat == PIX_FMT_YUV420P) {
2590 221b804f Diego Biurrun
          // unscaled YV12 -> packed YUV, we want speed
2591
          if (dstFormat == PIX_FMT_YUYV422)
2592
              c->swScale= yv12toyuy2_unscaled_altivec;
2593 3b38f1c6 Baptiste Coudurier
          else if (dstFormat == PIX_FMT_UYVY422)
2594 221b804f Diego Biurrun
              c->swScale= yv12touyvy_unscaled_altivec;
2595
        }
2596 b71cf33c Romain Dolbeau
#endif
2597
2598 221b804f Diego Biurrun
        /* simple copy */
2599
        if (  srcFormat == dstFormat
2600 2c897342 Cédric Schieli
            || (srcFormat == PIX_FMT_YUVA420P && dstFormat == PIX_FMT_YUV420P)
2601 6268f55b Cédric Schieli
            || (srcFormat == PIX_FMT_YUV420P && dstFormat == PIX_FMT_YUVA420P)
2602 221b804f Diego Biurrun
            || (isPlanarYUV(srcFormat) && isGray(dstFormat))
2603 e5091488 Benoit Fouet
            || (isPlanarYUV(dstFormat) && isGray(srcFormat)))
2604 221b804f Diego Biurrun
        {
2605 2d35ae56 Luca Barbato
            if (isPacked(c->srcFormat))
2606
                c->swScale= packedCopy;
2607
            else /* Planar YUV or gray */
2608
                c->swScale= planarCopy;
2609 221b804f Diego Biurrun
        }
2610
2611
        /* gray16{le,be} conversions */
2612
        if (isGray16(srcFormat) && (isPlanarYUV(dstFormat) || (dstFormat == PIX_FMT_GRAY8)))
2613
        {
2614
            c->swScale= gray16togray;
2615
        }
2616
        if ((isPlanarYUV(srcFormat) || (srcFormat == PIX_FMT_GRAY8)) && isGray16(dstFormat))
2617
        {
2618
            c->swScale= graytogray16;
2619
        }
2620
        if (srcFormat != dstFormat && isGray16(srcFormat) && isGray16(dstFormat))
2621
        {
2622
            c->swScale= gray16swap;
2623
        }
2624
2625 b63f641e Aurelien Jacobs
#if ARCH_BFIN
2626 1ebbfe15 Marc Hoffman
        if (flags & SWS_CPU_CAPS_BFIN)
2627
            ff_bfin_get_unscaled_swscale (c);
2628
#endif
2629
2630 221b804f Diego Biurrun
        if (c->swScale){
2631
            if (flags&SWS_PRINT_INFO)
2632 4b0c30b7 Baptiste Coudurier
                av_log(c, AV_LOG_INFO, "using unscaled %s -> %s special converter\n",
2633 221b804f Diego Biurrun
                                sws_format_name(srcFormat), sws_format_name(dstFormat));
2634
            return c;
2635
        }
2636
    }
2637
2638
    if (flags & SWS_CPU_CAPS_MMX2)
2639
    {
2640
        c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
2641
        if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
2642
        {
2643
            if (flags&SWS_PRINT_INFO)
2644 f40c7dbb Diego Biurrun
                av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n");
2645 221b804f Diego Biurrun
        }
2646
        if (usesHFilter) c->canMMX2BeUsed=0;
2647
    }
2648
    else
2649
        c->canMMX2BeUsed=0;
2650
2651
    c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
2652
    c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
2653
2654
    // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
2655
    // but only for the FAST_BILINEAR mode otherwise do correct scaling
2656
    // n-2 is the last chrominance sample available
2657 86bdf3fd Diego Biurrun
    // this is not perfect, but no one should notice the difference, the more correct variant
2658 221b804f Diego Biurrun
    // would be like the vertical one, but that would require some special code for the
2659
    // first and last pixel
2660
    if (flags&SWS_FAST_BILINEAR)
2661
    {
2662
        if (c->canMMX2BeUsed)
2663
        {
2664
            c->lumXInc+= 20;
2665
            c->chrXInc+= 20;
2666
        }
2667 f40c7dbb Diego Biurrun
        //we don't use the x86 asm scaler if MMX is available
2668 221b804f Diego Biurrun
        else if (flags & SWS_CPU_CAPS_MMX)
2669
        {
2670
            c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
2671
            c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
2672
        }
2673
    }
2674
2675
    /* precalculate horizontal scaler filter coefficients */
2676
    {
2677
        const int filterAlign=
2678
            (flags & SWS_CPU_CAPS_MMX) ? 4 :
2679
            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2680
            1;
2681
2682
        initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
2683
                   srcW      ,       dstW, filterAlign, 1<<14,
2684
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2685
                   srcFilter->lumH, dstFilter->lumH, c->param);
2686
        initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
2687
                   c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
2688
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2689
                   srcFilter->chrH, dstFilter->chrH, c->param);
2690 28bf81c9 Michael Niedermayer
2691 dbdae6ec Diego Biurrun
#define MAX_FUNNY_CODE_SIZE 10000
2692 17c613ef Uoti Urpala
#if defined(COMPILE_MMX2)
2693 77a416e8 Gabucino
// can't downscale !!!
2694 221b804f Diego Biurrun
        if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
2695
        {
2696 113ef149 Reimar Döffinger
#ifdef MAP_ANONYMOUS
2697 16a3717f Ramiro Polla
            c->funnyYCode  = mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2698
            c->funnyUVCode = mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2699 dd35beb2 Ramiro Polla
#elif HAVE_VIRTUALALLOC
2700
            c->funnyYCode  = VirtualAlloc(NULL, MAX_FUNNY_CODE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
2701
            c->funnyUVCode = VirtualAlloc(NULL, MAX_FUNNY_CODE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
2702 38d5c282 Aurelien Jacobs
#else
2703 16a3717f Ramiro Polla
            c->funnyYCode  = av_malloc(MAX_FUNNY_CODE_SIZE);
2704 221b804f Diego Biurrun
            c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE);
2705 38d5c282 Aurelien Jacobs
#endif