Statistics
| Branch: | Revision:

ffmpeg / libswscale / swscale.c @ 00c081a8

History | View | Annotate | Download (122 KB)

1 fe8054c0 Michael Niedermayer
/*
2 d026b45e Diego Biurrun
 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with FFmpeg; if not, write to the Free Software
18 b19bcbaa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 d026b45e Diego Biurrun
 *
20 807e0c66 Luca Abeni
 * the C code (not assembly, mmx, ...) of this file can be used
21
 * under the LGPL license too
22 d026b45e Diego Biurrun
 */
23 783e9cc9 Michael Niedermayer
24 28bf81c9 Michael Niedermayer
/*
25 9990e426 Michael Niedermayer
  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
26 caeaabe7 Alex Beregszaszi
  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
27 e09d12f4 Michael Niedermayer
  {BGR,RGB}{1,4,8,15,16} support dithering
28 6a4970ab Diego Biurrun

29 e09d12f4 Michael Niedermayer
  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
30
  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
31
  x -> x
32
  YUV9 -> YV12
33
  YUV9/YV12 -> Y800
34
  Y800 -> YUV9/YV12
35 b935781b Michael Niedermayer
  BGR24 -> BGR32 & RGB24 -> RGB32
36
  BGR32 -> BGR24 & RGB32 -> RGB24
37 4bb3fa5e Michael Niedermayer
  BGR15 -> BGR16
38 b935781b Michael Niedermayer
*/
39
40 6a4970ab Diego Biurrun
/*
41 a6f6b237 Diego Biurrun
tested special converters (most are tested actually, but I did not write it down ...)
42 e09d12f4 Michael Niedermayer
 YV12 -> BGR16
43 b935781b Michael Niedermayer
 YV12 -> YV12
44 4bb3fa5e Michael Niedermayer
 BGR15 -> BGR16
45 1e1c4fe9 Michael Niedermayer
 BGR16 -> BGR16
46 e09d12f4 Michael Niedermayer
 YVU9 -> YV12
47 b935781b Michael Niedermayer

48
untested special converters
49 f40c7dbb Diego Biurrun
  YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
50 1e1c4fe9 Michael Niedermayer
  YV12/I420 -> YV12/I420
51
  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
52 b935781b Michael Niedermayer
  BGR24 -> BGR32 & RGB24 -> RGB32
53
  BGR32 -> BGR24 & RGB32 -> RGB24
54 ec22603f Michael Niedermayer
  BGR24 -> YV12
55 28bf81c9 Michael Niedermayer
*/
56
57 d63a2cb1 Michael Niedermayer
#define _SVID_SOURCE //needed for MAP_ANONYMOUS
58 d3f41512 Michael Niedermayer
#include <inttypes.h>
59 dda87e9f Pierre Lombard
#include <string.h>
60 077ea8a7 Michael Niedermayer
#include <math.h>
61 c1b0bfb4 Michael Niedermayer
#include <stdio.h>
62 b2d374c9 Diego Biurrun
#include "config.h"
63 81b7c056 Michael Niedermayer
#include <assert.h>
64 b63f641e Aurelien Jacobs
#if HAVE_SYS_MMAN_H
65 38d5c282 Aurelien Jacobs
#include <sys/mman.h>
66 113ef149 Reimar Döffinger
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
67
#define MAP_ANONYMOUS MAP_ANON
68
#endif
69 38d5c282 Aurelien Jacobs
#endif
70 dd35beb2 Ramiro Polla
#if HAVE_VIRTUALALLOC
71
#define WIN32_LEAN_AND_MEAN
72
#include <windows.h>
73
#endif
74 d604bab9 Michael Niedermayer
#include "swscale.h"
75 5427e242 Michael Niedermayer
#include "swscale_internal.h"
76 37079906 Michael Niedermayer
#include "rgb2rgb.h"
77 52154148 Ramiro Polla
#include "libavutil/intreadwrite.h"
78 83da2c6f Diego Biurrun
#include "libavutil/x86_cpu.h"
79 7248797c Ramiro Polla
#include "libavutil/avutil.h"
80 83da2c6f Diego Biurrun
#include "libavutil/bswap.h"
81 a9af75ae Stefano Sabatini
#include "libavutil/pixdesc.h"
82 0d9f3d85 Arpi
83 b3e03fa7 Stefano Sabatini
unsigned swscale_version(void)
84
{
85
    return LIBSWSCALE_VERSION_INT;
86
}
87
88 997980f0 Stefano Sabatini
const char *swscale_configuration(void)
89 bd381fd3 Diego Biurrun
{
90
    return FFMPEG_CONFIGURATION;
91
}
92
93 997980f0 Stefano Sabatini
const char *swscale_license(void)
94 bd381fd3 Diego Biurrun
{
95
#define LICENSE_PREFIX "libswscale license: "
96
    return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
97
}
98
99 541c4eb9 Michael Niedermayer
#undef MOVNTQ
100 7d7f78b5 Michael Niedermayer
#undef PAVGB
101 d3f41512 Michael Niedermayer
102 783e9cc9 Michael Niedermayer
//#undef HAVE_MMX2
103 f4406ec1 Diego Biurrun
//#define HAVE_AMD3DNOW
104 d3f41512 Michael Niedermayer
//#undef HAVE_MMX
105 783e9cc9 Michael Niedermayer
//#undef ARCH_X86
106 d604bab9 Michael Niedermayer
#define DITHER1XBPP
107 d3f41512 Michael Niedermayer
108 f40c7dbb Diego Biurrun
#define FAST_BGR2YV12 // use 7 bit coefficients instead of 15 bit
109 ac6a2e45 Michael Niedermayer
110 f40c7dbb Diego Biurrun
#define RET 0xC3 //near return opcode for x86
111 c1b0bfb4 Michael Niedermayer
112 28bf81c9 Michael Niedermayer
#ifdef M_PI
113
#define PI M_PI
114
#else
115
#define PI 3.14159265358979323846
116
#endif
117 c1b0bfb4 Michael Niedermayer
118 9d9de37d Ivo van Poorten
#define isSupportedIn(x)    (       \
119
           (x)==PIX_FMT_YUV420P     \
120 79973335 Aurelien Jacobs
        || (x)==PIX_FMT_YUVA420P    \
121 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_YUYV422     \
122
        || (x)==PIX_FMT_UYVY422     \
123 e8417235 Kostya Shishkov
        || (x)==PIX_FMT_RGB48BE     \
124
        || (x)==PIX_FMT_RGB48LE     \
125 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_RGB32       \
126 9990e426 Michael Niedermayer
        || (x)==PIX_FMT_RGB32_1     \
127 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_BGR24       \
128
        || (x)==PIX_FMT_BGR565      \
129
        || (x)==PIX_FMT_BGR555      \
130
        || (x)==PIX_FMT_BGR32       \
131 9990e426 Michael Niedermayer
        || (x)==PIX_FMT_BGR32_1     \
132 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_RGB24       \
133
        || (x)==PIX_FMT_RGB565      \
134
        || (x)==PIX_FMT_RGB555      \
135
        || (x)==PIX_FMT_GRAY8       \
136
        || (x)==PIX_FMT_YUV410P     \
137 6c80eb16 Michael Niedermayer
        || (x)==PIX_FMT_YUV440P     \
138 f415be68 Ramiro Polla
        || (x)==PIX_FMT_NV12        \
139
        || (x)==PIX_FMT_NV21        \
140 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_GRAY16BE    \
141
        || (x)==PIX_FMT_GRAY16LE    \
142
        || (x)==PIX_FMT_YUV444P     \
143
        || (x)==PIX_FMT_YUV422P     \
144
        || (x)==PIX_FMT_YUV411P     \
145
        || (x)==PIX_FMT_PAL8        \
146
        || (x)==PIX_FMT_BGR8        \
147
        || (x)==PIX_FMT_RGB8        \
148
        || (x)==PIX_FMT_BGR4_BYTE   \
149
        || (x)==PIX_FMT_RGB4_BYTE   \
150 9ba7fe6d Andreas Öman
        || (x)==PIX_FMT_YUV440P     \
151 3d05e078 Michael Niedermayer
        || (x)==PIX_FMT_MONOWHITE   \
152
        || (x)==PIX_FMT_MONOBLACK   \
153 991e579c Lars Täuber
        || (x)==PIX_FMT_YUV420P16LE   \
154
        || (x)==PIX_FMT_YUV422P16LE   \
155
        || (x)==PIX_FMT_YUV444P16LE   \
156
        || (x)==PIX_FMT_YUV420P16BE   \
157
        || (x)==PIX_FMT_YUV422P16BE   \
158
        || (x)==PIX_FMT_YUV444P16BE   \
159 9d9de37d Ivo van Poorten
    )
160 8e9767f6 Stefano Sabatini
161
int sws_isSupportedInput(enum PixelFormat pix_fmt)
162
{
163
    return isSupportedIn(pix_fmt);
164
}
165
166 9d9de37d Ivo van Poorten
#define isSupportedOut(x)   (       \
167
           (x)==PIX_FMT_YUV420P     \
168 6268f55b Cédric Schieli
        || (x)==PIX_FMT_YUVA420P    \
169 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_YUYV422     \
170
        || (x)==PIX_FMT_UYVY422     \
171
        || (x)==PIX_FMT_YUV444P     \
172
        || (x)==PIX_FMT_YUV422P     \
173
        || (x)==PIX_FMT_YUV411P     \
174
        || isRGB(x)                 \
175
        || isBGR(x)                 \
176
        || (x)==PIX_FMT_NV12        \
177
        || (x)==PIX_FMT_NV21        \
178
        || (x)==PIX_FMT_GRAY16BE    \
179
        || (x)==PIX_FMT_GRAY16LE    \
180
        || (x)==PIX_FMT_GRAY8       \
181
        || (x)==PIX_FMT_YUV410P     \
182 6c80eb16 Michael Niedermayer
        || (x)==PIX_FMT_YUV440P     \
183 991e579c Lars Täuber
        || (x)==PIX_FMT_YUV420P16LE   \
184
        || (x)==PIX_FMT_YUV422P16LE   \
185
        || (x)==PIX_FMT_YUV444P16LE   \
186
        || (x)==PIX_FMT_YUV420P16BE   \
187
        || (x)==PIX_FMT_YUV422P16BE   \
188
        || (x)==PIX_FMT_YUV444P16BE   \
189 9d9de37d Ivo van Poorten
    )
190 8e9767f6 Stefano Sabatini
191
int sws_isSupportedOutput(enum PixelFormat pix_fmt)
192
{
193
    return isSupportedOut(pix_fmt);
194
}
195
196 9d9de37d Ivo van Poorten
#define isPacked(x)         (       \
197
           (x)==PIX_FMT_PAL8        \
198
        || (x)==PIX_FMT_YUYV422     \
199
        || (x)==PIX_FMT_UYVY422     \
200
        || isRGB(x)                 \
201
        || isBGR(x)                 \
202
    )
203 f412d5e9 Stefano Sabatini
#define usePal(x) (av_pix_fmt_descriptors[x].flags & PIX_FMT_PAL)
204 6ff0ad6b Michael Niedermayer
205 6b79dbce Michael Niedermayer
#define RGB2YUV_SHIFT 15
206 7b5d7b9e Michael Niedermayer
#define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
207
#define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
208
#define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
209
#define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
210
#define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
211
#define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
212
#define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
213
#define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
214
#define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
215 6c7506de Michael Niedermayer
216 fa58ba15 Kostya Shishkov
extern const int32_t ff_yuv2rgb_coeffs[8][4];
217 0481412a Michael Niedermayer
218 0f5d4aa8 Michael Niedermayer
static const double rgb2yuv_table[8][9]={
219
    {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
220
    {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
221
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
222
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
223
    {0.59  , 0.11  , 0.30  , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
224
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
225
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //SMPTE 170M
226
    {0.701 , 0.087 , 0.212 , -0.384, 0.5  -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
227
};
228
229 783e9cc9 Michael Niedermayer
/*
230
NOTES
231 d604bab9 Michael Niedermayer
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
232 31190492 Arpi

233 783e9cc9 Michael Niedermayer
TODO
234 bd7c6fd5 Diego Biurrun
more intelligent misalignment avoidance for the horizontal scaler
235 c1b0bfb4 Michael Niedermayer
write special vertical cubic upscale version
236 f40c7dbb Diego Biurrun
optimize C code (YV12 / minmax)
237
add support for packed pixel YUV input & output
238 6ff0ad6b Michael Niedermayer
add support for Y8 output
239 f40c7dbb Diego Biurrun
optimize BGR24 & BGR32
240 ff7ba856 Michael Niedermayer
add BGR4 output support
241 1e621b18 Michael Niedermayer
write special BGR->BGR scaler
242 783e9cc9 Michael Niedermayer
*/
243 31190492 Arpi
244 b63f641e Aurelien Jacobs
#if ARCH_X86 && CONFIG_GPL
245 d334c7c2 Reimar Döffinger
DECLARE_ASM_CONST(8, uint64_t, bF8)=       0xF8F8F8F8F8F8F8F8LL;
246
DECLARE_ASM_CONST(8, uint64_t, bFC)=       0xFCFCFCFCFCFCFCFCLL;
247
DECLARE_ASM_CONST(8, uint64_t, w10)=       0x0010001000100010LL;
248
DECLARE_ASM_CONST(8, uint64_t, w02)=       0x0002000200020002LL;
249
DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
250
DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
251
DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
252
DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
253 d604bab9 Michael Niedermayer
254 0cb25594 Carl Eugen Hoyos
const DECLARE_ALIGNED(8, uint64_t, ff_dither4[2]) = {
255 221b804f Diego Biurrun
        0x0103010301030103LL,
256
        0x0200020002000200LL,};
257 d8fa3c54 Michael Niedermayer
258 0cb25594 Carl Eugen Hoyos
const DECLARE_ALIGNED(8, uint64_t, ff_dither8[2]) = {
259 221b804f Diego Biurrun
        0x0602060206020602LL,
260
        0x0004000400040004LL,};
261 d604bab9 Michael Niedermayer
262 d334c7c2 Reimar Döffinger
DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
263
DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
264
DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
265
DECLARE_ASM_CONST(8, uint64_t, b15Mask)=   0x001F001F001F001FLL;
266
DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
267
DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
268 d604bab9 Michael Niedermayer
269 5802683a Reimar Döffinger
DECLARE_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
270
DECLARE_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
271
DECLARE_ALIGNED(8, const uint64_t, ff_M24C)         = 0x0000FF0000FF0000LL;
272 99d2cb72 Michael Niedermayer
273 ac6a2e45 Michael Niedermayer
#ifdef FAST_BGR2YV12
274 5802683a Reimar Döffinger
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000000210041000DULL;
275
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000FFEEFFDC0038ULL;
276
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00000038FFD2FFF8ULL;
277 ac6a2e45 Michael Niedermayer
#else
278 5802683a Reimar Döffinger
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000020E540830C8BULL;
279
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000ED0FDAC23831ULL;
280
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00003831D0E6F6EAULL;
281 69796008 Diego Biurrun
#endif /* FAST_BGR2YV12 */
282 5802683a Reimar Döffinger
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
283
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
284
DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
285 dfb09bd1 Michael Niedermayer
286 b5c44b1a Diego Pettenò
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
287
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
288
DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
289
DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
290
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
291 dfb09bd1 Michael Niedermayer
292 b5c44b1a Diego Pettenò
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV[2][4]) = {
293 dfb09bd1 Michael Niedermayer
    {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
294
    {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
295
};
296
297 b5c44b1a Diego Pettenò
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
298 dfb09bd1 Michael Niedermayer
299 7a24ec50 Diego Biurrun
#endif /* ARCH_X86 && CONFIG_GPL */
300 783e9cc9 Michael Niedermayer
301 d4e24275 Michael Niedermayer
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
302 6a4970ab Diego Biurrun
303 92db6235 Pavel Pavlov
DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4[2][8])={
304 45e18be8 Michael Niedermayer
{  1,   3,   1,   3,   1,   3,   1,   3, },
305
{  2,   0,   2,   0,   2,   0,   2,   0, },
306
};
307
308 92db6235 Pavel Pavlov
DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8[2][8])={
309 45e18be8 Michael Niedermayer
{  6,   2,   6,   2,   6,   2,   6,   2, },
310
{  0,   4,   0,   4,   0,   4,   0,   4, },
311
};
312
313 92db6235 Pavel Pavlov
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32[8][8])={
314 45e18be8 Michael Niedermayer
{ 17,   9,  23,  15,  16,   8,  22,  14, },
315
{  5,  29,   3,  27,   4,  28,   2,  26, },
316
{ 21,  13,  19,  11,  20,  12,  18,  10, },
317
{  0,  24,   6,  30,   1,  25,   7,  31, },
318
{ 16,   8,  22,  14,  17,   9,  23,  15, },
319
{  4,  28,   2,  26,   5,  29,   3,  27, },
320
{ 20,  12,  18,  10,  21,  13,  19,  11, },
321
{  1,  25,   7,  31,   0,  24,   6,  30, },
322
};
323
324 92db6235 Pavel Pavlov
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73[8][8])={
325 45e18be8 Michael Niedermayer
{  0,  55,  14,  68,   3,  58,  17,  72, },
326
{ 37,  18,  50,  32,  40,  22,  54,  35, },
327
{  9,  64,   5,  59,  13,  67,   8,  63, },
328
{ 46,  27,  41,  23,  49,  31,  44,  26, },
329
{  2,  57,  16,  71,   1,  56,  15,  70, },
330
{ 39,  21,  52,  34,  38,  19,  51,  33, },
331
{ 11,  66,   7,  62,  10,  65,   6,  60, },
332
{ 48,  30,  43,  25,  47,  29,  42,  24, },
333
};
334
335
#if 1
336 92db6235 Pavel Pavlov
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220[8][8])={
337 45e18be8 Michael Niedermayer
{117,  62, 158, 103, 113,  58, 155, 100, },
338
{ 34, 199,  21, 186,  31, 196,  17, 182, },
339
{144,  89, 131,  76, 141,  86, 127,  72, },
340
{  0, 165,  41, 206,  10, 175,  52, 217, },
341
{110,  55, 151,  96, 120,  65, 162, 107, },
342
{ 28, 193,  14, 179,  38, 203,  24, 189, },
343
{138,  83, 124,  69, 148,  93, 134,  79, },
344
{  7, 172,  48, 213,   3, 168,  45, 210, },
345
};
346
#elif 1
347
// tries to correct a gamma of 1.5
348 92db6235 Pavel Pavlov
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220[8][8])={
349 45e18be8 Michael Niedermayer
{  0, 143,  18, 200,   2, 156,  25, 215, },
350
{ 78,  28, 125,  64,  89,  36, 138,  74, },
351
{ 10, 180,   3, 161,  16, 195,   8, 175, },
352
{109,  51,  93,  38, 121,  60, 105,  47, },
353
{  1, 152,  23, 210,   0, 147,  20, 205, },
354
{ 85,  33, 134,  71,  81,  30, 130,  67, },
355
{ 14, 190,   6, 171,  12, 185,   5, 166, },
356
{117,  57, 101,  44, 113,  54,  97,  41, },
357
};
358
#elif 1
359
// tries to correct a gamma of 2.0
360 92db6235 Pavel Pavlov
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220[8][8])={
361 45e18be8 Michael Niedermayer
{  0, 124,   8, 193,   0, 140,  12, 213, },
362
{ 55,  14, 104,  42,  66,  19, 119,  52, },
363
{  3, 168,   1, 145,   6, 187,   3, 162, },
364
{ 86,  31,  70,  21,  99,  39,  82,  28, },
365
{  0, 134,  11, 206,   0, 129,   9, 200, },
366
{ 62,  17, 114,  48,  58,  16, 109,  45, },
367
{  5, 181,   2, 157,   4, 175,   1, 151, },
368
{ 95,  36,  78,  26,  90,  34,  74,  24, },
369
};
370
#else
371
// tries to correct a gamma of 2.5
372 92db6235 Pavel Pavlov
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220[8][8])={
373 45e18be8 Michael Niedermayer
{  0, 107,   3, 187,   0, 125,   6, 212, },
374
{ 39,   7,  86,  28,  49,  11, 102,  36, },
375
{  1, 158,   0, 131,   3, 180,   1, 151, },
376
{ 68,  19,  52,  12,  81,  25,  64,  17, },
377
{  0, 119,   5, 203,   0, 113,   4, 195, },
378
{ 45,   9,  96,  33,  42,   8,  91,  30, },
379
{  2, 172,   1, 144,   2, 165,   0, 137, },
380
{ 77,  23,  60,  15,  72,  21,  56,  14, },
381
};
382
#endif
383 5cebb24b Michael Niedermayer
384 8055ede6 Baptiste Coudurier
const char *sws_format_name(enum PixelFormat format)
385 94c4def2 Luca Abeni
{
386 a9af75ae Stefano Sabatini
    if ((unsigned)format < PIX_FMT_NB && av_pix_fmt_descriptors[format].name)
387
        return av_pix_fmt_descriptors[format].name;
388
    else
389 9b734d44 Ramiro Polla
        return "Unknown format";
390 94c4def2 Luca Abeni
}
391
392 52154148 Ramiro Polla
static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
393
                                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
394
                                                    const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest,
395
                                                    int dstW, int chrDstW, int big_endian)
396
{
397
    //FIXME Optimize (just quickly written not optimized..)
398
    int i;
399
400
    for (i = 0; i < dstW; i++) {
401
        int val = 1 << 10;
402
        int j;
403
404
        for (j = 0; j < lumFilterSize; j++)
405
            val += lumSrc[j][i] * lumFilter[j];
406
407
        if (big_endian) {
408
            AV_WB16(&dest[i], av_clip_uint16(val >> 11));
409
        } else {
410
            AV_WL16(&dest[i], av_clip_uint16(val >> 11));
411
        }
412
    }
413
414
    if (uDest) {
415
        for (i = 0; i < chrDstW; i++) {
416
            int u = 1 << 10;
417
            int v = 1 << 10;
418
            int j;
419
420
            for (j = 0; j < chrFilterSize; j++) {
421
                u += chrSrc[j][i       ] * chrFilter[j];
422
                v += chrSrc[j][i + VOFW] * chrFilter[j];
423
            }
424
425
            if (big_endian) {
426
                AV_WB16(&uDest[i], av_clip_uint16(u >> 11));
427
                AV_WB16(&vDest[i], av_clip_uint16(v >> 11));
428
            } else {
429
                AV_WL16(&uDest[i], av_clip_uint16(u >> 11));
430
                AV_WL16(&vDest[i], av_clip_uint16(v >> 11));
431
            }
432
        }
433
    }
434
435
    if (CONFIG_SWSCALE_ALPHA && aDest) {
436
        for (i = 0; i < dstW; i++) {
437
            int val = 1 << 10;
438
            int j;
439
440
            for (j = 0; j < lumFilterSize; j++)
441
                val += alpSrc[j][i] * lumFilter[j];
442
443
            if (big_endian) {
444
                AV_WB16(&aDest[i], av_clip_uint16(val >> 11));
445
            } else {
446
                AV_WL16(&aDest[i], av_clip_uint16(val >> 11));
447
            }
448
        }
449
    }
450
}
451
452
static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
453
                                 const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
454
                                 const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW,
455
                                 enum PixelFormat dstFormat)
456
{
457
    if (isBE(dstFormat)) {
458
        yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize,
459
                               chrFilter, chrSrc, chrFilterSize,
460
                               alpSrc,
461
                               dest, uDest, vDest, aDest,
462
                               dstW, chrDstW, 1);
463
    } else {
464
        yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize,
465
                               chrFilter, chrSrc, chrFilterSize,
466
                               alpSrc,
467
                               dest, uDest, vDest, aDest,
468
                               dstW, chrDstW, 0);
469
    }
470
}
471
472 7ac40327 Ramiro Polla
static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
473
                               const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
474
                               const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW)
475 e3d2500f Michael Niedermayer
{
476 f40c7dbb Diego Biurrun
    //FIXME Optimize (just quickly written not optimized..)
477 221b804f Diego Biurrun
    int i;
478 dd68318c Ramiro Polla
    for (i=0; i<dstW; i++) {
479 221b804f Diego Biurrun
        int val=1<<18;
480
        int j;
481
        for (j=0; j<lumFilterSize; j++)
482
            val += lumSrc[j][i] * lumFilter[j];
483
484
        dest[i]= av_clip_uint8(val>>19);
485
    }
486
487 1b0a4572 Benoit Fouet
    if (uDest)
488 dd68318c Ramiro Polla
        for (i=0; i<chrDstW; i++) {
489 221b804f Diego Biurrun
            int u=1<<18;
490
            int v=1<<18;
491
            int j;
492 dd68318c Ramiro Polla
            for (j=0; j<chrFilterSize; j++) {
493 221b804f Diego Biurrun
                u += chrSrc[j][i] * chrFilter[j];
494 8b2fce0d Michael Niedermayer
                v += chrSrc[j][i + VOFW] * chrFilter[j];
495 221b804f Diego Biurrun
            }
496
497
            uDest[i]= av_clip_uint8(u>>19);
498
            vDest[i]= av_clip_uint8(v>>19);
499
        }
500 6858492e Cédric Schieli
501
    if (CONFIG_SWSCALE_ALPHA && aDest)
502 dd68318c Ramiro Polla
        for (i=0; i<dstW; i++) {
503 6858492e Cédric Schieli
            int val=1<<18;
504
            int j;
505
            for (j=0; j<lumFilterSize; j++)
506
                val += alpSrc[j][i] * lumFilter[j];
507
508
            aDest[i]= av_clip_uint8(val>>19);
509
        }
510
511 e3d2500f Michael Niedermayer
}
512
513 7ac40327 Ramiro Polla
static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
514
                                const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
515 221b804f Diego Biurrun
                                uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
516 6118e52e Ville Syrjälä
{
517 f40c7dbb Diego Biurrun
    //FIXME Optimize (just quickly written not optimized..)
518 221b804f Diego Biurrun
    int i;
519 dd68318c Ramiro Polla
    for (i=0; i<dstW; i++) {
520 221b804f Diego Biurrun
        int val=1<<18;
521
        int j;
522
        for (j=0; j<lumFilterSize; j++)
523
            val += lumSrc[j][i] * lumFilter[j];
524
525
        dest[i]= av_clip_uint8(val>>19);
526
    }
527
528 1b0a4572 Benoit Fouet
    if (!uDest)
529 221b804f Diego Biurrun
        return;
530
531
    if (dstFormat == PIX_FMT_NV12)
532 dd68318c Ramiro Polla
        for (i=0; i<chrDstW; i++) {
533 221b804f Diego Biurrun
            int u=1<<18;
534
            int v=1<<18;
535
            int j;
536 dd68318c Ramiro Polla
            for (j=0; j<chrFilterSize; j++) {
537 221b804f Diego Biurrun
                u += chrSrc[j][i] * chrFilter[j];
538 8b2fce0d Michael Niedermayer
                v += chrSrc[j][i + VOFW] * chrFilter[j];
539 221b804f Diego Biurrun
            }
540
541
            uDest[2*i]= av_clip_uint8(u>>19);
542
            uDest[2*i+1]= av_clip_uint8(v>>19);
543
        }
544
    else
545 dd68318c Ramiro Polla
        for (i=0; i<chrDstW; i++) {
546 221b804f Diego Biurrun
            int u=1<<18;
547
            int v=1<<18;
548
            int j;
549 dd68318c Ramiro Polla
            for (j=0; j<chrFilterSize; j++) {
550 221b804f Diego Biurrun
                u += chrSrc[j][i] * chrFilter[j];
551 8b2fce0d Michael Niedermayer
                v += chrSrc[j][i + VOFW] * chrFilter[j];
552 221b804f Diego Biurrun
            }
553
554
            uDest[2*i]= av_clip_uint8(v>>19);
555
            uDest[2*i+1]= av_clip_uint8(u>>19);
556
        }
557 6118e52e Ville Syrjälä
}
558 46de8b73 Michael Niedermayer
559 6858492e Cédric Schieli
#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
560 dd68318c Ramiro Polla
    for (i=0; i<(dstW>>1); i++) {\
561 221b804f Diego Biurrun
        int j;\
562
        int Y1 = 1<<18;\
563
        int Y2 = 1<<18;\
564
        int U  = 1<<18;\
565
        int V  = 1<<18;\
566 6858492e Cédric Schieli
        int av_unused A1, A2;\
567 2db27aad Carl Eugen Hoyos
        type av_unused *r, *b, *g;\
568 221b804f Diego Biurrun
        const int i2= 2*i;\
569
        \
570 dd68318c Ramiro Polla
        for (j=0; j<lumFilterSize; j++) {\
571 221b804f Diego Biurrun
            Y1 += lumSrc[j][i2] * lumFilter[j];\
572
            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
573
        }\
574 dd68318c Ramiro Polla
        for (j=0; j<chrFilterSize; j++) {\
575 221b804f Diego Biurrun
            U += chrSrc[j][i] * chrFilter[j];\
576 8b2fce0d Michael Niedermayer
            V += chrSrc[j][i+VOFW] * chrFilter[j];\
577 221b804f Diego Biurrun
        }\
578
        Y1>>=19;\
579
        Y2>>=19;\
580
        U >>=19;\
581
        V >>=19;\
582 dd68318c Ramiro Polla
        if (alpha) {\
583 6858492e Cédric Schieli
            A1 = 1<<18;\
584
            A2 = 1<<18;\
585 dd68318c Ramiro Polla
            for (j=0; j<lumFilterSize; j++) {\
586 6858492e Cédric Schieli
                A1 += alpSrc[j][i2  ] * lumFilter[j];\
587
                A2 += alpSrc[j][i2+1] * lumFilter[j];\
588
            }\
589
            A1>>=19;\
590
            A2>>=19;\
591
        }\
592 bdf397ba Michael Niedermayer
593 6858492e Cédric Schieli
#define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
594
        YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
595 dd68318c Ramiro Polla
        if ((Y1|Y2|U|V)&256) {\
596 221b804f Diego Biurrun
            if (Y1>255)   Y1=255; \
597
            else if (Y1<0)Y1=0;   \
598
            if (Y2>255)   Y2=255; \
599
            else if (Y2<0)Y2=0;   \
600
            if (U>255)    U=255;  \
601
            else if (U<0) U=0;    \
602
            if (V>255)    V=255;  \
603
            else if (V<0) V=0;    \
604 6858492e Cédric Schieli
        }\
605 dd68318c Ramiro Polla
        if (alpha && ((A1|A2)&256)) {\
606 6858492e Cédric Schieli
            A1=av_clip_uint8(A1);\
607
            A2=av_clip_uint8(A2);\
608 221b804f Diego Biurrun
        }
609 6a4970ab Diego Biurrun
610 6858492e Cédric Schieli
#define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
611 dd68318c Ramiro Polla
    for (i=0; i<dstW; i++) {\
612 f0faee4c Michael Niedermayer
        int j;\
613
        int Y = 0;\
614
        int U = -128<<19;\
615
        int V = -128<<19;\
616 6858492e Cédric Schieli
        int av_unused A;\
617 f0faee4c Michael Niedermayer
        int R,G,B;\
618
        \
619 dd68318c Ramiro Polla
        for (j=0; j<lumFilterSize; j++) {\
620 f0faee4c Michael Niedermayer
            Y += lumSrc[j][i     ] * lumFilter[j];\
621
        }\
622 dd68318c Ramiro Polla
        for (j=0; j<chrFilterSize; j++) {\
623 f0faee4c Michael Niedermayer
            U += chrSrc[j][i     ] * chrFilter[j];\
624
            V += chrSrc[j][i+VOFW] * chrFilter[j];\
625
        }\
626
        Y >>=10;\
627
        U >>=10;\
628
        V >>=10;\
629 dd68318c Ramiro Polla
        if (alpha) {\
630 6858492e Cédric Schieli
            A = rnd;\
631
            for (j=0; j<lumFilterSize; j++)\
632
                A += alpSrc[j][i     ] * lumFilter[j];\
633
            A >>=19;\
634
            if (A&256)\
635
                A = av_clip_uint8(A);\
636
        }\
637 f0faee4c Michael Niedermayer
638 6858492e Cédric Schieli
#define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
639
    YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
640 43c16478 Michael Niedermayer
        Y-= c->yuv2rgb_y_offset;\
641
        Y*= c->yuv2rgb_y_coeff;\
642 f0faee4c Michael Niedermayer
        Y+= rnd;\
643 43c16478 Michael Niedermayer
        R= Y + V*c->yuv2rgb_v2r_coeff;\
644
        G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
645
        B= Y +                          U*c->yuv2rgb_u2b_coeff;\
646 dd68318c Ramiro Polla
        if ((R|G|B)&(0xC0000000)) {\
647 f0faee4c Michael Niedermayer
            if (R>=(256<<22))   R=(256<<22)-1; \
648
            else if (R<0)R=0;   \
649
            if (G>=(256<<22))   G=(256<<22)-1; \
650
            else if (G<0)G=0;   \
651
            if (B>=(256<<22))   B=(256<<22)-1; \
652
            else if (B<0)B=0;   \
653
        }\
654
655
656 e69bd294 Michael Niedermayer
#define YSCALE_YUV_2_GRAY16_C \
657 dd68318c Ramiro Polla
    for (i=0; i<(dstW>>1); i++) {\
658 b0880d5d Michael Niedermayer
        int j;\
659
        int Y1 = 1<<18;\
660
        int Y2 = 1<<18;\
661
        int U  = 1<<18;\
662
        int V  = 1<<18;\
663 e69bd294 Michael Niedermayer
        \
664 b0880d5d Michael Niedermayer
        const int i2= 2*i;\
665
        \
666 dd68318c Ramiro Polla
        for (j=0; j<lumFilterSize; j++) {\
667 b0880d5d Michael Niedermayer
            Y1 += lumSrc[j][i2] * lumFilter[j];\
668
            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
669
        }\
670
        Y1>>=11;\
671
        Y2>>=11;\
672 dd68318c Ramiro Polla
        if ((Y1|Y2|U|V)&65536) {\
673 b0880d5d Michael Niedermayer
            if (Y1>65535)   Y1=65535; \
674
            else if (Y1<0)Y1=0;   \
675
            if (Y2>65535)   Y2=65535; \
676
            else if (Y2<0)Y2=0;   \
677
        }
678
679 6858492e Cédric Schieli
#define YSCALE_YUV_2_RGBX_C(type,alpha) \
680
    YSCALE_YUV_2_PACKEDX_C(type,alpha)  /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
681 221b804f Diego Biurrun
    r = (type *)c->table_rV[V];   \
682
    g = (type *)(c->table_gU[U] + c->table_gV[V]); \
683
    b = (type *)c->table_bU[U];   \
684
685 6858492e Cédric Schieli
#define YSCALE_YUV_2_PACKED2_C(type,alpha)   \
686 dd68318c Ramiro Polla
    for (i=0; i<(dstW>>1); i++) { \
687 221b804f Diego Biurrun
        const int i2= 2*i;       \
688
        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
689
        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
690
        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;  \
691 8b2fce0d Michael Niedermayer
        int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19;  \
692 6858492e Cédric Schieli
        type av_unused *r, *b, *g;                                    \
693
        int av_unused A1, A2;                                         \
694 dd68318c Ramiro Polla
        if (alpha) {\
695 6858492e Cédric Schieli
            A1= (abuf0[i2  ]*yalpha1+abuf1[i2  ]*yalpha)>>19;         \
696
            A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19;         \
697
        }\
698 46de8b73 Michael Niedermayer
699 b0880d5d Michael Niedermayer
#define YSCALE_YUV_2_GRAY16_2_C   \
700 dd68318c Ramiro Polla
    for (i=0; i<(dstW>>1); i++) { \
701 b0880d5d Michael Niedermayer
        const int i2= 2*i;       \
702
        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>11;           \
703
        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;           \
704
705 6858492e Cédric Schieli
#define YSCALE_YUV_2_RGB2_C(type,alpha) \
706
    YSCALE_YUV_2_PACKED2_C(type,alpha)\
707 221b804f Diego Biurrun
    r = (type *)c->table_rV[V];\
708
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
709
    b = (type *)c->table_bU[U];\
710 cf7d1c1a Michael Niedermayer
711 6858492e Cédric Schieli
#define YSCALE_YUV_2_PACKED1_C(type,alpha) \
712 dd68318c Ramiro Polla
    for (i=0; i<(dstW>>1); i++) {\
713 221b804f Diego Biurrun
        const int i2= 2*i;\
714
        int Y1= buf0[i2  ]>>7;\
715
        int Y2= buf0[i2+1]>>7;\
716
        int U= (uvbuf1[i     ])>>7;\
717 8b2fce0d Michael Niedermayer
        int V= (uvbuf1[i+VOFW])>>7;\
718 6858492e Cédric Schieli
        type av_unused *r, *b, *g;\
719
        int av_unused A1, A2;\
720 dd68318c Ramiro Polla
        if (alpha) {\
721 6858492e Cédric Schieli
            A1= abuf0[i2  ]>>7;\
722
            A2= abuf0[i2+1]>>7;\
723
        }\
724 46de8b73 Michael Niedermayer
725 b0880d5d Michael Niedermayer
#define YSCALE_YUV_2_GRAY16_1_C \
726 dd68318c Ramiro Polla
    for (i=0; i<(dstW>>1); i++) {\
727 b0880d5d Michael Niedermayer
        const int i2= 2*i;\
728
        int Y1= buf0[i2  ]<<1;\
729
        int Y2= buf0[i2+1]<<1;\
730
731 6858492e Cédric Schieli
#define YSCALE_YUV_2_RGB1_C(type,alpha) \
732
    YSCALE_YUV_2_PACKED1_C(type,alpha)\
733 221b804f Diego Biurrun
    r = (type *)c->table_rV[V];\
734
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
735
    b = (type *)c->table_bU[U];\
736 cf7d1c1a Michael Niedermayer
737 6858492e Cédric Schieli
#define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
738 dd68318c Ramiro Polla
    for (i=0; i<(dstW>>1); i++) {\
739 221b804f Diego Biurrun
        const int i2= 2*i;\
740
        int Y1= buf0[i2  ]>>7;\
741
        int Y2= buf0[i2+1]>>7;\
742
        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
743 8b2fce0d Michael Niedermayer
        int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\
744 6858492e Cédric Schieli
        type av_unused *r, *b, *g;\
745
        int av_unused A1, A2;\
746 dd68318c Ramiro Polla
        if (alpha) {\
747 6858492e Cédric Schieli
            A1= abuf0[i2  ]>>7;\
748
            A2= abuf0[i2+1]>>7;\
749
        }\
750 46de8b73 Michael Niedermayer
751 6858492e Cédric Schieli
#define YSCALE_YUV_2_RGB1B_C(type,alpha) \
752
    YSCALE_YUV_2_PACKED1B_C(type,alpha)\
753 221b804f Diego Biurrun
    r = (type *)c->table_rV[V];\
754
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
755
    b = (type *)c->table_bU[U];\
756 cf7d1c1a Michael Niedermayer
757 ec1bca2a Michael Niedermayer
#define YSCALE_YUV_2_MONO2_C \
758 e69bd294 Michael Niedermayer
    const uint8_t * const d128=dither_8x8_220[y&7];\
759
    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
760 dd68318c Ramiro Polla
    for (i=0; i<dstW-7; i+=8) {\
761 e69bd294 Michael Niedermayer
        int acc;\
762
        acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
763
        acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
764
        acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
765
        acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
766
        acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
767
        acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
768
        acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
769
        acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
770 ec1bca2a Michael Niedermayer
        ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
771 e69bd294 Michael Niedermayer
        dest++;\
772
    }\
773
774
775 ec1bca2a Michael Niedermayer
#define YSCALE_YUV_2_MONOX_C \
776 e69bd294 Michael Niedermayer
    const uint8_t * const d128=dither_8x8_220[y&7];\
777
    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
778
    int acc=0;\
779 dd68318c Ramiro Polla
    for (i=0; i<dstW-1; i+=2) {\
780 e69bd294 Michael Niedermayer
        int j;\
781
        int Y1=1<<18;\
782
        int Y2=1<<18;\
783
\
784 dd68318c Ramiro Polla
        for (j=0; j<lumFilterSize; j++) {\
785 e69bd294 Michael Niedermayer
            Y1 += lumSrc[j][i] * lumFilter[j];\
786
            Y2 += lumSrc[j][i+1] * lumFilter[j];\
787
        }\
788
        Y1>>=19;\
789
        Y2>>=19;\
790 dd68318c Ramiro Polla
        if ((Y1|Y2)&256) {\
791 e69bd294 Michael Niedermayer
            if (Y1>255)   Y1=255;\
792
            else if (Y1<0)Y1=0;\
793
            if (Y2>255)   Y2=255;\
794
            else if (Y2<0)Y2=0;\
795
        }\
796
        acc+= acc + g[Y1+d128[(i+0)&7]];\
797
        acc+= acc + g[Y2+d128[(i+1)&7]];\
798 dd68318c Ramiro Polla
        if ((i&7)==6) {\
799 ec1bca2a Michael Niedermayer
            ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
800 e69bd294 Michael Niedermayer
            dest++;\
801
        }\
802
    }
803
804
805
#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
806 dd68318c Ramiro Polla
    switch(c->dstFormat) {\
807 68e7f482 Kostya Shishkov
    case PIX_FMT_RGB48BE:\
808
    case PIX_FMT_RGB48LE:\
809
        func(uint8_t,0)\
810
            ((uint8_t*)dest)[ 0]= r[Y1];\
811
            ((uint8_t*)dest)[ 1]= r[Y1];\
812
            ((uint8_t*)dest)[ 2]= g[Y1];\
813
            ((uint8_t*)dest)[ 3]= g[Y1];\
814
            ((uint8_t*)dest)[ 4]= b[Y1];\
815
            ((uint8_t*)dest)[ 5]= b[Y1];\
816
            ((uint8_t*)dest)[ 6]= r[Y2];\
817
            ((uint8_t*)dest)[ 7]= r[Y2];\
818
            ((uint8_t*)dest)[ 8]= g[Y2];\
819
            ((uint8_t*)dest)[ 9]= g[Y2];\
820
            ((uint8_t*)dest)[10]= b[Y2];\
821
            ((uint8_t*)dest)[11]= b[Y2];\
822
            dest+=12;\
823
        }\
824
        break;\
825 6858492e Cédric Schieli
    case PIX_FMT_RGBA:\
826
    case PIX_FMT_BGRA:\
827 dd68318c Ramiro Polla
        if (CONFIG_SMALL) {\
828 6858492e Cédric Schieli
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
829
            func(uint32_t,needAlpha)\
830
                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
831
                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
832
            }\
833 dd68318c Ramiro Polla
        } else {\
834
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
835 6858492e Cédric Schieli
                func(uint32_t,1)\
836
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
837
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
838
                }\
839 dd68318c Ramiro Polla
            } else {\
840 6858492e Cédric Schieli
                func(uint32_t,0)\
841
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
842
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
843
                }\
844
            }\
845
        }\
846
        break;\
847
    case PIX_FMT_ARGB:\
848
    case PIX_FMT_ABGR:\
849 dd68318c Ramiro Polla
        if (CONFIG_SMALL) {\
850 6858492e Cédric Schieli
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
851
            func(uint32_t,needAlpha)\
852
                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
853
                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
854
            }\
855 dd68318c Ramiro Polla
        } else {\
856
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
857 6858492e Cédric Schieli
                func(uint32_t,1)\
858
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
859
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
860
                }\
861 dd68318c Ramiro Polla
            } else {\
862 6858492e Cédric Schieli
                func(uint32_t,0)\
863
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
864
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
865
                }\
866
            }\
867 221b804f Diego Biurrun
        }                \
868
        break;\
869
    case PIX_FMT_RGB24:\
870 6858492e Cédric Schieli
        func(uint8_t,0)\
871 221b804f Diego Biurrun
            ((uint8_t*)dest)[0]= r[Y1];\
872
            ((uint8_t*)dest)[1]= g[Y1];\
873
            ((uint8_t*)dest)[2]= b[Y1];\
874
            ((uint8_t*)dest)[3]= r[Y2];\
875
            ((uint8_t*)dest)[4]= g[Y2];\
876
            ((uint8_t*)dest)[5]= b[Y2];\
877
            dest+=6;\
878
        }\
879
        break;\
880
    case PIX_FMT_BGR24:\
881 6858492e Cédric Schieli
        func(uint8_t,0)\
882 221b804f Diego Biurrun
            ((uint8_t*)dest)[0]= b[Y1];\
883
            ((uint8_t*)dest)[1]= g[Y1];\
884
            ((uint8_t*)dest)[2]= r[Y1];\
885
            ((uint8_t*)dest)[3]= b[Y2];\
886
            ((uint8_t*)dest)[4]= g[Y2];\
887
            ((uint8_t*)dest)[5]= r[Y2];\
888
            dest+=6;\
889
        }\
890
        break;\
891
    case PIX_FMT_RGB565:\
892
    case PIX_FMT_BGR565:\
893
        {\
894
            const int dr1= dither_2x2_8[y&1    ][0];\
895
            const int dg1= dither_2x2_4[y&1    ][0];\
896
            const int db1= dither_2x2_8[(y&1)^1][0];\
897
            const int dr2= dither_2x2_8[y&1    ][1];\
898
            const int dg2= dither_2x2_4[y&1    ][1];\
899
            const int db2= dither_2x2_8[(y&1)^1][1];\
900 6858492e Cédric Schieli
            func(uint16_t,0)\
901 221b804f Diego Biurrun
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
902
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
903
            }\
904
        }\
905
        break;\
906
    case PIX_FMT_RGB555:\
907
    case PIX_FMT_BGR555:\
908
        {\
909
            const int dr1= dither_2x2_8[y&1    ][0];\
910
            const int dg1= dither_2x2_8[y&1    ][1];\
911
            const int db1= dither_2x2_8[(y&1)^1][0];\
912
            const int dr2= dither_2x2_8[y&1    ][1];\
913
            const int dg2= dither_2x2_8[y&1    ][0];\
914
            const int db2= dither_2x2_8[(y&1)^1][1];\
915 6858492e Cédric Schieli
            func(uint16_t,0)\
916 221b804f Diego Biurrun
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
917
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
918
            }\
919
        }\
920
        break;\
921
    case PIX_FMT_RGB8:\
922
    case PIX_FMT_BGR8:\
923
        {\
924
            const uint8_t * const d64= dither_8x8_73[y&7];\
925
            const uint8_t * const d32= dither_8x8_32[y&7];\
926 6858492e Cédric Schieli
            func(uint8_t,0)\
927 221b804f Diego Biurrun
                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
928
                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
929
            }\
930
        }\
931
        break;\
932
    case PIX_FMT_RGB4:\
933
    case PIX_FMT_BGR4:\
934
        {\
935
            const uint8_t * const d64= dither_8x8_73 [y&7];\
936
            const uint8_t * const d128=dither_8x8_220[y&7];\
937 6858492e Cédric Schieli
            func(uint8_t,0)\
938 221b804f Diego Biurrun
                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
939
                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
940
            }\
941
        }\
942
        break;\
943
    case PIX_FMT_RGB4_BYTE:\
944
    case PIX_FMT_BGR4_BYTE:\
945
        {\
946
            const uint8_t * const d64= dither_8x8_73 [y&7];\
947
            const uint8_t * const d128=dither_8x8_220[y&7];\
948 6858492e Cédric Schieli
            func(uint8_t,0)\
949 221b804f Diego Biurrun
                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
950
                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
951
            }\
952
        }\
953
        break;\
954
    case PIX_FMT_MONOBLACK:\
955 ec1bca2a Michael Niedermayer
    case PIX_FMT_MONOWHITE:\
956 221b804f Diego Biurrun
        {\
957 e69bd294 Michael Niedermayer
            func_monoblack\
958 221b804f Diego Biurrun
        }\
959
        break;\
960
    case PIX_FMT_YUYV422:\
961
        func2\
962
            ((uint8_t*)dest)[2*i2+0]= Y1;\
963
            ((uint8_t*)dest)[2*i2+1]= U;\
964
            ((uint8_t*)dest)[2*i2+2]= Y2;\
965
            ((uint8_t*)dest)[2*i2+3]= V;\
966
        }                \
967
        break;\
968
    case PIX_FMT_UYVY422:\
969
        func2\
970
            ((uint8_t*)dest)[2*i2+0]= U;\
971
            ((uint8_t*)dest)[2*i2+1]= Y1;\
972
            ((uint8_t*)dest)[2*i2+2]= V;\
973
            ((uint8_t*)dest)[2*i2+3]= Y2;\
974
        }                \
975
        break;\
976 b0880d5d Michael Niedermayer
    case PIX_FMT_GRAY16BE:\
977
        func_g16\
978
            ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
979
            ((uint8_t*)dest)[2*i2+1]= Y1;\
980
            ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
981
            ((uint8_t*)dest)[2*i2+3]= Y2;\
982
        }                \
983
        break;\
984
    case PIX_FMT_GRAY16LE:\
985
        func_g16\
986
            ((uint8_t*)dest)[2*i2+0]= Y1;\
987
            ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
988
            ((uint8_t*)dest)[2*i2+2]= Y2;\
989
            ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
990
        }                \
991
        break;\
992 221b804f Diego Biurrun
    }\
993 cf7d1c1a Michael Niedermayer
994
995 7ac40327 Ramiro Polla
static inline void yuv2packedXinC(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
996
                                  const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
997
                                  const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
998 e3d2500f Michael Niedermayer
{
999 221b804f Diego Biurrun
    int i;
1000 6858492e Cédric Schieli
    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
1001 e3d2500f Michael Niedermayer
}
1002
1003 7ac40327 Ramiro Polla
static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
1004
                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
1005
                                    const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
1006 f0faee4c Michael Niedermayer
{
1007
    int i;
1008
    int step= fmt_depth(c->dstFormat)/8;
1009 d616c8ae Michael Niedermayer
    int aidx= 3;
1010 f0faee4c Michael Niedermayer
1011 dd68318c Ramiro Polla
    switch(c->dstFormat) {
1012 f0faee4c Michael Niedermayer
    case PIX_FMT_ARGB:
1013
        dest++;
1014 a3398feb Cédric Schieli
        aidx= 0;
1015 f0faee4c Michael Niedermayer
    case PIX_FMT_RGB24:
1016 d616c8ae Michael Niedermayer
        aidx--;
1017 f0faee4c Michael Niedermayer
    case PIX_FMT_RGBA:
1018 dd68318c Ramiro Polla
        if (CONFIG_SMALL) {
1019 6858492e Cédric Schieli
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1020
            YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1021
                dest[aidx]= needAlpha ? A : 255;
1022
                dest[0]= R>>22;
1023
                dest[1]= G>>22;
1024
                dest[2]= B>>22;
1025
                dest+= step;
1026
            }
1027 dd68318c Ramiro Polla
        } else {
1028
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1029 6858492e Cédric Schieli
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1030
                    dest[aidx]= A;
1031
                    dest[0]= R>>22;
1032
                    dest[1]= G>>22;
1033
                    dest[2]= B>>22;
1034
                    dest+= step;
1035
                }
1036 dd68318c Ramiro Polla
            } else {
1037 6858492e Cédric Schieli
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1038
                    dest[aidx]= 255;
1039
                    dest[0]= R>>22;
1040
                    dest[1]= G>>22;
1041
                    dest[2]= B>>22;
1042
                    dest+= step;
1043
                }
1044
            }
1045 f0faee4c Michael Niedermayer
        }
1046
        break;
1047
    case PIX_FMT_ABGR:
1048
        dest++;
1049 a3398feb Cédric Schieli
        aidx= 0;
1050 f0faee4c Michael Niedermayer
    case PIX_FMT_BGR24:
1051 d616c8ae Michael Niedermayer
        aidx--;
1052 f0faee4c Michael Niedermayer
    case PIX_FMT_BGRA:
1053 dd68318c Ramiro Polla
        if (CONFIG_SMALL) {
1054 6858492e Cédric Schieli
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1055
            YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1056
                dest[aidx]= needAlpha ? A : 255;
1057
                dest[0]= B>>22;
1058
                dest[1]= G>>22;
1059
                dest[2]= R>>22;
1060
                dest+= step;
1061
            }
1062 dd68318c Ramiro Polla
        } else {
1063
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1064 6858492e Cédric Schieli
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1065
                    dest[aidx]= A;
1066
                    dest[0]= B>>22;
1067
                    dest[1]= G>>22;
1068
                    dest[2]= R>>22;
1069
                    dest+= step;
1070
                }
1071 dd68318c Ramiro Polla
            } else {
1072 6858492e Cédric Schieli
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1073
                    dest[aidx]= 255;
1074
                    dest[0]= B>>22;
1075
                    dest[1]= G>>22;
1076
                    dest[2]= R>>22;
1077
                    dest+= step;
1078
                }
1079
            }
1080 f0faee4c Michael Niedermayer
        }
1081
        break;
1082
    default:
1083
        assert(0);
1084
    }
1085
}
1086 e3d2500f Michael Niedermayer
1087 dd68318c Ramiro Polla
static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val)
1088
{
1089 d4da3e47 Cédric Schieli
    int i;
1090
    uint8_t *ptr = plane + stride*y;
1091 dd68318c Ramiro Polla
    for (i=0; i<height; i++) {
1092 d4da3e47 Cédric Schieli
        memset(ptr, val, width);
1093
        ptr += stride;
1094
    }
1095
}
1096
1097 a3e35e28 Ramiro Polla
static inline void rgb48ToY(uint8_t *dst, const uint8_t *src, int width,
1098
                            uint32_t *unused)
1099 e8417235 Kostya Shishkov
{
1100
    int i;
1101
    for (i = 0; i < width; i++) {
1102
        int r = src[i*6+0];
1103
        int g = src[i*6+2];
1104
        int b = src[i*6+4];
1105
1106
        dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1107
    }
1108
}
1109
1110
static inline void rgb48ToUV(uint8_t *dstU, uint8_t *dstV,
1111 efc034cc Ramiro Polla
                             const uint8_t *src1, const uint8_t *src2,
1112
                             int width, uint32_t *unused)
1113 e8417235 Kostya Shishkov
{
1114
    int i;
1115
    assert(src1==src2);
1116
    for (i = 0; i < width; i++) {
1117
        int r = src1[6*i + 0];
1118
        int g = src1[6*i + 2];
1119
        int b = src1[6*i + 4];
1120
1121
        dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1122
        dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1123
    }
1124
}
1125
1126
static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV,
1127 efc034cc Ramiro Polla
                                  const uint8_t *src1, const uint8_t *src2,
1128
                                  int width, uint32_t *unused)
1129 e8417235 Kostya Shishkov
{
1130
    int i;
1131
    assert(src1==src2);
1132
    for (i = 0; i < width; i++) {
1133
        int r= src1[12*i + 0] + src1[12*i + 6];
1134
        int g= src1[12*i + 2] + src1[12*i + 8];
1135
        int b= src1[12*i + 4] + src1[12*i + 10];
1136
1137
        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1138
        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1139
    }
1140
}
1141
1142 80704c47 Kostya Shishkov
#define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
1143
static inline void name(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)\
1144
{\
1145
    int i;\
1146 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {\
1147 80704c47 Kostya Shishkov
        int b= (((const type*)src)[i]>>shb)&maskb;\
1148
        int g= (((const type*)src)[i]>>shg)&maskg;\
1149
        int r= (((const type*)src)[i]>>shr)&maskr;\
1150
\
1151
        dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
1152
    }\
1153
}
1154
1155
BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
1156
BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
1157
BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY    , RGB2YUV_SHIFT+8)
1158
BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY    , RGB2YUV_SHIFT+7)
1159
BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1160
BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1161
1162 dd68318c Ramiro Polla
static inline void abgrToA(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
1163
{
1164 80704c47 Kostya Shishkov
    int i;
1165 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1166 80704c47 Kostya Shishkov
        dst[i]= src[4*i];
1167
    }
1168
}
1169
1170
#define BGR2UV(type, name, shr, shg, shb, maska, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S)\
1171
static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
1172
{\
1173
    int i;\
1174 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {\
1175 80704c47 Kostya Shishkov
        int b= (((const type*)src)[i]&maskb)>>shb;\
1176
        int g= (((const type*)src)[i]&maskg)>>shg;\
1177
        int r= (((const type*)src)[i]&maskr)>>shr;\
1178
\
1179
        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
1180
        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
1181
    }\
1182
}\
1183
static inline void name ## _half(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
1184
{\
1185
    int i;\
1186 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {\
1187 80704c47 Kostya Shishkov
        int pix0= ((const type*)src)[2*i+0];\
1188
        int pix1= ((const type*)src)[2*i+1];\
1189
        int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
1190
        int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
1191
        int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
1192
        g&= maskg|(2*maskg);\
1193
\
1194
        g>>=shg;\
1195
\
1196
        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
1197
        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
1198
    }\
1199
}
1200
1201
BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0xFF000000, 0xFF0000, 0xFF00,   0x00FF, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
1202
BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0xFF000000,   0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
1203
BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0,          0,   0x001F, 0x07E0,   0xF800, RU<<11, GU<<5, BU    , RV<<11, GV<<5, BV    , RGB2YUV_SHIFT+8)
1204
BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0,          0,   0x001F, 0x03E0,   0x7C00, RU<<10, GU<<5, BU    , RV<<10, GV<<5, BV    , RGB2YUV_SHIFT+7)
1205
BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0,          0,   0xF800, 0x07E0,   0x001F, RU    , GU<<5, BU<<11, RV    , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
1206
BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0,          0,   0x7C00, 0x03E0,   0x001F, RU    , GU<<5, BU<<10, RV    , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
1207
1208
static inline void palToY(uint8_t *dst, const uint8_t *src, long width, uint32_t *pal)
1209
{
1210
    int i;
1211 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1212 80704c47 Kostya Shishkov
        int d= src[i];
1213
1214
        dst[i]= pal[d] & 0xFF;
1215
    }
1216
}
1217
1218
static inline void palToUV(uint8_t *dstU, uint8_t *dstV,
1219
                           const uint8_t *src1, const uint8_t *src2,
1220
                           long width, uint32_t *pal)
1221
{
1222
    int i;
1223
    assert(src1 == src2);
1224 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1225 80704c47 Kostya Shishkov
        int p= pal[src1[i]];
1226
1227
        dstU[i]= p>>8;
1228
        dstV[i]= p>>16;
1229
    }
1230
}
1231
1232
static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
1233
{
1234
    int i, j;
1235 dd68318c Ramiro Polla
    for (i=0; i<width/8; i++) {
1236 80704c47 Kostya Shishkov
        int d= ~src[i];
1237
        for(j=0; j<8; j++)
1238
            dst[8*i+j]= ((d>>(7-j))&1)*255;
1239
    }
1240
}
1241
1242
static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
1243
{
1244
    int i, j;
1245 dd68318c Ramiro Polla
    for (i=0; i<width/8; i++) {
1246 80704c47 Kostya Shishkov
        int d= src[i];
1247
        for(j=0; j<8; j++)
1248
            dst[8*i+j]= ((d>>(7-j))&1)*255;
1249
    }
1250
}
1251
1252
1253 dd409025 Ramiro Polla
//Note: we have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
1254 7630f2e0 Michael Niedermayer
//Plain C versions
1255 8b1a6441 Diego Biurrun
#if ((!HAVE_MMX || !CONFIG_GPL) && !HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
1256 726a959a Michael Niedermayer
#define COMPILE_C
1257
#endif
1258
1259 b63f641e Aurelien Jacobs
#if ARCH_PPC
1260 57f6d52a Diego Biurrun
#if HAVE_ALTIVEC || CONFIG_RUNTIME_CPUDETECT
1261 a2faa401 Romain Dolbeau
#define COMPILE_ALTIVEC
1262 7a24ec50 Diego Biurrun
#endif
1263 cb82a073 Diego Biurrun
#endif //ARCH_PPC
1264 a2faa401 Romain Dolbeau
1265 b63f641e Aurelien Jacobs
#if ARCH_X86
1266 726a959a Michael Niedermayer
1267 10a7d216 Ramiro Polla
#if ((HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
1268 726a959a Michael Niedermayer
#define COMPILE_MMX
1269
#endif
1270
1271 10a7d216 Ramiro Polla
#if (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
1272 726a959a Michael Niedermayer
#define COMPILE_MMX2
1273
#endif
1274
1275 10a7d216 Ramiro Polla
#if ((HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
1276 726a959a Michael Niedermayer
#define COMPILE_3DNOW
1277
#endif
1278 7a24ec50 Diego Biurrun
#endif //ARCH_X86
1279 726a959a Michael Niedermayer
1280 94daf2e9 Ramiro Polla
#define COMPILE_TEMPLATE_MMX 0
1281
#define COMPILE_TEMPLATE_MMX2 0
1282
#define COMPILE_TEMPLATE_AMD3DNOW 0
1283
#define COMPILE_TEMPLATE_ALTIVEC 0
1284 726a959a Michael Niedermayer
1285
#ifdef COMPILE_C
1286 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _C
1287
#include "swscale_template.c"
1288 726a959a Michael Niedermayer
#endif
1289 397c035e Michael Niedermayer
1290 a2faa401 Romain Dolbeau
#ifdef COMPILE_ALTIVEC
1291
#undef RENAME
1292 94daf2e9 Ramiro Polla
#undef COMPILE_TEMPLATE_ALTIVEC
1293
#define COMPILE_TEMPLATE_ALTIVEC 1
1294 a2faa401 Romain Dolbeau
#define RENAME(a) a ## _altivec
1295
#include "swscale_template.c"
1296
#endif
1297
1298 b63f641e Aurelien Jacobs
#if ARCH_X86
1299 397c035e Michael Niedermayer
1300 7630f2e0 Michael Niedermayer
//MMX versions
1301 726a959a Michael Niedermayer
#ifdef COMPILE_MMX
1302 7630f2e0 Michael Niedermayer
#undef RENAME
1303 94daf2e9 Ramiro Polla
#undef COMPILE_TEMPLATE_MMX
1304
#undef COMPILE_TEMPLATE_MMX2
1305
#undef COMPILE_TEMPLATE_AMD3DNOW
1306
#define COMPILE_TEMPLATE_MMX 1
1307
#define COMPILE_TEMPLATE_MMX2 0
1308
#define COMPILE_TEMPLATE_AMD3DNOW 0
1309 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _MMX
1310
#include "swscale_template.c"
1311 726a959a Michael Niedermayer
#endif
1312 7630f2e0 Michael Niedermayer
1313
//MMX2 versions
1314 726a959a Michael Niedermayer
#ifdef COMPILE_MMX2
1315 7630f2e0 Michael Niedermayer
#undef RENAME
1316 94daf2e9 Ramiro Polla
#undef COMPILE_TEMPLATE_MMX
1317
#undef COMPILE_TEMPLATE_MMX2
1318
#undef COMPILE_TEMPLATE_AMD3DNOW
1319
#define COMPILE_TEMPLATE_MMX 1
1320
#define COMPILE_TEMPLATE_MMX2 1
1321
#define COMPILE_TEMPLATE_AMD3DNOW 0
1322 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _MMX2
1323
#include "swscale_template.c"
1324 726a959a Michael Niedermayer
#endif
1325 7630f2e0 Michael Niedermayer
1326
//3DNOW versions
1327 726a959a Michael Niedermayer
#ifdef COMPILE_3DNOW
1328 7630f2e0 Michael Niedermayer
#undef RENAME
1329 94daf2e9 Ramiro Polla
#undef COMPILE_TEMPLATE_MMX
1330
#undef COMPILE_TEMPLATE_MMX2
1331
#undef COMPILE_TEMPLATE_AMD3DNOW
1332
#define COMPILE_TEMPLATE_MMX 1
1333
#define COMPILE_TEMPLATE_MMX2 0
1334
#define COMPILE_TEMPLATE_AMD3DNOW 1
1335 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _3DNow
1336
#include "swscale_template.c"
1337 726a959a Michael Niedermayer
#endif
1338 7630f2e0 Michael Niedermayer
1339 7a24ec50 Diego Biurrun
#endif //ARCH_X86
1340 7630f2e0 Michael Niedermayer
1341 a86c461c Michael Niedermayer
static double getSplineCoeff(double a, double b, double c, double d, double dist)
1342
{
1343 221b804f Diego Biurrun
//    printf("%f %f %f %f %f\n", a,b,c,d,dist);
1344 9b734d44 Ramiro Polla
    if (dist<=1.0) return ((d*dist + c)*dist + b)*dist +a;
1345
    else           return getSplineCoeff(        0.0,
1346
                                          b+ 2.0*c + 3.0*d,
1347
                                                 c + 3.0*d,
1348
                                         -b- 3.0*c - 6.0*d,
1349
                                         dist-1.0);
1350 a86c461c Michael Niedermayer
}
1351 6c7506de Michael Niedermayer
1352 bca11e75 Michael Niedermayer
static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
1353 221b804f Diego Biurrun
                             int srcW, int dstW, int filterAlign, int one, int flags,
1354
                             SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
1355 28bf81c9 Michael Niedermayer
{
1356 221b804f Diego Biurrun
    int i;
1357
    int filterSize;
1358
    int filter2Size;
1359
    int minFilterSize;
1360 a64a062f Michael Niedermayer
    int64_t *filter=NULL;
1361
    int64_t *filter2=NULL;
1362
    const int64_t fone= 1LL<<54;
1363 091d3bdc Michael Niedermayer
    int ret= -1;
1364 b63f641e Aurelien Jacobs
#if ARCH_X86
1365 221b804f Diego Biurrun
    if (flags & SWS_CPU_CAPS_MMX)
1366 7ad6469e Diego Pettenò
        __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
1367 726a959a Michael Niedermayer
#endif
1368 31190492 Arpi
1369 f40c7dbb Diego Biurrun
    // NOTE: the +1 is for the MMX scaler which reads over the end
1370 9cf484d0 Ramiro Polla
    FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+1)*sizeof(int16_t), fail);
1371 221b804f Diego Biurrun
1372 dd68318c Ramiro Polla
    if (FFABS(xInc - 0x10000) <10) { // unscaled
1373 221b804f Diego Biurrun
        int i;
1374
        filterSize= 1;
1375 9cf484d0 Ramiro Polla
        FF_ALLOCZ_OR_GOTO(NULL, filter, dstW*sizeof(*filter)*filterSize, fail);
1376 221b804f Diego Biurrun
1377 dd68318c Ramiro Polla
        for (i=0; i<dstW; i++) {
1378 a64a062f Michael Niedermayer
            filter[i*filterSize]= fone;
1379 221b804f Diego Biurrun
            (*filterPos)[i]=i;
1380
        }
1381
1382 dd68318c Ramiro Polla
    } else if (flags&SWS_POINT) { // lame looking point sampling mode
1383 221b804f Diego Biurrun
        int i;
1384
        int xDstInSrc;
1385
        filterSize= 1;
1386 9cf484d0 Ramiro Polla
        FF_ALLOC_OR_GOTO(NULL, filter, dstW*sizeof(*filter)*filterSize, fail);
1387 221b804f Diego Biurrun
1388
        xDstInSrc= xInc/2 - 0x8000;
1389 dd68318c Ramiro Polla
        for (i=0; i<dstW; i++) {
1390 221b804f Diego Biurrun
            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1391
1392
            (*filterPos)[i]= xx;
1393 a64a062f Michael Niedermayer
            filter[i]= fone;
1394 221b804f Diego Biurrun
            xDstInSrc+= xInc;
1395
        }
1396 dd68318c Ramiro Polla
    } else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) { // bilinear upscale
1397 221b804f Diego Biurrun
        int i;
1398
        int xDstInSrc;
1399 571d9587 Ramiro Polla
        filterSize= 2;
1400 9cf484d0 Ramiro Polla
        FF_ALLOC_OR_GOTO(NULL, filter, dstW*sizeof(*filter)*filterSize, fail);
1401 221b804f Diego Biurrun
1402
        xDstInSrc= xInc/2 - 0x8000;
1403 dd68318c Ramiro Polla
        for (i=0; i<dstW; i++) {
1404 221b804f Diego Biurrun
            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1405
            int j;
1406
1407
            (*filterPos)[i]= xx;
1408 9b734d44 Ramiro Polla
            //bilinear upscale / linear interpolate / area averaging
1409 dd68318c Ramiro Polla
            for (j=0; j<filterSize; j++) {
1410 9b734d44 Ramiro Polla
                int64_t coeff= fone - FFABS((xx<<16) - xDstInSrc)*(fone>>16);
1411
                if (coeff<0) coeff=0;
1412
                filter[i*filterSize + j]= coeff;
1413
                xx++;
1414
            }
1415 221b804f Diego Biurrun
            xDstInSrc+= xInc;
1416
        }
1417 dd68318c Ramiro Polla
    } else {
1418 a64a062f Michael Niedermayer
        int xDstInSrc;
1419
        int sizeFactor;
1420
1421
        if      (flags&SWS_BICUBIC)      sizeFactor=  4;
1422
        else if (flags&SWS_X)            sizeFactor=  8;
1423
        else if (flags&SWS_AREA)         sizeFactor=  1; //downscale only, for upscale it is bilinear
1424
        else if (flags&SWS_GAUSS)        sizeFactor=  8;   // infinite ;)
1425
        else if (flags&SWS_LANCZOS)      sizeFactor= param[0] != SWS_PARAM_DEFAULT ? ceil(2*param[0]) : 6;
1426
        else if (flags&SWS_SINC)         sizeFactor= 20; // infinite ;)
1427
        else if (flags&SWS_SPLINE)       sizeFactor= 20;  // infinite ;)
1428
        else if (flags&SWS_BILINEAR)     sizeFactor=  2;
1429 221b804f Diego Biurrun
        else {
1430 a64a062f Michael Niedermayer
            sizeFactor= 0; //GCC warning killer
1431 fcc402b1 Luca Barbato
            assert(0);
1432 221b804f Diego Biurrun
        }
1433
1434 a64a062f Michael Niedermayer
        if (xInc <= 1<<16)      filterSize= 1 + sizeFactor; // upscale
1435
        else                    filterSize= 1 + (sizeFactor*srcW + dstW - 1)/ dstW;
1436 221b804f Diego Biurrun
1437
        if (filterSize > srcW-2) filterSize=srcW-2;
1438
1439 9cf484d0 Ramiro Polla
        FF_ALLOC_OR_GOTO(NULL, filter, dstW*sizeof(*filter)*filterSize, fail);
1440 221b804f Diego Biurrun
1441 a64a062f Michael Niedermayer
        xDstInSrc= xInc - 0x10000;
1442 dd68318c Ramiro Polla
        for (i=0; i<dstW; i++) {
1443 a64a062f Michael Niedermayer
            int xx= (xDstInSrc - ((filterSize-2)<<16)) / (1<<17);
1444 221b804f Diego Biurrun
            int j;
1445
            (*filterPos)[i]= xx;
1446 dd68318c Ramiro Polla
            for (j=0; j<filterSize; j++) {
1447 a64a062f Michael Niedermayer
                int64_t d= ((int64_t)FFABS((xx<<17) - xDstInSrc))<<13;
1448
                double floatd;
1449
                int64_t coeff;
1450
1451
                if (xInc > 1<<16)
1452
                    d= d*dstW/srcW;
1453
                floatd= d * (1.0/(1<<30));
1454
1455 dd68318c Ramiro Polla
                if (flags & SWS_BICUBIC) {
1456 a64a062f Michael Niedermayer
                    int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] :   0) * (1<<24);
1457
                    int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24);
1458
                    int64_t dd = ( d*d)>>30;
1459
                    int64_t ddd= (dd*d)>>30;
1460
1461
                    if      (d < 1LL<<30)
1462
                        coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30);
1463
                    else if (d < 1LL<<31)
1464
                        coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30);
1465 221b804f Diego Biurrun
                    else
1466
                        coeff=0.0;
1467 a64a062f Michael Niedermayer
                    coeff *= fone>>(30+24);
1468 221b804f Diego Biurrun
                }
1469 dd68318c Ramiro Polla
/*                else if (flags & SWS_X) {
1470 221b804f Diego Biurrun
                    double p= param ? param*0.01 : 0.3;
1471
                    coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1472
                    coeff*= pow(2.0, - p*d*d);
1473
                }*/
1474 dd68318c Ramiro Polla
                else if (flags & SWS_X) {
1475 221b804f Diego Biurrun
                    double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
1476 a64a062f Michael Niedermayer
                    double c;
1477 221b804f Diego Biurrun
1478 a64a062f Michael Niedermayer
                    if (floatd<1.0)
1479
                        c = cos(floatd*PI);
1480 221b804f Diego Biurrun
                    else
1481 a64a062f Michael Niedermayer
                        c=-1.0;
1482
                    if (c<0.0)      c= -pow(-c, A);
1483
                    else            c=  pow( c, A);
1484
                    coeff= (c*0.5 + 0.5)*fone;
1485 dd68318c Ramiro Polla
                } else if (flags & SWS_AREA) {
1486 a64a062f Michael Niedermayer
                    int64_t d2= d - (1<<29);
1487
                    if      (d2*xInc < -(1LL<<(29+16))) coeff= 1.0 * (1LL<<(30+16));
1488
                    else if (d2*xInc <  (1LL<<(29+16))) coeff= -d2*xInc + (1LL<<(29+16));
1489 221b804f Diego Biurrun
                    else coeff=0.0;
1490 a64a062f Michael Niedermayer
                    coeff *= fone>>(30+16);
1491 dd68318c Ramiro Polla
                } else if (flags & SWS_GAUSS) {
1492 221b804f Diego Biurrun
                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1493 a64a062f Michael Niedermayer
                    coeff = (pow(2.0, - p*floatd*floatd))*fone;
1494 dd68318c Ramiro Polla
                } else if (flags & SWS_SINC) {
1495 a64a062f Michael Niedermayer
                    coeff = (d ? sin(floatd*PI)/(floatd*PI) : 1.0)*fone;
1496 dd68318c Ramiro Polla
                } else if (flags & SWS_LANCZOS) {
1497 221b804f Diego Biurrun
                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1498 a64a062f Michael Niedermayer
                    coeff = (d ? sin(floatd*PI)*sin(floatd*PI/p)/(floatd*floatd*PI*PI/p) : 1.0)*fone;
1499
                    if (floatd>p) coeff=0;
1500 dd68318c Ramiro Polla
                } else if (flags & SWS_BILINEAR) {
1501 a64a062f Michael Niedermayer
                    coeff= (1<<30) - d;
1502 221b804f Diego Biurrun
                    if (coeff<0) coeff=0;
1503 a64a062f Michael Niedermayer
                    coeff *= fone >> 30;
1504 dd68318c Ramiro Polla
                } else if (flags & SWS_SPLINE) {
1505 221b804f Diego Biurrun
                    double p=-2.196152422706632;
1506 f830d824 Michael Niedermayer
                    coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, floatd) * fone;
1507 dd68318c Ramiro Polla
                } else {
1508 221b804f Diego Biurrun
                    coeff= 0.0; //GCC warning killer
1509 fcc402b1 Luca Barbato
                    assert(0);
1510 221b804f Diego Biurrun
                }
1511
1512
                filter[i*filterSize + j]= coeff;
1513
                xx++;
1514
            }
1515 a64a062f Michael Niedermayer
            xDstInSrc+= 2*xInc;
1516 221b804f Diego Biurrun
        }
1517
    }
1518
1519
    /* apply src & dst Filter to filter -> filter2
1520
       av_free(filter);
1521
    */
1522 fcc402b1 Luca Barbato
    assert(filterSize>0);
1523 221b804f Diego Biurrun
    filter2Size= filterSize;
1524
    if (srcFilter) filter2Size+= srcFilter->length - 1;
1525
    if (dstFilter) filter2Size+= dstFilter->length - 1;
1526 fcc402b1 Luca Barbato
    assert(filter2Size>0);
1527 9cf484d0 Ramiro Polla
    FF_ALLOCZ_OR_GOTO(NULL, filter2, filter2Size*dstW*sizeof(*filter2), fail);
1528 221b804f Diego Biurrun
1529 dd68318c Ramiro Polla
    for (i=0; i<dstW; i++) {
1530 a64a062f Michael Niedermayer
        int j, k;
1531 221b804f Diego Biurrun
1532 dd68318c Ramiro Polla
        if(srcFilter) {
1533
            for (k=0; k<srcFilter->length; k++) {
1534 a64a062f Michael Niedermayer
                for (j=0; j<filterSize; j++)
1535
                    filter2[i*filter2Size + k + j] += srcFilter->coeff[k]*filter[i*filterSize + j];
1536
            }
1537 dd68318c Ramiro Polla
        } else {
1538 a64a062f Michael Niedermayer
            for (j=0; j<filterSize; j++)
1539
                filter2[i*filter2Size + j]= filter[i*filterSize + j];
1540 221b804f Diego Biurrun
        }
1541 a64a062f Michael Niedermayer
        //FIXME dstFilter
1542 221b804f Diego Biurrun
1543
        (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
1544
    }
1545 47b7382d Michael Niedermayer
    av_freep(&filter);
1546 221b804f Diego Biurrun
1547
    /* try to reduce the filter-size (step1 find size and shift left) */
1548 86bdf3fd Diego Biurrun
    // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
1549 221b804f Diego Biurrun
    minFilterSize= 0;
1550 dd68318c Ramiro Polla
    for (i=dstW-1; i>=0; i--) {
1551 221b804f Diego Biurrun
        int min= filter2Size;
1552
        int j;
1553 a64a062f Michael Niedermayer
        int64_t cutOff=0.0;
1554 221b804f Diego Biurrun
1555
        /* get rid off near zero elements on the left by shifting left */
1556 dd68318c Ramiro Polla
        for (j=0; j<filter2Size; j++) {
1557 221b804f Diego Biurrun
            int k;
1558
            cutOff += FFABS(filter2[i*filter2Size]);
1559
1560 a64a062f Michael Niedermayer
            if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
1561 221b804f Diego Biurrun
1562 86bdf3fd Diego Biurrun
            /* preserve monotonicity because the core can't handle the filter otherwise */
1563 221b804f Diego Biurrun
            if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1564
1565 f40c7dbb Diego Biurrun
            // move filter coefficients left
1566 221b804f Diego Biurrun
            for (k=1; k<filter2Size; k++)
1567
                filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1568 a64a062f Michael Niedermayer
            filter2[i*filter2Size + k - 1]= 0;
1569 221b804f Diego Biurrun
            (*filterPos)[i]++;
1570
        }
1571
1572 a64a062f Michael Niedermayer
        cutOff=0;
1573 221b804f Diego Biurrun
        /* count near zeros on the right */
1574 dd68318c Ramiro Polla
        for (j=filter2Size-1; j>0; j--) {
1575 221b804f Diego Biurrun
            cutOff += FFABS(filter2[i*filter2Size + j]);
1576
1577 a64a062f Michael Niedermayer
            if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
1578 221b804f Diego Biurrun
            min--;
1579
        }
1580
1581
        if (min>minFilterSize) minFilterSize= min;
1582
    }
1583
1584
    if (flags & SWS_CPU_CAPS_ALTIVEC) {
1585
        // we can handle the special case 4,
1586
        // so we don't want to go to the full 8
1587
        if (minFilterSize < 5)
1588 8c266f0c Romain Dolbeau
            filterAlign = 4;
1589
1590 f40c7dbb Diego Biurrun
        // We really don't want to waste our time
1591
        // doing useless computation, so fall back on
1592
        // the scalar C code for very small filters.
1593
        // Vectorizing is worth it only if you have a
1594 221b804f Diego Biurrun
        // decent-sized vector.
1595
        if (minFilterSize < 3)
1596 8c266f0c Romain Dolbeau
            filterAlign = 1;
1597 221b804f Diego Biurrun
    }
1598
1599
    if (flags & SWS_CPU_CAPS_MMX) {
1600
        // special case for unscaled vertical filtering
1601
        if (minFilterSize == 1 && filterAlign == 2)
1602
            filterAlign= 1;
1603
    }
1604
1605 fcc402b1 Luca Barbato
    assert(minFilterSize > 0);
1606 221b804f Diego Biurrun
    filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
1607 fcc402b1 Luca Barbato
    assert(filterSize > 0);
1608 8588e148 Michael Niedermayer
    filter= av_malloc(filterSize*dstW*sizeof(*filter));
1609 1625216e Michael Niedermayer
    if (filterSize >= MAX_FILTER_SIZE*16/((flags&SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
1610 7248797c Ramiro Polla
        goto fail;
1611 221b804f Diego Biurrun
    *outFilterSize= filterSize;
1612
1613
    if (flags&SWS_PRINT_INFO)
1614
        av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
1615
    /* try to reduce the filter-size (step2 reduce it) */
1616 dd68318c Ramiro Polla
    for (i=0; i<dstW; i++) {
1617 221b804f Diego Biurrun
        int j;
1618
1619 dd68318c Ramiro Polla
        for (j=0; j<filterSize; j++) {
1620 a64a062f Michael Niedermayer
            if (j>=filter2Size) filter[i*filterSize + j]= 0;
1621 221b804f Diego Biurrun
            else               filter[i*filterSize + j]= filter2[i*filter2Size + j];
1622 88bc5a64 Michael Niedermayer
            if((flags & SWS_BITEXACT) && j>=minFilterSize)
1623 a64a062f Michael Niedermayer
                filter[i*filterSize + j]= 0;
1624 8c266f0c Romain Dolbeau
        }
1625 221b804f Diego Biurrun
    }
1626
1627
1628 f40c7dbb Diego Biurrun
    //FIXME try to align filterPos if possible
1629 8c266f0c Romain Dolbeau
1630 221b804f Diego Biurrun
    //fix borders
1631 dd68318c Ramiro Polla
    for (i=0; i<dstW; i++) {
1632 221b804f Diego Biurrun
        int j;
1633 dd68318c Ramiro Polla
        if ((*filterPos)[i] < 0) {
1634 f40c7dbb Diego Biurrun
            // move filter coefficients left to compensate for filterPos
1635 dd68318c Ramiro Polla
            for (j=1; j<filterSize; j++) {
1636 221b804f Diego Biurrun
                int left= FFMAX(j + (*filterPos)[i], 0);
1637
                filter[i*filterSize + left] += filter[i*filterSize + j];
1638
                filter[i*filterSize + j]=0;
1639
            }
1640
            (*filterPos)[i]= 0;
1641 bca11e75 Michael Niedermayer
        }
1642
1643 dd68318c Ramiro Polla
        if ((*filterPos)[i] + filterSize > srcW) {
1644 221b804f Diego Biurrun
            int shift= (*filterPos)[i] + filterSize - srcW;
1645 f40c7dbb Diego Biurrun
            // move filter coefficients right to compensate for filterPos
1646 dd68318c Ramiro Polla
            for (j=filterSize-2; j>=0; j--) {
1647 221b804f Diego Biurrun
                int right= FFMIN(j + shift, filterSize-1);
1648
                filter[i*filterSize +right] += filter[i*filterSize +j];
1649
                filter[i*filterSize +j]=0;
1650
            }
1651
            (*filterPos)[i]= srcW - filterSize;
1652
        }
1653
    }
1654
1655 f40c7dbb Diego Biurrun
    // Note the +1 is for the MMX scaler which reads over the end
1656 221b804f Diego Biurrun
    /* align at 16 for AltiVec (needed by hScale_altivec_real) */
1657 9cf484d0 Ramiro Polla
    FF_ALLOCZ_OR_GOTO(NULL, *outFilter, *outFilterSize*(dstW+1)*sizeof(int16_t), fail);
1658 221b804f Diego Biurrun
1659 f40c7dbb Diego Biurrun
    /* normalize & store in outFilter */
1660 dd68318c Ramiro Polla
    for (i=0; i<dstW; i++) {
1661 221b804f Diego Biurrun
        int j;
1662 a64a062f Michael Niedermayer
        int64_t error=0;
1663
        int64_t sum=0;
1664 221b804f Diego Biurrun
1665 dd68318c Ramiro Polla
        for (j=0; j<filterSize; j++) {
1666 221b804f Diego Biurrun
            sum+= filter[i*filterSize + j];
1667
        }
1668 a64a062f Michael Niedermayer
        sum= (sum + one/2)/ one;
1669 dd68318c Ramiro Polla
        for (j=0; j<*outFilterSize; j++) {
1670 a64a062f Michael Niedermayer
            int64_t v= filter[i*filterSize + j] + error;
1671
            int intV= ROUNDED_DIV(v, sum);
1672 221b804f Diego Biurrun
            (*outFilter)[i*(*outFilterSize) + j]= intV;
1673 a64a062f Michael Niedermayer
            error= v - intV*sum;
1674 221b804f Diego Biurrun
        }
1675
    }
1676
1677
    (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
1678 dd68318c Ramiro Polla
    for (i=0; i<*outFilterSize; i++) {
1679 221b804f Diego Biurrun
        int j= dstW*(*outFilterSize);
1680
        (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
1681
    }
1682
1683 091d3bdc Michael Niedermayer
    ret=0;
1684 7248797c Ramiro Polla
fail:
1685 221b804f Diego Biurrun
    av_free(filter);
1686 091d3bdc Michael Niedermayer
    av_free(filter2);
1687
    return ret;
1688 7630f2e0 Michael Niedermayer
}
1689 31190492 Arpi
1690 17c613ef Uoti Urpala
#ifdef COMPILE_MMX2
1691 48f4c612 Ramiro Polla
static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *filter, int32_t *filterPos, int numSplits)
1692 28bf81c9 Michael Niedermayer
{
1693 221b804f Diego Biurrun
    uint8_t *fragmentA;
1694 d0ce212a Ramiro Polla
    x86_reg imm8OfPShufW1A;
1695
    x86_reg imm8OfPShufW2A;
1696
    x86_reg fragmentLengthA;
1697 221b804f Diego Biurrun
    uint8_t *fragmentB;
1698 d0ce212a Ramiro Polla
    x86_reg imm8OfPShufW1B;
1699
    x86_reg imm8OfPShufW2B;
1700
    x86_reg fragmentLengthB;
1701 221b804f Diego Biurrun
    int fragmentPos;
1702
1703
    int xpos, i;
1704
1705
    // create an optimized horizontal scaling routine
1706 a8bcc7b4 Ramiro Polla
    /* This scaler is made of runtime-generated MMX2 code using specially
1707
     * tuned pshufw instructions. For every four output pixels, if four
1708
     * input pixels are enough for the fast bilinear scaling, then a chunk
1709
     * of fragmentB is used. If five input pixels are needed, then a chunk
1710
     * of fragmentA is used.
1711
     */
1712 221b804f Diego Biurrun
1713
    //code fragment
1714
1715 7ad6469e Diego Pettenò
    __asm__ volatile(
1716 221b804f Diego Biurrun
        "jmp                         9f                 \n\t"
1717
    // Begin
1718
        "0:                                             \n\t"
1719
        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1720
        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1721
        "movd   1(%%"REG_c", %%"REG_S"), %%mm1          \n\t"
1722
        "punpcklbw                %%mm7, %%mm1          \n\t"
1723
        "punpcklbw                %%mm7, %%mm0          \n\t"
1724
        "pshufw                   $0xFF, %%mm1, %%mm1   \n\t"
1725
        "1:                                             \n\t"
1726
        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1727
        "2:                                             \n\t"
1728
        "psubw                    %%mm1, %%mm0          \n\t"
1729
        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1730
        "pmullw                   %%mm3, %%mm0          \n\t"
1731
        "psllw                       $7, %%mm1          \n\t"
1732
        "paddw                    %%mm1, %%mm0          \n\t"
1733
1734
        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1735
1736
        "add                         $8, %%"REG_a"      \n\t"
1737
    // End
1738
        "9:                                             \n\t"
1739
//        "int $3                                         \n\t"
1740 86593486 Alexander Strange
        "lea                 " LOCAL_MANGLE(0b) ", %0   \n\t"
1741
        "lea                 " LOCAL_MANGLE(1b) ", %1   \n\t"
1742
        "lea                 " LOCAL_MANGLE(2b) ", %2   \n\t"
1743 221b804f Diego Biurrun
        "dec                         %1                 \n\t"
1744
        "dec                         %2                 \n\t"
1745
        "sub                         %0, %1             \n\t"
1746
        "sub                         %0, %2             \n\t"
1747 86593486 Alexander Strange
        "lea                 " LOCAL_MANGLE(9b) ", %3   \n\t"
1748 221b804f Diego Biurrun
        "sub                         %0, %3             \n\t"
1749
1750
1751
        :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
1752
        "=r" (fragmentLengthA)
1753
    );
1754
1755 7ad6469e Diego Pettenò
    __asm__ volatile(
1756 221b804f Diego Biurrun
        "jmp                         9f                 \n\t"
1757
    // Begin
1758
        "0:                                             \n\t"
1759
        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1760
        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1761
        "punpcklbw                %%mm7, %%mm0          \n\t"
1762
        "pshufw                   $0xFF, %%mm0, %%mm1   \n\t"
1763
        "1:                                             \n\t"
1764
        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1765
        "2:                                             \n\t"
1766
        "psubw                    %%mm1, %%mm0          \n\t"
1767
        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1768
        "pmullw                   %%mm3, %%mm0          \n\t"
1769
        "psllw                       $7, %%mm1          \n\t"
1770
        "paddw                    %%mm1, %%mm0          \n\t"
1771
1772
        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1773
1774
        "add                         $8, %%"REG_a"      \n\t"
1775
    // End
1776
        "9:                                             \n\t"
1777
//        "int                       $3                   \n\t"
1778 86593486 Alexander Strange
        "lea                 " LOCAL_MANGLE(0b) ", %0   \n\t"
1779
        "lea                 " LOCAL_MANGLE(1b) ", %1   \n\t"
1780
        "lea                 " LOCAL_MANGLE(2b) ", %2   \n\t"
1781 221b804f Diego Biurrun
        "dec                         %1                 \n\t"
1782
        "dec                         %2                 \n\t"
1783
        "sub                         %0, %1             \n\t"
1784
        "sub                         %0, %2             \n\t"
1785 86593486 Alexander Strange
        "lea                 " LOCAL_MANGLE(9b) ", %3   \n\t"
1786 221b804f Diego Biurrun
        "sub                         %0, %3             \n\t"
1787
1788
1789
        :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
1790
        "=r" (fragmentLengthB)
1791
    );
1792
1793
    xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
1794
    fragmentPos=0;
1795
1796 dd68318c Ramiro Polla
    for (i=0; i<dstW/numSplits; i++) {
1797 221b804f Diego Biurrun
        int xx=xpos>>16;
1798
1799 dd68318c Ramiro Polla
        if ((i&3) == 0) {
1800 221b804f Diego Biurrun
            int a=0;
1801
            int b=((xpos+xInc)>>16) - xx;
1802
            int c=((xpos+xInc*2)>>16) - xx;
1803
            int d=((xpos+xInc*3)>>16) - xx;
1804 901775c7 Ramiro Polla
            int inc                = (d+1<4);
1805 f1c6dfa3 Ramiro Polla
            uint8_t *fragment      = (d+1<4) ? fragmentB       : fragmentA;
1806
            x86_reg imm8OfPShufW1  = (d+1<4) ? imm8OfPShufW1B  : imm8OfPShufW1A;
1807
            x86_reg imm8OfPShufW2  = (d+1<4) ? imm8OfPShufW2B  : imm8OfPShufW2A;
1808
            x86_reg fragmentLength = (d+1<4) ? fragmentLengthB : fragmentLengthA;
1809 469fd9b0 Ramiro Polla
            int maxShift= 3-(d+inc);
1810
            int shift=0;
1811 221b804f Diego Biurrun
1812 48f4c612 Ramiro Polla
            if (filterCode) {
1813 737cbcde Ramiro Polla
                filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
1814
                filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
1815
                filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
1816
                filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
1817
                filterPos[i/2]= xx;
1818 221b804f Diego Biurrun
1819 737cbcde Ramiro Polla
                memcpy(filterCode + fragmentPos, fragment, fragmentLength);
1820 221b804f Diego Biurrun
1821 737cbcde Ramiro Polla
                filterCode[fragmentPos + imm8OfPShufW1]=
1822
                    (a+inc) | ((b+inc)<<2) | ((c+inc)<<4) | ((d+inc)<<6);
1823
                filterCode[fragmentPos + imm8OfPShufW2]=
1824
                    a | (b<<2) | (c<<4) | (d<<6);
1825 221b804f Diego Biurrun
1826 737cbcde Ramiro Polla
                if (i+4-inc>=dstW) shift=maxShift; //avoid overread
1827
                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
1828 221b804f Diego Biurrun
1829 dd68318c Ramiro Polla
                if (shift && i>=shift) {
1830 737cbcde Ramiro Polla
                    filterCode[fragmentPos + imm8OfPShufW1]+= 0x55*shift;
1831
                    filterCode[fragmentPos + imm8OfPShufW2]+= 0x55*shift;
1832
                    filterPos[i/2]-=shift;
1833
                }
1834 48f4c612 Ramiro Polla
            }
1835 221b804f Diego Biurrun
1836 469fd9b0 Ramiro Polla
            fragmentPos+= fragmentLength;
1837
1838 48f4c612 Ramiro Polla
            if (filterCode)
1839 737cbcde Ramiro Polla
                filterCode[fragmentPos]= RET;
1840 221b804f Diego Biurrun
        }
1841
        xpos+=xInc;
1842
    }
1843 48f4c612 Ramiro Polla
    if (filterCode)
1844 737cbcde Ramiro Polla
        filterPos[((i/2)+1)&(~1)]= xpos>>16; // needed to jump to the next part
1845 48f4c612 Ramiro Polla
1846
    return fragmentPos + 1;
1847 28bf81c9 Michael Niedermayer
}
1848 17c613ef Uoti Urpala
#endif /* COMPILE_MMX2 */
1849 28bf81c9 Michael Niedermayer
1850 40fa5140 Ramiro Polla
static SwsFunc getSwsFunc(SwsContext *c)
1851 14d5c18c Ramiro Polla
{
1852 8b1a6441 Diego Biurrun
#if CONFIG_RUNTIME_CPUDETECT
1853 40fa5140 Ramiro Polla
    int flags = c->flags;
1854 6a4970ab Diego Biurrun
1855 8b1a6441 Diego Biurrun
#if ARCH_X86 && CONFIG_GPL
1856 c14731d8 Reimar Döffinger
    // ordered per speed fastest first
1857 14d5c18c Ramiro Polla
    if (flags & SWS_CPU_CAPS_MMX2) {
1858 40fa5140 Ramiro Polla
        sws_init_swScale_MMX2(c);
1859 221b804f Diego Biurrun
        return swScale_MMX2;
1860 14d5c18c Ramiro Polla
    } else if (flags & SWS_CPU_CAPS_3DNOW) {
1861 40fa5140 Ramiro Polla
        sws_init_swScale_3DNow(c);
1862 221b804f Diego Biurrun
        return swScale_3DNow;
1863 14d5c18c Ramiro Polla
    } else if (flags & SWS_CPU_CAPS_MMX) {
1864 40fa5140 Ramiro Polla
        sws_init_swScale_MMX(c);
1865 221b804f Diego Biurrun
        return swScale_MMX;
1866 14d5c18c Ramiro Polla
    } else {
1867 40fa5140 Ramiro Polla
        sws_init_swScale_C(c);
1868 221b804f Diego Biurrun
        return swScale_C;
1869 14d5c18c Ramiro Polla
    }
1870 28bf81c9 Michael Niedermayer
1871
#else
1872 b63f641e Aurelien Jacobs
#if ARCH_PPC
1873 14d5c18c Ramiro Polla
    if (flags & SWS_CPU_CAPS_ALTIVEC) {
1874 40fa5140 Ramiro Polla
        sws_init_swScale_altivec(c);
1875 221b804f Diego Biurrun
        return swScale_altivec;
1876 14d5c18c Ramiro Polla
    } else {
1877 40fa5140 Ramiro Polla
        sws_init_swScale_C(c);
1878 221b804f Diego Biurrun
        return swScale_C;
1879 14d5c18c Ramiro Polla
    }
1880 a2faa401 Romain Dolbeau
#endif
1881 40fa5140 Ramiro Polla
    sws_init_swScale_C(c);
1882 221b804f Diego Biurrun
    return swScale_C;
1883 8b1a6441 Diego Biurrun
#endif /* ARCH_X86 && CONFIG_GPL */
1884 10a7d216 Ramiro Polla
#else //CONFIG_RUNTIME_CPUDETECT
1885 94daf2e9 Ramiro Polla
#if   COMPILE_TEMPLATE_MMX2
1886 40fa5140 Ramiro Polla
    sws_init_swScale_MMX2(c);
1887 221b804f Diego Biurrun
    return swScale_MMX2;
1888 94daf2e9 Ramiro Polla
#elif COMPILE_TEMPLATE_AMD3DNOW
1889 40fa5140 Ramiro Polla
    sws_init_swScale_3DNow(c);
1890 221b804f Diego Biurrun
    return swScale_3DNow;
1891 94daf2e9 Ramiro Polla
#elif COMPILE_TEMPLATE_MMX
1892 40fa5140 Ramiro Polla
    sws_init_swScale_MMX(c);
1893 221b804f Diego Biurrun
    return swScale_MMX;
1894 94daf2e9 Ramiro Polla
#elif COMPILE_TEMPLATE_ALTIVEC
1895 40fa5140 Ramiro Polla
    sws_init_swScale_altivec(c);
1896 221b804f Diego Biurrun
    return swScale_altivec;
1897 28bf81c9 Michael Niedermayer
#else
1898 40fa5140 Ramiro Polla
    sws_init_swScale_C(c);
1899 221b804f Diego Biurrun
    return swScale_C;
1900 28bf81c9 Michael Niedermayer
#endif
1901 10a7d216 Ramiro Polla
#endif //!CONFIG_RUNTIME_CPUDETECT
1902 31190492 Arpi
}
1903 7630f2e0 Michael Niedermayer
1904 d4e24275 Michael Niedermayer
static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1905 dd68318c Ramiro Polla
                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
1906
{
1907 221b804f Diego Biurrun
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1908
    /* Copy Y plane */
1909
    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1910
        memcpy(dst, src[0], srcSliceH*dstStride[0]);
1911 dd68318c Ramiro Polla
    else {
1912 221b804f Diego Biurrun
        int i;
1913 7ac40327 Ramiro Polla
        const uint8_t *srcPtr= src[0];
1914 221b804f Diego Biurrun
        uint8_t *dstPtr= dst;
1915 dd68318c Ramiro Polla
        for (i=0; i<srcSliceH; i++) {
1916 221b804f Diego Biurrun
            memcpy(dstPtr, srcPtr, c->srcW);
1917
            srcPtr+= srcStride[0];
1918
            dstPtr+= dstStride[0];
1919
        }
1920
    }
1921
    dst = dstParam[1] + dstStride[1]*srcSliceY/2;
1922
    if (c->dstFormat == PIX_FMT_NV12)
1923 30c48a0a Benoit Fouet
        interleaveBytes(src[1], src[2], dst, c->srcW/2, srcSliceH/2, srcStride[1], srcStride[2], dstStride[0]);
1924 221b804f Diego Biurrun
    else
1925 30c48a0a Benoit Fouet
        interleaveBytes(src[2], src[1], dst, c->srcW/2, srcSliceH/2, srcStride[2], srcStride[1], dstStride[0]);
1926 221b804f Diego Biurrun
1927
    return srcSliceH;
1928 0d9f3d85 Arpi
}
1929
1930 d4e24275 Michael Niedermayer
static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1931 dd68318c Ramiro Polla
                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
1932
{
1933 221b804f Diego Biurrun
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1934 0d9f3d85 Arpi
1935 30c48a0a Benoit Fouet
    yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
1936 fccb9b2b Michael Niedermayer
1937 221b804f Diego Biurrun
    return srcSliceH;
1938 0d9f3d85 Arpi
}
1939
1940 caeaabe7 Alex Beregszaszi
static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1941 dd68318c Ramiro Polla
                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
1942
{
1943 221b804f Diego Biurrun
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1944 caeaabe7 Alex Beregszaszi
1945 30c48a0a Benoit Fouet
    yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
1946 caeaabe7 Alex Beregszaszi
1947 221b804f Diego Biurrun
    return srcSliceH;
1948 caeaabe7 Alex Beregszaszi
}
1949
1950 a6100f39 Baptiste Coudurier
static int YUV422PToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1951 dd68318c Ramiro Polla
                                int srcSliceH, uint8_t* dstParam[], int dstStride[])
1952
{
1953 a6100f39 Baptiste Coudurier
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1954
1955
    yuv422ptoyuy2(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
1956
1957
    return srcSliceH;
1958
}
1959
1960
static int YUV422PToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1961 dd68318c Ramiro Polla
                                int srcSliceH, uint8_t* dstParam[], int dstStride[])
1962
{
1963 a6100f39 Baptiste Coudurier
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1964
1965
    yuv422ptouyvy(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
1966
1967
    return srcSliceH;
1968
}
1969
1970 0411072e Michael Niedermayer
static int YUYV2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1971 dd68318c Ramiro Polla
                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
1972
{
1973 0411072e Michael Niedermayer
    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
1974
    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
1975
    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
1976
1977
    yuyvtoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
1978
1979 4626ee1a Cédric Schieli
    if (dstParam[3])
1980
        fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
1981
1982 0411072e Michael Niedermayer
    return srcSliceH;
1983
}
1984
1985
static int YUYV2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1986 dd68318c Ramiro Polla
                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
1987
{
1988 0411072e Michael Niedermayer
    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
1989 72ef3dd7 Michael Niedermayer
    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
1990
    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
1991 0411072e Michael Niedermayer
1992
    yuyvtoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
1993
1994
    return srcSliceH;
1995
}
1996
1997
static int UYVY2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1998 dd68318c Ramiro Polla
                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
1999
{
2000 0411072e Michael Niedermayer
    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
2001
    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
2002
    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
2003
2004
    uyvytoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
2005
2006 4626ee1a Cédric Schieli
    if (dstParam[3])
2007
        fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2008
2009 0411072e Michael Niedermayer
    return srcSliceH;
2010
}
2011
2012
static int UYVY2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2013 dd68318c Ramiro Polla
                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
2014
{
2015 0411072e Michael Niedermayer
    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
2016 72ef3dd7 Michael Niedermayer
    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
2017
    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
2018 0411072e Michael Niedermayer
2019
    uyvytoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
2020
2021
    return srcSliceH;
2022
}
2023
2024 49004617 Vitor Sessak
static int pal2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2025 dd68318c Ramiro Polla
                          int srcSliceH, uint8_t* dst[], int dstStride[])
2026
{
2027 f5a2c981 Carl Eugen Hoyos
    const enum PixelFormat srcFormat= c->srcFormat;
2028
    const enum PixelFormat dstFormat= c->dstFormat;
2029 49004617 Vitor Sessak
    void (*conv)(const uint8_t *src, uint8_t *dst, long num_pixels,
2030
                 const uint8_t *palette)=NULL;
2031
    int i;
2032
    uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2033
    uint8_t *srcPtr= src[0];
2034
2035
    if (!usePal(srcFormat))
2036
        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2037
               sws_format_name(srcFormat), sws_format_name(dstFormat));
2038
2039 dd68318c Ramiro Polla
    switch(dstFormat) {
2040 522ce957 Vitor Sessak
    case PIX_FMT_RGB32  : conv = palette8topacked32; break;
2041
    case PIX_FMT_BGR32  : conv = palette8topacked32; break;
2042
    case PIX_FMT_BGR32_1: conv = palette8topacked32; break;
2043
    case PIX_FMT_RGB32_1: conv = palette8topacked32; break;
2044
    case PIX_FMT_RGB24  : conv = palette8topacked24; break;
2045
    case PIX_FMT_BGR24  : conv = palette8topacked24; break;
2046 49004617 Vitor Sessak
    default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2047
                    sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
2048
    }
2049
2050
2051
    for (i=0; i<srcSliceH; i++) {
2052 65f65c30 Vitor Sessak
        conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb);
2053 49004617 Vitor Sessak
        srcPtr+= srcStride[0];
2054
        dstPtr+= dstStride[0];
2055
    }
2056
2057
    return srcSliceH;
2058
}
2059
2060 9990e426 Michael Niedermayer
/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
2061 d4e24275 Michael Niedermayer
static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2062 dd68318c Ramiro Polla
                          int srcSliceH, uint8_t* dst[], int dstStride[])
2063
{
2064 58e4b706 Carl Eugen Hoyos
    const enum PixelFormat srcFormat= c->srcFormat;
2065
    const enum PixelFormat dstFormat= c->dstFormat;
2066 221b804f Diego Biurrun
    const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3;
2067
    const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3;
2068
    const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
2069
    const int dstId= fmt_depth(dstFormat) >> 2;
2070
    void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
2071
2072
    /* BGR -> BGR */
2073
    if (  (isBGR(srcFormat) && isBGR(dstFormat))
2074 dd68318c Ramiro Polla
       || (isRGB(srcFormat) && isRGB(dstFormat))) {
2075
        switch(srcId | (dstId<<4)) {
2076 221b804f Diego Biurrun
        case 0x34: conv= rgb16to15; break;
2077
        case 0x36: conv= rgb24to15; break;
2078
        case 0x38: conv= rgb32to15; break;
2079
        case 0x43: conv= rgb15to16; break;
2080
        case 0x46: conv= rgb24to16; break;
2081
        case 0x48: conv= rgb32to16; break;
2082
        case 0x63: conv= rgb15to24; break;
2083
        case 0x64: conv= rgb16to24; break;
2084
        case 0x68: conv= rgb32to24; break;
2085
        case 0x83: conv= rgb15to32; break;
2086
        case 0x84: conv= rgb16to32; break;
2087
        case 0x86: conv= rgb24to32; break;
2088 3f0bc115 Diego Biurrun
        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2089 221b804f Diego Biurrun
                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
2090
        }
2091 dd68318c Ramiro Polla
    } else if (  (isBGR(srcFormat) && isRGB(dstFormat))
2092
             || (isRGB(srcFormat) && isBGR(dstFormat))) {
2093
        switch(srcId | (dstId<<4)) {
2094 221b804f Diego Biurrun
        case 0x33: conv= rgb15tobgr15; break;
2095
        case 0x34: conv= rgb16tobgr15; break;
2096
        case 0x36: conv= rgb24tobgr15; break;
2097
        case 0x38: conv= rgb32tobgr15; break;
2098
        case 0x43: conv= rgb15tobgr16; break;
2099
        case 0x44: conv= rgb16tobgr16; break;
2100
        case 0x46: conv= rgb24tobgr16; break;
2101
        case 0x48: conv= rgb32tobgr16; break;
2102
        case 0x63: conv= rgb15tobgr24; break;
2103
        case 0x64: conv= rgb16tobgr24; break;
2104
        case 0x66: conv= rgb24tobgr24; break;
2105
        case 0x68: conv= rgb32tobgr24; break;
2106
        case 0x83: conv= rgb15tobgr32; break;
2107
        case 0x84: conv= rgb16tobgr32; break;
2108
        case 0x86: conv= rgb24tobgr32; break;
2109
        case 0x88: conv= rgb32tobgr32; break;
2110 3f0bc115 Diego Biurrun
        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2111 221b804f Diego Biurrun
                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
2112
        }
2113 dd68318c Ramiro Polla
    } else {
2114 3f0bc115 Diego Biurrun
        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2115 221b804f Diego Biurrun
               sws_format_name(srcFormat), sws_format_name(dstFormat));
2116
    }
2117
2118 dd68318c Ramiro Polla
    if(conv) {
2119 9990e426 Michael Niedermayer
        uint8_t *srcPtr= src[0];
2120
        if(srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1)
2121
            srcPtr += ALT32_CORR;
2122
2123 5efaf000 Peter Schlaile
        if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0)
2124 9990e426 Michael Niedermayer
            conv(srcPtr, dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
2125 dd68318c Ramiro Polla
        else {
2126 c4ca31d0 Benoit Fouet
            int i;
2127
            uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2128
2129 dd68318c Ramiro Polla
            for (i=0; i<srcSliceH; i++) {
2130 c4ca31d0 Benoit Fouet
                conv(srcPtr, dstPtr, c->srcW*srcBpp);
2131
                srcPtr+= srcStride[0];
2132
                dstPtr+= dstStride[0];
2133
            }
2134 221b804f Diego Biurrun
        }
2135
    }
2136
    return srcSliceH;
2137 0d9f3d85 Arpi
}
2138
2139 d4e24275 Michael Niedermayer
static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2140 dd68318c Ramiro Polla
                              int srcSliceH, uint8_t* dst[], int dstStride[])
2141
{
2142 221b804f Diego Biurrun
2143
    rgb24toyv12(
2144
        src[0],
2145
        dst[0]+ srcSliceY    *dstStride[0],
2146
        dst[1]+(srcSliceY>>1)*dstStride[1],
2147
        dst[2]+(srcSliceY>>1)*dstStride[2],
2148
        c->srcW, srcSliceH,
2149
        dstStride[0], dstStride[1], srcStride[0]);
2150 08218e6d Cédric Schieli
    if (dst[3])
2151
        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2152 221b804f Diego Biurrun
    return srcSliceH;
2153 ec22603f Michael Niedermayer
}
2154
2155 d4e24275 Michael Niedermayer
static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2156 dd68318c Ramiro Polla
                             int srcSliceH, uint8_t* dst[], int dstStride[])
2157
{
2158 221b804f Diego Biurrun
    int i;
2159
2160
    /* copy Y */
2161
    if (srcStride[0]==dstStride[0] && srcStride[0] > 0)
2162
        memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
2163 dd68318c Ramiro Polla
    else {
2164 221b804f Diego Biurrun
        uint8_t *srcPtr= src[0];
2165
        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2166
2167 dd68318c Ramiro Polla
        for (i=0; i<srcSliceH; i++) {
2168 221b804f Diego Biurrun
            memcpy(dstPtr, srcPtr, c->srcW);
2169
            srcPtr+= srcStride[0];
2170
            dstPtr+= dstStride[0];
2171
        }
2172
    }
2173
2174 dd68318c Ramiro Polla
    if (c->dstFormat==PIX_FMT_YUV420P || c->dstFormat==PIX_FMT_YUVA420P) {
2175 5f9ae198 Vitor Sessak
        planar2x(src[1], dst[1] + dstStride[1]*(srcSliceY >> 1), c->chrSrcW,
2176
                 srcSliceH >> 2, srcStride[1], dstStride[1]);
2177
        planar2x(src[2], dst[2] + dstStride[2]*(srcSliceY >> 1), c->chrSrcW,
2178
                 srcSliceH >> 2, srcStride[2], dstStride[2]);
2179 dd68318c Ramiro Polla
    } else {
2180 5f9ae198 Vitor Sessak
        planar2x(src[1], dst[2] + dstStride[2]*(srcSliceY >> 1), c->chrSrcW,
2181
                 srcSliceH >> 2, srcStride[1], dstStride[2]);
2182
        planar2x(src[2], dst[1] + dstStride[1]*(srcSliceY >> 1), c->chrSrcW,
2183
                 srcSliceH >> 2, srcStride[2], dstStride[1]);
2184 221b804f Diego Biurrun
    }
2185 08218e6d Cédric Schieli
    if (dst[3])
2186
        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2187 221b804f Diego Biurrun
    return srcSliceH;
2188 b241cbf2 Michael Niedermayer
}
2189
2190 b6654a54 Michael Niedermayer
/* unscaled copy like stuff (assumes nearly identical formats) */
2191 2d35ae56 Luca Barbato
static int packedCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2192 bc5a0444 Luca Barbato
                      int srcSliceH, uint8_t* dst[], int dstStride[])
2193
{
2194
    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
2195
        memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
2196 dd68318c Ramiro Polla
    else {
2197 bc5a0444 Luca Barbato
        int i;
2198
        uint8_t *srcPtr= src[0];
2199
        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2200
        int length=0;
2201 221b804f Diego Biurrun
2202 bc5a0444 Luca Barbato
        /* universal length finder */
2203
        while(length+c->srcW <= FFABS(dstStride[0])
2204
           && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
2205 fcc402b1 Luca Barbato
        assert(length!=0);
2206 2d35ae56 Luca Barbato
2207 dd68318c Ramiro Polla
        for (i=0; i<srcSliceH; i++) {
2208 bc5a0444 Luca Barbato
            memcpy(dstPtr, srcPtr, length);
2209
            srcPtr+= srcStride[0];
2210
            dstPtr+= dstStride[0];
2211 221b804f Diego Biurrun
        }
2212 bc5a0444 Luca Barbato
    }
2213 2d35ae56 Luca Barbato
    return srcSliceH;
2214
}
2215 bc5a0444 Luca Barbato
2216 2d35ae56 Luca Barbato
static int planarCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2217
                      int srcSliceH, uint8_t* dst[], int dstStride[])
2218
{
2219 61a3f379 Michael Niedermayer
    int plane, i, j;
2220 dd68318c Ramiro Polla
    for (plane=0; plane<4; plane++) {
2221 6268f55b Cédric Schieli
        int length= (plane==0 || plane==3) ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
2222
        int y=      (plane==0 || plane==3) ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
2223
        int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
2224 61a3f379 Michael Niedermayer
        uint8_t *srcPtr= src[plane];
2225
        uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
2226 2d35ae56 Luca Barbato
2227 20ddf5a8 Reimar Döffinger
        if (!dst[plane]) continue;
2228 6f348086 Reimar Döffinger
        // ignore palette for GRAY8
2229
        if (plane == 1 && !dst[2]) continue;
2230 dd68318c Ramiro Polla
        if (!src[plane] || (plane == 1 && !src[2])) {
2231 61a3f379 Michael Niedermayer
            if(is16BPS(c->dstFormat))
2232
                length*=2;
2233 6268f55b Cédric Schieli
            fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128);
2234 dd68318c Ramiro Polla
        } else {
2235
            if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) {
2236 61a3f379 Michael Niedermayer
                if (!isBE(c->srcFormat)) srcPtr++;
2237 dd68318c Ramiro Polla
                for (i=0; i<height; i++) {
2238 61a3f379 Michael Niedermayer
                    for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
2239
                    srcPtr+= srcStride[plane];
2240
                    dstPtr+= dstStride[plane];
2241
                }
2242 dd68318c Ramiro Polla
            } else if(!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) {
2243
                for (i=0; i<height; i++) {
2244
                    for (j=0; j<length; j++) {
2245 61a3f379 Michael Niedermayer
                        dstPtr[ j<<1   ] = srcPtr[j];
2246
                        dstPtr[(j<<1)+1] = srcPtr[j];
2247
                    }
2248
                    srcPtr+= srcStride[plane];
2249
                    dstPtr+= dstStride[plane];
2250
                }
2251 dd68318c Ramiro Polla
            } else if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat)
2252
                  && isBE(c->srcFormat) != isBE(c->dstFormat)) {
2253 61a3f379 Michael Niedermayer
2254 dd68318c Ramiro Polla
                for (i=0; i<height; i++) {
2255 61a3f379 Michael Niedermayer
                    for (j=0; j<length; j++)
2256
                        ((uint16_t*)dstPtr)[j] = bswap_16(((uint16_t*)srcPtr)[j]);
2257
                    srcPtr+= srcStride[plane];
2258
                    dstPtr+= dstStride[plane];
2259
                }
2260
            } else if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
2261 bc5a0444 Luca Barbato
                memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
2262 dd68318c Ramiro Polla
            else {
2263 61a3f379 Michael Niedermayer
                if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat))
2264
                    length*=2;
2265 dd68318c Ramiro Polla
                for (i=0; i<height; i++) {
2266 bc5a0444 Luca Barbato
                    memcpy(dstPtr, srcPtr, length);
2267
                    srcPtr+= srcStride[plane];
2268
                    dstPtr+= dstStride[plane];
2269 221b804f Diego Biurrun
                }
2270
            }
2271
        }
2272 bc5a0444 Luca Barbato
    }
2273 221b804f Diego Biurrun
    return srcSliceH;
2274 37079906 Michael Niedermayer
}
2275 28bf81c9 Michael Niedermayer
2276 4884b9e5 Kostya Shishkov
2277 634116df Stefano Sabatini
static void getSubSampleFactors(int *h, int *v, enum PixelFormat format)
2278 dd68318c Ramiro Polla
{
2279 b3cf7cb1 Stefano Sabatini
    *h = av_pix_fmt_descriptors[format].log2_chroma_w;
2280
    *v = av_pix_fmt_descriptors[format].log2_chroma_h;
2281 c7a810cc Michael Niedermayer
}
2282
2283 dd68318c Ramiro Polla
static uint16_t roundToInt16(int64_t f)
2284
{
2285 221b804f Diego Biurrun
    int r= (f + (1<<15))>>16;
2286
         if (r<-0x7FFF) return 0x8000;
2287
    else if (r> 0x7FFF) return 0x7FFF;
2288
    else                return r;
2289 0481412a Michael Niedermayer
}
2290
2291 dd68318c Ramiro Polla
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation)
2292
{
2293 221b804f Diego Biurrun
    int64_t crv =  inv_table[0];
2294
    int64_t cbu =  inv_table[1];
2295
    int64_t cgu = -inv_table[2];
2296
    int64_t cgv = -inv_table[3];
2297
    int64_t cy  = 1<<16;
2298
    int64_t oy  = 0;
2299
2300
    memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
2301
    memcpy(c->dstColorspaceTable,     table, sizeof(int)*4);
2302
2303
    c->brightness= brightness;
2304
    c->contrast  = contrast;
2305
    c->saturation= saturation;
2306
    c->srcRange  = srcRange;
2307
    c->dstRange  = dstRange;
2308 0c067dc3 Stefano Sabatini
    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
2309 221b804f Diego Biurrun
2310
    c->uOffset=   0x0400040004000400LL;
2311
    c->vOffset=   0x0400040004000400LL;
2312
2313 dd68318c Ramiro Polla
    if (!srcRange) {
2314 221b804f Diego Biurrun
        cy= (cy*255) / 219;
2315
        oy= 16<<16;
2316 dd68318c Ramiro Polla
    } else {
2317 221b804f Diego Biurrun
        crv= (crv*224) / 255;
2318
        cbu= (cbu*224) / 255;
2319
        cgu= (cgu*224) / 255;
2320
        cgv= (cgv*224) / 255;
2321
    }
2322 0481412a Michael Niedermayer
2323 221b804f Diego Biurrun
    cy = (cy *contrast             )>>16;
2324
    crv= (crv*contrast * saturation)>>32;
2325
    cbu= (cbu*contrast * saturation)>>32;
2326
    cgu= (cgu*contrast * saturation)>>32;
2327
    cgv= (cgv*contrast * saturation)>>32;
2328 0481412a Michael Niedermayer
2329 221b804f Diego Biurrun
    oy -= 256*brightness;
2330 0481412a Michael Niedermayer
2331 221b804f Diego Biurrun
    c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
2332
    c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
2333
    c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
2334
    c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
2335
    c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
2336
    c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
2337 5427e242 Michael Niedermayer
2338 43c16478 Michael Niedermayer
    c->yuv2rgb_y_coeff  = (int16_t)roundToInt16(cy <<13);
2339
    c->yuv2rgb_y_offset = (int16_t)roundToInt16(oy << 9);
2340
    c->yuv2rgb_v2r_coeff= (int16_t)roundToInt16(crv<<13);
2341
    c->yuv2rgb_v2g_coeff= (int16_t)roundToInt16(cgv<<13);
2342
    c->yuv2rgb_u2g_coeff= (int16_t)roundToInt16(cgu<<13);
2343
    c->yuv2rgb_u2b_coeff= (int16_t)roundToInt16(cbu<<13);
2344 f0faee4c Michael Niedermayer
2345 780daf2b Diego Biurrun
    ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
2346 221b804f Diego Biurrun
    //FIXME factorize
2347 a31de956 Michael Niedermayer
2348 6634d0ef Nicolas Plourde
#ifdef COMPILE_ALTIVEC
2349 221b804f Diego Biurrun
    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
2350 780daf2b Diego Biurrun
        ff_yuv2rgb_init_tables_altivec(c, inv_table, brightness, contrast, saturation);
2351 6a4970ab Diego Biurrun
#endif
2352 221b804f Diego Biurrun
    return 0;
2353 5427e242 Michael Niedermayer
}
2354
2355 dd68318c Ramiro Polla
int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation)
2356
{
2357 221b804f Diego Biurrun
    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
2358 5427e242 Michael Niedermayer
2359 221b804f Diego Biurrun
    *inv_table = c->srcColorspaceTable;
2360
    *table     = c->dstColorspaceTable;
2361
    *srcRange  = c->srcRange;
2362
    *dstRange  = c->dstRange;
2363
    *brightness= c->brightness;
2364
    *contrast  = c->contrast;
2365
    *saturation= c->saturation;
2366 6a4970ab Diego Biurrun
2367 221b804f Diego Biurrun
    return 0;
2368 0481412a Michael Niedermayer
}
2369
2370 13394e8c Aurelien Jacobs
static int handle_jpeg(enum PixelFormat *format)
2371 44cdb423 Luca Abeni
{
2372 221b804f Diego Biurrun
    switch (*format) {
2373 9b734d44 Ramiro Polla
    case PIX_FMT_YUVJ420P:
2374
        *format = PIX_FMT_YUV420P;
2375
        return 1;
2376
    case PIX_FMT_YUVJ422P:
2377
        *format = PIX_FMT_YUV422P;
2378
        return 1;
2379
    case PIX_FMT_YUVJ444P:
2380
        *format = PIX_FMT_YUV444P;
2381
        return 1;
2382
    case PIX_FMT_YUVJ440P:
2383
        *format = PIX_FMT_YUV440P;
2384
        return 1;
2385
    default:
2386
        return 0;
2387 221b804f Diego Biurrun
    }
2388 44cdb423 Luca Abeni
}
2389
2390 58e4b706 Carl Eugen Hoyos
SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int dstW, int dstH, enum PixelFormat dstFormat, int flags,
2391 0020c54c Diego Biurrun
                           SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)
2392
{
2393 221b804f Diego Biurrun
2394
    SwsContext *c;
2395
    int i;
2396
    int usesVFilter, usesHFilter;
2397
    int unscaled, needsDither;
2398
    int srcRange, dstRange;
2399
    SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
2400 b63f641e Aurelien Jacobs
#if ARCH_X86
2401 221b804f Diego Biurrun
    if (flags & SWS_CPU_CAPS_MMX)
2402 7ad6469e Diego Pettenò
        __asm__ volatile("emms\n\t"::: "memory");
2403 5cebb24b Michael Niedermayer
#endif
2404 516b1f82 Michael Niedermayer
2405 8b1a6441 Diego Biurrun
#if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
2406 d3f3eea9 Marc Hoffman
    flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN);
2407 94daf2e9 Ramiro Polla
#if   COMPILE_TEMPLATE_MMX2
2408 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
2409 94daf2e9 Ramiro Polla
#elif COMPILE_TEMPLATE_AMD3DNOW
2410 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
2411 94daf2e9 Ramiro Polla
#elif COMPILE_TEMPLATE_MMX
2412 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_MMX;
2413 94daf2e9 Ramiro Polla
#elif COMPILE_TEMPLATE_ALTIVEC
2414 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_ALTIVEC;
2415 b63f641e Aurelien Jacobs
#elif ARCH_BFIN
2416 d3f3eea9 Marc Hoffman
    flags |= SWS_CPU_CAPS_BFIN;
2417 516b1f82 Michael Niedermayer
#endif
2418 10a7d216 Ramiro Polla
#endif /* CONFIG_RUNTIME_CPUDETECT */
2419 1b0a4572 Benoit Fouet
    if (!rgb15to16) sws_rgb2rgb_init(flags);
2420 221b804f Diego Biurrun
2421
    unscaled = (srcW == dstW && srcH == dstH);
2422
    needsDither= (isBGR(dstFormat) || isRGB(dstFormat))
2423
        && (fmt_depth(dstFormat))<24
2424
        && ((fmt_depth(dstFormat))<(fmt_depth(srcFormat)) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
2425
2426
    srcRange = handle_jpeg(&srcFormat);
2427
    dstRange = handle_jpeg(&dstFormat);
2428
2429 dd68318c Ramiro Polla
    if (!isSupportedIn(srcFormat)) {
2430 0d6fd5ec Stefano Sabatini
        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat));
2431 221b804f Diego Biurrun
        return NULL;
2432
    }
2433 dd68318c Ramiro Polla
    if (!isSupportedOut(dstFormat)) {
2434 0d6fd5ec Stefano Sabatini
        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat));
2435 221b804f Diego Biurrun
        return NULL;
2436
    }
2437
2438 010c00bc Michael Niedermayer
    i= flags & ( SWS_POINT
2439
                |SWS_AREA
2440 6afc7c19 Michael Niedermayer
                |SWS_BILINEAR
2441 010c00bc Michael Niedermayer
                |SWS_FAST_BILINEAR
2442
                |SWS_BICUBIC
2443
                |SWS_X
2444
                |SWS_GAUSS
2445
                |SWS_LANCZOS
2446
                |SWS_SINC
2447
                |SWS_SPLINE
2448
                |SWS_BICUBLIN);
2449 dd68318c Ramiro Polla
    if(!i || (i & (i-1))) {
2450 f40c7dbb Diego Biurrun
        av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be chosen\n");
2451 010c00bc Michael Niedermayer
        return NULL;
2452
    }
2453
2454 221b804f Diego Biurrun
    /* sanity check */
2455 dd68318c Ramiro Polla
    if (srcW<4 || srcH<1 || dstW<8 || dstH<1) { //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
2456 221b804f Diego Biurrun
        av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
2457
               srcW, srcH, dstW, dstH);
2458
        return NULL;
2459
    }
2460 dd68318c Ramiro Polla
    if(srcW > VOFW || dstW > VOFW) {
2461 f40c7dbb Diego Biurrun
        av_log(NULL, AV_LOG_ERROR, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n");
2462 8b2fce0d Michael Niedermayer
        return NULL;
2463
    }
2464 221b804f Diego Biurrun
2465
    if (!dstFilter) dstFilter= &dummyFilter;
2466
    if (!srcFilter) srcFilter= &dummyFilter;
2467
2468 9cf484d0 Ramiro Polla
    FF_ALLOCZ_OR_GOTO(NULL, c, sizeof(SwsContext), fail);
2469 221b804f Diego Biurrun
2470
    c->av_class = &sws_context_class;
2471
    c->srcW= srcW;
2472
    c->srcH= srcH;
2473
    c->dstW= dstW;
2474
    c->dstH= dstH;
2475
    c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
2476
    c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
2477
    c->flags= flags;
2478
    c->dstFormat= dstFormat;
2479
    c->srcFormat= srcFormat;
2480
    c->vRounder= 4* 0x0001000100010001ULL;
2481
2482
    usesHFilter= usesVFilter= 0;
2483 1b0a4572 Benoit Fouet
    if (dstFilter->lumV && dstFilter->lumV->length>1) usesVFilter=1;
2484
    if (dstFilter->lumH && dstFilter->lumH->length>1) usesHFilter=1;
2485
    if (dstFilter->chrV && dstFilter->chrV->length>1) usesVFilter=1;
2486
    if (dstFilter->chrH && dstFilter->chrH->length>1) usesHFilter=1;
2487
    if (srcFilter->lumV && srcFilter->lumV->length>1) usesVFilter=1;
2488
    if (srcFilter->lumH && srcFilter->lumH->length>1) usesHFilter=1;
2489
    if (srcFilter->chrV && srcFilter->chrV->length>1) usesVFilter=1;
2490
    if (srcFilter->chrH && srcFilter->chrH->length>1) usesHFilter=1;
2491 221b804f Diego Biurrun
2492
    getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
2493
    getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
2494
2495 f40c7dbb Diego Biurrun
    // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation
2496 221b804f Diego Biurrun
    if ((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
2497
2498
    // drop some chroma lines if the user wants it
2499
    c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
2500
    c->chrSrcVSubSample+= c->vChrDrop;
2501
2502 f40c7dbb Diego Biurrun
    // drop every other pixel for chroma calculation unless user wants full chroma
2503 221b804f Diego Biurrun
    if ((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)
2504
      && srcFormat!=PIX_FMT_RGB8      && srcFormat!=PIX_FMT_BGR8
2505
      && srcFormat!=PIX_FMT_RGB4      && srcFormat!=PIX_FMT_BGR4
2506 dfb09bd1 Michael Niedermayer
      && srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE
2507 2f60f629 Michael Niedermayer
      && ((dstW>>c->chrDstHSubSample) <= (srcW>>1) || (flags&(SWS_FAST_BILINEAR|SWS_POINT))))
2508 221b804f Diego Biurrun
        c->chrSrcHSubSample=1;
2509
2510 dd68318c Ramiro Polla
    if (param) {
2511 221b804f Diego Biurrun
        c->param[0] = param[0];
2512
        c->param[1] = param[1];
2513 dd68318c Ramiro Polla
    } else {
2514 221b804f Diego Biurrun
        c->param[0] =
2515
        c->param[1] = SWS_PARAM_DEFAULT;
2516
    }
2517
2518
    // Note the -((-x)>>y) is so that we always round toward +inf.
2519
    c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
2520
    c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
2521
    c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
2522
    c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
2523
2524 fa58ba15 Kostya Shishkov
    sws_setColorspaceDetails(c, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT], srcRange, ff_yuv2rgb_coeffs[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16);
2525 221b804f Diego Biurrun
2526 f40c7dbb Diego Biurrun
    /* unscaled special cases */
2527 dd68318c Ramiro Polla
    if (unscaled && !usesHFilter && !usesVFilter && (srcRange == dstRange || isBGR(dstFormat) || isRGB(dstFormat))) {
2528 221b804f Diego Biurrun
        /* yv12_to_nv12 */
2529 dd68318c Ramiro Polla
        if ((srcFormat == PIX_FMT_YUV420P || srcFormat == PIX_FMT_YUVA420P) && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)) {
2530 221b804f Diego Biurrun
            c->swScale= PlanarToNV12Wrapper;
2531
        }
2532
        /* yuv2bgr */
2533 2c897342 Cédric Schieli
        if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P || srcFormat==PIX_FMT_YUVA420P) && (isBGR(dstFormat) || isRGB(dstFormat))
2534 dd68318c Ramiro Polla
            && !(flags & SWS_ACCURATE_RND) && !(dstH&1)) {
2535 780daf2b Diego Biurrun
            c->swScale= ff_yuv2rgb_get_func_ptr(c);
2536 221b804f Diego Biurrun
        }
2537 6a4970ab Diego Biurrun
2538 dd68318c Ramiro Polla
        if (srcFormat==PIX_FMT_YUV410P && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_BITEXACT)) {
2539 221b804f Diego Biurrun
            c->swScale= yvu9toyv12Wrapper;
2540
        }
2541
2542
        /* bgr24toYV12 */
2543 6268f55b Cédric Schieli
        if (srcFormat==PIX_FMT_BGR24 && (dstFormat==PIX_FMT_YUV420P || dstFormat==PIX_FMT_YUVA420P) && !(flags & SWS_ACCURATE_RND))
2544 221b804f Diego Biurrun
            c->swScale= bgr24toyv12Wrapper;
2545
2546 f40c7dbb Diego Biurrun
        /* RGB/BGR -> RGB/BGR (no dither needed forms) */
2547 221b804f Diego Biurrun
        if (  (isBGR(srcFormat) || isRGB(srcFormat))
2548
           && (isBGR(dstFormat) || isRGB(dstFormat))
2549
           && srcFormat != PIX_FMT_BGR8      && dstFormat != PIX_FMT_BGR8
2550
           && srcFormat != PIX_FMT_RGB8      && dstFormat != PIX_FMT_RGB8
2551
           && srcFormat != PIX_FMT_BGR4      && dstFormat != PIX_FMT_BGR4
2552
           && srcFormat != PIX_FMT_RGB4      && dstFormat != PIX_FMT_RGB4
2553
           && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE
2554
           && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE
2555
           && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK
2556 ec1bca2a Michael Niedermayer
           && srcFormat != PIX_FMT_MONOWHITE && dstFormat != PIX_FMT_MONOWHITE
2557