Statistics
| Branch: | Revision:

ffmpeg / libswscale / swscale.c @ 0607b090

History | View | Annotate | Download (124 KB)

1 fe8054c0 Michael Niedermayer
/*
2 d026b45e Diego Biurrun
 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with FFmpeg; if not, write to the Free Software
18 b19bcbaa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 d026b45e Diego Biurrun
 *
20 807e0c66 Luca Abeni
 * the C code (not assembly, mmx, ...) of this file can be used
21
 * under the LGPL license too
22 d026b45e Diego Biurrun
 */
23 783e9cc9 Michael Niedermayer
24 28bf81c9 Michael Niedermayer
/*
25 9990e426 Michael Niedermayer
  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR32_1, BGR24, BGR16, BGR15, RGB32, RGB32_1, RGB24, Y8/Y800, YVU9/IF09, PAL8
26 caeaabe7 Alex Beregszaszi
  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
27 e09d12f4 Michael Niedermayer
  {BGR,RGB}{1,4,8,15,16} support dithering
28 6a4970ab Diego Biurrun

29 e09d12f4 Michael Niedermayer
  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
30
  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
31
  x -> x
32
  YUV9 -> YV12
33
  YUV9/YV12 -> Y800
34
  Y800 -> YUV9/YV12
35 b935781b Michael Niedermayer
  BGR24 -> BGR32 & RGB24 -> RGB32
36
  BGR32 -> BGR24 & RGB32 -> RGB24
37 4bb3fa5e Michael Niedermayer
  BGR15 -> BGR16
38 b935781b Michael Niedermayer
*/
39
40 6a4970ab Diego Biurrun
/*
41 a6f6b237 Diego Biurrun
tested special converters (most are tested actually, but I did not write it down ...)
42 e09d12f4 Michael Niedermayer
 YV12 -> BGR16
43 b935781b Michael Niedermayer
 YV12 -> YV12
44 4bb3fa5e Michael Niedermayer
 BGR15 -> BGR16
45 1e1c4fe9 Michael Niedermayer
 BGR16 -> BGR16
46 e09d12f4 Michael Niedermayer
 YVU9 -> YV12
47 b935781b Michael Niedermayer

48
untested special converters
49 f40c7dbb Diego Biurrun
  YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
50 1e1c4fe9 Michael Niedermayer
  YV12/I420 -> YV12/I420
51
  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
52 b935781b Michael Niedermayer
  BGR24 -> BGR32 & RGB24 -> RGB32
53
  BGR32 -> BGR24 & RGB32 -> RGB24
54 ec22603f Michael Niedermayer
  BGR24 -> YV12
55 28bf81c9 Michael Niedermayer
*/
56
57 d63a2cb1 Michael Niedermayer
#define _SVID_SOURCE //needed for MAP_ANONYMOUS
58 d3f41512 Michael Niedermayer
#include <inttypes.h>
59 dda87e9f Pierre Lombard
#include <string.h>
60 077ea8a7 Michael Niedermayer
#include <math.h>
61 c1b0bfb4 Michael Niedermayer
#include <stdio.h>
62 b2d374c9 Diego Biurrun
#include "config.h"
63 81b7c056 Michael Niedermayer
#include <assert.h>
64 b63f641e Aurelien Jacobs
#if HAVE_SYS_MMAN_H
65 38d5c282 Aurelien Jacobs
#include <sys/mman.h>
66 113ef149 Reimar Döffinger
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
67
#define MAP_ANONYMOUS MAP_ANON
68
#endif
69 38d5c282 Aurelien Jacobs
#endif
70 dd35beb2 Ramiro Polla
#if HAVE_VIRTUALALLOC
71
#define WIN32_LEAN_AND_MEAN
72
#include <windows.h>
73
#endif
74 d604bab9 Michael Niedermayer
#include "swscale.h"
75 5427e242 Michael Niedermayer
#include "swscale_internal.h"
76 37079906 Michael Niedermayer
#include "rgb2rgb.h"
77 52154148 Ramiro Polla
#include "libavutil/intreadwrite.h"
78 83da2c6f Diego Biurrun
#include "libavutil/x86_cpu.h"
79
#include "libavutil/bswap.h"
80 0d9f3d85 Arpi
81 b3e03fa7 Stefano Sabatini
unsigned swscale_version(void)
82
{
83
    return LIBSWSCALE_VERSION_INT;
84
}
85
86 541c4eb9 Michael Niedermayer
#undef MOVNTQ
87 7d7f78b5 Michael Niedermayer
#undef PAVGB
88 d3f41512 Michael Niedermayer
89 783e9cc9 Michael Niedermayer
//#undef HAVE_MMX2
90 f4406ec1 Diego Biurrun
//#define HAVE_AMD3DNOW
91 d3f41512 Michael Niedermayer
//#undef HAVE_MMX
92 783e9cc9 Michael Niedermayer
//#undef ARCH_X86
93 d604bab9 Michael Niedermayer
#define DITHER1XBPP
94 d3f41512 Michael Niedermayer
95 f40c7dbb Diego Biurrun
#define FAST_BGR2YV12 // use 7 bit coefficients instead of 15 bit
96 ac6a2e45 Michael Niedermayer
97 f40c7dbb Diego Biurrun
#define RET 0xC3 //near return opcode for x86
98 c1b0bfb4 Michael Niedermayer
99 28bf81c9 Michael Niedermayer
#ifdef M_PI
100
#define PI M_PI
101
#else
102
#define PI 3.14159265358979323846
103
#endif
104 c1b0bfb4 Michael Niedermayer
105 9d9de37d Ivo van Poorten
#define isSupportedIn(x)    (       \
106
           (x)==PIX_FMT_YUV420P     \
107 79973335 Aurelien Jacobs
        || (x)==PIX_FMT_YUVA420P    \
108 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_YUYV422     \
109
        || (x)==PIX_FMT_UYVY422     \
110 e8417235 Kostya Shishkov
        || (x)==PIX_FMT_RGB48BE     \
111
        || (x)==PIX_FMT_RGB48LE     \
112 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_RGB32       \
113 9990e426 Michael Niedermayer
        || (x)==PIX_FMT_RGB32_1     \
114 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_BGR24       \
115
        || (x)==PIX_FMT_BGR565      \
116
        || (x)==PIX_FMT_BGR555      \
117
        || (x)==PIX_FMT_BGR32       \
118 9990e426 Michael Niedermayer
        || (x)==PIX_FMT_BGR32_1     \
119 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_RGB24       \
120
        || (x)==PIX_FMT_RGB565      \
121
        || (x)==PIX_FMT_RGB555      \
122
        || (x)==PIX_FMT_GRAY8       \
123
        || (x)==PIX_FMT_YUV410P     \
124 6c80eb16 Michael Niedermayer
        || (x)==PIX_FMT_YUV440P     \
125 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_GRAY16BE    \
126
        || (x)==PIX_FMT_GRAY16LE    \
127
        || (x)==PIX_FMT_YUV444P     \
128
        || (x)==PIX_FMT_YUV422P     \
129
        || (x)==PIX_FMT_YUV411P     \
130
        || (x)==PIX_FMT_PAL8        \
131
        || (x)==PIX_FMT_BGR8        \
132
        || (x)==PIX_FMT_RGB8        \
133
        || (x)==PIX_FMT_BGR4_BYTE   \
134
        || (x)==PIX_FMT_RGB4_BYTE   \
135 9ba7fe6d Andreas Öman
        || (x)==PIX_FMT_YUV440P     \
136 3d05e078 Michael Niedermayer
        || (x)==PIX_FMT_MONOWHITE   \
137
        || (x)==PIX_FMT_MONOBLACK   \
138 de1275d5 Michael Niedermayer
        || (x)==PIX_FMT_YUV420PLE   \
139
        || (x)==PIX_FMT_YUV422PLE   \
140
        || (x)==PIX_FMT_YUV444PLE   \
141
        || (x)==PIX_FMT_YUV420PBE   \
142
        || (x)==PIX_FMT_YUV422PBE   \
143
        || (x)==PIX_FMT_YUV444PBE   \
144 9d9de37d Ivo van Poorten
    )
145
#define isSupportedOut(x)   (       \
146
           (x)==PIX_FMT_YUV420P     \
147 6268f55b Cédric Schieli
        || (x)==PIX_FMT_YUVA420P    \
148 9d9de37d Ivo van Poorten
        || (x)==PIX_FMT_YUYV422     \
149
        || (x)==PIX_FMT_UYVY422     \
150
        || (x)==PIX_FMT_YUV444P     \
151
        || (x)==PIX_FMT_YUV422P     \
152
        || (x)==PIX_FMT_YUV411P     \
153
        || isRGB(x)                 \
154
        || isBGR(x)                 \
155
        || (x)==PIX_FMT_NV12        \
156
        || (x)==PIX_FMT_NV21        \
157
        || (x)==PIX_FMT_GRAY16BE    \
158
        || (x)==PIX_FMT_GRAY16LE    \
159
        || (x)==PIX_FMT_GRAY8       \
160
        || (x)==PIX_FMT_YUV410P     \
161 6c80eb16 Michael Niedermayer
        || (x)==PIX_FMT_YUV440P     \
162 de1275d5 Michael Niedermayer
        || (x)==PIX_FMT_YUV420PLE   \
163
        || (x)==PIX_FMT_YUV422PLE   \
164
        || (x)==PIX_FMT_YUV444PLE   \
165
        || (x)==PIX_FMT_YUV420PBE   \
166
        || (x)==PIX_FMT_YUV422PBE   \
167
        || (x)==PIX_FMT_YUV444PBE   \
168 9d9de37d Ivo van Poorten
    )
169
#define isPacked(x)         (       \
170
           (x)==PIX_FMT_PAL8        \
171
        || (x)==PIX_FMT_YUYV422     \
172
        || (x)==PIX_FMT_UYVY422     \
173
        || isRGB(x)                 \
174
        || isBGR(x)                 \
175
    )
176 49004617 Vitor Sessak
#define usePal(x)           (       \
177
           (x)==PIX_FMT_PAL8        \
178
        || (x)==PIX_FMT_BGR4_BYTE   \
179
        || (x)==PIX_FMT_RGB4_BYTE   \
180
        || (x)==PIX_FMT_BGR8        \
181
        || (x)==PIX_FMT_RGB8        \
182
    )
183 6ff0ad6b Michael Niedermayer
184 6b79dbce Michael Niedermayer
#define RGB2YUV_SHIFT 15
185 7b5d7b9e Michael Niedermayer
#define BY ( (int)(0.114*219/255*(1<<RGB2YUV_SHIFT)+0.5))
186
#define BV (-(int)(0.081*224/255*(1<<RGB2YUV_SHIFT)+0.5))
187
#define BU ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
188
#define GY ( (int)(0.587*219/255*(1<<RGB2YUV_SHIFT)+0.5))
189
#define GV (-(int)(0.419*224/255*(1<<RGB2YUV_SHIFT)+0.5))
190
#define GU (-(int)(0.331*224/255*(1<<RGB2YUV_SHIFT)+0.5))
191
#define RY ( (int)(0.299*219/255*(1<<RGB2YUV_SHIFT)+0.5))
192
#define RV ( (int)(0.500*224/255*(1<<RGB2YUV_SHIFT)+0.5))
193
#define RU (-(int)(0.169*224/255*(1<<RGB2YUV_SHIFT)+0.5))
194 6c7506de Michael Niedermayer
195 fa58ba15 Kostya Shishkov
extern const int32_t ff_yuv2rgb_coeffs[8][4];
196 0481412a Michael Niedermayer
197 0f5d4aa8 Michael Niedermayer
static const double rgb2yuv_table[8][9]={
198
    {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
199
    {0.7152, 0.0722, 0.2126, -0.386, 0.5, -0.115, -0.454, -0.046, 0.5},
200
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
201
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
202
    {0.59  , 0.11  , 0.30  , -0.331, 0.5, -0.169, -0.421, -0.079, 0.5}, //FCC
203
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5},
204
    {0.587 , 0.114 , 0.299 , -0.331, 0.5, -0.169, -0.419, -0.081, 0.5}, //SMPTE 170M
205
    {0.701 , 0.087 , 0.212 , -0.384, 0.5  -0.116, -0.445, -0.055, 0.5}, //SMPTE 240M
206
};
207
208 783e9cc9 Michael Niedermayer
/*
209
NOTES
210 d604bab9 Michael Niedermayer
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
211 31190492 Arpi

212 783e9cc9 Michael Niedermayer
TODO
213 bd7c6fd5 Diego Biurrun
more intelligent misalignment avoidance for the horizontal scaler
214 c1b0bfb4 Michael Niedermayer
write special vertical cubic upscale version
215 f40c7dbb Diego Biurrun
optimize C code (YV12 / minmax)
216
add support for packed pixel YUV input & output
217 6ff0ad6b Michael Niedermayer
add support for Y8 output
218 f40c7dbb Diego Biurrun
optimize BGR24 & BGR32
219 ff7ba856 Michael Niedermayer
add BGR4 output support
220 1e621b18 Michael Niedermayer
write special BGR->BGR scaler
221 783e9cc9 Michael Niedermayer
*/
222 31190492 Arpi
223 b63f641e Aurelien Jacobs
#if ARCH_X86 && CONFIG_GPL
224 d334c7c2 Reimar Döffinger
DECLARE_ASM_CONST(8, uint64_t, bF8)=       0xF8F8F8F8F8F8F8F8LL;
225
DECLARE_ASM_CONST(8, uint64_t, bFC)=       0xFCFCFCFCFCFCFCFCLL;
226
DECLARE_ASM_CONST(8, uint64_t, w10)=       0x0010001000100010LL;
227
DECLARE_ASM_CONST(8, uint64_t, w02)=       0x0002000200020002LL;
228
DECLARE_ASM_CONST(8, uint64_t, bm00001111)=0x00000000FFFFFFFFLL;
229
DECLARE_ASM_CONST(8, uint64_t, bm00000111)=0x0000000000FFFFFFLL;
230
DECLARE_ASM_CONST(8, uint64_t, bm11111000)=0xFFFFFFFFFF000000LL;
231
DECLARE_ASM_CONST(8, uint64_t, bm01010101)=0x00FF00FF00FF00FFLL;
232 d604bab9 Michael Niedermayer
233 0cb25594 Carl Eugen Hoyos
const DECLARE_ALIGNED(8, uint64_t, ff_dither4[2]) = {
234 221b804f Diego Biurrun
        0x0103010301030103LL,
235
        0x0200020002000200LL,};
236 d8fa3c54 Michael Niedermayer
237 0cb25594 Carl Eugen Hoyos
const DECLARE_ALIGNED(8, uint64_t, ff_dither8[2]) = {
238 221b804f Diego Biurrun
        0x0602060206020602LL,
239
        0x0004000400040004LL,};
240 d604bab9 Michael Niedermayer
241 d334c7c2 Reimar Döffinger
DECLARE_ASM_CONST(8, uint64_t, b16Mask)=   0x001F001F001F001FLL;
242
DECLARE_ASM_CONST(8, uint64_t, g16Mask)=   0x07E007E007E007E0LL;
243
DECLARE_ASM_CONST(8, uint64_t, r16Mask)=   0xF800F800F800F800LL;
244
DECLARE_ASM_CONST(8, uint64_t, b15Mask)=   0x001F001F001F001FLL;
245
DECLARE_ASM_CONST(8, uint64_t, g15Mask)=   0x03E003E003E003E0LL;
246
DECLARE_ASM_CONST(8, uint64_t, r15Mask)=   0x7C007C007C007C00LL;
247 d604bab9 Michael Niedermayer
248 5802683a Reimar Döffinger
DECLARE_ALIGNED(8, const uint64_t, ff_M24A)         = 0x00FF0000FF0000FFLL;
249
DECLARE_ALIGNED(8, const uint64_t, ff_M24B)         = 0xFF0000FF0000FF00LL;
250
DECLARE_ALIGNED(8, const uint64_t, ff_M24C)         = 0x0000FF0000FF0000LL;
251 99d2cb72 Michael Niedermayer
252 ac6a2e45 Michael Niedermayer
#ifdef FAST_BGR2YV12
253 5802683a Reimar Döffinger
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000000210041000DULL;
254
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000FFEEFFDC0038ULL;
255
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00000038FFD2FFF8ULL;
256 ac6a2e45 Michael Niedermayer
#else
257 5802683a Reimar Döffinger
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YCoeff)   = 0x000020E540830C8BULL;
258
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UCoeff)   = 0x0000ED0FDAC23831ULL;
259
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2VCoeff)   = 0x00003831D0E6F6EAULL;
260 69796008 Diego Biurrun
#endif /* FAST_BGR2YV12 */
261 5802683a Reimar Döffinger
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2YOffset)  = 0x1010101010101010ULL;
262
DECLARE_ALIGNED(8, const uint64_t, ff_bgr2UVOffset) = 0x8080808080808080ULL;
263
DECLARE_ALIGNED(8, const uint64_t, ff_w1111)        = 0x0001000100010001ULL;
264 dfb09bd1 Michael Niedermayer
265 b5c44b1a Diego Pettenò
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
266
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
267
DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
268
DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
269
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
270 dfb09bd1 Michael Niedermayer
271 b5c44b1a Diego Pettenò
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV[2][4]) = {
272 dfb09bd1 Michael Niedermayer
    {0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
273
    {0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
274
};
275
276 b5c44b1a Diego Pettenò
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
277 dfb09bd1 Michael Niedermayer
278 7a24ec50 Diego Biurrun
#endif /* ARCH_X86 && CONFIG_GPL */
279 783e9cc9 Michael Niedermayer
280
// clipping helper table for C implementations:
281
static unsigned char clip_table[768];
282
283 d4e24275 Michael Niedermayer
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
284 6a4970ab Diego Biurrun
285 92db6235 Pavel Pavlov
DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_4[2][8])={
286 45e18be8 Michael Niedermayer
{  1,   3,   1,   3,   1,   3,   1,   3, },
287
{  2,   0,   2,   0,   2,   0,   2,   0, },
288
};
289
290 92db6235 Pavel Pavlov
DECLARE_ALIGNED(8, static const uint8_t, dither_2x2_8[2][8])={
291 45e18be8 Michael Niedermayer
{  6,   2,   6,   2,   6,   2,   6,   2, },
292
{  0,   4,   0,   4,   0,   4,   0,   4, },
293
};
294
295 92db6235 Pavel Pavlov
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_32[8][8])={
296 45e18be8 Michael Niedermayer
{ 17,   9,  23,  15,  16,   8,  22,  14, },
297
{  5,  29,   3,  27,   4,  28,   2,  26, },
298
{ 21,  13,  19,  11,  20,  12,  18,  10, },
299
{  0,  24,   6,  30,   1,  25,   7,  31, },
300
{ 16,   8,  22,  14,  17,   9,  23,  15, },
301
{  4,  28,   2,  26,   5,  29,   3,  27, },
302
{ 20,  12,  18,  10,  21,  13,  19,  11, },
303
{  1,  25,   7,  31,   0,  24,   6,  30, },
304
};
305
306 92db6235 Pavel Pavlov
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_73[8][8])={
307 45e18be8 Michael Niedermayer
{  0,  55,  14,  68,   3,  58,  17,  72, },
308
{ 37,  18,  50,  32,  40,  22,  54,  35, },
309
{  9,  64,   5,  59,  13,  67,   8,  63, },
310
{ 46,  27,  41,  23,  49,  31,  44,  26, },
311
{  2,  57,  16,  71,   1,  56,  15,  70, },
312
{ 39,  21,  52,  34,  38,  19,  51,  33, },
313
{ 11,  66,   7,  62,  10,  65,   6,  60, },
314
{ 48,  30,  43,  25,  47,  29,  42,  24, },
315
};
316
317
#if 1
318 92db6235 Pavel Pavlov
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220[8][8])={
319 45e18be8 Michael Niedermayer
{117,  62, 158, 103, 113,  58, 155, 100, },
320
{ 34, 199,  21, 186,  31, 196,  17, 182, },
321
{144,  89, 131,  76, 141,  86, 127,  72, },
322
{  0, 165,  41, 206,  10, 175,  52, 217, },
323
{110,  55, 151,  96, 120,  65, 162, 107, },
324
{ 28, 193,  14, 179,  38, 203,  24, 189, },
325
{138,  83, 124,  69, 148,  93, 134,  79, },
326
{  7, 172,  48, 213,   3, 168,  45, 210, },
327
};
328
#elif 1
329
// tries to correct a gamma of 1.5
330 92db6235 Pavel Pavlov
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220[8][8])={
331 45e18be8 Michael Niedermayer
{  0, 143,  18, 200,   2, 156,  25, 215, },
332
{ 78,  28, 125,  64,  89,  36, 138,  74, },
333
{ 10, 180,   3, 161,  16, 195,   8, 175, },
334
{109,  51,  93,  38, 121,  60, 105,  47, },
335
{  1, 152,  23, 210,   0, 147,  20, 205, },
336
{ 85,  33, 134,  71,  81,  30, 130,  67, },
337
{ 14, 190,   6, 171,  12, 185,   5, 166, },
338
{117,  57, 101,  44, 113,  54,  97,  41, },
339
};
340
#elif 1
341
// tries to correct a gamma of 2.0
342 92db6235 Pavel Pavlov
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220[8][8])={
343 45e18be8 Michael Niedermayer
{  0, 124,   8, 193,   0, 140,  12, 213, },
344
{ 55,  14, 104,  42,  66,  19, 119,  52, },
345
{  3, 168,   1, 145,   6, 187,   3, 162, },
346
{ 86,  31,  70,  21,  99,  39,  82,  28, },
347
{  0, 134,  11, 206,   0, 129,   9, 200, },
348
{ 62,  17, 114,  48,  58,  16, 109,  45, },
349
{  5, 181,   2, 157,   4, 175,   1, 151, },
350
{ 95,  36,  78,  26,  90,  34,  74,  24, },
351
};
352
#else
353
// tries to correct a gamma of 2.5
354 92db6235 Pavel Pavlov
DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220[8][8])={
355 45e18be8 Michael Niedermayer
{  0, 107,   3, 187,   0, 125,   6, 212, },
356
{ 39,   7,  86,  28,  49,  11, 102,  36, },
357
{  1, 158,   0, 131,   3, 180,   1, 151, },
358
{ 68,  19,  52,  12,  81,  25,  64,  17, },
359
{  0, 119,   5, 203,   0, 113,   4, 195, },
360
{ 45,   9,  96,  33,  42,   8,  91,  30, },
361
{  2, 172,   1, 144,   2, 165,   0, 137, },
362
{ 77,  23,  60,  15,  72,  21,  56,  14, },
363
};
364
#endif
365 5cebb24b Michael Niedermayer
366 8055ede6 Baptiste Coudurier
const char *sws_format_name(enum PixelFormat format)
367 94c4def2 Luca Abeni
{
368 e9e12f0e Luca Abeni
    switch (format) {
369 9b734d44 Ramiro Polla
    case PIX_FMT_YUV420P:
370
        return "yuv420p";
371
    case PIX_FMT_YUVA420P:
372
        return "yuva420p";
373
    case PIX_FMT_YUYV422:
374
        return "yuyv422";
375
    case PIX_FMT_RGB24:
376
        return "rgb24";
377
    case PIX_FMT_BGR24:
378
        return "bgr24";
379
    case PIX_FMT_YUV422P:
380
        return "yuv422p";
381
    case PIX_FMT_YUV444P:
382
        return "yuv444p";
383
    case PIX_FMT_RGB32:
384
        return "rgb32";
385
    case PIX_FMT_YUV410P:
386
        return "yuv410p";
387
    case PIX_FMT_YUV411P:
388
        return "yuv411p";
389
    case PIX_FMT_RGB565:
390
        return "rgb565";
391
    case PIX_FMT_RGB555:
392
        return "rgb555";
393
    case PIX_FMT_GRAY16BE:
394
        return "gray16be";
395
    case PIX_FMT_GRAY16LE:
396
        return "gray16le";
397
    case PIX_FMT_GRAY8:
398
        return "gray8";
399
    case PIX_FMT_MONOWHITE:
400
        return "mono white";
401
    case PIX_FMT_MONOBLACK:
402
        return "mono black";
403
    case PIX_FMT_PAL8:
404
        return "Palette";
405
    case PIX_FMT_YUVJ420P:
406
        return "yuvj420p";
407
    case PIX_FMT_YUVJ422P:
408
        return "yuvj422p";
409
    case PIX_FMT_YUVJ444P:
410
        return "yuvj444p";
411
    case PIX_FMT_XVMC_MPEG2_MC:
412
        return "xvmc_mpeg2_mc";
413
    case PIX_FMT_XVMC_MPEG2_IDCT:
414
        return "xvmc_mpeg2_idct";
415
    case PIX_FMT_UYVY422:
416
        return "uyvy422";
417
    case PIX_FMT_UYYVYY411:
418
        return "uyyvyy411";
419
    case PIX_FMT_RGB32_1:
420
        return "rgb32x";
421
    case PIX_FMT_BGR32_1:
422
        return "bgr32x";
423
    case PIX_FMT_BGR32:
424
        return "bgr32";
425
    case PIX_FMT_BGR565:
426
        return "bgr565";
427
    case PIX_FMT_BGR555:
428
        return "bgr555";
429
    case PIX_FMT_BGR8:
430
        return "bgr8";
431
    case PIX_FMT_BGR4:
432
        return "bgr4";
433
    case PIX_FMT_BGR4_BYTE:
434
        return "bgr4 byte";
435
    case PIX_FMT_RGB8:
436
        return "rgb8";
437
    case PIX_FMT_RGB4:
438
        return "rgb4";
439
    case PIX_FMT_RGB4_BYTE:
440
        return "rgb4 byte";
441
    case PIX_FMT_RGB48BE:
442
        return "rgb48be";
443
    case PIX_FMT_RGB48LE:
444
        return "rgb48le";
445
    case PIX_FMT_NV12:
446
        return "nv12";
447
    case PIX_FMT_NV21:
448
        return "nv21";
449
    case PIX_FMT_YUV440P:
450
        return "yuv440p";
451
    case PIX_FMT_VDPAU_H264:
452
        return "vdpau_h264";
453
    case PIX_FMT_VDPAU_MPEG1:
454
        return "vdpau_mpeg1";
455
    case PIX_FMT_VDPAU_MPEG2:
456
        return "vdpau_mpeg2";
457
    case PIX_FMT_VDPAU_WMV3:
458
        return "vdpau_wmv3";
459
    case PIX_FMT_VDPAU_VC1:
460
        return "vdpau_vc1";
461
    case PIX_FMT_YUV420PLE:
462
        return "yuv420ple";
463
    case PIX_FMT_YUV422PLE:
464
        return "yuv422ple";
465
    case PIX_FMT_YUV444PLE:
466
        return "yuv444ple";
467
    case PIX_FMT_YUV420PBE:
468
        return "yuv420pbe";
469
    case PIX_FMT_YUV422PBE:
470
        return "yuv422pbe";
471
    case PIX_FMT_YUV444PBE:
472
        return "yuv444pbe";
473
    default:
474
        return "Unknown format";
475 e9e12f0e Luca Abeni
    }
476 94c4def2 Luca Abeni
}
477
478 52154148 Ramiro Polla
static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
479
                                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
480
                                                    const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest,
481
                                                    int dstW, int chrDstW, int big_endian)
482
{
483
    //FIXME Optimize (just quickly written not optimized..)
484
    int i;
485
486
    for (i = 0; i < dstW; i++) {
487
        int val = 1 << 10;
488
        int j;
489
490
        for (j = 0; j < lumFilterSize; j++)
491
            val += lumSrc[j][i] * lumFilter[j];
492
493
        if (big_endian) {
494
            AV_WB16(&dest[i], av_clip_uint16(val >> 11));
495
        } else {
496
            AV_WL16(&dest[i], av_clip_uint16(val >> 11));
497
        }
498
    }
499
500
    if (uDest) {
501
        for (i = 0; i < chrDstW; i++) {
502
            int u = 1 << 10;
503
            int v = 1 << 10;
504
            int j;
505
506
            for (j = 0; j < chrFilterSize; j++) {
507
                u += chrSrc[j][i       ] * chrFilter[j];
508
                v += chrSrc[j][i + VOFW] * chrFilter[j];
509
            }
510
511
            if (big_endian) {
512
                AV_WB16(&uDest[i], av_clip_uint16(u >> 11));
513
                AV_WB16(&vDest[i], av_clip_uint16(v >> 11));
514
            } else {
515
                AV_WL16(&uDest[i], av_clip_uint16(u >> 11));
516
                AV_WL16(&vDest[i], av_clip_uint16(v >> 11));
517
            }
518
        }
519
    }
520
521
    if (CONFIG_SWSCALE_ALPHA && aDest) {
522
        for (i = 0; i < dstW; i++) {
523
            int val = 1 << 10;
524
            int j;
525
526
            for (j = 0; j < lumFilterSize; j++)
527
                val += alpSrc[j][i] * lumFilter[j];
528
529
            if (big_endian) {
530
                AV_WB16(&aDest[i], av_clip_uint16(val >> 11));
531
            } else {
532
                AV_WL16(&aDest[i], av_clip_uint16(val >> 11));
533
            }
534
        }
535
    }
536
}
537
538
static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
539
                                 const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
540
                                 const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW,
541
                                 enum PixelFormat dstFormat)
542
{
543
    if (isBE(dstFormat)) {
544
        yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize,
545
                               chrFilter, chrSrc, chrFilterSize,
546
                               alpSrc,
547
                               dest, uDest, vDest, aDest,
548
                               dstW, chrDstW, 1);
549
    } else {
550
        yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize,
551
                               chrFilter, chrSrc, chrFilterSize,
552
                               alpSrc,
553
                               dest, uDest, vDest, aDest,
554
                               dstW, chrDstW, 0);
555
    }
556
}
557
558 7ac40327 Ramiro Polla
static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
559
                               const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
560
                               const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW)
561 e3d2500f Michael Niedermayer
{
562 f40c7dbb Diego Biurrun
    //FIXME Optimize (just quickly written not optimized..)
563 221b804f Diego Biurrun
    int i;
564 dd68318c Ramiro Polla
    for (i=0; i<dstW; i++) {
565 221b804f Diego Biurrun
        int val=1<<18;
566
        int j;
567
        for (j=0; j<lumFilterSize; j++)
568
            val += lumSrc[j][i] * lumFilter[j];
569
570
        dest[i]= av_clip_uint8(val>>19);
571
    }
572
573 1b0a4572 Benoit Fouet
    if (uDest)
574 dd68318c Ramiro Polla
        for (i=0; i<chrDstW; i++) {
575 221b804f Diego Biurrun
            int u=1<<18;
576
            int v=1<<18;
577
            int j;
578 dd68318c Ramiro Polla
            for (j=0; j<chrFilterSize; j++) {
579 221b804f Diego Biurrun
                u += chrSrc[j][i] * chrFilter[j];
580 8b2fce0d Michael Niedermayer
                v += chrSrc[j][i + VOFW] * chrFilter[j];
581 221b804f Diego Biurrun
            }
582
583
            uDest[i]= av_clip_uint8(u>>19);
584
            vDest[i]= av_clip_uint8(v>>19);
585
        }
586 6858492e Cédric Schieli
587
    if (CONFIG_SWSCALE_ALPHA && aDest)
588 dd68318c Ramiro Polla
        for (i=0; i<dstW; i++) {
589 6858492e Cédric Schieli
            int val=1<<18;
590
            int j;
591
            for (j=0; j<lumFilterSize; j++)
592
                val += alpSrc[j][i] * lumFilter[j];
593
594
            aDest[i]= av_clip_uint8(val>>19);
595
        }
596
597 e3d2500f Michael Niedermayer
}
598
599 7ac40327 Ramiro Polla
static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
600
                                const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
601 221b804f Diego Biurrun
                                uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
602 6118e52e Ville Syrjälä
{
603 f40c7dbb Diego Biurrun
    //FIXME Optimize (just quickly written not optimized..)
604 221b804f Diego Biurrun
    int i;
605 dd68318c Ramiro Polla
    for (i=0; i<dstW; i++) {
606 221b804f Diego Biurrun
        int val=1<<18;
607
        int j;
608
        for (j=0; j<lumFilterSize; j++)
609
            val += lumSrc[j][i] * lumFilter[j];
610
611
        dest[i]= av_clip_uint8(val>>19);
612
    }
613
614 1b0a4572 Benoit Fouet
    if (!uDest)
615 221b804f Diego Biurrun
        return;
616
617
    if (dstFormat == PIX_FMT_NV12)
618 dd68318c Ramiro Polla
        for (i=0; i<chrDstW; i++) {
619 221b804f Diego Biurrun
            int u=1<<18;
620
            int v=1<<18;
621
            int j;
622 dd68318c Ramiro Polla
            for (j=0; j<chrFilterSize; j++) {
623 221b804f Diego Biurrun
                u += chrSrc[j][i] * chrFilter[j];
624 8b2fce0d Michael Niedermayer
                v += chrSrc[j][i + VOFW] * chrFilter[j];
625 221b804f Diego Biurrun
            }
626
627
            uDest[2*i]= av_clip_uint8(u>>19);
628
            uDest[2*i+1]= av_clip_uint8(v>>19);
629
        }
630
    else
631 dd68318c Ramiro Polla
        for (i=0; i<chrDstW; i++) {
632 221b804f Diego Biurrun
            int u=1<<18;
633
            int v=1<<18;
634
            int j;
635 dd68318c Ramiro Polla
            for (j=0; j<chrFilterSize; j++) {
636 221b804f Diego Biurrun
                u += chrSrc[j][i] * chrFilter[j];
637 8b2fce0d Michael Niedermayer
                v += chrSrc[j][i + VOFW] * chrFilter[j];
638 221b804f Diego Biurrun
            }
639
640
            uDest[2*i]= av_clip_uint8(v>>19);
641
            uDest[2*i+1]= av_clip_uint8(u>>19);
642
        }
643 6118e52e Ville Syrjälä
}
644 46de8b73 Michael Niedermayer
645 6858492e Cédric Schieli
#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
646 dd68318c Ramiro Polla
    for (i=0; i<(dstW>>1); i++) {\
647 221b804f Diego Biurrun
        int j;\
648
        int Y1 = 1<<18;\
649
        int Y2 = 1<<18;\
650
        int U  = 1<<18;\
651
        int V  = 1<<18;\
652 6858492e Cédric Schieli
        int av_unused A1, A2;\
653 2db27aad Carl Eugen Hoyos
        type av_unused *r, *b, *g;\
654 221b804f Diego Biurrun
        const int i2= 2*i;\
655
        \
656 dd68318c Ramiro Polla
        for (j=0; j<lumFilterSize; j++) {\
657 221b804f Diego Biurrun
            Y1 += lumSrc[j][i2] * lumFilter[j];\
658
            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
659
        }\
660 dd68318c Ramiro Polla
        for (j=0; j<chrFilterSize; j++) {\
661 221b804f Diego Biurrun
            U += chrSrc[j][i] * chrFilter[j];\
662 8b2fce0d Michael Niedermayer
            V += chrSrc[j][i+VOFW] * chrFilter[j];\
663 221b804f Diego Biurrun
        }\
664
        Y1>>=19;\
665
        Y2>>=19;\
666
        U >>=19;\
667
        V >>=19;\
668 dd68318c Ramiro Polla
        if (alpha) {\
669 6858492e Cédric Schieli
            A1 = 1<<18;\
670
            A2 = 1<<18;\
671 dd68318c Ramiro Polla
            for (j=0; j<lumFilterSize; j++) {\
672 6858492e Cédric Schieli
                A1 += alpSrc[j][i2  ] * lumFilter[j];\
673
                A2 += alpSrc[j][i2+1] * lumFilter[j];\
674
            }\
675
            A1>>=19;\
676
            A2>>=19;\
677
        }\
678 bdf397ba Michael Niedermayer
679 6858492e Cédric Schieli
#define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
680
        YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
681 dd68318c Ramiro Polla
        if ((Y1|Y2|U|V)&256) {\
682 221b804f Diego Biurrun
            if (Y1>255)   Y1=255; \
683
            else if (Y1<0)Y1=0;   \
684
            if (Y2>255)   Y2=255; \
685
            else if (Y2<0)Y2=0;   \
686
            if (U>255)    U=255;  \
687
            else if (U<0) U=0;    \
688
            if (V>255)    V=255;  \
689
            else if (V<0) V=0;    \
690 6858492e Cédric Schieli
        }\
691 dd68318c Ramiro Polla
        if (alpha && ((A1|A2)&256)) {\
692 6858492e Cédric Schieli
            A1=av_clip_uint8(A1);\
693
            A2=av_clip_uint8(A2);\
694 221b804f Diego Biurrun
        }
695 6a4970ab Diego Biurrun
696 6858492e Cédric Schieli
#define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
697 dd68318c Ramiro Polla
    for (i=0; i<dstW; i++) {\
698 f0faee4c Michael Niedermayer
        int j;\
699
        int Y = 0;\
700
        int U = -128<<19;\
701
        int V = -128<<19;\
702 6858492e Cédric Schieli
        int av_unused A;\
703 f0faee4c Michael Niedermayer
        int R,G,B;\
704
        \
705 dd68318c Ramiro Polla
        for (j=0; j<lumFilterSize; j++) {\
706 f0faee4c Michael Niedermayer
            Y += lumSrc[j][i     ] * lumFilter[j];\
707
        }\
708 dd68318c Ramiro Polla
        for (j=0; j<chrFilterSize; j++) {\
709 f0faee4c Michael Niedermayer
            U += chrSrc[j][i     ] * chrFilter[j];\
710
            V += chrSrc[j][i+VOFW] * chrFilter[j];\
711
        }\
712
        Y >>=10;\
713
        U >>=10;\
714
        V >>=10;\
715 dd68318c Ramiro Polla
        if (alpha) {\
716 6858492e Cédric Schieli
            A = rnd;\
717
            for (j=0; j<lumFilterSize; j++)\
718
                A += alpSrc[j][i     ] * lumFilter[j];\
719
            A >>=19;\
720
            if (A&256)\
721
                A = av_clip_uint8(A);\
722
        }\
723 f0faee4c Michael Niedermayer
724 6858492e Cédric Schieli
#define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
725
    YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
726 43c16478 Michael Niedermayer
        Y-= c->yuv2rgb_y_offset;\
727
        Y*= c->yuv2rgb_y_coeff;\
728 f0faee4c Michael Niedermayer
        Y+= rnd;\
729 43c16478 Michael Niedermayer
        R= Y + V*c->yuv2rgb_v2r_coeff;\
730
        G= Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff;\
731
        B= Y +                          U*c->yuv2rgb_u2b_coeff;\
732 dd68318c Ramiro Polla
        if ((R|G|B)&(0xC0000000)) {\
733 f0faee4c Michael Niedermayer
            if (R>=(256<<22))   R=(256<<22)-1; \
734
            else if (R<0)R=0;   \
735
            if (G>=(256<<22))   G=(256<<22)-1; \
736
            else if (G<0)G=0;   \
737
            if (B>=(256<<22))   B=(256<<22)-1; \
738
            else if (B<0)B=0;   \
739
        }\
740
741
742 e69bd294 Michael Niedermayer
#define YSCALE_YUV_2_GRAY16_C \
743 dd68318c Ramiro Polla
    for (i=0; i<(dstW>>1); i++) {\
744 b0880d5d Michael Niedermayer
        int j;\
745
        int Y1 = 1<<18;\
746
        int Y2 = 1<<18;\
747
        int U  = 1<<18;\
748
        int V  = 1<<18;\
749 e69bd294 Michael Niedermayer
        \
750 b0880d5d Michael Niedermayer
        const int i2= 2*i;\
751
        \
752 dd68318c Ramiro Polla
        for (j=0; j<lumFilterSize; j++) {\
753 b0880d5d Michael Niedermayer
            Y1 += lumSrc[j][i2] * lumFilter[j];\
754
            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
755
        }\
756
        Y1>>=11;\
757
        Y2>>=11;\
758 dd68318c Ramiro Polla
        if ((Y1|Y2|U|V)&65536) {\
759 b0880d5d Michael Niedermayer
            if (Y1>65535)   Y1=65535; \
760
            else if (Y1<0)Y1=0;   \
761
            if (Y2>65535)   Y2=65535; \
762
            else if (Y2<0)Y2=0;   \
763
        }
764
765 6858492e Cédric Schieli
#define YSCALE_YUV_2_RGBX_C(type,alpha) \
766
    YSCALE_YUV_2_PACKEDX_C(type,alpha)  /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
767 221b804f Diego Biurrun
    r = (type *)c->table_rV[V];   \
768
    g = (type *)(c->table_gU[U] + c->table_gV[V]); \
769
    b = (type *)c->table_bU[U];   \
770
771 6858492e Cédric Schieli
#define YSCALE_YUV_2_PACKED2_C(type,alpha)   \
772 dd68318c Ramiro Polla
    for (i=0; i<(dstW>>1); i++) { \
773 221b804f Diego Biurrun
        const int i2= 2*i;       \
774
        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
775
        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
776
        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;  \
777 8b2fce0d Michael Niedermayer
        int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19;  \
778 6858492e Cédric Schieli
        type av_unused *r, *b, *g;                                    \
779
        int av_unused A1, A2;                                         \
780 dd68318c Ramiro Polla
        if (alpha) {\
781 6858492e Cédric Schieli
            A1= (abuf0[i2  ]*yalpha1+abuf1[i2  ]*yalpha)>>19;         \
782
            A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19;         \
783
        }\
784 46de8b73 Michael Niedermayer
785 b0880d5d Michael Niedermayer
#define YSCALE_YUV_2_GRAY16_2_C   \
786 dd68318c Ramiro Polla
    for (i=0; i<(dstW>>1); i++) { \
787 b0880d5d Michael Niedermayer
        const int i2= 2*i;       \
788
        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>11;           \
789
        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;           \
790
791 6858492e Cédric Schieli
#define YSCALE_YUV_2_RGB2_C(type,alpha) \
792
    YSCALE_YUV_2_PACKED2_C(type,alpha)\
793 221b804f Diego Biurrun
    r = (type *)c->table_rV[V];\
794
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
795
    b = (type *)c->table_bU[U];\
796 cf7d1c1a Michael Niedermayer
797 6858492e Cédric Schieli
#define YSCALE_YUV_2_PACKED1_C(type,alpha) \
798 dd68318c Ramiro Polla
    for (i=0; i<(dstW>>1); i++) {\
799 221b804f Diego Biurrun
        const int i2= 2*i;\
800
        int Y1= buf0[i2  ]>>7;\
801
        int Y2= buf0[i2+1]>>7;\
802
        int U= (uvbuf1[i     ])>>7;\
803 8b2fce0d Michael Niedermayer
        int V= (uvbuf1[i+VOFW])>>7;\
804 6858492e Cédric Schieli
        type av_unused *r, *b, *g;\
805
        int av_unused A1, A2;\
806 dd68318c Ramiro Polla
        if (alpha) {\
807 6858492e Cédric Schieli
            A1= abuf0[i2  ]>>7;\
808
            A2= abuf0[i2+1]>>7;\
809
        }\
810 46de8b73 Michael Niedermayer
811 b0880d5d Michael Niedermayer
#define YSCALE_YUV_2_GRAY16_1_C \
812 dd68318c Ramiro Polla
    for (i=0; i<(dstW>>1); i++) {\
813 b0880d5d Michael Niedermayer
        const int i2= 2*i;\
814
        int Y1= buf0[i2  ]<<1;\
815
        int Y2= buf0[i2+1]<<1;\
816
817 6858492e Cédric Schieli
#define YSCALE_YUV_2_RGB1_C(type,alpha) \
818
    YSCALE_YUV_2_PACKED1_C(type,alpha)\
819 221b804f Diego Biurrun
    r = (type *)c->table_rV[V];\
820
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
821
    b = (type *)c->table_bU[U];\
822 cf7d1c1a Michael Niedermayer
823 6858492e Cédric Schieli
#define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
824 dd68318c Ramiro Polla
    for (i=0; i<(dstW>>1); i++) {\
825 221b804f Diego Biurrun
        const int i2= 2*i;\
826
        int Y1= buf0[i2  ]>>7;\
827
        int Y2= buf0[i2+1]>>7;\
828
        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
829 8b2fce0d Michael Niedermayer
        int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\
830 6858492e Cédric Schieli
        type av_unused *r, *b, *g;\
831
        int av_unused A1, A2;\
832 dd68318c Ramiro Polla
        if (alpha) {\
833 6858492e Cédric Schieli
            A1= abuf0[i2  ]>>7;\
834
            A2= abuf0[i2+1]>>7;\
835
        }\
836 46de8b73 Michael Niedermayer
837 6858492e Cédric Schieli
#define YSCALE_YUV_2_RGB1B_C(type,alpha) \
838
    YSCALE_YUV_2_PACKED1B_C(type,alpha)\
839 221b804f Diego Biurrun
    r = (type *)c->table_rV[V];\
840
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
841
    b = (type *)c->table_bU[U];\
842 cf7d1c1a Michael Niedermayer
843 ec1bca2a Michael Niedermayer
#define YSCALE_YUV_2_MONO2_C \
844 e69bd294 Michael Niedermayer
    const uint8_t * const d128=dither_8x8_220[y&7];\
845
    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
846 dd68318c Ramiro Polla
    for (i=0; i<dstW-7; i+=8) {\
847 e69bd294 Michael Niedermayer
        int acc;\
848
        acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
849
        acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
850
        acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
851
        acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
852
        acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
853
        acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
854
        acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
855
        acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
856 ec1bca2a Michael Niedermayer
        ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
857 e69bd294 Michael Niedermayer
        dest++;\
858
    }\
859
860
861 ec1bca2a Michael Niedermayer
#define YSCALE_YUV_2_MONOX_C \
862 e69bd294 Michael Niedermayer
    const uint8_t * const d128=dither_8x8_220[y&7];\
863
    uint8_t *g= c->table_gU[128] + c->table_gV[128];\
864
    int acc=0;\
865 dd68318c Ramiro Polla
    for (i=0; i<dstW-1; i+=2) {\
866 e69bd294 Michael Niedermayer
        int j;\
867
        int Y1=1<<18;\
868
        int Y2=1<<18;\
869
\
870 dd68318c Ramiro Polla
        for (j=0; j<lumFilterSize; j++) {\
871 e69bd294 Michael Niedermayer
            Y1 += lumSrc[j][i] * lumFilter[j];\
872
            Y2 += lumSrc[j][i+1] * lumFilter[j];\
873
        }\
874
        Y1>>=19;\
875
        Y2>>=19;\
876 dd68318c Ramiro Polla
        if ((Y1|Y2)&256) {\
877 e69bd294 Michael Niedermayer
            if (Y1>255)   Y1=255;\
878
            else if (Y1<0)Y1=0;\
879
            if (Y2>255)   Y2=255;\
880
            else if (Y2<0)Y2=0;\
881
        }\
882
        acc+= acc + g[Y1+d128[(i+0)&7]];\
883
        acc+= acc + g[Y2+d128[(i+1)&7]];\
884 dd68318c Ramiro Polla
        if ((i&7)==6) {\
885 ec1bca2a Michael Niedermayer
            ((uint8_t*)dest)[0]= c->dstFormat == PIX_FMT_MONOBLACK ? acc : ~acc;\
886 e69bd294 Michael Niedermayer
            dest++;\
887
        }\
888
    }
889
890
891
#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
892 dd68318c Ramiro Polla
    switch(c->dstFormat) {\
893 68e7f482 Kostya Shishkov
    case PIX_FMT_RGB48BE:\
894
    case PIX_FMT_RGB48LE:\
895
        func(uint8_t,0)\
896
            ((uint8_t*)dest)[ 0]= r[Y1];\
897
            ((uint8_t*)dest)[ 1]= r[Y1];\
898
            ((uint8_t*)dest)[ 2]= g[Y1];\
899
            ((uint8_t*)dest)[ 3]= g[Y1];\
900
            ((uint8_t*)dest)[ 4]= b[Y1];\
901
            ((uint8_t*)dest)[ 5]= b[Y1];\
902
            ((uint8_t*)dest)[ 6]= r[Y2];\
903
            ((uint8_t*)dest)[ 7]= r[Y2];\
904
            ((uint8_t*)dest)[ 8]= g[Y2];\
905
            ((uint8_t*)dest)[ 9]= g[Y2];\
906
            ((uint8_t*)dest)[10]= b[Y2];\
907
            ((uint8_t*)dest)[11]= b[Y2];\
908
            dest+=12;\
909
        }\
910
        break;\
911 6858492e Cédric Schieli
    case PIX_FMT_RGBA:\
912
    case PIX_FMT_BGRA:\
913 dd68318c Ramiro Polla
        if (CONFIG_SMALL) {\
914 6858492e Cédric Schieli
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
915
            func(uint32_t,needAlpha)\
916
                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
917
                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
918
            }\
919 dd68318c Ramiro Polla
        } else {\
920
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
921 6858492e Cédric Schieli
                func(uint32_t,1)\
922
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
923
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
924
                }\
925 dd68318c Ramiro Polla
            } else {\
926 6858492e Cédric Schieli
                func(uint32_t,0)\
927
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
928
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
929
                }\
930
            }\
931
        }\
932
        break;\
933
    case PIX_FMT_ARGB:\
934
    case PIX_FMT_ABGR:\
935 dd68318c Ramiro Polla
        if (CONFIG_SMALL) {\
936 6858492e Cédric Schieli
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
937
            func(uint32_t,needAlpha)\
938
                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
939
                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
940
            }\
941 dd68318c Ramiro Polla
        } else {\
942
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {\
943 6858492e Cédric Schieli
                func(uint32_t,1)\
944
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
945
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
946
                }\
947 dd68318c Ramiro Polla
            } else {\
948 6858492e Cédric Schieli
                func(uint32_t,0)\
949
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
950
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
951
                }\
952
            }\
953 221b804f Diego Biurrun
        }                \
954
        break;\
955
    case PIX_FMT_RGB24:\
956 6858492e Cédric Schieli
        func(uint8_t,0)\
957 221b804f Diego Biurrun
            ((uint8_t*)dest)[0]= r[Y1];\
958
            ((uint8_t*)dest)[1]= g[Y1];\
959
            ((uint8_t*)dest)[2]= b[Y1];\
960
            ((uint8_t*)dest)[3]= r[Y2];\
961
            ((uint8_t*)dest)[4]= g[Y2];\
962
            ((uint8_t*)dest)[5]= b[Y2];\
963
            dest+=6;\
964
        }\
965
        break;\
966
    case PIX_FMT_BGR24:\
967 6858492e Cédric Schieli
        func(uint8_t,0)\
968 221b804f Diego Biurrun
            ((uint8_t*)dest)[0]= b[Y1];\
969
            ((uint8_t*)dest)[1]= g[Y1];\
970
            ((uint8_t*)dest)[2]= r[Y1];\
971
            ((uint8_t*)dest)[3]= b[Y2];\
972
            ((uint8_t*)dest)[4]= g[Y2];\
973
            ((uint8_t*)dest)[5]= r[Y2];\
974
            dest+=6;\
975
        }\
976
        break;\
977
    case PIX_FMT_RGB565:\
978
    case PIX_FMT_BGR565:\
979
        {\
980
            const int dr1= dither_2x2_8[y&1    ][0];\
981
            const int dg1= dither_2x2_4[y&1    ][0];\
982
            const int db1= dither_2x2_8[(y&1)^1][0];\
983
            const int dr2= dither_2x2_8[y&1    ][1];\
984
            const int dg2= dither_2x2_4[y&1    ][1];\
985
            const int db2= dither_2x2_8[(y&1)^1][1];\
986 6858492e Cédric Schieli
            func(uint16_t,0)\
987 221b804f Diego Biurrun
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
988
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
989
            }\
990
        }\
991
        break;\
992
    case PIX_FMT_RGB555:\
993
    case PIX_FMT_BGR555:\
994
        {\
995
            const int dr1= dither_2x2_8[y&1    ][0];\
996
            const int dg1= dither_2x2_8[y&1    ][1];\
997
            const int db1= dither_2x2_8[(y&1)^1][0];\
998
            const int dr2= dither_2x2_8[y&1    ][1];\
999
            const int dg2= dither_2x2_8[y&1    ][0];\
1000
            const int db2= dither_2x2_8[(y&1)^1][1];\
1001 6858492e Cédric Schieli
            func(uint16_t,0)\
1002 221b804f Diego Biurrun
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
1003
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
1004
            }\
1005
        }\
1006
        break;\
1007
    case PIX_FMT_RGB8:\
1008
    case PIX_FMT_BGR8:\
1009
        {\
1010
            const uint8_t * const d64= dither_8x8_73[y&7];\
1011
            const uint8_t * const d32= dither_8x8_32[y&7];\
1012 6858492e Cédric Schieli
            func(uint8_t,0)\
1013 221b804f Diego Biurrun
                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
1014
                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
1015
            }\
1016
        }\
1017
        break;\
1018
    case PIX_FMT_RGB4:\
1019
    case PIX_FMT_BGR4:\
1020
        {\
1021
            const uint8_t * const d64= dither_8x8_73 [y&7];\
1022
            const uint8_t * const d128=dither_8x8_220[y&7];\
1023 6858492e Cédric Schieli
            func(uint8_t,0)\
1024 221b804f Diego Biurrun
                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
1025
                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
1026
            }\
1027
        }\
1028
        break;\
1029
    case PIX_FMT_RGB4_BYTE:\
1030
    case PIX_FMT_BGR4_BYTE:\
1031
        {\
1032
            const uint8_t * const d64= dither_8x8_73 [y&7];\
1033
            const uint8_t * const d128=dither_8x8_220[y&7];\
1034 6858492e Cédric Schieli
            func(uint8_t,0)\
1035 221b804f Diego Biurrun
                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
1036
                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
1037
            }\
1038
        }\
1039
        break;\
1040
    case PIX_FMT_MONOBLACK:\
1041 ec1bca2a Michael Niedermayer
    case PIX_FMT_MONOWHITE:\
1042 221b804f Diego Biurrun
        {\
1043 e69bd294 Michael Niedermayer
            func_monoblack\
1044 221b804f Diego Biurrun
        }\
1045
        break;\
1046
    case PIX_FMT_YUYV422:\
1047
        func2\
1048
            ((uint8_t*)dest)[2*i2+0]= Y1;\
1049
            ((uint8_t*)dest)[2*i2+1]= U;\
1050
            ((uint8_t*)dest)[2*i2+2]= Y2;\
1051
            ((uint8_t*)dest)[2*i2+3]= V;\
1052
        }                \
1053
        break;\
1054
    case PIX_FMT_UYVY422:\
1055
        func2\
1056
            ((uint8_t*)dest)[2*i2+0]= U;\
1057
            ((uint8_t*)dest)[2*i2+1]= Y1;\
1058
            ((uint8_t*)dest)[2*i2+2]= V;\
1059
            ((uint8_t*)dest)[2*i2+3]= Y2;\
1060
        }                \
1061
        break;\
1062 b0880d5d Michael Niedermayer
    case PIX_FMT_GRAY16BE:\
1063
        func_g16\
1064
            ((uint8_t*)dest)[2*i2+0]= Y1>>8;\
1065
            ((uint8_t*)dest)[2*i2+1]= Y1;\
1066
            ((uint8_t*)dest)[2*i2+2]= Y2>>8;\
1067
            ((uint8_t*)dest)[2*i2+3]= Y2;\
1068
        }                \
1069
        break;\
1070
    case PIX_FMT_GRAY16LE:\
1071
        func_g16\
1072
            ((uint8_t*)dest)[2*i2+0]= Y1;\
1073
            ((uint8_t*)dest)[2*i2+1]= Y1>>8;\
1074
            ((uint8_t*)dest)[2*i2+2]= Y2;\
1075
            ((uint8_t*)dest)[2*i2+3]= Y2>>8;\
1076
        }                \
1077
        break;\
1078 221b804f Diego Biurrun
    }\
1079 cf7d1c1a Michael Niedermayer
1080
1081 7ac40327 Ramiro Polla
static inline void yuv2packedXinC(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
1082
                                  const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
1083
                                  const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
1084 e3d2500f Michael Niedermayer
{
1085 221b804f Diego Biurrun
    int i;
1086 6858492e Cédric Schieli
    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
1087 e3d2500f Michael Niedermayer
}
1088
1089 7ac40327 Ramiro Polla
static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize,
1090
                                    const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize,
1091
                                    const int16_t **alpSrc, uint8_t *dest, int dstW, int y)
1092 f0faee4c Michael Niedermayer
{
1093
    int i;
1094
    int step= fmt_depth(c->dstFormat)/8;
1095 d616c8ae Michael Niedermayer
    int aidx= 3;
1096 f0faee4c Michael Niedermayer
1097 dd68318c Ramiro Polla
    switch(c->dstFormat) {
1098 f0faee4c Michael Niedermayer
    case PIX_FMT_ARGB:
1099
        dest++;
1100 a3398feb Cédric Schieli
        aidx= 0;
1101 f0faee4c Michael Niedermayer
    case PIX_FMT_RGB24:
1102 d616c8ae Michael Niedermayer
        aidx--;
1103 f0faee4c Michael Niedermayer
    case PIX_FMT_RGBA:
1104 dd68318c Ramiro Polla
        if (CONFIG_SMALL) {
1105 6858492e Cédric Schieli
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1106
            YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1107
                dest[aidx]= needAlpha ? A : 255;
1108
                dest[0]= R>>22;
1109
                dest[1]= G>>22;
1110
                dest[2]= B>>22;
1111
                dest+= step;
1112
            }
1113 dd68318c Ramiro Polla
        } else {
1114
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1115 6858492e Cédric Schieli
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1116
                    dest[aidx]= A;
1117
                    dest[0]= R>>22;
1118
                    dest[1]= G>>22;
1119
                    dest[2]= B>>22;
1120
                    dest+= step;
1121
                }
1122 dd68318c Ramiro Polla
            } else {
1123 6858492e Cédric Schieli
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1124
                    dest[aidx]= 255;
1125
                    dest[0]= R>>22;
1126
                    dest[1]= G>>22;
1127
                    dest[2]= B>>22;
1128
                    dest+= step;
1129
                }
1130
            }
1131 f0faee4c Michael Niedermayer
        }
1132
        break;
1133
    case PIX_FMT_ABGR:
1134
        dest++;
1135 a3398feb Cédric Schieli
        aidx= 0;
1136 f0faee4c Michael Niedermayer
    case PIX_FMT_BGR24:
1137 d616c8ae Michael Niedermayer
        aidx--;
1138 f0faee4c Michael Niedermayer
    case PIX_FMT_BGRA:
1139 dd68318c Ramiro Polla
        if (CONFIG_SMALL) {
1140 6858492e Cédric Schieli
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1141
            YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1142
                dest[aidx]= needAlpha ? A : 255;
1143
                dest[0]= B>>22;
1144
                dest[1]= G>>22;
1145
                dest[2]= R>>22;
1146
                dest+= step;
1147
            }
1148 dd68318c Ramiro Polla
        } else {
1149
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
1150 6858492e Cédric Schieli
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1151
                    dest[aidx]= A;
1152
                    dest[0]= B>>22;
1153
                    dest[1]= G>>22;
1154
                    dest[2]= R>>22;
1155
                    dest+= step;
1156
                }
1157 dd68318c Ramiro Polla
            } else {
1158 6858492e Cédric Schieli
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1159
                    dest[aidx]= 255;
1160
                    dest[0]= B>>22;
1161
                    dest[1]= G>>22;
1162
                    dest[2]= R>>22;
1163
                    dest+= step;
1164
                }
1165
            }
1166 f0faee4c Michael Niedermayer
        }
1167
        break;
1168
    default:
1169
        assert(0);
1170
    }
1171
}
1172 e3d2500f Michael Niedermayer
1173 dd68318c Ramiro Polla
static void fillPlane(uint8_t* plane, int stride, int width, int height, int y, uint8_t val)
1174
{
1175 d4da3e47 Cédric Schieli
    int i;
1176
    uint8_t *ptr = plane + stride*y;
1177 dd68318c Ramiro Polla
    for (i=0; i<height; i++) {
1178 d4da3e47 Cédric Schieli
        memset(ptr, val, width);
1179
        ptr += stride;
1180
    }
1181
}
1182
1183 e8417235 Kostya Shishkov
static inline void rgb48ToY(uint8_t *dst, const uint8_t *src, int width)
1184
{
1185
    int i;
1186
    for (i = 0; i < width; i++) {
1187
        int r = src[i*6+0];
1188
        int g = src[i*6+2];
1189
        int b = src[i*6+4];
1190
1191
        dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1192
    }
1193
}
1194
1195
static inline void rgb48ToUV(uint8_t *dstU, uint8_t *dstV,
1196
                             uint8_t *src1, uint8_t *src2, int width)
1197
{
1198
    int i;
1199
    assert(src1==src2);
1200
    for (i = 0; i < width; i++) {
1201
        int r = src1[6*i + 0];
1202
        int g = src1[6*i + 2];
1203
        int b = src1[6*i + 4];
1204
1205
        dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1206
        dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
1207
    }
1208
}
1209
1210
static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV,
1211
                                  uint8_t *src1, uint8_t *src2, int width)
1212
{
1213
    int i;
1214
    assert(src1==src2);
1215
    for (i = 0; i < width; i++) {
1216
        int r= src1[12*i + 0] + src1[12*i + 6];
1217
        int g= src1[12*i + 2] + src1[12*i + 8];
1218
        int b= src1[12*i + 4] + src1[12*i + 10];
1219
1220
        dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1221
        dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
1222
    }
1223
}
1224
1225 80704c47 Kostya Shishkov
#define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
1226
static inline void name(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)\
1227
{\
1228
    int i;\
1229 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {\
1230 80704c47 Kostya Shishkov
        int b= (((const type*)src)[i]>>shb)&maskb;\
1231
        int g= (((const type*)src)[i]>>shg)&maskg;\
1232
        int r= (((const type*)src)[i]>>shr)&maskr;\
1233
\
1234
        dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
1235
    }\
1236
}
1237
1238
BGR2Y(uint32_t, bgr32ToY,16, 0, 0, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
1239
BGR2Y(uint32_t, rgb32ToY, 0, 0,16, 0x00FF, 0xFF00, 0x00FF, RY<< 8, GY   , BY<< 8, RGB2YUV_SHIFT+8)
1240
BGR2Y(uint16_t, bgr16ToY, 0, 0, 0, 0x001F, 0x07E0, 0xF800, RY<<11, GY<<5, BY    , RGB2YUV_SHIFT+8)
1241
BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY    , RGB2YUV_SHIFT+7)
1242
BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1243
BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1244
1245 dd68318c Ramiro Polla
static inline void abgrToA(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
1246
{
1247 80704c47 Kostya Shishkov
    int i;
1248 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1249 80704c47 Kostya Shishkov
        dst[i]= src[4*i];
1250
    }
1251
}
1252
1253
#define BGR2UV(type, name, shr, shg, shb, maska, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S)\
1254
static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
1255
{\
1256
    int i;\
1257 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {\
1258 80704c47 Kostya Shishkov
        int b= (((const type*)src)[i]&maskb)>>shb;\
1259
        int g= (((const type*)src)[i]&maskg)>>shg;\
1260
        int r= (((const type*)src)[i]&maskr)>>shr;\
1261
\
1262
        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
1263
        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
1264
    }\
1265
}\
1266
static inline void name ## _half(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
1267
{\
1268
    int i;\
1269 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {\
1270 80704c47 Kostya Shishkov
        int pix0= ((const type*)src)[2*i+0];\
1271
        int pix1= ((const type*)src)[2*i+1];\
1272
        int g= (pix0&~(maskr|maskb))+(pix1&~(maskr|maskb));\
1273
        int b= ((pix0+pix1-g)&(maskb|(2*maskb)))>>shb;\
1274
        int r= ((pix0+pix1-g)&(maskr|(2*maskr)))>>shr;\
1275
        g&= maskg|(2*maskg);\
1276
\
1277
        g>>=shg;\
1278
\
1279
        dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
1280
        dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
1281
    }\
1282
}
1283
1284
BGR2UV(uint32_t, bgr32ToUV,16, 0, 0, 0xFF000000, 0xFF0000, 0xFF00,   0x00FF, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
1285
BGR2UV(uint32_t, rgb32ToUV, 0, 0,16, 0xFF000000,   0x00FF, 0xFF00, 0xFF0000, RU<< 8, GU   , BU<< 8, RV<< 8, GV   , BV<< 8, RGB2YUV_SHIFT+8)
1286
BGR2UV(uint16_t, bgr16ToUV, 0, 0, 0,          0,   0x001F, 0x07E0,   0xF800, RU<<11, GU<<5, BU    , RV<<11, GV<<5, BV    , RGB2YUV_SHIFT+8)
1287
BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0,          0,   0x001F, 0x03E0,   0x7C00, RU<<10, GU<<5, BU    , RV<<10, GV<<5, BV    , RGB2YUV_SHIFT+7)
1288
BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0,          0,   0xF800, 0x07E0,   0x001F, RU    , GU<<5, BU<<11, RV    , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
1289
BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0,          0,   0x7C00, 0x03E0,   0x001F, RU    , GU<<5, BU<<10, RV    , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
1290
1291
static inline void palToY(uint8_t *dst, const uint8_t *src, long width, uint32_t *pal)
1292
{
1293
    int i;
1294 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1295 80704c47 Kostya Shishkov
        int d= src[i];
1296
1297
        dst[i]= pal[d] & 0xFF;
1298
    }
1299
}
1300
1301
static inline void palToUV(uint8_t *dstU, uint8_t *dstV,
1302
                           const uint8_t *src1, const uint8_t *src2,
1303
                           long width, uint32_t *pal)
1304
{
1305
    int i;
1306
    assert(src1 == src2);
1307 dd68318c Ramiro Polla
    for (i=0; i<width; i++) {
1308 80704c47 Kostya Shishkov
        int p= pal[src1[i]];
1309
1310
        dstU[i]= p>>8;
1311
        dstV[i]= p>>16;
1312
    }
1313
}
1314
1315
static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
1316
{
1317
    int i, j;
1318 dd68318c Ramiro Polla
    for (i=0; i<width/8; i++) {
1319 80704c47 Kostya Shishkov
        int d= ~src[i];
1320
        for(j=0; j<8; j++)
1321
            dst[8*i+j]= ((d>>(7-j))&1)*255;
1322
    }
1323
}
1324
1325
static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
1326
{
1327
    int i, j;
1328 dd68318c Ramiro Polla
    for (i=0; i<width/8; i++) {
1329 80704c47 Kostya Shishkov
        int d= src[i];
1330
        for(j=0; j<8; j++)
1331
            dst[8*i+j]= ((d>>(7-j))&1)*255;
1332
    }
1333
}
1334
1335
1336 dd409025 Ramiro Polla
//Note: we have C, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
1337 7630f2e0 Michael Niedermayer
//Plain C versions
1338 8b1a6441 Diego Biurrun
#if ((!HAVE_MMX || !CONFIG_GPL) && !HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
1339 726a959a Michael Niedermayer
#define COMPILE_C
1340
#endif
1341
1342 b63f641e Aurelien Jacobs
#if ARCH_PPC
1343 57f6d52a Diego Biurrun
#if HAVE_ALTIVEC || CONFIG_RUNTIME_CPUDETECT
1344 a2faa401 Romain Dolbeau
#define COMPILE_ALTIVEC
1345 7a24ec50 Diego Biurrun
#endif
1346 cb82a073 Diego Biurrun
#endif //ARCH_PPC
1347 a2faa401 Romain Dolbeau
1348 b63f641e Aurelien Jacobs
#if ARCH_X86
1349 726a959a Michael Niedermayer
1350 10a7d216 Ramiro Polla
#if ((HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
1351 726a959a Michael Niedermayer
#define COMPILE_MMX
1352
#endif
1353
1354 10a7d216 Ramiro Polla
#if (HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
1355 726a959a Michael Niedermayer
#define COMPILE_MMX2
1356
#endif
1357
1358 10a7d216 Ramiro Polla
#if ((HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT) && CONFIG_GPL
1359 726a959a Michael Niedermayer
#define COMPILE_3DNOW
1360
#endif
1361 7a24ec50 Diego Biurrun
#endif //ARCH_X86
1362 726a959a Michael Niedermayer
1363 94daf2e9 Ramiro Polla
#define COMPILE_TEMPLATE_MMX 0
1364
#define COMPILE_TEMPLATE_MMX2 0
1365
#define COMPILE_TEMPLATE_AMD3DNOW 0
1366
#define COMPILE_TEMPLATE_ALTIVEC 0
1367 726a959a Michael Niedermayer
1368
#ifdef COMPILE_C
1369 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _C
1370
#include "swscale_template.c"
1371 726a959a Michael Niedermayer
#endif
1372 397c035e Michael Niedermayer
1373 a2faa401 Romain Dolbeau
#ifdef COMPILE_ALTIVEC
1374
#undef RENAME
1375 94daf2e9 Ramiro Polla
#undef COMPILE_TEMPLATE_ALTIVEC
1376
#define COMPILE_TEMPLATE_ALTIVEC 1
1377 a2faa401 Romain Dolbeau
#define RENAME(a) a ## _altivec
1378
#include "swscale_template.c"
1379
#endif
1380
1381 b63f641e Aurelien Jacobs
#if ARCH_X86
1382 397c035e Michael Niedermayer
1383 7630f2e0 Michael Niedermayer
//MMX versions
1384 726a959a Michael Niedermayer
#ifdef COMPILE_MMX
1385 7630f2e0 Michael Niedermayer
#undef RENAME
1386 94daf2e9 Ramiro Polla
#undef COMPILE_TEMPLATE_MMX
1387
#undef COMPILE_TEMPLATE_MMX2
1388
#undef COMPILE_TEMPLATE_AMD3DNOW
1389
#define COMPILE_TEMPLATE_MMX 1
1390
#define COMPILE_TEMPLATE_MMX2 0
1391
#define COMPILE_TEMPLATE_AMD3DNOW 0
1392 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _MMX
1393
#include "swscale_template.c"
1394 726a959a Michael Niedermayer
#endif
1395 7630f2e0 Michael Niedermayer
1396
//MMX2 versions
1397 726a959a Michael Niedermayer
#ifdef COMPILE_MMX2
1398 7630f2e0 Michael Niedermayer
#undef RENAME
1399 94daf2e9 Ramiro Polla
#undef COMPILE_TEMPLATE_MMX
1400
#undef COMPILE_TEMPLATE_MMX2
1401
#undef COMPILE_TEMPLATE_AMD3DNOW
1402
#define COMPILE_TEMPLATE_MMX 1
1403
#define COMPILE_TEMPLATE_MMX2 1
1404
#define COMPILE_TEMPLATE_AMD3DNOW 0
1405 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _MMX2
1406
#include "swscale_template.c"
1407 726a959a Michael Niedermayer
#endif
1408 7630f2e0 Michael Niedermayer
1409
//3DNOW versions
1410 726a959a Michael Niedermayer
#ifdef COMPILE_3DNOW
1411 7630f2e0 Michael Niedermayer
#undef RENAME
1412 94daf2e9 Ramiro Polla
#undef COMPILE_TEMPLATE_MMX
1413
#undef COMPILE_TEMPLATE_MMX2
1414
#undef COMPILE_TEMPLATE_AMD3DNOW
1415
#define COMPILE_TEMPLATE_MMX 1
1416
#define COMPILE_TEMPLATE_MMX2 0
1417
#define COMPILE_TEMPLATE_AMD3DNOW 1
1418 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _3DNow
1419
#include "swscale_template.c"
1420 726a959a Michael Niedermayer
#endif
1421 7630f2e0 Michael Niedermayer
1422 7a24ec50 Diego Biurrun
#endif //ARCH_X86
1423 7630f2e0 Michael Niedermayer
1424 a86c461c Michael Niedermayer
static double getSplineCoeff(double a, double b, double c, double d, double dist)
1425
{
1426 221b804f Diego Biurrun
//    printf("%f %f %f %f %f\n", a,b,c,d,dist);
1427 9b734d44 Ramiro Polla
    if (dist<=1.0) return ((d*dist + c)*dist + b)*dist +a;
1428
    else           return getSplineCoeff(        0.0,
1429
                                          b+ 2.0*c + 3.0*d,
1430
                                                 c + 3.0*d,
1431
                                         -b- 3.0*c - 6.0*d,
1432
                                         dist-1.0);
1433 a86c461c Michael Niedermayer
}
1434 6c7506de Michael Niedermayer
1435 bca11e75 Michael Niedermayer
static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
1436 221b804f Diego Biurrun
                             int srcW, int dstW, int filterAlign, int one, int flags,
1437
                             SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
1438 28bf81c9 Michael Niedermayer
{
1439 221b804f Diego Biurrun
    int i;
1440
    int filterSize;
1441
    int filter2Size;
1442
    int minFilterSize;
1443 a64a062f Michael Niedermayer
    int64_t *filter=NULL;
1444
    int64_t *filter2=NULL;
1445
    const int64_t fone= 1LL<<54;
1446 091d3bdc Michael Niedermayer
    int ret= -1;
1447 b63f641e Aurelien Jacobs
#if ARCH_X86
1448 221b804f Diego Biurrun
    if (flags & SWS_CPU_CAPS_MMX)
1449 7ad6469e Diego Pettenò
        __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
1450 726a959a Michael Niedermayer
#endif
1451 31190492 Arpi
1452 f40c7dbb Diego Biurrun
    // NOTE: the +1 is for the MMX scaler which reads over the end
1453 221b804f Diego Biurrun
    *filterPos = av_malloc((dstW+1)*sizeof(int16_t));
1454
1455 dd68318c Ramiro Polla
    if (FFABS(xInc - 0x10000) <10) { // unscaled
1456 221b804f Diego Biurrun
        int i;
1457
        filterSize= 1;
1458 8588e148 Michael Niedermayer
        filter= av_mallocz(dstW*sizeof(*filter)*filterSize);
1459 221b804f Diego Biurrun
1460 dd68318c Ramiro Polla
        for (i=0; i<dstW; i++) {
1461 a64a062f Michael Niedermayer
            filter[i*filterSize]= fone;
1462 221b804f Diego Biurrun
            (*filterPos)[i]=i;
1463
        }
1464
1465 dd68318c Ramiro Polla
    } else if (flags&SWS_POINT) { // lame looking point sampling mode
1466 221b804f Diego Biurrun
        int i;
1467
        int xDstInSrc;
1468
        filterSize= 1;
1469 8588e148 Michael Niedermayer
        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
1470 221b804f Diego Biurrun
1471
        xDstInSrc= xInc/2 - 0x8000;
1472 dd68318c Ramiro Polla
        for (i=0; i<dstW; i++) {
1473 221b804f Diego Biurrun
            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1474
1475
            (*filterPos)[i]= xx;
1476 a64a062f Michael Niedermayer
            filter[i]= fone;
1477 221b804f Diego Biurrun
            xDstInSrc+= xInc;
1478
        }
1479 dd68318c Ramiro Polla
    } else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) { // bilinear upscale
1480 221b804f Diego Biurrun
        int i;
1481
        int xDstInSrc;
1482 571d9587 Ramiro Polla
        filterSize= 2;
1483 8588e148 Michael Niedermayer
        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
1484 221b804f Diego Biurrun
1485
        xDstInSrc= xInc/2 - 0x8000;
1486 dd68318c Ramiro Polla
        for (i=0; i<dstW; i++) {
1487 221b804f Diego Biurrun
            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1488
            int j;
1489
1490
            (*filterPos)[i]= xx;
1491 9b734d44 Ramiro Polla
            //bilinear upscale / linear interpolate / area averaging
1492 dd68318c Ramiro Polla
            for (j=0; j<filterSize; j++) {
1493 9b734d44 Ramiro Polla
                int64_t coeff= fone - FFABS((xx<<16) - xDstInSrc)*(fone>>16);
1494
                if (coeff<0) coeff=0;
1495
                filter[i*filterSize + j]= coeff;
1496
                xx++;
1497
            }
1498 221b804f Diego Biurrun
            xDstInSrc+= xInc;
1499
        }
1500 dd68318c Ramiro Polla
    } else {
1501 a64a062f Michael Niedermayer
        int xDstInSrc;
1502
        int sizeFactor;
1503
1504
        if      (flags&SWS_BICUBIC)      sizeFactor=  4;
1505
        else if (flags&SWS_X)            sizeFactor=  8;
1506
        else if (flags&SWS_AREA)         sizeFactor=  1; //downscale only, for upscale it is bilinear
1507
        else if (flags&SWS_GAUSS)        sizeFactor=  8;   // infinite ;)
1508
        else if (flags&SWS_LANCZOS)      sizeFactor= param[0] != SWS_PARAM_DEFAULT ? ceil(2*param[0]) : 6;
1509
        else if (flags&SWS_SINC)         sizeFactor= 20; // infinite ;)
1510
        else if (flags&SWS_SPLINE)       sizeFactor= 20;  // infinite ;)
1511
        else if (flags&SWS_BILINEAR)     sizeFactor=  2;
1512 221b804f Diego Biurrun
        else {
1513 a64a062f Michael Niedermayer
            sizeFactor= 0; //GCC warning killer
1514 fcc402b1 Luca Barbato
            assert(0);
1515 221b804f Diego Biurrun
        }
1516
1517 a64a062f Michael Niedermayer
        if (xInc <= 1<<16)      filterSize= 1 + sizeFactor; // upscale
1518
        else                    filterSize= 1 + (sizeFactor*srcW + dstW - 1)/ dstW;
1519 221b804f Diego Biurrun
1520
        if (filterSize > srcW-2) filterSize=srcW-2;
1521
1522 8588e148 Michael Niedermayer
        filter= av_malloc(dstW*sizeof(*filter)*filterSize);
1523 221b804f Diego Biurrun
1524 a64a062f Michael Niedermayer
        xDstInSrc= xInc - 0x10000;
1525 dd68318c Ramiro Polla
        for (i=0; i<dstW; i++) {
1526 a64a062f Michael Niedermayer
            int xx= (xDstInSrc - ((filterSize-2)<<16)) / (1<<17);
1527 221b804f Diego Biurrun
            int j;
1528
            (*filterPos)[i]= xx;
1529 dd68318c Ramiro Polla
            for (j=0; j<filterSize; j++) {
1530 a64a062f Michael Niedermayer
                int64_t d= ((int64_t)FFABS((xx<<17) - xDstInSrc))<<13;
1531
                double floatd;
1532
                int64_t coeff;
1533
1534
                if (xInc > 1<<16)
1535
                    d= d*dstW/srcW;
1536
                floatd= d * (1.0/(1<<30));
1537
1538 dd68318c Ramiro Polla
                if (flags & SWS_BICUBIC) {
1539 a64a062f Michael Niedermayer
                    int64_t B= (param[0] != SWS_PARAM_DEFAULT ? param[0] :   0) * (1<<24);
1540
                    int64_t C= (param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6) * (1<<24);
1541
                    int64_t dd = ( d*d)>>30;
1542
                    int64_t ddd= (dd*d)>>30;
1543
1544
                    if      (d < 1LL<<30)
1545
                        coeff = (12*(1<<24)-9*B-6*C)*ddd + (-18*(1<<24)+12*B+6*C)*dd + (6*(1<<24)-2*B)*(1<<30);
1546
                    else if (d < 1LL<<31)
1547
                        coeff = (-B-6*C)*ddd + (6*B+30*C)*dd + (-12*B-48*C)*d + (8*B+24*C)*(1<<30);
1548 221b804f Diego Biurrun
                    else
1549
                        coeff=0.0;
1550 a64a062f Michael Niedermayer
                    coeff *= fone>>(30+24);
1551 221b804f Diego Biurrun
                }
1552 dd68318c Ramiro Polla
/*                else if (flags & SWS_X) {
1553 221b804f Diego Biurrun
                    double p= param ? param*0.01 : 0.3;
1554
                    coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1555
                    coeff*= pow(2.0, - p*d*d);
1556
                }*/
1557 dd68318c Ramiro Polla
                else if (flags & SWS_X) {
1558 221b804f Diego Biurrun
                    double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
1559 a64a062f Michael Niedermayer
                    double c;
1560 221b804f Diego Biurrun
1561 a64a062f Michael Niedermayer
                    if (floatd<1.0)
1562
                        c = cos(floatd*PI);
1563 221b804f Diego Biurrun
                    else
1564 a64a062f Michael Niedermayer
                        c=-1.0;
1565
                    if (c<0.0)      c= -pow(-c, A);
1566
                    else            c=  pow( c, A);
1567
                    coeff= (c*0.5 + 0.5)*fone;
1568 dd68318c Ramiro Polla
                } else if (flags & SWS_AREA) {
1569 a64a062f Michael Niedermayer
                    int64_t d2= d - (1<<29);
1570
                    if      (d2*xInc < -(1LL<<(29+16))) coeff= 1.0 * (1LL<<(30+16));
1571
                    else if (d2*xInc <  (1LL<<(29+16))) coeff= -d2*xInc + (1LL<<(29+16));
1572 221b804f Diego Biurrun
                    else coeff=0.0;
1573 a64a062f Michael Niedermayer
                    coeff *= fone>>(30+16);
1574 dd68318c Ramiro Polla
                } else if (flags & SWS_GAUSS) {
1575 221b804f Diego Biurrun
                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1576 a64a062f Michael Niedermayer
                    coeff = (pow(2.0, - p*floatd*floatd))*fone;
1577 dd68318c Ramiro Polla
                } else if (flags & SWS_SINC) {
1578 a64a062f Michael Niedermayer
                    coeff = (d ? sin(floatd*PI)/(floatd*PI) : 1.0)*fone;
1579 dd68318c Ramiro Polla
                } else if (flags & SWS_LANCZOS) {
1580 221b804f Diego Biurrun
                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1581 a64a062f Michael Niedermayer
                    coeff = (d ? sin(floatd*PI)*sin(floatd*PI/p)/(floatd*floatd*PI*PI/p) : 1.0)*fone;
1582
                    if (floatd>p) coeff=0;
1583 dd68318c Ramiro Polla
                } else if (flags & SWS_BILINEAR) {
1584 a64a062f Michael Niedermayer
                    coeff= (1<<30) - d;
1585 221b804f Diego Biurrun
                    if (coeff<0) coeff=0;
1586 a64a062f Michael Niedermayer
                    coeff *= fone >> 30;
1587 dd68318c Ramiro Polla
                } else if (flags & SWS_SPLINE) {
1588 221b804f Diego Biurrun
                    double p=-2.196152422706632;
1589 f830d824 Michael Niedermayer
                    coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, floatd) * fone;
1590 dd68318c Ramiro Polla
                } else {
1591 221b804f Diego Biurrun
                    coeff= 0.0; //GCC warning killer
1592 fcc402b1 Luca Barbato
                    assert(0);
1593 221b804f Diego Biurrun
                }
1594
1595
                filter[i*filterSize + j]= coeff;
1596
                xx++;
1597
            }
1598 a64a062f Michael Niedermayer
            xDstInSrc+= 2*xInc;
1599 221b804f Diego Biurrun
        }
1600
    }
1601
1602
    /* apply src & dst Filter to filter -> filter2
1603
       av_free(filter);
1604
    */
1605 fcc402b1 Luca Barbato
    assert(filterSize>0);
1606 221b804f Diego Biurrun
    filter2Size= filterSize;
1607
    if (srcFilter) filter2Size+= srcFilter->length - 1;
1608
    if (dstFilter) filter2Size+= dstFilter->length - 1;
1609 fcc402b1 Luca Barbato
    assert(filter2Size>0);
1610 a64a062f Michael Niedermayer
    filter2= av_mallocz(filter2Size*dstW*sizeof(*filter2));
1611 221b804f Diego Biurrun
1612 dd68318c Ramiro Polla
    for (i=0; i<dstW; i++) {
1613 a64a062f Michael Niedermayer
        int j, k;
1614 221b804f Diego Biurrun
1615 dd68318c Ramiro Polla
        if(srcFilter) {
1616
            for (k=0; k<srcFilter->length; k++) {
1617 a64a062f Michael Niedermayer
                for (j=0; j<filterSize; j++)
1618
                    filter2[i*filter2Size + k + j] += srcFilter->coeff[k]*filter[i*filterSize + j];
1619
            }
1620 dd68318c Ramiro Polla
        } else {
1621 a64a062f Michael Niedermayer
            for (j=0; j<filterSize; j++)
1622
                filter2[i*filter2Size + j]= filter[i*filterSize + j];
1623 221b804f Diego Biurrun
        }
1624 a64a062f Michael Niedermayer
        //FIXME dstFilter
1625 221b804f Diego Biurrun
1626
        (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
1627
    }
1628 47b7382d Michael Niedermayer
    av_freep(&filter);
1629 221b804f Diego Biurrun
1630
    /* try to reduce the filter-size (step1 find size and shift left) */
1631 86bdf3fd Diego Biurrun
    // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
1632 221b804f Diego Biurrun
    minFilterSize= 0;
1633 dd68318c Ramiro Polla
    for (i=dstW-1; i>=0; i--) {
1634 221b804f Diego Biurrun
        int min= filter2Size;
1635
        int j;
1636 a64a062f Michael Niedermayer
        int64_t cutOff=0.0;
1637 221b804f Diego Biurrun
1638
        /* get rid off near zero elements on the left by shifting left */
1639 dd68318c Ramiro Polla
        for (j=0; j<filter2Size; j++) {
1640 221b804f Diego Biurrun
            int k;
1641
            cutOff += FFABS(filter2[i*filter2Size]);
1642
1643 a64a062f Michael Niedermayer
            if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
1644 221b804f Diego Biurrun
1645 86bdf3fd Diego Biurrun
            /* preserve monotonicity because the core can't handle the filter otherwise */
1646 221b804f Diego Biurrun
            if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1647
1648 f40c7dbb Diego Biurrun
            // move filter coefficients left
1649 221b804f Diego Biurrun
            for (k=1; k<filter2Size; k++)
1650
                filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1651 a64a062f Michael Niedermayer
            filter2[i*filter2Size + k - 1]= 0;
1652 221b804f Diego Biurrun
            (*filterPos)[i]++;
1653
        }
1654
1655 a64a062f Michael Niedermayer
        cutOff=0;
1656 221b804f Diego Biurrun
        /* count near zeros on the right */
1657 dd68318c Ramiro Polla
        for (j=filter2Size-1; j>0; j--) {
1658 221b804f Diego Biurrun
            cutOff += FFABS(filter2[i*filter2Size + j]);
1659
1660 a64a062f Michael Niedermayer
            if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
1661 221b804f Diego Biurrun
            min--;
1662
        }
1663
1664
        if (min>minFilterSize) minFilterSize= min;
1665
    }
1666
1667
    if (flags & SWS_CPU_CAPS_ALTIVEC) {
1668
        // we can handle the special case 4,
1669
        // so we don't want to go to the full 8
1670
        if (minFilterSize < 5)
1671 8c266f0c Romain Dolbeau
            filterAlign = 4;
1672
1673 f40c7dbb Diego Biurrun
        // We really don't want to waste our time
1674
        // doing useless computation, so fall back on
1675
        // the scalar C code for very small filters.
1676
        // Vectorizing is worth it only if you have a
1677 221b804f Diego Biurrun
        // decent-sized vector.
1678
        if (minFilterSize < 3)
1679 8c266f0c Romain Dolbeau
            filterAlign = 1;
1680 221b804f Diego Biurrun
    }
1681
1682
    if (flags & SWS_CPU_CAPS_MMX) {
1683
        // special case for unscaled vertical filtering
1684
        if (minFilterSize == 1 && filterAlign == 2)
1685
            filterAlign= 1;
1686
    }
1687
1688 fcc402b1 Luca Barbato
    assert(minFilterSize > 0);
1689 221b804f Diego Biurrun
    filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
1690 fcc402b1 Luca Barbato
    assert(filterSize > 0);
1691 8588e148 Michael Niedermayer
    filter= av_malloc(filterSize*dstW*sizeof(*filter));
1692 1625216e Michael Niedermayer
    if (filterSize >= MAX_FILTER_SIZE*16/((flags&SWS_ACCURATE_RND) ? APCK_SIZE : 16) || !filter)
1693 091d3bdc Michael Niedermayer
        goto error;
1694 221b804f Diego Biurrun
    *outFilterSize= filterSize;
1695
1696
    if (flags&SWS_PRINT_INFO)
1697
        av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
1698
    /* try to reduce the filter-size (step2 reduce it) */
1699 dd68318c Ramiro Polla
    for (i=0; i<dstW; i++) {
1700 221b804f Diego Biurrun
        int j;
1701
1702 dd68318c Ramiro Polla
        for (j=0; j<filterSize; j++) {
1703 a64a062f Michael Niedermayer
            if (j>=filter2Size) filter[i*filterSize + j]= 0;
1704 221b804f Diego Biurrun
            else               filter[i*filterSize + j]= filter2[i*filter2Size + j];
1705 88bc5a64 Michael Niedermayer
            if((flags & SWS_BITEXACT) && j>=minFilterSize)
1706 a64a062f Michael Niedermayer
                filter[i*filterSize + j]= 0;
1707 8c266f0c Romain Dolbeau
        }
1708 221b804f Diego Biurrun
    }
1709
1710
1711 f40c7dbb Diego Biurrun
    //FIXME try to align filterPos if possible
1712 8c266f0c Romain Dolbeau
1713 221b804f Diego Biurrun
    //fix borders
1714 dd68318c Ramiro Polla
    for (i=0; i<dstW; i++) {
1715 221b804f Diego Biurrun
        int j;
1716 dd68318c Ramiro Polla
        if ((*filterPos)[i] < 0) {
1717 f40c7dbb Diego Biurrun
            // move filter coefficients left to compensate for filterPos
1718 dd68318c Ramiro Polla
            for (j=1; j<filterSize; j++) {
1719 221b804f Diego Biurrun
                int left= FFMAX(j + (*filterPos)[i], 0);
1720
                filter[i*filterSize + left] += filter[i*filterSize + j];
1721
                filter[i*filterSize + j]=0;
1722
            }
1723
            (*filterPos)[i]= 0;
1724 bca11e75 Michael Niedermayer
        }
1725
1726 dd68318c Ramiro Polla
        if ((*filterPos)[i] + filterSize > srcW) {
1727 221b804f Diego Biurrun
            int shift= (*filterPos)[i] + filterSize - srcW;
1728 f40c7dbb Diego Biurrun
            // move filter coefficients right to compensate for filterPos
1729 dd68318c Ramiro Polla
            for (j=filterSize-2; j>=0; j--) {
1730 221b804f Diego Biurrun
                int right= FFMIN(j + shift, filterSize-1);
1731
                filter[i*filterSize +right] += filter[i*filterSize +j];
1732
                filter[i*filterSize +j]=0;
1733
            }
1734
            (*filterPos)[i]= srcW - filterSize;
1735
        }
1736
    }
1737
1738 f40c7dbb Diego Biurrun
    // Note the +1 is for the MMX scaler which reads over the end
1739 221b804f Diego Biurrun
    /* align at 16 for AltiVec (needed by hScale_altivec_real) */
1740
    *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t));
1741
1742 f40c7dbb Diego Biurrun
    /* normalize & store in outFilter */
1743 dd68318c Ramiro Polla
    for (i=0; i<dstW; i++) {
1744 221b804f Diego Biurrun
        int j;
1745 a64a062f Michael Niedermayer
        int64_t error=0;
1746
        int64_t sum=0;
1747 221b804f Diego Biurrun
1748 dd68318c Ramiro Polla
        for (j=0; j<filterSize; j++) {
1749 221b804f Diego Biurrun
            sum+= filter[i*filterSize + j];
1750
        }
1751 a64a062f Michael Niedermayer
        sum= (sum + one/2)/ one;
1752 dd68318c Ramiro Polla
        for (j=0; j<*outFilterSize; j++) {
1753 a64a062f Michael Niedermayer
            int64_t v= filter[i*filterSize + j] + error;
1754
            int intV= ROUNDED_DIV(v, sum);
1755 221b804f Diego Biurrun
            (*outFilter)[i*(*outFilterSize) + j]= intV;
1756 a64a062f Michael Niedermayer
            error= v - intV*sum;
1757 221b804f Diego Biurrun
        }
1758
    }
1759
1760
    (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
1761 dd68318c Ramiro Polla
    for (i=0; i<*outFilterSize; i++) {
1762 221b804f Diego Biurrun
        int j= dstW*(*outFilterSize);
1763
        (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
1764
    }
1765
1766 091d3bdc Michael Niedermayer
    ret=0;
1767
error:
1768 221b804f Diego Biurrun
    av_free(filter);
1769 091d3bdc Michael Niedermayer
    av_free(filter2);
1770
    return ret;
1771 7630f2e0 Michael Niedermayer
}
1772 31190492 Arpi
1773 17c613ef Uoti Urpala
#ifdef COMPILE_MMX2
1774 48f4c612 Ramiro Polla
static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, int16_t *filter, int32_t *filterPos, int numSplits)
1775 28bf81c9 Michael Niedermayer
{
1776 221b804f Diego Biurrun
    uint8_t *fragmentA;
1777 d0ce212a Ramiro Polla
    x86_reg imm8OfPShufW1A;
1778
    x86_reg imm8OfPShufW2A;
1779
    x86_reg fragmentLengthA;
1780 221b804f Diego Biurrun
    uint8_t *fragmentB;
1781 d0ce212a Ramiro Polla
    x86_reg imm8OfPShufW1B;
1782
    x86_reg imm8OfPShufW2B;
1783
    x86_reg fragmentLengthB;
1784 221b804f Diego Biurrun
    int fragmentPos;
1785
1786
    int xpos, i;
1787
1788
    // create an optimized horizontal scaling routine
1789
1790
    //code fragment
1791
1792 7ad6469e Diego Pettenò
    __asm__ volatile(
1793 221b804f Diego Biurrun
        "jmp                         9f                 \n\t"
1794
    // Begin
1795
        "0:                                             \n\t"
1796
        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1797
        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1798
        "movd   1(%%"REG_c", %%"REG_S"), %%mm1          \n\t"
1799
        "punpcklbw                %%mm7, %%mm1          \n\t"
1800
        "punpcklbw                %%mm7, %%mm0          \n\t"
1801
        "pshufw                   $0xFF, %%mm1, %%mm1   \n\t"
1802
        "1:                                             \n\t"
1803
        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1804
        "2:                                             \n\t"
1805
        "psubw                    %%mm1, %%mm0          \n\t"
1806
        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1807
        "pmullw                   %%mm3, %%mm0          \n\t"
1808
        "psllw                       $7, %%mm1          \n\t"
1809
        "paddw                    %%mm1, %%mm0          \n\t"
1810
1811
        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1812
1813
        "add                         $8, %%"REG_a"      \n\t"
1814
    // End
1815
        "9:                                             \n\t"
1816
//        "int $3                                         \n\t"
1817 86593486 Alexander Strange
        "lea                 " LOCAL_MANGLE(0b) ", %0   \n\t"
1818
        "lea                 " LOCAL_MANGLE(1b) ", %1   \n\t"
1819
        "lea                 " LOCAL_MANGLE(2b) ", %2   \n\t"
1820 221b804f Diego Biurrun
        "dec                         %1                 \n\t"
1821
        "dec                         %2                 \n\t"
1822
        "sub                         %0, %1             \n\t"
1823
        "sub                         %0, %2             \n\t"
1824 86593486 Alexander Strange
        "lea                 " LOCAL_MANGLE(9b) ", %3   \n\t"
1825 221b804f Diego Biurrun
        "sub                         %0, %3             \n\t"
1826
1827
1828
        :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
1829
        "=r" (fragmentLengthA)
1830
    );
1831
1832 7ad6469e Diego Pettenò
    __asm__ volatile(
1833 221b804f Diego Biurrun
        "jmp                         9f                 \n\t"
1834
    // Begin
1835
        "0:                                             \n\t"
1836
        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1837
        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1838
        "punpcklbw                %%mm7, %%mm0          \n\t"
1839
        "pshufw                   $0xFF, %%mm0, %%mm1   \n\t"
1840
        "1:                                             \n\t"
1841
        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1842
        "2:                                             \n\t"
1843
        "psubw                    %%mm1, %%mm0          \n\t"
1844
        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1845
        "pmullw                   %%mm3, %%mm0          \n\t"
1846
        "psllw                       $7, %%mm1          \n\t"
1847
        "paddw                    %%mm1, %%mm0          \n\t"
1848
1849
        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1850
1851
        "add                         $8, %%"REG_a"      \n\t"
1852
    // End
1853
        "9:                                             \n\t"
1854
//        "int                       $3                   \n\t"
1855 86593486 Alexander Strange
        "lea                 " LOCAL_MANGLE(0b) ", %0   \n\t"
1856
        "lea                 " LOCAL_MANGLE(1b) ", %1   \n\t"
1857
        "lea                 " LOCAL_MANGLE(2b) ", %2   \n\t"
1858 221b804f Diego Biurrun
        "dec                         %1                 \n\t"
1859
        "dec                         %2                 \n\t"
1860
        "sub                         %0, %1             \n\t"
1861
        "sub                         %0, %2             \n\t"
1862 86593486 Alexander Strange
        "lea                 " LOCAL_MANGLE(9b) ", %3   \n\t"
1863 221b804f Diego Biurrun
        "sub                         %0, %3             \n\t"
1864
1865
1866
        :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
1867
        "=r" (fragmentLengthB)
1868
    );
1869
1870
    xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
1871
    fragmentPos=0;
1872
1873 dd68318c Ramiro Polla
    for (i=0; i<dstW/numSplits; i++) {
1874 221b804f Diego Biurrun
        int xx=xpos>>16;
1875
1876 dd68318c Ramiro Polla
        if ((i&3) == 0) {
1877 221b804f Diego Biurrun
            int a=0;
1878
            int b=((xpos+xInc)>>16) - xx;
1879
            int c=((xpos+xInc*2)>>16) - xx;
1880
            int d=((xpos+xInc*3)>>16) - xx;
1881 901775c7 Ramiro Polla
            int inc                = (d+1<4);
1882 f1c6dfa3 Ramiro Polla
            uint8_t *fragment      = (d+1<4) ? fragmentB       : fragmentA;
1883
            x86_reg imm8OfPShufW1  = (d+1<4) ? imm8OfPShufW1B  : imm8OfPShufW1A;
1884
            x86_reg imm8OfPShufW2  = (d+1<4) ? imm8OfPShufW2B  : imm8OfPShufW2A;
1885
            x86_reg fragmentLength = (d+1<4) ? fragmentLengthB : fragmentLengthA;
1886 469fd9b0 Ramiro Polla
            int maxShift= 3-(d+inc);
1887
            int shift=0;
1888 221b804f Diego Biurrun
1889 48f4c612 Ramiro Polla
            if (filterCode) {
1890 737cbcde Ramiro Polla
                filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
1891
                filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
1892
                filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
1893
                filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
1894
                filterPos[i/2]= xx;
1895 221b804f Diego Biurrun
1896 737cbcde Ramiro Polla
                memcpy(filterCode + fragmentPos, fragment, fragmentLength);
1897 221b804f Diego Biurrun
1898 737cbcde Ramiro Polla
                filterCode[fragmentPos + imm8OfPShufW1]=
1899
                    (a+inc) | ((b+inc)<<2) | ((c+inc)<<4) | ((d+inc)<<6);
1900
                filterCode[fragmentPos + imm8OfPShufW2]=
1901
                    a | (b<<2) | (c<<4) | (d<<6);
1902 221b804f Diego Biurrun
1903 737cbcde Ramiro Polla
                if (i+4-inc>=dstW) shift=maxShift; //avoid overread
1904
                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
1905 221b804f Diego Biurrun
1906 dd68318c Ramiro Polla
                if (shift && i>=shift) {
1907 737cbcde Ramiro Polla
                    filterCode[fragmentPos + imm8OfPShufW1]+= 0x55*shift;
1908
                    filterCode[fragmentPos + imm8OfPShufW2]+= 0x55*shift;
1909
                    filterPos[i/2]-=shift;
1910
                }
1911 48f4c612 Ramiro Polla
            }
1912 221b804f Diego Biurrun
1913 469fd9b0 Ramiro Polla
            fragmentPos+= fragmentLength;
1914
1915 48f4c612 Ramiro Polla
            if (filterCode)
1916 737cbcde Ramiro Polla
                filterCode[fragmentPos]= RET;
1917 221b804f Diego Biurrun
        }
1918
        xpos+=xInc;
1919
    }
1920 48f4c612 Ramiro Polla
    if (filterCode)
1921 737cbcde Ramiro Polla
        filterPos[((i/2)+1)&(~1)]= xpos>>16; // needed to jump to the next part
1922 48f4c612 Ramiro Polla
1923
    return fragmentPos + 1;
1924 28bf81c9 Michael Niedermayer
}
1925 17c613ef Uoti Urpala
#endif /* COMPILE_MMX2 */
1926 28bf81c9 Michael Niedermayer
1927 dd68318c Ramiro Polla
static void globalInit(void)
1928
{
1929 31190492 Arpi
    // generating tables:
1930
    int i;
1931 dd68318c Ramiro Polla
    for (i=0; i<768; i++) {
1932 221b804f Diego Biurrun
        int c= av_clip_uint8(i-256);
1933
        clip_table[i]=c;
1934 b18ea156 Michael Niedermayer
    }
1935 516b1f82 Michael Niedermayer
}
1936 c1b0bfb4 Michael Niedermayer
1937 40fa5140 Ramiro Polla
static SwsFunc getSwsFunc(SwsContext *c)
1938 14d5c18c Ramiro Polla
{
1939 8b1a6441 Diego Biurrun
#if CONFIG_RUNTIME_CPUDETECT
1940 40fa5140 Ramiro Polla
    int flags = c->flags;
1941 6a4970ab Diego Biurrun
1942 8b1a6441 Diego Biurrun
#if ARCH_X86 && CONFIG_GPL
1943 c14731d8 Reimar Döffinger
    // ordered per speed fastest first
1944 14d5c18c Ramiro Polla
    if (flags & SWS_CPU_CAPS_MMX2) {
1945 40fa5140 Ramiro Polla
        sws_init_swScale_MMX2(c);
1946 221b804f Diego Biurrun
        return swScale_MMX2;
1947 14d5c18c Ramiro Polla
    } else if (flags & SWS_CPU_CAPS_3DNOW) {
1948 40fa5140 Ramiro Polla
        sws_init_swScale_3DNow(c);
1949 221b804f Diego Biurrun
        return swScale_3DNow;
1950 14d5c18c Ramiro Polla
    } else if (flags & SWS_CPU_CAPS_MMX) {
1951 40fa5140 Ramiro Polla
        sws_init_swScale_MMX(c);
1952 221b804f Diego Biurrun
        return swScale_MMX;
1953 14d5c18c Ramiro Polla
    } else {
1954 40fa5140 Ramiro Polla
        sws_init_swScale_C(c);
1955 221b804f Diego Biurrun
        return swScale_C;
1956 14d5c18c Ramiro Polla
    }
1957 28bf81c9 Michael Niedermayer
1958
#else
1959 b63f641e Aurelien Jacobs
#if ARCH_PPC
1960 14d5c18c Ramiro Polla
    if (flags & SWS_CPU_CAPS_ALTIVEC) {
1961 40fa5140 Ramiro Polla
        sws_init_swScale_altivec(c);
1962 221b804f Diego Biurrun
        return swScale_altivec;
1963 14d5c18c Ramiro Polla
    } else {
1964 40fa5140 Ramiro Polla
        sws_init_swScale_C(c);
1965 221b804f Diego Biurrun
        return swScale_C;
1966 14d5c18c Ramiro Polla
    }
1967 a2faa401 Romain Dolbeau
#endif
1968 40fa5140 Ramiro Polla
    sws_init_swScale_C(c);
1969 221b804f Diego Biurrun
    return swScale_C;
1970 8b1a6441 Diego Biurrun
#endif /* ARCH_X86 && CONFIG_GPL */
1971 10a7d216 Ramiro Polla
#else //CONFIG_RUNTIME_CPUDETECT
1972 94daf2e9 Ramiro Polla
#if   COMPILE_TEMPLATE_MMX2
1973 40fa5140 Ramiro Polla
    sws_init_swScale_MMX2(c);
1974 221b804f Diego Biurrun
    return swScale_MMX2;
1975 94daf2e9 Ramiro Polla
#elif COMPILE_TEMPLATE_AMD3DNOW
1976 40fa5140 Ramiro Polla
    sws_init_swScale_3DNow(c);
1977 221b804f Diego Biurrun
    return swScale_3DNow;
1978 94daf2e9 Ramiro Polla
#elif COMPILE_TEMPLATE_MMX
1979 40fa5140 Ramiro Polla
    sws_init_swScale_MMX(c);
1980 221b804f Diego Biurrun
    return swScale_MMX;
1981 94daf2e9 Ramiro Polla
#elif COMPILE_TEMPLATE_ALTIVEC
1982 40fa5140 Ramiro Polla
    sws_init_swScale_altivec(c);
1983 221b804f Diego Biurrun
    return swScale_altivec;
1984 28bf81c9 Michael Niedermayer
#else
1985 40fa5140 Ramiro Polla
    sws_init_swScale_C(c);
1986 221b804f Diego Biurrun
    return swScale_C;
1987 28bf81c9 Michael Niedermayer
#endif
1988 10a7d216 Ramiro Polla
#endif //!CONFIG_RUNTIME_CPUDETECT
1989 31190492 Arpi
}
1990 7630f2e0 Michael Niedermayer
1991 d4e24275 Michael Niedermayer
static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1992 dd68318c Ramiro Polla
                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
1993
{
1994 221b804f Diego Biurrun
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1995
    /* Copy Y plane */
1996
    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1997
        memcpy(dst, src[0], srcSliceH*dstStride[0]);
1998 dd68318c Ramiro Polla
    else {
1999 221b804f Diego Biurrun
        int i;
2000 7ac40327 Ramiro Polla
        const uint8_t *srcPtr= src[0];
2001 221b804f Diego Biurrun
        uint8_t *dstPtr= dst;
2002 dd68318c Ramiro Polla
        for (i=0; i<srcSliceH; i++) {
2003 221b804f Diego Biurrun
            memcpy(dstPtr, srcPtr, c->srcW);
2004
            srcPtr+= srcStride[0];
2005
            dstPtr+= dstStride[0];
2006
        }
2007
    }
2008
    dst = dstParam[1] + dstStride[1]*srcSliceY/2;
2009
    if (c->dstFormat == PIX_FMT_NV12)
2010 30c48a0a Benoit Fouet
        interleaveBytes(src[1], src[2], dst, c->srcW/2, srcSliceH/2, srcStride[1], srcStride[2], dstStride[0]);
2011 221b804f Diego Biurrun
    else
2012 30c48a0a Benoit Fouet
        interleaveBytes(src[2], src[1], dst, c->srcW/2, srcSliceH/2, srcStride[2], srcStride[1], dstStride[0]);
2013 221b804f Diego Biurrun
2014
    return srcSliceH;
2015 0d9f3d85 Arpi
}
2016
2017 d4e24275 Michael Niedermayer
static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2018 dd68318c Ramiro Polla
                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
2019
{
2020 221b804f Diego Biurrun
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
2021 0d9f3d85 Arpi
2022 30c48a0a Benoit Fouet
    yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
2023 fccb9b2b Michael Niedermayer
2024 221b804f Diego Biurrun
    return srcSliceH;
2025 0d9f3d85 Arpi
}
2026
2027 caeaabe7 Alex Beregszaszi
static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2028 dd68318c Ramiro Polla
                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
2029
{
2030 221b804f Diego Biurrun
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
2031 caeaabe7 Alex Beregszaszi
2032 30c48a0a Benoit Fouet
    yv12touyvy(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
2033 caeaabe7 Alex Beregszaszi
2034 221b804f Diego Biurrun
    return srcSliceH;
2035 caeaabe7 Alex Beregszaszi
}
2036
2037 a6100f39 Baptiste Coudurier
static int YUV422PToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2038 dd68318c Ramiro Polla
                                int srcSliceH, uint8_t* dstParam[], int dstStride[])
2039
{
2040 a6100f39 Baptiste Coudurier
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
2041
2042
    yuv422ptoyuy2(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
2043
2044
    return srcSliceH;
2045
}
2046
2047
static int YUV422PToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2048 dd68318c Ramiro Polla
                                int srcSliceH, uint8_t* dstParam[], int dstStride[])
2049
{
2050 a6100f39 Baptiste Coudurier
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
2051
2052
    yuv422ptouyvy(src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0]);
2053
2054
    return srcSliceH;
2055
}
2056
2057 0411072e Michael Niedermayer
static int YUYV2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2058 dd68318c Ramiro Polla
                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
2059
{
2060 0411072e Michael Niedermayer
    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
2061
    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
2062
    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
2063
2064
    yuyvtoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
2065
2066 4626ee1a Cédric Schieli
    if (dstParam[3])
2067
        fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2068
2069 0411072e Michael Niedermayer
    return srcSliceH;
2070
}
2071
2072
static int YUYV2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2073 dd68318c Ramiro Polla
                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
2074
{
2075 0411072e Michael Niedermayer
    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
2076 72ef3dd7 Michael Niedermayer
    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
2077
    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
2078 0411072e Michael Niedermayer
2079
    yuyvtoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
2080
2081
    return srcSliceH;
2082
}
2083
2084
static int UYVY2YUV420Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2085 dd68318c Ramiro Polla
                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
2086
{
2087 0411072e Michael Niedermayer
    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
2088
    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY/2;
2089
    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY/2;
2090
2091
    uyvytoyuv420(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
2092
2093 4626ee1a Cédric Schieli
    if (dstParam[3])
2094
        fillPlane(dstParam[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2095
2096 0411072e Michael Niedermayer
    return srcSliceH;
2097
}
2098
2099
static int UYVY2YUV422Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2100 dd68318c Ramiro Polla
                               int srcSliceH, uint8_t* dstParam[], int dstStride[])
2101
{
2102 0411072e Michael Niedermayer
    uint8_t *ydst=dstParam[0] + dstStride[0]*srcSliceY;
2103 72ef3dd7 Michael Niedermayer
    uint8_t *udst=dstParam[1] + dstStride[1]*srcSliceY;
2104
    uint8_t *vdst=dstParam[2] + dstStride[2]*srcSliceY;
2105 0411072e Michael Niedermayer
2106
    uyvytoyuv422(ydst, udst, vdst, src[0], c->srcW, srcSliceH, dstStride[0], dstStride[1], srcStride[0]);
2107
2108
    return srcSliceH;
2109
}
2110
2111 49004617 Vitor Sessak
static int pal2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2112 dd68318c Ramiro Polla
                          int srcSliceH, uint8_t* dst[], int dstStride[])
2113
{
2114 f5a2c981 Carl Eugen Hoyos
    const enum PixelFormat srcFormat= c->srcFormat;
2115
    const enum PixelFormat dstFormat= c->dstFormat;
2116 49004617 Vitor Sessak
    void (*conv)(const uint8_t *src, uint8_t *dst, long num_pixels,
2117
                 const uint8_t *palette)=NULL;
2118
    int i;
2119
    uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2120
    uint8_t *srcPtr= src[0];
2121
2122
    if (!usePal(srcFormat))
2123
        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2124
               sws_format_name(srcFormat), sws_format_name(dstFormat));
2125
2126 dd68318c Ramiro Polla
    switch(dstFormat) {
2127 522ce957 Vitor Sessak
    case PIX_FMT_RGB32  : conv = palette8topacked32; break;
2128
    case PIX_FMT_BGR32  : conv = palette8topacked32; break;
2129
    case PIX_FMT_BGR32_1: conv = palette8topacked32; break;
2130
    case PIX_FMT_RGB32_1: conv = palette8topacked32; break;
2131
    case PIX_FMT_RGB24  : conv = palette8topacked24; break;
2132
    case PIX_FMT_BGR24  : conv = palette8topacked24; break;
2133 49004617 Vitor Sessak
    default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2134
                    sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
2135
    }
2136
2137
2138
    for (i=0; i<srcSliceH; i++) {
2139 65f65c30 Vitor Sessak
        conv(srcPtr, dstPtr, c->srcW, (uint8_t *) c->pal_rgb);
2140 49004617 Vitor Sessak
        srcPtr+= srcStride[0];
2141
        dstPtr+= dstStride[0];
2142
    }
2143
2144
    return srcSliceH;
2145
}
2146
2147 9990e426 Michael Niedermayer
/* {RGB,BGR}{15,16,24,32,32_1} -> {RGB,BGR}{15,16,24,32} */
2148 d4e24275 Michael Niedermayer
static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2149 dd68318c Ramiro Polla
                          int srcSliceH, uint8_t* dst[], int dstStride[])
2150
{
2151 58e4b706 Carl Eugen Hoyos
    const enum PixelFormat srcFormat= c->srcFormat;
2152
    const enum PixelFormat dstFormat= c->dstFormat;
2153 221b804f Diego Biurrun
    const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3;
2154
    const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3;
2155
    const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
2156
    const int dstId= fmt_depth(dstFormat) >> 2;
2157
    void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
2158
2159
    /* BGR -> BGR */
2160
    if (  (isBGR(srcFormat) && isBGR(dstFormat))
2161 dd68318c Ramiro Polla
       || (isRGB(srcFormat) && isRGB(dstFormat))) {
2162
        switch(srcId | (dstId<<4)) {
2163 221b804f Diego Biurrun
        case 0x34: conv= rgb16to15; break;
2164
        case 0x36: conv= rgb24to15; break;
2165
        case 0x38: conv= rgb32to15; break;
2166
        case 0x43: conv= rgb15to16; break;
2167
        case 0x46: conv= rgb24to16; break;
2168
        case 0x48: conv= rgb32to16; break;
2169
        case 0x63: conv= rgb15to24; break;
2170
        case 0x64: conv= rgb16to24; break;
2171
        case 0x68: conv= rgb32to24; break;
2172
        case 0x83: conv= rgb15to32; break;
2173
        case 0x84: conv= rgb16to32; break;
2174
        case 0x86: conv= rgb24to32; break;
2175 3f0bc115 Diego Biurrun
        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2176 221b804f Diego Biurrun
                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
2177
        }
2178 dd68318c Ramiro Polla
    } else if (  (isBGR(srcFormat) && isRGB(dstFormat))
2179
             || (isRGB(srcFormat) && isBGR(dstFormat))) {
2180
        switch(srcId | (dstId<<4)) {
2181 221b804f Diego Biurrun
        case 0x33: conv= rgb15tobgr15; break;
2182
        case 0x34: conv= rgb16tobgr15; break;
2183
        case 0x36: conv= rgb24tobgr15; break;
2184
        case 0x38: conv= rgb32tobgr15; break;
2185
        case 0x43: conv= rgb15tobgr16; break;
2186
        case 0x44: conv= rgb16tobgr16; break;
2187
        case 0x46: conv= rgb24tobgr16; break;
2188
        case 0x48: conv= rgb32tobgr16; break;
2189
        case 0x63: conv= rgb15tobgr24; break;
2190
        case 0x64: conv= rgb16tobgr24; break;
2191
        case 0x66: conv= rgb24tobgr24; break;
2192
        case 0x68: conv= rgb32tobgr24; break;
2193
        case 0x83: conv= rgb15tobgr32; break;
2194
        case 0x84: conv= rgb16tobgr32; break;
2195
        case 0x86: conv= rgb24tobgr32; break;
2196
        case 0x88: conv= rgb32tobgr32; break;
2197 3f0bc115 Diego Biurrun
        default: av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2198 221b804f Diego Biurrun
                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
2199
        }
2200 dd68318c Ramiro Polla
    } else {
2201 3f0bc115 Diego Biurrun
        av_log(c, AV_LOG_ERROR, "internal error %s -> %s converter\n",
2202 221b804f Diego Biurrun
               sws_format_name(srcFormat), sws_format_name(dstFormat));
2203
    }
2204
2205 dd68318c Ramiro Polla
    if(conv) {
2206 9990e426 Michael Niedermayer
        uint8_t *srcPtr= src[0];
2207
        if(srcFormat == PIX_FMT_RGB32_1 || srcFormat == PIX_FMT_BGR32_1)
2208
            srcPtr += ALT32_CORR;
2209
2210 5efaf000 Peter Schlaile
        if (dstStride[0]*srcBpp == srcStride[0]*dstBpp && srcStride[0] > 0)
2211 9990e426 Michael Niedermayer
            conv(srcPtr, dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
2212 dd68318c Ramiro Polla
        else {
2213 c4ca31d0 Benoit Fouet
            int i;
2214
            uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2215
2216 dd68318c Ramiro Polla
            for (i=0; i<srcSliceH; i++) {
2217 c4ca31d0 Benoit Fouet
                conv(srcPtr, dstPtr, c->srcW*srcBpp);
2218
                srcPtr+= srcStride[0];
2219
                dstPtr+= dstStride[0];
2220
            }
2221 221b804f Diego Biurrun
        }
2222
    }
2223
    return srcSliceH;
2224 0d9f3d85 Arpi
}
2225
2226 d4e24275 Michael Niedermayer
static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2227 dd68318c Ramiro Polla
                              int srcSliceH, uint8_t* dst[], int dstStride[])
2228
{
2229 221b804f Diego Biurrun
2230
    rgb24toyv12(
2231
        src[0],
2232
        dst[0]+ srcSliceY    *dstStride[0],
2233
        dst[1]+(srcSliceY>>1)*dstStride[1],
2234
        dst[2]+(srcSliceY>>1)*dstStride[2],
2235
        c->srcW, srcSliceH,
2236
        dstStride[0], dstStride[1], srcStride[0]);
2237 08218e6d Cédric Schieli
    if (dst[3])
2238
        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2239 221b804f Diego Biurrun
    return srcSliceH;
2240 ec22603f Michael Niedermayer
}
2241
2242 d4e24275 Michael Niedermayer
static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2243 dd68318c Ramiro Polla
                             int srcSliceH, uint8_t* dst[], int dstStride[])
2244
{
2245 221b804f Diego Biurrun
    int i;
2246
2247
    /* copy Y */
2248
    if (srcStride[0]==dstStride[0] && srcStride[0] > 0)
2249
        memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
2250 dd68318c Ramiro Polla
    else {
2251 221b804f Diego Biurrun
        uint8_t *srcPtr= src[0];
2252
        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2253
2254 dd68318c Ramiro Polla
        for (i=0; i<srcSliceH; i++) {
2255 221b804f Diego Biurrun
            memcpy(dstPtr, srcPtr, c->srcW);
2256
            srcPtr+= srcStride[0];
2257
            dstPtr+= dstStride[0];
2258
        }
2259
    }
2260
2261 dd68318c Ramiro Polla
    if (c->dstFormat==PIX_FMT_YUV420P || c->dstFormat==PIX_FMT_YUVA420P) {
2262 5f9ae198 Vitor Sessak
        planar2x(src[1], dst[1] + dstStride[1]*(srcSliceY >> 1), c->chrSrcW,
2263
                 srcSliceH >> 2, srcStride[1], dstStride[1]);
2264
        planar2x(src[2], dst[2] + dstStride[2]*(srcSliceY >> 1), c->chrSrcW,
2265
                 srcSliceH >> 2, srcStride[2], dstStride[2]);
2266 dd68318c Ramiro Polla
    } else {
2267 5f9ae198 Vitor Sessak
        planar2x(src[1], dst[2] + dstStride[2]*(srcSliceY >> 1), c->chrSrcW,
2268
                 srcSliceH >> 2, srcStride[1], dstStride[2]);
2269
        planar2x(src[2], dst[1] + dstStride[1]*(srcSliceY >> 1), c->chrSrcW,
2270
                 srcSliceH >> 2, srcStride[2], dstStride[1]);
2271 221b804f Diego Biurrun
    }
2272 08218e6d Cédric Schieli
    if (dst[3])
2273
        fillPlane(dst[3], dstStride[3], c->srcW, srcSliceH, srcSliceY, 255);
2274 221b804f Diego Biurrun
    return srcSliceH;
2275 b241cbf2 Michael Niedermayer
}
2276
2277 b6654a54 Michael Niedermayer
/* unscaled copy like stuff (assumes nearly identical formats) */
2278 2d35ae56 Luca Barbato
static int packedCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2279 bc5a0444 Luca Barbato
                      int srcSliceH, uint8_t* dst[], int dstStride[])
2280
{
2281
    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
2282
        memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
2283 dd68318c Ramiro Polla
    else {
2284 bc5a0444 Luca Barbato
        int i;
2285
        uint8_t *srcPtr= src[0];
2286
        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
2287
        int length=0;
2288 221b804f Diego Biurrun
2289 bc5a0444 Luca Barbato
        /* universal length finder */
2290
        while(length+c->srcW <= FFABS(dstStride[0])
2291
           && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
2292 fcc402b1 Luca Barbato
        assert(length!=0);
2293 2d35ae56 Luca Barbato
2294 dd68318c Ramiro Polla
        for (i=0; i<srcSliceH; i++) {
2295 bc5a0444 Luca Barbato
            memcpy(dstPtr, srcPtr, length);
2296
            srcPtr+= srcStride[0];
2297
            dstPtr+= dstStride[0];
2298 221b804f Diego Biurrun
        }
2299 bc5a0444 Luca Barbato
    }
2300 2d35ae56 Luca Barbato
    return srcSliceH;
2301
}
2302 bc5a0444 Luca Barbato
2303 2d35ae56 Luca Barbato
static int planarCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2304
                      int srcSliceH, uint8_t* dst[], int dstStride[])
2305
{
2306 61a3f379 Michael Niedermayer
    int plane, i, j;
2307 dd68318c Ramiro Polla
    for (plane=0; plane<4; plane++) {
2308 6268f55b Cédric Schieli
        int length= (plane==0 || plane==3) ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
2309
        int y=      (plane==0 || plane==3) ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
2310
        int height= (plane==0 || plane==3) ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
2311 61a3f379 Michael Niedermayer
        uint8_t *srcPtr= src[plane];
2312
        uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
2313 2d35ae56 Luca Barbato
2314 20ddf5a8 Reimar Döffinger
        if (!dst[plane]) continue;
2315 6f348086 Reimar Döffinger
        // ignore palette for GRAY8
2316
        if (plane == 1 && !dst[2]) continue;
2317 dd68318c Ramiro Polla
        if (!src[plane] || (plane == 1 && !src[2])) {
2318 61a3f379 Michael Niedermayer
            if(is16BPS(c->dstFormat))
2319
                length*=2;
2320 6268f55b Cédric Schieli
            fillPlane(dst[plane], dstStride[plane], length, height, y, (plane==3) ? 255 : 128);
2321 dd68318c Ramiro Polla
        } else {
2322
            if(is16BPS(c->srcFormat) && !is16BPS(c->dstFormat)) {
2323 61a3f379 Michael Niedermayer
                if (!isBE(c->srcFormat)) srcPtr++;
2324 dd68318c Ramiro Polla
                for (i=0; i<height; i++) {
2325 61a3f379 Michael Niedermayer
                    for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
2326
                    srcPtr+= srcStride[plane];
2327
                    dstPtr+= dstStride[plane];
2328
                }
2329 dd68318c Ramiro Polla
            } else if(!is16BPS(c->srcFormat) && is16BPS(c->dstFormat)) {
2330
                for (i=0; i<height; i++) {
2331
                    for (j=0; j<length; j++) {
2332 61a3f379 Michael Niedermayer
                        dstPtr[ j<<1   ] = srcPtr[j];
2333
                        dstPtr[(j<<1)+1] = srcPtr[j];
2334
                    }
2335
                    srcPtr+= srcStride[plane];
2336
                    dstPtr+= dstStride[plane];
2337
                }
2338 dd68318c Ramiro Polla
            } else if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat)
2339
                  && isBE(c->srcFormat) != isBE(c->dstFormat)) {
2340 61a3f379 Michael Niedermayer
2341 dd68318c Ramiro Polla
                for (i=0; i<height; i++) {
2342 61a3f379 Michael Niedermayer
                    for (j=0; j<length; j++)
2343
                        ((uint16_t*)dstPtr)[j] = bswap_16(((uint16_t*)srcPtr)[j]);
2344
                    srcPtr+= srcStride[plane];
2345
                    dstPtr+= dstStride[plane];
2346
                }
2347
            } else if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
2348 bc5a0444 Luca Barbato
                memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
2349 dd68318c Ramiro Polla
            else {
2350 61a3f379 Michael Niedermayer
                if(is16BPS(c->srcFormat) && is16BPS(c->dstFormat))
2351
                    length*=2;
2352 dd68318c Ramiro Polla
                for (i=0; i<height; i++) {
2353 bc5a0444 Luca Barbato
                    memcpy(dstPtr, srcPtr, length);
2354
                    srcPtr+= srcStride[plane];
2355
                    dstPtr+= dstStride[plane];
2356 221b804f Diego Biurrun
                }
2357
            }
2358
        }
2359 bc5a0444 Luca Barbato
    }
2360 221b804f Diego Biurrun
    return srcSliceH;
2361 37079906 Michael Niedermayer
}
2362 28bf81c9 Michael Niedermayer
2363 4884b9e5 Kostya Shishkov
2364 dd68318c Ramiro Polla
static void getSubSampleFactors(int *h, int *v, int format)
2365
{
2366
    switch(format) {
2367 221b804f Diego Biurrun
    case PIX_FMT_UYVY422:
2368
    case PIX_FMT_YUYV422:
2369
        *h=1;
2370
        *v=0;
2371
        break;
2372
    case PIX_FMT_YUV420P:
2373 de1275d5 Michael Niedermayer
    case PIX_FMT_YUV420PLE:
2374
    case PIX_FMT_YUV420PBE:
2375 79973335 Aurelien Jacobs
    case PIX_FMT_YUVA420P:
2376 221b804f Diego Biurrun
    case PIX_FMT_GRAY16BE:
2377
    case PIX_FMT_GRAY16LE:
2378
    case PIX_FMT_GRAY8: //FIXME remove after different subsamplings are fully implemented
2379
    case PIX_FMT_NV12:
2380
    case PIX_FMT_NV21:
2381
        *h=1;
2382
        *v=1;
2383
        break;
2384 9ba7fe6d Andreas Öman
    case PIX_FMT_YUV440P:
2385
        *h=0;
2386
        *v=1;
2387
        break;
2388 221b804f Diego Biurrun
    case PIX_FMT_YUV410P:
2389
        *h=2;
2390
        *v=2;
2391
        break;
2392
    case PIX_FMT_YUV444P:
2393 de1275d5 Michael Niedermayer
    case PIX_FMT_YUV444PLE:
2394
    case PIX_FMT_YUV444PBE:
2395 221b804f Diego Biurrun
        *h=0;
2396
        *v=0;
2397
        break;
2398
    case PIX_FMT_YUV422P:
2399 de1275d5 Michael Niedermayer
    case PIX_FMT_YUV422PLE:
2400
    case PIX_FMT_YUV422PBE:
2401 221b804f Diego Biurrun
        *h=1;
2402
        *v=0;
2403
        break;
2404
    case PIX_FMT_YUV411P:
2405
        *h=2;
2406
        *v=0;
2407
        break;
2408
    default:
2409
        *h=0;
2410
        *v=0;
2411
        break;
2412
    }
2413 c7a810cc Michael Niedermayer
}
2414
2415 dd68318c Ramiro Polla
static uint16_t roundToInt16(int64_t f)
2416
{
2417 221b804f Diego Biurrun
    int r= (f + (1<<15))>>16;
2418
         if (r<-0x7FFF) return 0x8000;
2419
    else if (r> 0x7FFF) return 0x7FFF;
2420
    else                return r;
2421 0481412a Michael Niedermayer
}
2422
2423 dd68318c Ramiro Polla
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation)
2424
{
2425 221b804f Diego Biurrun
    int64_t crv =  inv_table[0];
2426
    int64_t cbu =  inv_table[1];
2427
    int64_t cgu = -inv_table[2];
2428
    int64_t cgv = -inv_table[3];
2429
    int64_t cy  = 1<<16;
2430
    int64_t oy  = 0;
2431
2432
    memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
2433
    memcpy(c->dstColorspaceTable,     table, sizeof(int)*4);
2434
2435
    c->brightness= brightness;
2436
    c->contrast  = contrast;
2437
    c->saturation= saturation;
2438
    c->srcRange  = srcRange;
2439
    c->dstRange  = dstRange;
2440 0c067dc3 Stefano Sabatini
    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
2441 221b804f Diego Biurrun
2442
    c->uOffset=   0x0400040004000400LL;
2443
    c->vOffset=   0x0400040004000400LL;
2444
2445 dd68318c Ramiro Polla
    if (!srcRange) {
2446 221b804f Diego Biurrun
        cy= (cy*255) / 219;
2447
        oy= 16<<16;
2448 dd68318c Ramiro Polla
    } else {
2449 221b804f Diego Biurrun
        crv= (crv*224) / 255;
2450
        cbu= (cbu*224) / 255;
2451
        cgu= (cgu*224) / 255;
2452
        cgv= (cgv*224) / 255;
2453
    }
2454 0481412a Michael Niedermayer
2455 221b804f Diego Biurrun
    cy = (cy *contrast             )>>16;
2456
    crv= (crv*contrast * saturation)>>32;
2457
    cbu= (cbu*contrast * saturation)>>32;
2458
    cgu= (cgu*contrast * saturation)>>32;
2459
    cgv= (cgv*contrast * saturation)>>32;
2460 0481412a Michael Niedermayer
2461 221b804f Diego Biurrun
    oy -= 256*brightness;
2462 0481412a Michael Niedermayer
2463 221b804f Diego Biurrun
    c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
2464
    c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
2465
    c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
2466
    c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
2467
    c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
2468
    c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
2469 5427e242 Michael Niedermayer
2470 43c16478 Michael Niedermayer
    c->yuv2rgb_y_coeff  = (int16_t)roundToInt16(cy <<13);
2471
    c->yuv2rgb_y_offset = (int16_t)roundToInt16(oy << 9);
2472
    c->yuv2rgb_v2r_coeff= (int16_t)roundToInt16(crv<<13);
2473
    c->yuv2rgb_v2g_coeff= (int16_t)roundToInt16(cgv<<13);
2474
    c->yuv2rgb_u2g_coeff= (int16_t)roundToInt16(cgu<<13);
2475
    c->yuv2rgb_u2b_coeff= (int16_t)roundToInt16(cbu<<13);
2476 f0faee4c Michael Niedermayer
2477 780daf2b Diego Biurrun
    ff_yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
2478 221b804f Diego Biurrun
    //FIXME factorize
2479 a31de956 Michael Niedermayer
2480 6634d0ef Nicolas Plourde
#ifdef COMPILE_ALTIVEC
2481 221b804f Diego Biurrun
    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
2482 780daf2b Diego Biurrun
        ff_yuv2rgb_init_tables_altivec(c, inv_table, brightness, contrast, saturation);
2483 6a4970ab Diego Biurrun
#endif
2484 221b804f Diego Biurrun
    return 0;
2485 5427e242 Michael Niedermayer
}
2486
2487 dd68318c Ramiro Polla
int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation)
2488
{
2489 221b804f Diego Biurrun
    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
2490 5427e242 Michael Niedermayer
2491 221b804f Diego Biurrun
    *inv_table = c->srcColorspaceTable;
2492
    *table     = c->dstColorspaceTable;
2493
    *srcRange  = c->srcRange;
2494
    *dstRange  = c->dstRange;
2495
    *brightness= c->brightness;
2496
    *contrast  = c->contrast;
2497
    *saturation= c->saturation;
2498 6a4970ab Diego Biurrun
2499 221b804f Diego Biurrun
    return 0;
2500 0481412a Michael Niedermayer
}
2501
2502 13394e8c Aurelien Jacobs
static int handle_jpeg(enum PixelFormat *format)
2503 44cdb423 Luca Abeni
{
2504 221b804f Diego Biurrun
    switch (*format) {
2505 9b734d44 Ramiro Polla
    case PIX_FMT_YUVJ420P:
2506
        *format = PIX_FMT_YUV420P;
2507
        return 1;
2508
    case PIX_FMT_YUVJ422P:
2509
        *format = PIX_FMT_YUV422P;
2510
        return 1;
2511
    case PIX_FMT_YUVJ444P:
2512
        *format = PIX_FMT_YUV444P;
2513
        return 1;
2514
    case PIX_FMT_YUVJ440P:
2515
        *format = PIX_FMT_YUV440P;
2516
        return 1;
2517
    default:
2518
        return 0;
2519 221b804f Diego Biurrun
    }
2520 44cdb423 Luca Abeni
}
2521
2522 58e4b706 Carl Eugen Hoyos
SwsContext *sws_getContext(int srcW, int srcH, enum PixelFormat srcFormat, int dstW, int dstH, enum PixelFormat dstFormat, int flags,
2523 0020c54c Diego Biurrun
                           SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)
2524
{
2525 221b804f Diego Biurrun
2526
    SwsContext *c;
2527
    int i;
2528
    int usesVFilter, usesHFilter;
2529
    int unscaled, needsDither;
2530
    int srcRange, dstRange;
2531
    SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
2532 b63f641e Aurelien Jacobs
#if ARCH_X86
2533 221b804f Diego Biurrun
    if (flags & SWS_CPU_CAPS_MMX)
2534 7ad6469e Diego Pettenò
        __asm__ volatile("emms\n\t"::: "memory");
2535 5cebb24b Michael Niedermayer
#endif
2536 516b1f82 Michael Niedermayer
2537 8b1a6441 Diego Biurrun
#if !CONFIG_RUNTIME_CPUDETECT //ensure that the flags match the compiled variant if cpudetect is off
2538 d3f3eea9 Marc Hoffman
    flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN);
2539 94daf2e9 Ramiro Polla
#if   COMPILE_TEMPLATE_MMX2
2540 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
2541 94daf2e9 Ramiro Polla
#elif COMPILE_TEMPLATE_AMD3DNOW
2542 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
2543 94daf2e9 Ramiro Polla
#elif COMPILE_TEMPLATE_MMX
2544 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_MMX;
2545 94daf2e9 Ramiro Polla
#elif COMPILE_TEMPLATE_ALTIVEC
2546 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_ALTIVEC;
2547 b63f641e Aurelien Jacobs
#elif ARCH_BFIN
2548 d3f3eea9 Marc Hoffman
    flags |= SWS_CPU_CAPS_BFIN;
2549 516b1f82 Michael Niedermayer
#endif
2550 10a7d216 Ramiro Polla
#endif /* CONFIG_RUNTIME_CPUDETECT */
2551 221b804f Diego Biurrun
    if (clip_table[512] != 255) globalInit();
2552 1b0a4572 Benoit Fouet
    if (!rgb15to16) sws_rgb2rgb_init(flags);
2553 221b804f Diego Biurrun
2554
    unscaled = (srcW == dstW && srcH == dstH);
2555
    needsDither= (isBGR(dstFormat) || isRGB(dstFormat))
2556
        && (fmt_depth(dstFormat))<24
2557
        && ((fmt_depth(dstFormat))<(fmt_depth(srcFormat)) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
2558
2559
    srcRange = handle_jpeg(&srcFormat);
2560
    dstRange = handle_jpeg(&dstFormat);
2561
2562 dd68318c Ramiro Polla
    if (!isSupportedIn(srcFormat)) {
2563 0d6fd5ec Stefano Sabatini
        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input pixel format\n", sws_format_name(srcFormat));
2564 221b804f Diego Biurrun
        return NULL;
2565
    }
2566 dd68318c Ramiro Polla
    if (!isSupportedOut(dstFormat)) {
2567 0d6fd5ec Stefano Sabatini
        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output pixel format\n", sws_format_name(dstFormat));
2568 221b804f Diego Biurrun
        return NULL;
2569
    }
2570
2571 010c00bc Michael Niedermayer
    i= flags & ( SWS_POINT
2572
                |SWS_AREA
2573 6afc7c19 Michael Niedermayer
                |SWS_BILINEAR
2574 010c00bc Michael Niedermayer
                |SWS_FAST_BILINEAR
2575
                |SWS_BICUBIC
2576
                |SWS_X
2577
                |SWS_GAUSS
2578
                |SWS_LANCZOS
2579
                |SWS_SINC
2580
                |SWS_SPLINE
2581