Statistics
| Branch: | Revision:

ffmpeg / libswscale / swscale.c @ 8f58a4c9

History | View | Annotate | Download (101 KB)

1 fe8054c0 Michael Niedermayer
/*
2 d026b45e Diego Biurrun
 * Copyright (C) 2001-2003 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with FFmpeg; if not, write to the Free Software
18 b19bcbaa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 d026b45e Diego Biurrun
 *
20 807e0c66 Luca Abeni
 * the C code (not assembly, mmx, ...) of this file can be used
21
 * under the LGPL license too
22 d026b45e Diego Biurrun
 */
23 783e9cc9 Michael Niedermayer
24 28bf81c9 Michael Niedermayer
/*
25 e28630fc Michael Niedermayer
  supported Input formats: YV12, I420/IYUV, YUY2, UYVY, BGR32, BGR24, BGR16, BGR15, RGB32, RGB24, Y8/Y800, YVU9/IF09, PAL8
26 caeaabe7 Alex Beregszaszi
  supported output formats: YV12, I420/IYUV, YUY2, UYVY, {BGR,RGB}{1,4,8,15,16,24,32}, Y8/Y800, YVU9/IF09
27 e09d12f4 Michael Niedermayer
  {BGR,RGB}{1,4,8,15,16} support dithering
28 6a4970ab Diego Biurrun

29 e09d12f4 Michael Niedermayer
  unscaled special converters (YV12=I420=IYUV, Y800=Y8)
30
  YV12 -> {BGR,RGB}{1,4,8,15,16,24,32}
31
  x -> x
32
  YUV9 -> YV12
33
  YUV9/YV12 -> Y800
34
  Y800 -> YUV9/YV12
35 b935781b Michael Niedermayer
  BGR24 -> BGR32 & RGB24 -> RGB32
36
  BGR32 -> BGR24 & RGB32 -> RGB24
37 4bb3fa5e Michael Niedermayer
  BGR15 -> BGR16
38 b935781b Michael Niedermayer
*/
39
40 6a4970ab Diego Biurrun
/*
41 e09d12f4 Michael Niedermayer
tested special converters (most are tested actually but i didnt write it down ...)
42
 YV12 -> BGR16
43 b935781b Michael Niedermayer
 YV12 -> YV12
44 4bb3fa5e Michael Niedermayer
 BGR15 -> BGR16
45 1e1c4fe9 Michael Niedermayer
 BGR16 -> BGR16
46 e09d12f4 Michael Niedermayer
 YVU9 -> YV12
47 b935781b Michael Niedermayer

48
untested special converters
49 86bdf3fd Diego Biurrun
  YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be ok)
50 1e1c4fe9 Michael Niedermayer
  YV12/I420 -> YV12/I420
51
  YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
52 b935781b Michael Niedermayer
  BGR24 -> BGR32 & RGB24 -> RGB32
53
  BGR32 -> BGR24 & RGB32 -> RGB24
54 ec22603f Michael Niedermayer
  BGR24 -> YV12
55 28bf81c9 Michael Niedermayer
*/
56
57 d3f41512 Michael Niedermayer
#include <inttypes.h>
58 dda87e9f Pierre Lombard
#include <string.h>
59 077ea8a7 Michael Niedermayer
#include <math.h>
60 c1b0bfb4 Michael Niedermayer
#include <stdio.h>
61 171d7d78 Bohdan Horst
#include <unistd.h>
62 b2d374c9 Diego Biurrun
#include "config.h"
63 81b7c056 Michael Niedermayer
#include <assert.h>
64 38d5c282 Aurelien Jacobs
#ifdef HAVE_SYS_MMAN_H
65
#include <sys/mman.h>
66 113ef149 Reimar Döffinger
#if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
67
#define MAP_ANONYMOUS MAP_ANON
68
#endif
69 38d5c282 Aurelien Jacobs
#endif
70 d604bab9 Michael Niedermayer
#include "swscale.h"
71 5427e242 Michael Niedermayer
#include "swscale_internal.h"
72 94c4def2 Luca Abeni
#include "x86_cpu.h"
73 b2d374c9 Diego Biurrun
#include "bswap.h"
74 37079906 Michael Niedermayer
#include "rgb2rgb.h"
75 d61845e8 Ivo van Poorten
#include "libavcodec/opt.h"
76 0d9f3d85 Arpi
77 541c4eb9 Michael Niedermayer
#undef MOVNTQ
78 7d7f78b5 Michael Niedermayer
#undef PAVGB
79 d3f41512 Michael Niedermayer
80 783e9cc9 Michael Niedermayer
//#undef HAVE_MMX2
81 7f56a527 Michael Niedermayer
//#define HAVE_3DNOW
82 d3f41512 Michael Niedermayer
//#undef HAVE_MMX
83 783e9cc9 Michael Niedermayer
//#undef ARCH_X86
84 2ba1bff0 Michael Niedermayer
//#define WORDS_BIGENDIAN
85 d604bab9 Michael Niedermayer
#define DITHER1XBPP
86 d3f41512 Michael Niedermayer
87 ac6a2e45 Michael Niedermayer
#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
88
89 1e621b18 Michael Niedermayer
#define RET 0xC3 //near return opcode for X86
90 c1b0bfb4 Michael Niedermayer
91 28bf81c9 Michael Niedermayer
#ifdef MP_DEBUG
92 81b7c056 Michael Niedermayer
#define ASSERT(x) assert(x);
93 28bf81c9 Michael Niedermayer
#else
94 c1b0bfb4 Michael Niedermayer
#define ASSERT(x) ;
95 28bf81c9 Michael Niedermayer
#endif
96
97
#ifdef M_PI
98
#define PI M_PI
99
#else
100
#define PI 3.14159265358979323846
101
#endif
102 c1b0bfb4 Michael Niedermayer
103 9d9de37d Ivo van Poorten
#define isSupportedIn(x)    (       \
104
           (x)==PIX_FMT_YUV420P     \
105
        || (x)==PIX_FMT_YUYV422     \
106
        || (x)==PIX_FMT_UYVY422     \
107
        || (x)==PIX_FMT_RGB32       \
108
        || (x)==PIX_FMT_BGR24       \
109
        || (x)==PIX_FMT_BGR565      \
110
        || (x)==PIX_FMT_BGR555      \
111
        || (x)==PIX_FMT_BGR32       \
112
        || (x)==PIX_FMT_RGB24       \
113
        || (x)==PIX_FMT_RGB565      \
114
        || (x)==PIX_FMT_RGB555      \
115
        || (x)==PIX_FMT_GRAY8       \
116
        || (x)==PIX_FMT_YUV410P     \
117
        || (x)==PIX_FMT_GRAY16BE    \
118
        || (x)==PIX_FMT_GRAY16LE    \
119
        || (x)==PIX_FMT_YUV444P     \
120
        || (x)==PIX_FMT_YUV422P     \
121
        || (x)==PIX_FMT_YUV411P     \
122
        || (x)==PIX_FMT_PAL8        \
123
        || (x)==PIX_FMT_BGR8        \
124
        || (x)==PIX_FMT_RGB8        \
125
        || (x)==PIX_FMT_BGR4_BYTE   \
126
        || (x)==PIX_FMT_RGB4_BYTE   \
127 9ba7fe6d Andreas Öman
        || (x)==PIX_FMT_YUV440P     \
128 9d9de37d Ivo van Poorten
    )
129
#define isSupportedOut(x)   (       \
130
           (x)==PIX_FMT_YUV420P     \
131
        || (x)==PIX_FMT_YUYV422     \
132
        || (x)==PIX_FMT_UYVY422     \
133
        || (x)==PIX_FMT_YUV444P     \
134
        || (x)==PIX_FMT_YUV422P     \
135
        || (x)==PIX_FMT_YUV411P     \
136
        || isRGB(x)                 \
137
        || isBGR(x)                 \
138
        || (x)==PIX_FMT_NV12        \
139
        || (x)==PIX_FMT_NV21        \
140
        || (x)==PIX_FMT_GRAY16BE    \
141
        || (x)==PIX_FMT_GRAY16LE    \
142
        || (x)==PIX_FMT_GRAY8       \
143
        || (x)==PIX_FMT_YUV410P     \
144
    )
145
#define isPacked(x)         (       \
146
           (x)==PIX_FMT_PAL8        \
147
        || (x)==PIX_FMT_YUYV422     \
148
        || (x)==PIX_FMT_UYVY422     \
149
        || isRGB(x)                 \
150
        || isBGR(x)                 \
151
    )
152 6ff0ad6b Michael Niedermayer
153
#define RGB2YUV_SHIFT 16
154 1e621b18 Michael Niedermayer
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
155
#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
156
#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
157
#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
158
#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
159
#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
160
#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
161
#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
162
#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
163 6c7506de Michael Niedermayer
164 0481412a Michael Niedermayer
extern const int32_t Inverse_Table_6_9[8][4];
165
166 783e9cc9 Michael Niedermayer
/*
167
NOTES
168 d604bab9 Michael Niedermayer
Special versions: fast Y 1:1 scaling (no interpolation in y direction)
169 31190492 Arpi

170 783e9cc9 Michael Niedermayer
TODO
171 d604bab9 Michael Niedermayer
more intelligent missalignment avoidance for the horizontal scaler
172 c1b0bfb4 Michael Niedermayer
write special vertical cubic upscale version
173
Optimize C code (yv12 / minmax)
174 ff7ba856 Michael Niedermayer
add support for packed pixel yuv input & output
175 6ff0ad6b Michael Niedermayer
add support for Y8 output
176
optimize bgr24 & bgr32
177 ff7ba856 Michael Niedermayer
add BGR4 output support
178 1e621b18 Michael Niedermayer
write special BGR->BGR scaler
179 783e9cc9 Michael Niedermayer
*/
180 31190492 Arpi
181 9bde778e Luca Abeni
#if defined(ARCH_X86) && defined (CONFIG_GPL)
182 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) bF8=       0xF8F8F8F8F8F8F8F8LL;
183
static uint64_t attribute_used __attribute__((aligned(8))) bFC=       0xFCFCFCFCFCFCFCFCLL;
184 221b804f Diego Biurrun
static uint64_t                __attribute__((aligned(8))) w10=       0x0010001000100010LL;
185 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) w02=       0x0002000200020002LL;
186
static uint64_t attribute_used __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
187
static uint64_t attribute_used __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
188
static uint64_t attribute_used __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
189
static uint64_t attribute_used __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL;
190 d604bab9 Michael Niedermayer
191 db7a2e0d Matthieu Castet
static volatile uint64_t attribute_used __attribute__((aligned(8))) b5Dither;
192
static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
193
static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
194
static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;
195 d8fa3c54 Michael Niedermayer
196
static uint64_t __attribute__((aligned(8))) dither4[2]={
197 221b804f Diego Biurrun
        0x0103010301030103LL,
198
        0x0200020002000200LL,};
199 d8fa3c54 Michael Niedermayer
200
static uint64_t __attribute__((aligned(8))) dither8[2]={
201 221b804f Diego Biurrun
        0x0602060206020602LL,
202
        0x0004000400040004LL,};
203 d604bab9 Michael Niedermayer
204 221b804f Diego Biurrun
static uint64_t                __attribute__((aligned(8))) b16Mask=   0x001F001F001F001FLL;
205 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) g16Mask=   0x07E007E007E007E0LL;
206
static uint64_t attribute_used __attribute__((aligned(8))) r16Mask=   0xF800F800F800F800LL;
207 221b804f Diego Biurrun
static uint64_t                __attribute__((aligned(8))) b15Mask=   0x001F001F001F001FLL;
208 db7a2e0d Matthieu Castet
static uint64_t attribute_used __attribute__((aligned(8))) g15Mask=   0x03E003E003E003E0LL;
209
static uint64_t attribute_used __attribute__((aligned(8))) r15Mask=   0x7C007C007C007C00LL;
210 d604bab9 Michael Niedermayer
211 221b804f Diego Biurrun
static uint64_t attribute_used __attribute__((aligned(8))) M24A=      0x00FF0000FF0000FFLL;
212
static uint64_t attribute_used __attribute__((aligned(8))) M24B=      0xFF0000FF0000FF00LL;
213
static uint64_t attribute_used __attribute__((aligned(8))) M24C=      0x0000FF0000FF0000LL;
214 99d2cb72 Michael Niedermayer
215 ac6a2e45 Michael Niedermayer
#ifdef FAST_BGR2YV12
216 221b804f Diego Biurrun
static const uint64_t bgr2YCoeff   attribute_used __attribute__((aligned(8))) = 0x000000210041000DULL;
217
static const uint64_t bgr2UCoeff   attribute_used __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
218
static const uint64_t bgr2VCoeff   attribute_used __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
219 ac6a2e45 Michael Niedermayer
#else
220 221b804f Diego Biurrun
static const uint64_t bgr2YCoeff   attribute_used __attribute__((aligned(8))) = 0x000020E540830C8BULL;
221
static const uint64_t bgr2UCoeff   attribute_used __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
222
static const uint64_t bgr2VCoeff   attribute_used __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
223 69796008 Diego Biurrun
#endif /* FAST_BGR2YV12 */
224 221b804f Diego Biurrun
static const uint64_t bgr2YOffset  attribute_used __attribute__((aligned(8))) = 0x1010101010101010ULL;
225
static const uint64_t bgr2UVOffset attribute_used __attribute__((aligned(8))) = 0x8080808080808080ULL;
226
static const uint64_t w1111        attribute_used __attribute__((aligned(8))) = 0x0001000100010001ULL;
227 3d6a30d9 Diego Biurrun
#endif /* defined(ARCH_X86) */
228 783e9cc9 Michael Niedermayer
229
// clipping helper table for C implementations:
230
static unsigned char clip_table[768];
231
232 d4e24275 Michael Niedermayer
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b);
233 6a4970ab Diego Biurrun
234 cf7d1c1a Michael Niedermayer
extern const uint8_t dither_2x2_4[2][8];
235
extern const uint8_t dither_2x2_8[2][8];
236
extern const uint8_t dither_8x8_32[8][8];
237
extern const uint8_t dither_8x8_73[8][8];
238
extern const uint8_t dither_8x8_220[8][8];
239 5cebb24b Michael Niedermayer
240 2d529db5 Luca Abeni
static const char * sws_context_to_name(void * ptr) {
241
    return "swscaler";
242
}
243
244 d61845e8 Ivo van Poorten
#define OFFSET(x) offsetof(SwsContext, x)
245
#define DEFAULT 0
246
#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
247
248
static const AVOption options[] = {
249
    { "sws_flags", "scaler/cpu flags", OFFSET(flags), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, VE, "sws_flags" },
250
    { "fast_bilinear", "fast bilinear", 0, FF_OPT_TYPE_CONST, SWS_FAST_BILINEAR, INT_MIN, INT_MAX, VE, "sws_flags" },
251
    { "bilinear", "bilinear", 0, FF_OPT_TYPE_CONST, SWS_BILINEAR, INT_MIN, INT_MAX, VE, "sws_flags" },
252
    { "bicubic", "bicubic", 0, FF_OPT_TYPE_CONST, SWS_BICUBIC, INT_MIN, INT_MAX, VE, "sws_flags" },
253
    { "experimental", "experimental", 0, FF_OPT_TYPE_CONST, SWS_X, INT_MIN, INT_MAX, VE, "sws_flags" },
254
    { "neighbor", "nearest neighbor", 0, FF_OPT_TYPE_CONST, SWS_POINT, INT_MIN, INT_MAX, VE, "sws_flags" },
255
    { "area", "averaging area", 0, FF_OPT_TYPE_CONST, SWS_AREA, INT_MIN, INT_MAX, VE, "sws_flags" },
256
    { "bicublin", "luma bicubic, chroma bilinear", 0, FF_OPT_TYPE_CONST, SWS_BICUBLIN, INT_MIN, INT_MAX, VE, "sws_flags" },
257
    { "gauss", "gaussian", 0, FF_OPT_TYPE_CONST, SWS_GAUSS, INT_MIN, INT_MAX, VE, "sws_flags" },
258
    { "sinc", "sinc", 0, FF_OPT_TYPE_CONST, SWS_SINC, INT_MIN, INT_MAX, VE, "sws_flags" },
259
    { "lanczos", "lanczos", 0, FF_OPT_TYPE_CONST, SWS_LANCZOS, INT_MIN, INT_MAX, VE, "sws_flags" },
260
    { "spline", "natural bicubic spline", 0, FF_OPT_TYPE_CONST, SWS_SPLINE, INT_MIN, INT_MAX, VE, "sws_flags" },
261
    { "print_info", "print info", 0, FF_OPT_TYPE_CONST, SWS_PRINT_INFO, INT_MIN, INT_MAX, VE, "sws_flags" },
262
    { "accurate_rnd", "accurate rounding", 0, FF_OPT_TYPE_CONST, SWS_ACCURATE_RND, INT_MIN, INT_MAX, VE, "sws_flags" },
263
    { "mmx", "MMX SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_MMX, INT_MIN, INT_MAX, VE, "sws_flags" },
264
    { "mmx2", "MMX2 SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_MMX2, INT_MIN, INT_MAX, VE, "sws_flags" },
265
    { "3dnow", "3DNOW SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_3DNOW, INT_MIN, INT_MAX, VE, "sws_flags" },
266
    { "altivec", "AltiVec SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_ALTIVEC, INT_MIN, INT_MAX, VE, "sws_flags" },
267
    { "bfin", "Blackfin SIMD acceleration", 0, FF_OPT_TYPE_CONST, SWS_CPU_CAPS_BFIN, INT_MIN, INT_MAX, VE, "sws_flags" },
268
    { "full_chroma_int", "full chroma interpolation", 0 , FF_OPT_TYPE_CONST, SWS_FULL_CHR_H_INT, INT_MIN, INT_MAX, VE, "sws_flags" },
269
    { "full_chroma_inp", "full chroma input", 0 , FF_OPT_TYPE_CONST, SWS_FULL_CHR_H_INP, INT_MIN, INT_MAX, VE, "sws_flags" },
270
    { NULL }
271
};
272
273
#undef VE
274
#undef DEFAULT
275
276
static AVClass sws_context_class = { "SWScaler", sws_context_to_name, options };
277 2d529db5 Luca Abeni
278 e9e12f0e Luca Abeni
char *sws_format_name(enum PixelFormat format)
279 94c4def2 Luca Abeni
{
280 e9e12f0e Luca Abeni
    switch (format) {
281
        case PIX_FMT_YUV420P:
282
            return "yuv420p";
283
        case PIX_FMT_YUYV422:
284
            return "yuyv422";
285
        case PIX_FMT_RGB24:
286
            return "rgb24";
287
        case PIX_FMT_BGR24:
288
            return "bgr24";
289
        case PIX_FMT_YUV422P:
290
            return "yuv422p";
291
        case PIX_FMT_YUV444P:
292
            return "yuv444p";
293
        case PIX_FMT_RGB32:
294
            return "rgb32";
295
        case PIX_FMT_YUV410P:
296
            return "yuv410p";
297
        case PIX_FMT_YUV411P:
298
            return "yuv411p";
299
        case PIX_FMT_RGB565:
300
            return "rgb565";
301
        case PIX_FMT_RGB555:
302
            return "rgb555";
303 4884b9e5 Kostya Shishkov
        case PIX_FMT_GRAY16BE:
304
            return "gray16be";
305
        case PIX_FMT_GRAY16LE:
306
            return "gray16le";
307 e9e12f0e Luca Abeni
        case PIX_FMT_GRAY8:
308
            return "gray8";
309
        case PIX_FMT_MONOWHITE:
310
            return "mono white";
311
        case PIX_FMT_MONOBLACK:
312
            return "mono black";
313
        case PIX_FMT_PAL8:
314
            return "Palette";
315
        case PIX_FMT_YUVJ420P:
316
            return "yuvj420p";
317
        case PIX_FMT_YUVJ422P:
318
            return "yuvj422p";
319
        case PIX_FMT_YUVJ444P:
320
            return "yuvj444p";
321
        case PIX_FMT_XVMC_MPEG2_MC:
322
            return "xvmc_mpeg2_mc";
323
        case PIX_FMT_XVMC_MPEG2_IDCT:
324
            return "xvmc_mpeg2_idct";
325
        case PIX_FMT_UYVY422:
326
            return "uyvy422";
327
        case PIX_FMT_UYYVYY411:
328
            return "uyyvyy411";
329
        case PIX_FMT_RGB32_1:
330
            return "rgb32x";
331
        case PIX_FMT_BGR32_1:
332
            return "bgr32x";
333
        case PIX_FMT_BGR32:
334
            return "bgr32";
335
        case PIX_FMT_BGR565:
336
            return "bgr565";
337
        case PIX_FMT_BGR555:
338
            return "bgr555";
339
        case PIX_FMT_BGR8:
340
            return "bgr8";
341
        case PIX_FMT_BGR4:
342
            return "bgr4";
343
        case PIX_FMT_BGR4_BYTE:
344
            return "bgr4 byte";
345
        case PIX_FMT_RGB8:
346
            return "rgb8";
347
        case PIX_FMT_RGB4:
348
            return "rgb4";
349
        case PIX_FMT_RGB4_BYTE:
350
            return "rgb4 byte";
351
        case PIX_FMT_NV12:
352
            return "nv12";
353
        case PIX_FMT_NV21:
354
            return "nv21";
355 9ba7fe6d Andreas Öman
        case PIX_FMT_YUV440P:
356
            return "yuv440p";
357 e9e12f0e Luca Abeni
        default:
358
            return "Unknown format";
359
    }
360 94c4def2 Luca Abeni
}
361
362 9bde778e Luca Abeni
#if defined(ARCH_X86) && defined (CONFIG_GPL)
363 96034638 Michael Niedermayer
void in_asm_used_var_warning_killer()
364
{
365 221b804f Diego Biurrun
    volatile int i= bF8+bFC+w10+
366
    bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+
367
    M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101;
368
    if (i) i=0;
369 96034638 Michael Niedermayer
}
370
#endif
371 d604bab9 Michael Niedermayer
372 5859233b Michael Niedermayer
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
373 221b804f Diego Biurrun
                               int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
374
                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
375 e3d2500f Michael Niedermayer
{
376 221b804f Diego Biurrun
    //FIXME Optimize (just quickly writen not opti..)
377
    int i;
378
    for (i=0; i<dstW; i++)
379
    {
380
        int val=1<<18;
381
        int j;
382
        for (j=0; j<lumFilterSize; j++)
383
            val += lumSrc[j][i] * lumFilter[j];
384
385
        dest[i]= av_clip_uint8(val>>19);
386
    }
387
388
    if (uDest != NULL)
389
        for (i=0; i<chrDstW; i++)
390
        {
391
            int u=1<<18;
392
            int v=1<<18;
393
            int j;
394
            for (j=0; j<chrFilterSize; j++)
395
            {
396
                u += chrSrc[j][i] * chrFilter[j];
397
                v += chrSrc[j][i + 2048] * chrFilter[j];
398
            }
399
400
            uDest[i]= av_clip_uint8(u>>19);
401
            vDest[i]= av_clip_uint8(v>>19);
402
        }
403 e3d2500f Michael Niedermayer
}
404
405 6118e52e Ville Syrjälä
static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
406 221b804f Diego Biurrun
                                int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
407
                                uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
408 6118e52e Ville Syrjälä
{
409 221b804f Diego Biurrun
    //FIXME Optimize (just quickly writen not opti..)
410
    int i;
411
    for (i=0; i<dstW; i++)
412
    {
413
        int val=1<<18;
414
        int j;
415
        for (j=0; j<lumFilterSize; j++)
416
            val += lumSrc[j][i] * lumFilter[j];
417
418
        dest[i]= av_clip_uint8(val>>19);
419
    }
420
421
    if (uDest == NULL)
422
        return;
423
424
    if (dstFormat == PIX_FMT_NV12)
425
        for (i=0; i<chrDstW; i++)
426
        {
427
            int u=1<<18;
428
            int v=1<<18;
429
            int j;
430
            for (j=0; j<chrFilterSize; j++)
431
            {
432
                u += chrSrc[j][i] * chrFilter[j];
433
                v += chrSrc[j][i + 2048] * chrFilter[j];
434
            }
435
436
            uDest[2*i]= av_clip_uint8(u>>19);
437
            uDest[2*i+1]= av_clip_uint8(v>>19);
438
        }
439
    else
440
        for (i=0; i<chrDstW; i++)
441
        {
442
            int u=1<<18;
443
            int v=1<<18;
444
            int j;
445
            for (j=0; j<chrFilterSize; j++)
446
            {
447
                u += chrSrc[j][i] * chrFilter[j];
448
                v += chrSrc[j][i + 2048] * chrFilter[j];
449
            }
450
451
            uDest[2*i]= av_clip_uint8(v>>19);
452
            uDest[2*i+1]= av_clip_uint8(u>>19);
453
        }
454 6118e52e Ville Syrjälä
}
455 46de8b73 Michael Niedermayer
456 25593e29 Michael Niedermayer
#define YSCALE_YUV_2_PACKEDX_C(type) \
457 221b804f Diego Biurrun
    for (i=0; i<(dstW>>1); i++){\
458
        int j;\
459
        int Y1 = 1<<18;\
460
        int Y2 = 1<<18;\
461
        int U  = 1<<18;\
462
        int V  = 1<<18;\
463 2db27aad Carl Eugen Hoyos
        type av_unused *r, *b, *g;\
464 221b804f Diego Biurrun
        const int i2= 2*i;\
465
        \
466
        for (j=0; j<lumFilterSize; j++)\
467
        {\
468
            Y1 += lumSrc[j][i2] * lumFilter[j];\
469
            Y2 += lumSrc[j][i2+1] * lumFilter[j];\
470
        }\
471
        for (j=0; j<chrFilterSize; j++)\
472
        {\
473
            U += chrSrc[j][i] * chrFilter[j];\
474
            V += chrSrc[j][i+2048] * chrFilter[j];\
475
        }\
476
        Y1>>=19;\
477
        Y2>>=19;\
478
        U >>=19;\
479
        V >>=19;\
480
        if ((Y1|Y2|U|V)&256)\
481
        {\
482
            if (Y1>255)   Y1=255; \
483
            else if (Y1<0)Y1=0;   \
484
            if (Y2>255)   Y2=255; \
485
            else if (Y2<0)Y2=0;   \
486
            if (U>255)    U=255;  \
487
            else if (U<0) U=0;    \
488
            if (V>255)    V=255;  \
489
            else if (V<0) V=0;    \
490
        }
491 6a4970ab Diego Biurrun
492 46de8b73 Michael Niedermayer
#define YSCALE_YUV_2_RGBX_C(type) \
493 221b804f Diego Biurrun
    YSCALE_YUV_2_PACKEDX_C(type)  \
494
    r = (type *)c->table_rV[V];   \
495
    g = (type *)(c->table_gU[U] + c->table_gV[V]); \
496
    b = (type *)c->table_bU[U];   \
497
498
#define YSCALE_YUV_2_PACKED2_C   \
499
    for (i=0; i<(dstW>>1); i++){ \
500
        const int i2= 2*i;       \
501
        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
502
        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
503
        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;  \
504
        int V= (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;  \
505 46de8b73 Michael Niedermayer
506
#define YSCALE_YUV_2_RGB2_C(type) \
507 221b804f Diego Biurrun
    YSCALE_YUV_2_PACKED2_C\
508
    type *r, *b, *g;\
509
    r = (type *)c->table_rV[V];\
510
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
511
    b = (type *)c->table_bU[U];\
512 cf7d1c1a Michael Niedermayer
513 25593e29 Michael Niedermayer
#define YSCALE_YUV_2_PACKED1_C \
514 221b804f Diego Biurrun
    for (i=0; i<(dstW>>1); i++){\
515
        const int i2= 2*i;\
516
        int Y1= buf0[i2  ]>>7;\
517
        int Y2= buf0[i2+1]>>7;\
518
        int U= (uvbuf1[i     ])>>7;\
519
        int V= (uvbuf1[i+2048])>>7;\
520 46de8b73 Michael Niedermayer
521
#define YSCALE_YUV_2_RGB1_C(type) \
522 221b804f Diego Biurrun
    YSCALE_YUV_2_PACKED1_C\
523
    type *r, *b, *g;\
524
    r = (type *)c->table_rV[V];\
525
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
526
    b = (type *)c->table_bU[U];\
527 cf7d1c1a Michael Niedermayer
528 25593e29 Michael Niedermayer
#define YSCALE_YUV_2_PACKED1B_C \
529 221b804f Diego Biurrun
    for (i=0; i<(dstW>>1); i++){\
530
        const int i2= 2*i;\
531
        int Y1= buf0[i2  ]>>7;\
532
        int Y2= buf0[i2+1]>>7;\
533
        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
534
        int V= (uvbuf0[i+2048] + uvbuf1[i+2048])>>8;\
535 46de8b73 Michael Niedermayer
536
#define YSCALE_YUV_2_RGB1B_C(type) \
537 221b804f Diego Biurrun
    YSCALE_YUV_2_PACKED1B_C\
538
    type *r, *b, *g;\
539
    r = (type *)c->table_rV[V];\
540
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
541
    b = (type *)c->table_bU[U];\
542 cf7d1c1a Michael Niedermayer
543 46de8b73 Michael Niedermayer
#define YSCALE_YUV_2_ANYRGB_C(func, func2)\
544 221b804f Diego Biurrun
    switch(c->dstFormat)\
545
    {\
546
    case PIX_FMT_RGB32:\
547
    case PIX_FMT_BGR32:\
548
        func(uint32_t)\
549
            ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
550
            ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
551
        }                \
552
        break;\
553
    case PIX_FMT_RGB24:\
554
        func(uint8_t)\
555
            ((uint8_t*)dest)[0]= r[Y1];\
556
            ((uint8_t*)dest)[1]= g[Y1];\
557
            ((uint8_t*)dest)[2]= b[Y1];\
558
            ((uint8_t*)dest)[3]= r[Y2];\
559
            ((uint8_t*)dest)[4]= g[Y2];\
560
            ((uint8_t*)dest)[5]= b[Y2];\
561
            dest+=6;\
562
        }\
563
        break;\
564
    case PIX_FMT_BGR24:\
565
        func(uint8_t)\
566
            ((uint8_t*)dest)[0]= b[Y1];\
567
            ((uint8_t*)dest)[1]= g[Y1];\
568
            ((uint8_t*)dest)[2]= r[Y1];\
569
            ((uint8_t*)dest)[3]= b[Y2];\
570
            ((uint8_t*)dest)[4]= g[Y2];\
571
            ((uint8_t*)dest)[5]= r[Y2];\
572
            dest+=6;\
573
        }\
574
        break;\
575
    case PIX_FMT_RGB565:\
576
    case PIX_FMT_BGR565:\
577
        {\
578
            const int dr1= dither_2x2_8[y&1    ][0];\
579
            const int dg1= dither_2x2_4[y&1    ][0];\
580
            const int db1= dither_2x2_8[(y&1)^1][0];\
581
            const int dr2= dither_2x2_8[y&1    ][1];\
582
            const int dg2= dither_2x2_4[y&1    ][1];\
583
            const int db2= dither_2x2_8[(y&1)^1][1];\
584
            func(uint16_t)\
585
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
586
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
587
            }\
588
        }\
589
        break;\
590
    case PIX_FMT_RGB555:\
591
    case PIX_FMT_BGR555:\
592
        {\
593
            const int dr1= dither_2x2_8[y&1    ][0];\
594
            const int dg1= dither_2x2_8[y&1    ][1];\
595
            const int db1= dither_2x2_8[(y&1)^1][0];\
596
            const int dr2= dither_2x2_8[y&1    ][1];\
597
            const int dg2= dither_2x2_8[y&1    ][0];\
598
            const int db2= dither_2x2_8[(y&1)^1][1];\
599
            func(uint16_t)\
600
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
601
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
602
            }\
603
        }\
604
        break;\
605
    case PIX_FMT_RGB8:\
606
    case PIX_FMT_BGR8:\
607
        {\
608
            const uint8_t * const d64= dither_8x8_73[y&7];\
609
            const uint8_t * const d32= dither_8x8_32[y&7];\
610
            func(uint8_t)\
611
                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
612
                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
613
            }\
614
        }\
615
        break;\
616
    case PIX_FMT_RGB4:\
617
    case PIX_FMT_BGR4:\
618
        {\
619
            const uint8_t * const d64= dither_8x8_73 [y&7];\
620
            const uint8_t * const d128=dither_8x8_220[y&7];\
621
            func(uint8_t)\
622
                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
623
                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
624
            }\
625
        }\
626
        break;\
627
    case PIX_FMT_RGB4_BYTE:\
628
    case PIX_FMT_BGR4_BYTE:\
629
        {\
630
            const uint8_t * const d64= dither_8x8_73 [y&7];\
631
            const uint8_t * const d128=dither_8x8_220[y&7];\
632
            func(uint8_t)\
633
                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
634
                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
635
            }\
636
        }\
637
        break;\
638
    case PIX_FMT_MONOBLACK:\
639
        {\
640
            const uint8_t * const d128=dither_8x8_220[y&7];\
641
            uint8_t *g= c->table_gU[128] + c->table_gV[128];\
642
            for (i=0; i<dstW-7; i+=8){\
643
                int acc;\
644
                acc =       g[((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19) + d128[0]];\
645
                acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
646
                acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
647
                acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
648
                acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
649
                acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
650
                acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
651
                acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
652
                ((uint8_t*)dest)[0]= acc;\
653
                dest++;\
654
            }\
655 cf7d1c1a Michael Niedermayer
\
656
/*\
657
((uint8_t*)dest)-= dstW>>4;\
658
{\
659 221b804f Diego Biurrun
            int acc=0;\
660
            int left=0;\
661
            static int top[1024];\
662
            static int last_new[1024][1024];\
663
            static int last_in3[1024][1024];\
664
            static int drift[1024][1024];\
665
            int topLeft=0;\
666
            int shift=0;\
667
            int count=0;\
668
            const uint8_t * const d128=dither_8x8_220[y&7];\
669
            int error_new=0;\
670
            int error_in3=0;\
671
            int f=0;\
672
            \
673
            for (i=dstW>>1; i<dstW; i++){\
674
                int in= ((buf0[i  ]*yalpha1+buf1[i  ]*yalpha)>>19);\
675
                int in2 = (76309 * (in - 16) + 32768) >> 16;\
676
                int in3 = (in2 < 0) ? 0 : ((in2 > 255) ? 255 : in2);\
677
                int old= (left*7 + topLeft + top[i]*5 + top[i+1]*3)/20 + in3\
678
                         + (last_new[y][i] - in3)*f/256;\
679
                int new= old> 128 ? 255 : 0;\
680 cf7d1c1a Michael Niedermayer
\
681 221b804f Diego Biurrun
                error_new+= FFABS(last_new[y][i] - new);\
682
                error_in3+= FFABS(last_in3[y][i] - in3);\
683
                f= error_new - error_in3*4;\
684
                if (f<0) f=0;\
685
                if (f>256) f=256;\
686 cf7d1c1a Michael Niedermayer
\
687 221b804f Diego Biurrun
                topLeft= top[i];\
688
                left= top[i]= old - new;\
689
                last_new[y][i]= new;\
690
                last_in3[y][i]= in3;\
691 cf7d1c1a Michael Niedermayer
\
692 221b804f Diego Biurrun
                acc+= acc + (new&1);\
693
                if ((i&7)==6){\
694
                    ((uint8_t*)dest)[0]= acc;\
695
                    ((uint8_t*)dest)++;\
696
                }\
697
            }\
698 cf7d1c1a Michael Niedermayer
}\
699
*/\
700 221b804f Diego Biurrun
        }\
701
        break;\
702
    case PIX_FMT_YUYV422:\
703
        func2\
704
            ((uint8_t*)dest)[2*i2+0]= Y1;\
705
            ((uint8_t*)dest)[2*i2+1]= U;\
706
            ((uint8_t*)dest)[2*i2+2]= Y2;\
707
            ((uint8_t*)dest)[2*i2+3]= V;\
708
        }                \
709
        break;\
710
    case PIX_FMT_UYVY422:\
711
        func2\
712
            ((uint8_t*)dest)[2*i2+0]= U;\
713
            ((uint8_t*)dest)[2*i2+1]= Y1;\
714
            ((uint8_t*)dest)[2*i2+2]= V;\
715
            ((uint8_t*)dest)[2*i2+3]= Y2;\
716
        }                \
717
        break;\
718
    }\
719 cf7d1c1a Michael Niedermayer
720
721 25593e29 Michael Niedermayer
static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
722 221b804f Diego Biurrun
                                  int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
723
                                  uint8_t *dest, int dstW, int y)
724 e3d2500f Michael Niedermayer
{
725 221b804f Diego Biurrun
    int i;
726
    switch(c->dstFormat)
727
    {
728
    case PIX_FMT_BGR32:
729
    case PIX_FMT_RGB32:
730
        YSCALE_YUV_2_RGBX_C(uint32_t)
731
            ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];
732
            ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];
733
        }
734
        break;
735
    case PIX_FMT_RGB24:
736
        YSCALE_YUV_2_RGBX_C(uint8_t)
737
            ((uint8_t*)dest)[0]= r[Y1];
738
            ((uint8_t*)dest)[1]= g[Y1];
739
            ((uint8_t*)dest)[2]= b[Y1];
740
            ((uint8_t*)dest)[3]= r[Y2];
741
            ((uint8_t*)dest)[4]= g[Y2];
742
            ((uint8_t*)dest)[5]= b[Y2];
743
            dest+=6;
744
        }
745
        break;
746
    case PIX_FMT_BGR24:
747
        YSCALE_YUV_2_RGBX_C(uint8_t)
748
            ((uint8_t*)dest)[0]= b[Y1];
749
            ((uint8_t*)dest)[1]= g[Y1];
750
            ((uint8_t*)dest)[2]= r[Y1];
751
            ((uint8_t*)dest)[3]= b[Y2];
752
            ((uint8_t*)dest)[4]= g[Y2];
753
            ((uint8_t*)dest)[5]= r[Y2];
754
            dest+=6;
755
        }
756
        break;
757
    case PIX_FMT_RGB565:
758
    case PIX_FMT_BGR565:
759
        {
760
            const int dr1= dither_2x2_8[y&1    ][0];
761
            const int dg1= dither_2x2_4[y&1    ][0];
762
            const int db1= dither_2x2_8[(y&1)^1][0];
763
            const int dr2= dither_2x2_8[y&1    ][1];
764
            const int dg2= dither_2x2_4[y&1    ][1];
765
            const int db2= dither_2x2_8[(y&1)^1][1];
766
            YSCALE_YUV_2_RGBX_C(uint16_t)
767
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
768
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
769
            }
770
        }
771
        break;
772
    case PIX_FMT_RGB555:
773
    case PIX_FMT_BGR555:
774
        {
775
            const int dr1= dither_2x2_8[y&1    ][0];
776
            const int dg1= dither_2x2_8[y&1    ][1];
777
            const int db1= dither_2x2_8[(y&1)^1][0];
778
            const int dr2= dither_2x2_8[y&1    ][1];
779
            const int dg2= dither_2x2_8[y&1    ][0];
780
            const int db2= dither_2x2_8[(y&1)^1][1];
781
            YSCALE_YUV_2_RGBX_C(uint16_t)
782
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
783
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
784
            }
785
        }
786
        break;
787
    case PIX_FMT_RGB8:
788
    case PIX_FMT_BGR8:
789
        {
790
            const uint8_t * const d64= dither_8x8_73[y&7];
791
            const uint8_t * const d32= dither_8x8_32[y&7];
792
            YSCALE_YUV_2_RGBX_C(uint8_t)
793
                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];
794
                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];
795
            }
796
        }
797
        break;
798
    case PIX_FMT_RGB4:
799
    case PIX_FMT_BGR4:
800
        {
801
            const uint8_t * const d64= dither_8x8_73 [y&7];
802
            const uint8_t * const d128=dither_8x8_220[y&7];
803
            YSCALE_YUV_2_RGBX_C(uint8_t)
804
                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]
805
                                  +((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);
806
            }
807
        }
808
        break;
809
    case PIX_FMT_RGB4_BYTE:
810
    case PIX_FMT_BGR4_BYTE:
811
        {
812
            const uint8_t * const d64= dither_8x8_73 [y&7];
813
            const uint8_t * const d128=dither_8x8_220[y&7];
814
            YSCALE_YUV_2_RGBX_C(uint8_t)
815
                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];
816
                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];
817
            }
818
        }
819
        break;
820
    case PIX_FMT_MONOBLACK:
821
        {
822
            const uint8_t * const d128=dither_8x8_220[y&7];
823
            uint8_t *g= c->table_gU[128] + c->table_gV[128];
824
            int acc=0;
825
            for (i=0; i<dstW-1; i+=2){
826
                int j;
827
                int Y1=1<<18;
828
                int Y2=1<<18;
829
830
                for (j=0; j<lumFilterSize; j++)
831
                {
832
                    Y1 += lumSrc[j][i] * lumFilter[j];
833
                    Y2 += lumSrc[j][i+1] * lumFilter[j];
834
                }
835
                Y1>>=19;
836
                Y2>>=19;
837
                if ((Y1|Y2)&256)
838
                {
839
                    if (Y1>255)   Y1=255;
840
                    else if (Y1<0)Y1=0;
841
                    if (Y2>255)   Y2=255;
842
                    else if (Y2<0)Y2=0;
843
                }
844
                acc+= acc + g[Y1+d128[(i+0)&7]];
845
                acc+= acc + g[Y2+d128[(i+1)&7]];
846
                if ((i&7)==6){
847
                    ((uint8_t*)dest)[0]= acc;
848
                    dest++;
849
                }
850
            }
851
        }
852
        break;
853
    case PIX_FMT_YUYV422:
854
        YSCALE_YUV_2_PACKEDX_C(void)
855
            ((uint8_t*)dest)[2*i2+0]= Y1;
856
            ((uint8_t*)dest)[2*i2+1]= U;
857
            ((uint8_t*)dest)[2*i2+2]= Y2;
858
            ((uint8_t*)dest)[2*i2+3]= V;
859
        }
860
        break;
861
    case PIX_FMT_UYVY422:
862
        YSCALE_YUV_2_PACKEDX_C(void)
863
            ((uint8_t*)dest)[2*i2+0]= U;
864
            ((uint8_t*)dest)[2*i2+1]= Y1;
865
            ((uint8_t*)dest)[2*i2+2]= V;
866
            ((uint8_t*)dest)[2*i2+3]= Y2;
867
        }
868
        break;
869
    }
870 e3d2500f Michael Niedermayer
}
871
872
873 7630f2e0 Michael Niedermayer
//Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
874
//Plain C versions
875 9bde778e Luca Abeni
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) || !defined(CONFIG_GPL)
876 726a959a Michael Niedermayer
#define COMPILE_C
877
#endif
878
879 a2faa401 Romain Dolbeau
#ifdef ARCH_POWERPC
880 9bde778e Luca Abeni
#if (defined (HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
881 a2faa401 Romain Dolbeau
#define COMPILE_ALTIVEC
882
#endif //HAVE_ALTIVEC
883
#endif //ARCH_POWERPC
884
885 3d6a30d9 Diego Biurrun
#if defined(ARCH_X86)
886 726a959a Michael Niedermayer
887 9bde778e Luca Abeni
#if ((defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
888 726a959a Michael Niedermayer
#define COMPILE_MMX
889
#endif
890
891 9bde778e Luca Abeni
#if (defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
892 726a959a Michael Niedermayer
#define COMPILE_MMX2
893
#endif
894
895 9bde778e Luca Abeni
#if ((defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
896 726a959a Michael Niedermayer
#define COMPILE_3DNOW
897
#endif
898 6e1c66bc Aurelien Jacobs
#endif //ARCH_X86 || ARCH_X86_64
899 726a959a Michael Niedermayer
900
#undef HAVE_MMX
901
#undef HAVE_MMX2
902
#undef HAVE_3DNOW
903
904
#ifdef COMPILE_C
905 7630f2e0 Michael Niedermayer
#undef HAVE_MMX
906
#undef HAVE_MMX2
907
#undef HAVE_3DNOW
908 a2faa401 Romain Dolbeau
#undef HAVE_ALTIVEC
909 7630f2e0 Michael Niedermayer
#define RENAME(a) a ## _C
910
#include "swscale_template.c"
911 726a959a Michael Niedermayer
#endif
912 397c035e Michael Niedermayer
913 a2faa401 Romain Dolbeau
#ifdef ARCH_POWERPC
914
#ifdef COMPILE_ALTIVEC
915
#undef RENAME
916
#define HAVE_ALTIVEC
917
#define RENAME(a) a ## _altivec
918
#include "swscale_template.c"
919
#endif
920
#endif //ARCH_POWERPC
921
922 3d6a30d9 Diego Biurrun
#if defined(ARCH_X86)
923 397c035e Michael Niedermayer
924 7630f2e0 Michael Niedermayer
//X86 versions
925
/*
926
#undef RENAME
927
#undef HAVE_MMX
928
#undef HAVE_MMX2
929
#undef HAVE_3DNOW
930
#define ARCH_X86
931
#define RENAME(a) a ## _X86
932
#include "swscale_template.c"
933 1faf0867 Michael Niedermayer
*/
934 7630f2e0 Michael Niedermayer
//MMX versions
935 726a959a Michael Niedermayer
#ifdef COMPILE_MMX
936 7630f2e0 Michael Niedermayer
#undef RENAME
937
#define HAVE_MMX
938
#undef HAVE_MMX2
939
#undef HAVE_3DNOW
940
#define RENAME(a) a ## _MMX
941
#include "swscale_template.c"
942 726a959a Michael Niedermayer
#endif
943 7630f2e0 Michael Niedermayer
944
//MMX2 versions
945 726a959a Michael Niedermayer
#ifdef COMPILE_MMX2
946 7630f2e0 Michael Niedermayer
#undef RENAME
947
#define HAVE_MMX
948
#define HAVE_MMX2
949
#undef HAVE_3DNOW
950
#define RENAME(a) a ## _MMX2
951
#include "swscale_template.c"
952 726a959a Michael Niedermayer
#endif
953 7630f2e0 Michael Niedermayer
954
//3DNOW versions
955 726a959a Michael Niedermayer
#ifdef COMPILE_3DNOW
956 7630f2e0 Michael Niedermayer
#undef RENAME
957
#define HAVE_MMX
958
#undef HAVE_MMX2
959
#define HAVE_3DNOW
960
#define RENAME(a) a ## _3DNow
961
#include "swscale_template.c"
962 726a959a Michael Niedermayer
#endif
963 7630f2e0 Michael Niedermayer
964 6e1c66bc Aurelien Jacobs
#endif //ARCH_X86 || ARCH_X86_64
965 7630f2e0 Michael Niedermayer
966 77a416e8 Gabucino
// minor note: the HAVE_xyz is messed up after that line so don't use it
967 d604bab9 Michael Niedermayer
968 a86c461c Michael Niedermayer
static double getSplineCoeff(double a, double b, double c, double d, double dist)
969
{
970 221b804f Diego Biurrun
//    printf("%f %f %f %f %f\n", a,b,c,d,dist);
971
    if (dist<=1.0)      return ((d*dist + c)*dist + b)*dist +a;
972
    else                return getSplineCoeff(        0.0,
973
                                             b+ 2.0*c + 3.0*d,
974
                                                    c + 3.0*d,
975
                                            -b- 3.0*c - 6.0*d,
976
                                            dist-1.0);
977 a86c461c Michael Niedermayer
}
978 6c7506de Michael Niedermayer
979 bca11e75 Michael Niedermayer
static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
980 221b804f Diego Biurrun
                             int srcW, int dstW, int filterAlign, int one, int flags,
981
                             SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
982 28bf81c9 Michael Niedermayer
{
983 221b804f Diego Biurrun
    int i;
984
    int filterSize;
985
    int filter2Size;
986
    int minFilterSize;
987
    double *filter=NULL;
988
    double *filter2=NULL;
989 3d6a30d9 Diego Biurrun
#if defined(ARCH_X86)
990 221b804f Diego Biurrun
    if (flags & SWS_CPU_CAPS_MMX)
991 86bdf3fd Diego Biurrun
        asm volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
992 726a959a Michael Niedermayer
#endif
993 31190492 Arpi
994 221b804f Diego Biurrun
    // Note the +1 is for the MMXscaler which reads over the end
995
    *filterPos = av_malloc((dstW+1)*sizeof(int16_t));
996
997
    if (FFABS(xInc - 0x10000) <10) // unscaled
998
    {
999
        int i;
1000
        filterSize= 1;
1001
        filter= av_malloc(dstW*sizeof(double)*filterSize);
1002
        for (i=0; i<dstW*filterSize; i++) filter[i]=0;
1003
1004
        for (i=0; i<dstW; i++)
1005
        {
1006
            filter[i*filterSize]=1;
1007
            (*filterPos)[i]=i;
1008
        }
1009
1010
    }
1011
    else if (flags&SWS_POINT) // lame looking point sampling mode
1012
    {
1013
        int i;
1014
        int xDstInSrc;
1015
        filterSize= 1;
1016
        filter= av_malloc(dstW*sizeof(double)*filterSize);
1017
1018
        xDstInSrc= xInc/2 - 0x8000;
1019
        for (i=0; i<dstW; i++)
1020
        {
1021
            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1022
1023
            (*filterPos)[i]= xx;
1024
            filter[i]= 1.0;
1025
            xDstInSrc+= xInc;
1026
        }
1027
    }
1028
    else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
1029
    {
1030
        int i;
1031
        int xDstInSrc;
1032
        if      (flags&SWS_BICUBIC) filterSize= 4;
1033
        else if (flags&SWS_X      ) filterSize= 4;
1034
        else                        filterSize= 2; // SWS_BILINEAR / SWS_AREA
1035
        filter= av_malloc(dstW*sizeof(double)*filterSize);
1036
1037
        xDstInSrc= xInc/2 - 0x8000;
1038
        for (i=0; i<dstW; i++)
1039
        {
1040
            int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1041
            int j;
1042
1043
            (*filterPos)[i]= xx;
1044
                //Bilinear upscale / linear interpolate / Area averaging
1045
                for (j=0; j<filterSize; j++)
1046
                {
1047
                    double d= FFABS((xx<<16) - xDstInSrc)/(double)(1<<16);
1048
                    double coeff= 1.0 - d;
1049
                    if (coeff<0) coeff=0;
1050
                    filter[i*filterSize + j]= coeff;
1051
                    xx++;
1052
                }
1053
            xDstInSrc+= xInc;
1054
        }
1055
    }
1056
    else
1057
    {
1058
        double xDstInSrc;
1059
        double sizeFactor, filterSizeInSrc;
1060
        const double xInc1= (double)xInc / (double)(1<<16);
1061
1062
        if      (flags&SWS_BICUBIC)      sizeFactor=  4.0;
1063
        else if (flags&SWS_X)            sizeFactor=  8.0;
1064
        else if (flags&SWS_AREA)         sizeFactor=  1.0; //downscale only, for upscale it is bilinear
1065
        else if (flags&SWS_GAUSS)        sizeFactor=  8.0;   // infinite ;)
1066
        else if (flags&SWS_LANCZOS)      sizeFactor= param[0] != SWS_PARAM_DEFAULT ? 2.0*param[0] : 6.0;
1067
        else if (flags&SWS_SINC)         sizeFactor= 20.0; // infinite ;)
1068
        else if (flags&SWS_SPLINE)       sizeFactor= 20.0;  // infinite ;)
1069
        else if (flags&SWS_BILINEAR)     sizeFactor=  2.0;
1070
        else {
1071
            sizeFactor= 0.0; //GCC warning killer
1072
            ASSERT(0)
1073
        }
1074
1075
        if (xInc1 <= 1.0)       filterSizeInSrc= sizeFactor; // upscale
1076
        else                    filterSizeInSrc= sizeFactor*srcW / (double)dstW;
1077
1078
        filterSize= (int)ceil(1 + filterSizeInSrc); // will be reduced later if possible
1079
        if (filterSize > srcW-2) filterSize=srcW-2;
1080
1081
        filter= av_malloc(dstW*sizeof(double)*filterSize);
1082
1083
        xDstInSrc= xInc1 / 2.0 - 0.5;
1084
        for (i=0; i<dstW; i++)
1085
        {
1086
            int xx= (int)(xDstInSrc - (filterSize-1)*0.5 + 0.5);
1087
            int j;
1088
            (*filterPos)[i]= xx;
1089
            for (j=0; j<filterSize; j++)
1090
            {
1091
                double d= FFABS(xx - xDstInSrc)/filterSizeInSrc*sizeFactor;
1092
                double coeff;
1093
                if (flags & SWS_BICUBIC)
1094
                {
1095
                    double B= param[0] != SWS_PARAM_DEFAULT ? param[0] : 0.0;
1096
                    double C= param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6;
1097
1098
                    if (d<1.0)
1099
                        coeff = (12-9*B-6*C)*d*d*d + (-18+12*B+6*C)*d*d + 6-2*B;
1100
                    else if (d<2.0)
1101
                        coeff = (-B-6*C)*d*d*d + (6*B+30*C)*d*d + (-12*B-48*C)*d +8*B+24*C;
1102
                    else
1103
                        coeff=0.0;
1104
                }
1105
/*                else if (flags & SWS_X)
1106
                {
1107
                    double p= param ? param*0.01 : 0.3;
1108
                    coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1109
                    coeff*= pow(2.0, - p*d*d);
1110
                }*/
1111
                else if (flags & SWS_X)
1112
                {
1113
                    double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
1114
1115
                    if (d<1.0)
1116
                        coeff = cos(d*PI);
1117
                    else
1118
                        coeff=-1.0;
1119
                    if (coeff<0.0)      coeff= -pow(-coeff, A);
1120
                    else                coeff=  pow( coeff, A);
1121
                    coeff= coeff*0.5 + 0.5;
1122
                }
1123
                else if (flags & SWS_AREA)
1124
                {
1125
                    double srcPixelSize= 1.0/xInc1;
1126
                    if      (d + srcPixelSize/2 < 0.5) coeff= 1.0;
1127
                    else if (d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
1128
                    else coeff=0.0;
1129
                }
1130
                else if (flags & SWS_GAUSS)
1131
                {
1132
                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1133
                    coeff = pow(2.0, - p*d*d);
1134
                }
1135
                else if (flags & SWS_SINC)
1136
                {
1137
                    coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1138
                }
1139
                else if (flags & SWS_LANCZOS)
1140
                {
1141
                    double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1142
                    coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0;
1143
                    if (d>p) coeff=0;
1144
                }
1145
                else if (flags & SWS_BILINEAR)
1146
                {
1147
                    coeff= 1.0 - d;
1148
                    if (coeff<0) coeff=0;
1149
                }
1150
                else if (flags & SWS_SPLINE)
1151
                {
1152
                    double p=-2.196152422706632;
1153
                    coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, d);
1154
                }
1155
                else {
1156
                    coeff= 0.0; //GCC warning killer
1157
                    ASSERT(0)
1158
                }
1159
1160
                filter[i*filterSize + j]= coeff;
1161
                xx++;
1162
            }
1163
            xDstInSrc+= xInc1;
1164
        }
1165
    }
1166
1167
    /* apply src & dst Filter to filter -> filter2
1168
       av_free(filter);
1169
    */
1170
    ASSERT(filterSize>0)
1171
    filter2Size= filterSize;
1172
    if (srcFilter) filter2Size+= srcFilter->length - 1;
1173
    if (dstFilter) filter2Size+= dstFilter->length - 1;
1174
    ASSERT(filter2Size>0)
1175
    filter2= av_malloc(filter2Size*dstW*sizeof(double));
1176
1177
    for (i=0; i<dstW; i++)
1178
    {
1179
        int j;
1180
        SwsVector scaleFilter;
1181
        SwsVector *outVec;
1182
1183
        scaleFilter.coeff= filter + i*filterSize;
1184
        scaleFilter.length= filterSize;
1185
1186
        if (srcFilter) outVec= sws_getConvVec(srcFilter, &scaleFilter);
1187
        else           outVec= &scaleFilter;
1188
1189
        ASSERT(outVec->length == filter2Size)
1190
        //FIXME dstFilter
1191
1192
        for (j=0; j<outVec->length; j++)
1193
        {
1194
            filter2[i*filter2Size + j]= outVec->coeff[j];
1195
        }
1196
1197
        (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
1198
1199
        if (outVec != &scaleFilter) sws_freeVec(outVec);
1200
    }
1201
    av_free(filter); filter=NULL;
1202
1203
    /* try to reduce the filter-size (step1 find size and shift left) */
1204 86bdf3fd Diego Biurrun
    // Assume it is near normalized (*0.5 or *2.0 is OK but * 0.001 is not).
1205 221b804f Diego Biurrun
    minFilterSize= 0;
1206
    for (i=dstW-1; i>=0; i--)
1207
    {
1208
        int min= filter2Size;
1209
        int j;
1210
        double cutOff=0.0;
1211
1212
        /* get rid off near zero elements on the left by shifting left */
1213
        for (j=0; j<filter2Size; j++)
1214
        {
1215
            int k;
1216
            cutOff += FFABS(filter2[i*filter2Size]);
1217
1218
            if (cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1219
1220 86bdf3fd Diego Biurrun
            /* preserve monotonicity because the core can't handle the filter otherwise */
1221 221b804f Diego Biurrun
            if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1222
1223
            // Move filter coeffs left
1224
            for (k=1; k<filter2Size; k++)
1225
                filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1226
            filter2[i*filter2Size + k - 1]= 0.0;
1227
            (*filterPos)[i]++;
1228
        }
1229
1230
        cutOff=0.0;
1231
        /* count near zeros on the right */
1232
        for (j=filter2Size-1; j>0; j--)
1233
        {
1234
            cutOff += FFABS(filter2[i*filter2Size + j]);
1235
1236
            if (cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1237
            min--;
1238
        }
1239
1240
        if (min>minFilterSize) minFilterSize= min;
1241
    }
1242
1243
    if (flags & SWS_CPU_CAPS_ALTIVEC) {
1244
        // we can handle the special case 4,
1245
        // so we don't want to go to the full 8
1246
        if (minFilterSize < 5)
1247 8c266f0c Romain Dolbeau
            filterAlign = 4;
1248
1249 221b804f Diego Biurrun
        // we really don't want to waste our time
1250
        // doing useless computation, so fall-back on
1251
        // the scalar C code for very small filter.
1252
        // vectorizing is worth it only if you have
1253
        // decent-sized vector.
1254
        if (minFilterSize < 3)
1255 8c266f0c Romain Dolbeau
            filterAlign = 1;
1256 221b804f Diego Biurrun
    }
1257
1258
    if (flags & SWS_CPU_CAPS_MMX) {
1259
        // special case for unscaled vertical filtering
1260
        if (minFilterSize == 1 && filterAlign == 2)
1261
            filterAlign= 1;
1262
    }
1263
1264
    ASSERT(minFilterSize > 0)
1265
    filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
1266
    ASSERT(filterSize > 0)
1267
    filter= av_malloc(filterSize*dstW*sizeof(double));
1268
    if (filterSize >= MAX_FILTER_SIZE)
1269
        return -1;
1270
    *outFilterSize= filterSize;
1271
1272
    if (flags&SWS_PRINT_INFO)
1273
        av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
1274
    /* try to reduce the filter-size (step2 reduce it) */
1275
    for (i=0; i<dstW; i++)
1276
    {
1277
        int j;
1278
1279
        for (j=0; j<filterSize; j++)
1280
        {
1281
            if (j>=filter2Size) filter[i*filterSize + j]= 0.0;
1282
            else               filter[i*filterSize + j]= filter2[i*filter2Size + j];
1283 8c266f0c Romain Dolbeau
        }
1284 221b804f Diego Biurrun
    }
1285
    av_free(filter2); filter2=NULL;
1286
1287
1288
    //FIXME try to align filterpos if possible
1289 8c266f0c Romain Dolbeau
1290 221b804f Diego Biurrun
    //fix borders
1291
    for (i=0; i<dstW; i++)
1292
    {
1293
        int j;
1294
        if ((*filterPos)[i] < 0)
1295
        {
1296
            // Move filter coeffs left to compensate for filterPos
1297
            for (j=1; j<filterSize; j++)
1298
            {
1299
                int left= FFMAX(j + (*filterPos)[i], 0);
1300
                filter[i*filterSize + left] += filter[i*filterSize + j];
1301
                filter[i*filterSize + j]=0;
1302
            }
1303
            (*filterPos)[i]= 0;
1304 bca11e75 Michael Niedermayer
        }
1305
1306 221b804f Diego Biurrun
        if ((*filterPos)[i] + filterSize > srcW)
1307
        {
1308
            int shift= (*filterPos)[i] + filterSize - srcW;
1309
            // Move filter coeffs right to compensate for filterPos
1310
            for (j=filterSize-2; j>=0; j--)
1311
            {
1312
                int right= FFMIN(j + shift, filterSize-1);
1313
                filter[i*filterSize +right] += filter[i*filterSize +j];
1314
                filter[i*filterSize +j]=0;
1315
            }
1316
            (*filterPos)[i]= srcW - filterSize;
1317
        }
1318
    }
1319
1320
    // Note the +1 is for the MMXscaler which reads over the end
1321
    /* align at 16 for AltiVec (needed by hScale_altivec_real) */
1322
    *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t));
1323
1324
    /* Normalize & Store in outFilter */
1325
    for (i=0; i<dstW; i++)
1326
    {
1327
        int j;
1328
        double error=0;
1329
        double sum=0;
1330
        double scale= one;
1331
1332
        for (j=0; j<filterSize; j++)
1333
        {
1334
            sum+= filter[i*filterSize + j];
1335
        }
1336
        scale/= sum;
1337
        for (j=0; j<*outFilterSize; j++)
1338
        {
1339
            double v= filter[i*filterSize + j]*scale + error;
1340
            int intV= floor(v + 0.5);
1341
            (*outFilter)[i*(*outFilterSize) + j]= intV;
1342
            error = v - intV;
1343
        }
1344
    }
1345
1346
    (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
1347
    for (i=0; i<*outFilterSize; i++)
1348
    {
1349
        int j= dstW*(*outFilterSize);
1350
        (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
1351
    }
1352
1353
    av_free(filter);
1354
    return 0;
1355 7630f2e0 Michael Niedermayer
}
1356 31190492 Arpi
1357 17c613ef Uoti Urpala
#ifdef COMPILE_MMX2
1358 b7dc6f66 Michael Niedermayer
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
1359 28bf81c9 Michael Niedermayer
{
1360 221b804f Diego Biurrun
    uint8_t *fragmentA;
1361
    long imm8OfPShufW1A;
1362
    long imm8OfPShufW2A;
1363
    long fragmentLengthA;
1364
    uint8_t *fragmentB;
1365
    long imm8OfPShufW1B;
1366
    long imm8OfPShufW2B;
1367
    long fragmentLengthB;
1368
    int fragmentPos;
1369
1370
    int xpos, i;
1371
1372
    // create an optimized horizontal scaling routine
1373
1374
    //code fragment
1375
1376
    asm volatile(
1377
        "jmp                         9f                 \n\t"
1378
    // Begin
1379
        "0:                                             \n\t"
1380
        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1381
        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1382
        "movd   1(%%"REG_c", %%"REG_S"), %%mm1          \n\t"
1383
        "punpcklbw                %%mm7, %%mm1          \n\t"
1384
        "punpcklbw                %%mm7, %%mm0          \n\t"
1385
        "pshufw                   $0xFF, %%mm1, %%mm1   \n\t"
1386
        "1:                                             \n\t"
1387
        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1388
        "2:                                             \n\t"
1389
        "psubw                    %%mm1, %%mm0          \n\t"
1390
        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1391
        "pmullw                   %%mm3, %%mm0          \n\t"
1392
        "psllw                       $7, %%mm1          \n\t"
1393
        "paddw                    %%mm1, %%mm0          \n\t"
1394
1395
        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1396
1397
        "add                         $8, %%"REG_a"      \n\t"
1398
    // End
1399
        "9:                                             \n\t"
1400
//        "int $3                                         \n\t"
1401
        "lea                         0b, %0             \n\t"
1402
        "lea                         1b, %1             \n\t"
1403
        "lea                         2b, %2             \n\t"
1404
        "dec                         %1                 \n\t"
1405
        "dec                         %2                 \n\t"
1406
        "sub                         %0, %1             \n\t"
1407
        "sub                         %0, %2             \n\t"
1408
        "lea                         9b, %3             \n\t"
1409
        "sub                         %0, %3             \n\t"
1410
1411
1412
        :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
1413
        "=r" (fragmentLengthA)
1414
    );
1415
1416
    asm volatile(
1417
        "jmp                         9f                 \n\t"
1418
    // Begin
1419
        "0:                                             \n\t"
1420
        "movq    (%%"REG_d", %%"REG_a"), %%mm3          \n\t"
1421
        "movd    (%%"REG_c", %%"REG_S"), %%mm0          \n\t"
1422
        "punpcklbw                %%mm7, %%mm0          \n\t"
1423
        "pshufw                   $0xFF, %%mm0, %%mm1   \n\t"
1424
        "1:                                             \n\t"
1425
        "pshufw                   $0xFF, %%mm0, %%mm0   \n\t"
1426
        "2:                                             \n\t"
1427
        "psubw                    %%mm1, %%mm0          \n\t"
1428
        "movl   8(%%"REG_b", %%"REG_a"), %%esi          \n\t"
1429
        "pmullw                   %%mm3, %%mm0          \n\t"
1430
        "psllw                       $7, %%mm1          \n\t"
1431
        "paddw                    %%mm1, %%mm0          \n\t"
1432
1433
        "movq                     %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1434
1435
        "add                         $8, %%"REG_a"      \n\t"
1436
    // End
1437
        "9:                                             \n\t"
1438
//        "int                       $3                   \n\t"
1439
        "lea                         0b, %0             \n\t"
1440
        "lea                         1b, %1             \n\t"
1441
        "lea                         2b, %2             \n\t"
1442
        "dec                         %1                 \n\t"
1443
        "dec                         %2                 \n\t"
1444
        "sub                         %0, %1             \n\t"
1445
        "sub                         %0, %2             \n\t"
1446
        "lea                         9b, %3             \n\t"
1447
        "sub                         %0, %3             \n\t"
1448
1449
1450
        :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
1451
        "=r" (fragmentLengthB)
1452
    );
1453
1454
    xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
1455
    fragmentPos=0;
1456
1457
    for (i=0; i<dstW/numSplits; i++)
1458
    {
1459
        int xx=xpos>>16;
1460
1461
        if ((i&3) == 0)
1462
        {
1463
            int a=0;
1464
            int b=((xpos+xInc)>>16) - xx;
1465
            int c=((xpos+xInc*2)>>16) - xx;
1466
            int d=((xpos+xInc*3)>>16) - xx;
1467
1468
            filter[i  ] = (( xpos         & 0xFFFF) ^ 0xFFFF)>>9;
1469
            filter[i+1] = (((xpos+xInc  ) & 0xFFFF) ^ 0xFFFF)>>9;
1470
            filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
1471
            filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
1472
            filterPos[i/2]= xx;
1473
1474
            if (d+1<4)
1475
            {
1476
                int maxShift= 3-(d+1);
1477
                int shift=0;
1478
1479
                memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
1480
1481
                funnyCode[fragmentPos + imm8OfPShufW1B]=
1482
                    (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
1483
                funnyCode[fragmentPos + imm8OfPShufW2B]=
1484
                    a | (b<<2) | (c<<4) | (d<<6);
1485
1486
                if (i+3>=dstW) shift=maxShift; //avoid overread
1487
                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
1488
1489
                if (shift && i>=shift)
1490
                {
1491
                    funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
1492
                    funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
1493
                    filterPos[i/2]-=shift;
1494
                }
1495
1496
                fragmentPos+= fragmentLengthB;
1497
            }
1498
            else
1499
            {
1500
                int maxShift= 3-d;
1501
                int shift=0;
1502
1503
                memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
1504
1505
                funnyCode[fragmentPos + imm8OfPShufW1A]=
1506
                funnyCode[fragmentPos + imm8OfPShufW2A]=
1507
                    a | (b<<2) | (c<<4) | (d<<6);
1508
1509
                if (i+4>=dstW) shift=maxShift; //avoid overread
1510
                else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
1511
1512
                if (shift && i>=shift)
1513
                {
1514
                    funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
1515
                    funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
1516
                    filterPos[i/2]-=shift;
1517
                }
1518
1519
                fragmentPos+= fragmentLengthA;
1520
            }
1521
1522
            funnyCode[fragmentPos]= RET;
1523
        }
1524
        xpos+=xInc;
1525
    }
1526
    filterPos[i/2]= xpos>>16; // needed to jump to the next part
1527 28bf81c9 Michael Niedermayer
}
1528 17c613ef Uoti Urpala
#endif /* COMPILE_MMX2 */
1529 28bf81c9 Michael Niedermayer
1530 9b2283cc Stefan Huehner
static void globalInit(void){
1531 31190492 Arpi
    // generating tables:
1532
    int i;
1533 221b804f Diego Biurrun
    for (i=0; i<768; i++){
1534
        int c= av_clip_uint8(i-256);
1535
        clip_table[i]=c;
1536 b18ea156 Michael Niedermayer
    }
1537 516b1f82 Michael Niedermayer
}
1538 c1b0bfb4 Michael Niedermayer
1539 516b1f82 Michael Niedermayer
static SwsFunc getSwsFunc(int flags){
1540 6a4970ab Diego Biurrun
1541 9bde778e Luca Abeni
#if defined(RUNTIME_CPUDETECT) && defined (CONFIG_GPL)
1542 3d6a30d9 Diego Biurrun
#if defined(ARCH_X86)
1543 221b804f Diego Biurrun
    // ordered per speed fasterst first
1544
    if (flags & SWS_CPU_CAPS_MMX2)
1545
        return swScale_MMX2;
1546
    else if (flags & SWS_CPU_CAPS_3DNOW)
1547
        return swScale_3DNow;
1548
    else if (flags & SWS_CPU_CAPS_MMX)
1549
        return swScale_MMX;
1550
    else
1551
        return swScale_C;
1552 28bf81c9 Michael Niedermayer
1553
#else
1554 a2faa401 Romain Dolbeau
#ifdef ARCH_POWERPC
1555 221b804f Diego Biurrun
    if (flags & SWS_CPU_CAPS_ALTIVEC)
1556
        return swScale_altivec;
1557
    else
1558
        return swScale_C;
1559 a2faa401 Romain Dolbeau
#endif
1560 221b804f Diego Biurrun
    return swScale_C;
1561 3d6a30d9 Diego Biurrun
#endif /* defined(ARCH_X86) */
1562 28bf81c9 Michael Niedermayer
#else //RUNTIME_CPUDETECT
1563
#ifdef HAVE_MMX2
1564 221b804f Diego Biurrun
    return swScale_MMX2;
1565 28bf81c9 Michael Niedermayer
#elif defined (HAVE_3DNOW)
1566 221b804f Diego Biurrun
    return swScale_3DNow;
1567 28bf81c9 Michael Niedermayer
#elif defined (HAVE_MMX)
1568 221b804f Diego Biurrun
    return swScale_MMX;
1569 a2faa401 Romain Dolbeau
#elif defined (HAVE_ALTIVEC)
1570 221b804f Diego Biurrun
    return swScale_altivec;
1571 28bf81c9 Michael Niedermayer
#else
1572 221b804f Diego Biurrun
    return swScale_C;
1573 28bf81c9 Michael Niedermayer
#endif
1574
#endif //!RUNTIME_CPUDETECT
1575 31190492 Arpi
}
1576 7630f2e0 Michael Niedermayer
1577 d4e24275 Michael Niedermayer
static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1578 221b804f Diego Biurrun
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1579
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1580
    /* Copy Y plane */
1581
    if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1582
        memcpy(dst, src[0], srcSliceH*dstStride[0]);
1583
    else
1584
    {
1585
        int i;
1586
        uint8_t *srcPtr= src[0];
1587
        uint8_t *dstPtr= dst;
1588
        for (i=0; i<srcSliceH; i++)
1589
        {
1590
            memcpy(dstPtr, srcPtr, c->srcW);
1591
            srcPtr+= srcStride[0];
1592
            dstPtr+= dstStride[0];
1593
        }
1594
    }
1595
    dst = dstParam[1] + dstStride[1]*srcSliceY/2;
1596
    if (c->dstFormat == PIX_FMT_NV12)
1597
        interleaveBytes( src[1],src[2],dst,c->srcW/2,srcSliceH/2,srcStride[1],srcStride[2],dstStride[0] );
1598
    else
1599
        interleaveBytes( src[2],src[1],dst,c->srcW/2,srcSliceH/2,srcStride[2],srcStride[1],dstStride[0] );
1600
1601
    return srcSliceH;
1602 0d9f3d85 Arpi
}
1603
1604 d4e24275 Michael Niedermayer
static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1605 221b804f Diego Biurrun
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1606
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1607 0d9f3d85 Arpi
1608 221b804f Diego Biurrun
    yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1609 fccb9b2b Michael Niedermayer
1610 221b804f Diego Biurrun
    return srcSliceH;
1611 0d9f3d85 Arpi
}
1612
1613 caeaabe7 Alex Beregszaszi
static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1614 221b804f Diego Biurrun
                               int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1615
    uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1616 caeaabe7 Alex Beregszaszi
1617 221b804f Diego Biurrun
    yv12touyvy( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1618 caeaabe7 Alex Beregszaszi
1619 221b804f Diego Biurrun
    return srcSliceH;
1620 caeaabe7 Alex Beregszaszi
}
1621
1622 e09d12f4 Michael Niedermayer
/* {RGB,BGR}{15,16,24,32} -> {RGB,BGR}{15,16,24,32} */
1623 d4e24275 Michael Niedermayer
static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1624 221b804f Diego Biurrun
                          int srcSliceH, uint8_t* dst[], int dstStride[]){
1625
    const int srcFormat= c->srcFormat;
1626
    const int dstFormat= c->dstFormat;
1627
    const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3;
1628
    const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3;
1629
    const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
1630
    const int dstId= fmt_depth(dstFormat) >> 2;
1631
    void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
1632
1633
    /* BGR -> BGR */
1634
    if (  (isBGR(srcFormat) && isBGR(dstFormat))
1635
       || (isRGB(srcFormat) && isRGB(dstFormat))){
1636
        switch(srcId | (dstId<<4)){
1637
        case 0x34: conv= rgb16to15; break;
1638
        case 0x36: conv= rgb24to15; break;
1639
        case 0x38: conv= rgb32to15; break;
1640
        case 0x43: conv= rgb15to16; break;
1641
        case 0x46: conv= rgb24to16; break;
1642
        case 0x48: conv= rgb32to16; break;
1643
        case 0x63: conv= rgb15to24; break;
1644
        case 0x64: conv= rgb16to24; break;
1645
        case 0x68: conv= rgb32to24; break;
1646
        case 0x83: conv= rgb15to32; break;
1647
        case 0x84: conv= rgb16to32; break;
1648
        case 0x86: conv= rgb24to32; break;
1649
        default: av_log(c, AV_LOG_ERROR, "swScaler: internal error %s -> %s converter\n",
1650
                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1651
        }
1652
    }else if (  (isBGR(srcFormat) && isRGB(dstFormat))
1653
             || (isRGB(srcFormat) && isBGR(dstFormat))){
1654
        switch(srcId | (dstId<<4)){
1655
        case 0x33: conv= rgb15tobgr15; break;
1656
        case 0x34: conv= rgb16tobgr15; break;
1657
        case 0x36: conv= rgb24tobgr15; break;
1658
        case 0x38: conv= rgb32tobgr15; break;
1659
        case 0x43: conv= rgb15tobgr16; break;
1660
        case 0x44: conv= rgb16tobgr16; break;
1661
        case 0x46: conv= rgb24tobgr16; break;
1662
        case 0x48: conv= rgb32tobgr16; break;
1663
        case 0x63: conv= rgb15tobgr24; break;
1664
        case 0x64: conv= rgb16tobgr24; break;
1665
        case 0x66: conv= rgb24tobgr24; break;
1666
        case 0x68: conv= rgb32tobgr24; break;
1667
        case 0x83: conv= rgb15tobgr32; break;
1668
        case 0x84: conv= rgb16tobgr32; break;
1669
        case 0x86: conv= rgb24tobgr32; break;
1670
        case 0x88: conv= rgb32tobgr32; break;
1671
        default: av_log(c, AV_LOG_ERROR, "swScaler: internal error %s -> %s converter\n",
1672
                        sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1673
        }
1674
    }else{
1675
        av_log(c, AV_LOG_ERROR, "swScaler: internal error %s -> %s converter\n",
1676
               sws_format_name(srcFormat), sws_format_name(dstFormat));
1677
    }
1678
1679 068b0f4f Benoit Fouet
    if(conv)
1680
    {
1681 c4ca31d0 Benoit Fouet
        if (dstStride[0]*srcBpp == srcStride[0]*dstBpp)
1682
            conv(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1683
        else
1684 221b804f Diego Biurrun
        {
1685 c4ca31d0 Benoit Fouet
            int i;
1686
            uint8_t *srcPtr= src[0];
1687
            uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1688
1689
            for (i=0; i<srcSliceH; i++)
1690
            {
1691
                conv(srcPtr, dstPtr, c->srcW*srcBpp);
1692
                srcPtr+= srcStride[0];
1693
                dstPtr+= dstStride[0];
1694
            }
1695 221b804f Diego Biurrun
        }
1696
    }
1697
    return srcSliceH;
1698 0d9f3d85 Arpi
}
1699
1700 d4e24275 Michael Niedermayer
static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1701 221b804f Diego Biurrun
                              int srcSliceH, uint8_t* dst[], int dstStride[]){
1702
1703
    rgb24toyv12(
1704
        src[0],
1705
        dst[0]+ srcSliceY    *dstStride[0],
1706
        dst[1]+(srcSliceY>>1)*dstStride[1],
1707
        dst[2]+(srcSliceY>>1)*dstStride[2],
1708
        c->srcW, srcSliceH,
1709
        dstStride[0], dstStride[1], srcStride[0]);
1710
    return srcSliceH;
1711 ec22603f Michael Niedermayer
}
1712
1713 d4e24275 Michael Niedermayer
static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1714 221b804f Diego Biurrun
                             int srcSliceH, uint8_t* dst[], int dstStride[]){
1715
    int i;
1716
1717
    /* copy Y */
1718
    if (srcStride[0]==dstStride[0] && srcStride[0] > 0)
1719
        memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
1720
    else{
1721
        uint8_t *srcPtr= src[0];
1722
        uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1723
1724
        for (i=0; i<srcSliceH; i++)
1725
        {
1726
            memcpy(dstPtr, srcPtr, c->srcW);
1727
            srcPtr+= srcStride[0];
1728
            dstPtr+= dstStride[0];
1729
        }
1730
    }
1731
1732
    if (c->dstFormat==PIX_FMT_YUV420P){
1733
        planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
1734
        planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
1735
    }else{
1736
        planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
1737
        planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
1738
    }
1739
    return srcSliceH;
1740 b241cbf2 Michael Niedermayer
}
1741
1742 b6654a54 Michael Niedermayer
/* unscaled copy like stuff (assumes nearly identical formats) */
1743 3e499f53 Michael Niedermayer
static int simpleCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1744 221b804f Diego Biurrun
                      int srcSliceH, uint8_t* dst[], int dstStride[]){
1745
1746
    if (isPacked(c->srcFormat))
1747
    {
1748
        if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1749
            memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
1750
        else
1751
        {
1752
            int i;
1753
            uint8_t *srcPtr= src[0];
1754
            uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1755
            int length=0;
1756
1757
            /* universal length finder */
1758
            while(length+c->srcW <= FFABS(dstStride[0])
1759
               && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
1760
            ASSERT(length!=0);
1761
1762
            for (i=0; i<srcSliceH; i++)
1763
            {
1764
                memcpy(dstPtr, srcPtr, length);
1765
                srcPtr+= srcStride[0];
1766
                dstPtr+= dstStride[0];
1767
            }
1768
        }
1769
    }
1770
    else
1771
    { /* Planar YUV or gray */
1772
        int plane;
1773
        for (plane=0; plane<3; plane++)
1774
        {
1775
            int length= plane==0 ? c->srcW  : -((-c->srcW  )>>c->chrDstHSubSample);
1776
            int y=      plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
1777
            int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
1778
1779
            if ((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
1780
            {
1781
                if (!isGray(c->dstFormat))
1782
                    memset(dst[plane], 128, dstStride[plane]*height);
1783
            }
1784
            else
1785
            {
1786
                if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
1787
                    memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
1788
                else
1789
                {
1790
                    int i;
1791
                    uint8_t *srcPtr= src[plane];
1792
                    uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
1793
                    for (i=0; i<height; i++)
1794
                    {
1795
                        memcpy(dstPtr, srcPtr, length);
1796
                        srcPtr+= srcStride[plane];
1797
                        dstPtr+= dstStride[plane];
1798
                    }
1799
                }
1800
            }
1801
        }
1802
    }
1803
    return srcSliceH;
1804 37079906 Michael Niedermayer
}
1805 28bf81c9 Michael Niedermayer
1806 4884b9e5 Kostya Shishkov
static int gray16togray(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1807 221b804f Diego Biurrun
                        int srcSliceH, uint8_t* dst[], int dstStride[]){
1808
1809
    int length= c->srcW;
1810
    int y=      srcSliceY;
1811
    int height= srcSliceH;
1812
    int i, j;
1813
    uint8_t *srcPtr= src[0];
1814
    uint8_t *dstPtr= dst[0] + dstStride[0]*y;
1815
1816
    if (!isGray(c->dstFormat)){
1817
        int height= -((-srcSliceH)>>c->chrDstVSubSample);
1818
        memset(dst[1], 128, dstStride[1]*height);
1819
        memset(dst[2], 128, dstStride[2]*height);
1820
    }
1821
    if (c->srcFormat == PIX_FMT_GRAY16LE) srcPtr++;
1822
    for (i=0; i<height; i++)
1823
    {
1824
        for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
1825
        srcPtr+= srcStride[0];
1826
        dstPtr+= dstStride[0];
1827
    }
1828
    return srcSliceH;
1829 4884b9e5 Kostya Shishkov
}
1830
1831
static int graytogray16(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1832 221b804f Diego Biurrun
                        int srcSliceH, uint8_t* dst[], int dstStride[]){
1833
1834
    int length= c->srcW;
1835
    int y=      srcSliceY;
1836
    int height= srcSliceH;
1837
    int i, j;
1838
    uint8_t *srcPtr= src[0];
1839
    uint8_t *dstPtr= dst[0] + dstStride[0]*y;
1840
    for (i=0; i<height; i++)
1841
    {
1842
        for (j=0; j<length; j++)
1843
        {
1844
            dstPtr[j<<1] = srcPtr[j];
1845
            dstPtr[(j<<1)+1] = srcPtr[j];
1846
        }
1847
        srcPtr+= srcStride[0];
1848
        dstPtr+= dstStride[0];
1849
    }
1850
    return srcSliceH;
1851 4884b9e5 Kostya Shishkov
}
1852
1853
static int gray16swap(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1854 221b804f Diego Biurrun
                      int srcSliceH, uint8_t* dst[], int dstStride[]){
1855
1856
    int length= c->srcW;
1857
    int y=      srcSliceY;
1858
    int height= srcSliceH;
1859
    int i, j;
1860
    uint16_t *srcPtr= src[0];
1861
    uint16_t *dstPtr= dst[0] + dstStride[0]*y/2;
1862
    for (i=0; i<height; i++)
1863
    {
1864
        for (j=0; j<length; j++) dstPtr[j] = bswap_16(srcPtr[j]);
1865
        srcPtr+= srcStride[0]/2;
1866
        dstPtr+= dstStride[0]/2;
1867
    }
1868
    return srcSliceH;
1869 4884b9e5 Kostya Shishkov
}
1870
1871
1872 c7a810cc Michael Niedermayer
static void getSubSampleFactors(int *h, int *v, int format){
1873 221b804f Diego Biurrun
    switch(format){
1874
    case PIX_FMT_UYVY422:
1875
    case PIX_FMT_YUYV422:
1876
        *h=1;
1877
        *v=0;
1878
        break;
1879
    case PIX_FMT_YUV420P:
1880
    case PIX_FMT_GRAY16BE:
1881
    case PIX_FMT_GRAY16LE:
1882
    case PIX_FMT_GRAY8: //FIXME remove after different subsamplings are fully implemented
1883
    case PIX_FMT_NV12:
1884
    case PIX_FMT_NV21:
1885
        *h=1;
1886
        *v=1;
1887
        break;
1888 9ba7fe6d Andreas Öman
    case PIX_FMT_YUV440P:
1889
        *h=0;
1890
        *v=1;
1891
        break;
1892 221b804f Diego Biurrun
    case PIX_FMT_YUV410P:
1893
        *h=2;
1894
        *v=2;
1895
        break;
1896
    case PIX_FMT_YUV444P:
1897
        *h=0;
1898
        *v=0;
1899
        break;
1900
    case PIX_FMT_YUV422P:
1901
        *h=1;
1902
        *v=0;
1903
        break;
1904
    case PIX_FMT_YUV411P:
1905
        *h=2;
1906
        *v=0;
1907
        break;
1908
    default:
1909
        *h=0;
1910
        *v=0;
1911
        break;
1912
    }
1913 c7a810cc Michael Niedermayer
}
1914
1915 5427e242 Michael Niedermayer
static uint16_t roundToInt16(int64_t f){
1916 221b804f Diego Biurrun
    int r= (f + (1<<15))>>16;
1917
         if (r<-0x7FFF) return 0x8000;
1918
    else if (r> 0x7FFF) return 0x7FFF;
1919
    else                return r;
1920 0481412a Michael Niedermayer
}
1921
1922
/**
1923 5427e242 Michael Niedermayer
 * @param inv_table the yuv2rgb coeffs, normally Inverse_Table_6_9[x]
1924 86bdf3fd Diego Biurrun
 * @param fullRange if 1 then the luma range is 0..255 if 0 it is 16..235
1925 5427e242 Michael Niedermayer
 * @return -1 if not supported
1926 0481412a Michael Niedermayer
 */
1927 5427e242 Michael Niedermayer
int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
1928 221b804f Diego Biurrun
    int64_t crv =  inv_table[0];
1929
    int64_t cbu =  inv_table[1];
1930
    int64_t cgu = -inv_table[2];
1931
    int64_t cgv = -inv_table[3];
1932
    int64_t cy  = 1<<16;
1933
    int64_t oy  = 0;
1934
1935
    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
1936
    memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
1937
    memcpy(c->dstColorspaceTable,     table, sizeof(int)*4);
1938
1939
    c->brightness= brightness;
1940
    c->contrast  = contrast;
1941
    c->saturation= saturation;
1942
    c->srcRange  = srcRange;
1943
    c->dstRange  = dstRange;
1944
1945
    c->uOffset=   0x0400040004000400LL;
1946
    c->vOffset=   0x0400040004000400LL;
1947
1948
    if (!srcRange){
1949
        cy= (cy*255) / 219;
1950
        oy= 16<<16;
1951
    }else{
1952
        crv= (crv*224) / 255;
1953
        cbu= (cbu*224) / 255;
1954
        cgu= (cgu*224) / 255;
1955
        cgv= (cgv*224) / 255;
1956
    }
1957 0481412a Michael Niedermayer
1958 221b804f Diego Biurrun
    cy = (cy *contrast             )>>16;
1959
    crv= (crv*contrast * saturation)>>32;
1960
    cbu= (cbu*contrast * saturation)>>32;
1961
    cgu= (cgu*contrast * saturation)>>32;
1962
    cgv= (cgv*contrast * saturation)>>32;
1963 0481412a Michael Niedermayer
1964 221b804f Diego Biurrun
    oy -= 256*brightness;
1965 0481412a Michael Niedermayer
1966 221b804f Diego Biurrun
    c->yCoeff=    roundToInt16(cy *8192) * 0x0001000100010001ULL;
1967
    c->vrCoeff=   roundToInt16(crv*8192) * 0x0001000100010001ULL;
1968
    c->ubCoeff=   roundToInt16(cbu*8192) * 0x0001000100010001ULL;
1969
    c->vgCoeff=   roundToInt16(cgv*8192) * 0x0001000100010001ULL;
1970
    c->ugCoeff=   roundToInt16(cgu*8192) * 0x0001000100010001ULL;
1971
    c->yOffset=   roundToInt16(oy *   8) * 0x0001000100010001ULL;
1972 5427e242 Michael Niedermayer
1973 221b804f Diego Biurrun
    yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
1974
    //FIXME factorize
1975 a31de956 Michael Niedermayer
1976 6634d0ef Nicolas Plourde
#ifdef COMPILE_ALTIVEC
1977 221b804f Diego Biurrun
    if (c->flags & SWS_CPU_CAPS_ALTIVEC)
1978
        yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation);
1979 6a4970ab Diego Biurrun
#endif
1980 221b804f Diego Biurrun
    return 0;
1981 5427e242 Michael Niedermayer
}
1982
1983
/**
1984
 * @return -1 if not supported
1985
 */
1986
int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
1987 221b804f Diego Biurrun
    if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
1988 5427e242 Michael Niedermayer
1989 221b804f Diego Biurrun
    *inv_table = c->srcColorspaceTable;
1990
    *table     = c->dstColorspaceTable;
1991
    *srcRange  = c->srcRange;
1992
    *dstRange  = c->dstRange;
1993
    *brightness= c->brightness;
1994
    *contrast  = c->contrast;
1995
    *saturation= c->saturation;
1996 6a4970ab Diego Biurrun
1997 221b804f Diego Biurrun
    return 0;
1998 0481412a Michael Niedermayer
}
1999
2000 44cdb423 Luca Abeni
static int handle_jpeg(int *format)
2001
{
2002 221b804f Diego Biurrun
    switch (*format) {
2003
        case PIX_FMT_YUVJ420P:
2004
            *format = PIX_FMT_YUV420P;
2005
            return 1;
2006
        case PIX_FMT_YUVJ422P:
2007
            *format = PIX_FMT_YUV422P;
2008
            return 1;
2009
        case PIX_FMT_YUVJ444P:
2010
            *format = PIX_FMT_YUV444P;
2011
            return 1;
2012 9ba7fe6d Andreas Öman
        case PIX_FMT_YUVJ440P:
2013
            *format = PIX_FMT_YUV440P;
2014
            return 1;
2015 221b804f Diego Biurrun
        default:
2016
            return 0;
2017
    }
2018 44cdb423 Luca Abeni
}
2019
2020 e9e12f0e Luca Abeni
SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags,
2021 221b804f Diego Biurrun
                           SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){
2022
2023
    SwsContext *c;
2024
    int i;
2025
    int usesVFilter, usesHFilter;
2026
    int unscaled, needsDither;
2027
    int srcRange, dstRange;
2028
    SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
2029 3d6a30d9 Diego Biurrun
#if defined(ARCH_X86)
2030 221b804f Diego Biurrun
    if (flags & SWS_CPU_CAPS_MMX)
2031
        asm volatile("emms\n\t"::: "memory");
2032 5cebb24b Michael Niedermayer
#endif
2033 516b1f82 Michael Niedermayer
2034 9bde778e Luca Abeni
#if !defined(RUNTIME_CPUDETECT) || !defined (CONFIG_GPL) //ensure that the flags match the compiled variant if cpudetect is off
2035 d3f3eea9 Marc Hoffman
    flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC|SWS_CPU_CAPS_BFIN);
2036 516b1f82 Michael Niedermayer
#ifdef HAVE_MMX2
2037 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
2038 516b1f82 Michael Niedermayer
#elif defined (HAVE_3DNOW)
2039 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
2040 516b1f82 Michael Niedermayer
#elif defined (HAVE_MMX)
2041 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_MMX;
2042 a2faa401 Romain Dolbeau
#elif defined (HAVE_ALTIVEC)
2043 221b804f Diego Biurrun
    flags |= SWS_CPU_CAPS_ALTIVEC;
2044 d3f3eea9 Marc Hoffman
#elif defined (ARCH_BFIN)
2045
    flags |= SWS_CPU_CAPS_BFIN;
2046 516b1f82 Michael Niedermayer
#endif
2047 69796008 Diego Biurrun
#endif /* RUNTIME_CPUDETECT */
2048 221b804f Diego Biurrun
    if (clip_table[512] != 255) globalInit();
2049
    if (rgb15to16 == NULL) sws_rgb2rgb_init(flags);
2050
2051
    unscaled = (srcW == dstW && srcH == dstH);
2052
    needsDither= (isBGR(dstFormat) || isRGB(dstFormat))
2053
        && (fmt_depth(dstFormat))<24
2054
        && ((fmt_depth(dstFormat))<(fmt_depth(srcFormat)) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
2055
2056
    srcRange = handle_jpeg(&srcFormat);
2057
    dstRange = handle_jpeg(&dstFormat);
2058
2059
    if (!isSupportedIn(srcFormat))
2060
    {
2061
        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input format\n", sws_format_name(srcFormat));
2062
        return NULL;
2063
    }
2064
    if (!isSupportedOut(dstFormat))
2065
    {
2066
        av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output format\n", sws_format_name(dstFormat));
2067
        return NULL;
2068
    }
2069
2070
    /* sanity check */
2071
    if (srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
2072
    {
2073
        av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
2074
               srcW, srcH, dstW, dstH);
2075
        return NULL;
2076
    }
2077
2078
    if (!dstFilter) dstFilter= &dummyFilter;
2079
    if (!srcFilter) srcFilter= &dummyFilter;
2080
2081
    c= av_mallocz(sizeof(SwsContext));
2082
2083
    c->av_class = &sws_context_class;
2084
    c->srcW= srcW;
2085
    c->srcH= srcH;
2086
    c->dstW= dstW;
2087
    c->dstH= dstH;
2088
    c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
2089
    c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
2090
    c->flags= flags;
2091
    c->dstFormat= dstFormat;
2092
    c->srcFormat= srcFormat;
2093
    c->vRounder= 4* 0x0001000100010001ULL;
2094
2095
    usesHFilter= usesVFilter= 0;
2096
    if (dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesVFilter=1;
2097
    if (dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesHFilter=1;
2098
    if (dstFilter->chrV!=NULL && dstFilter->chrV->length>1) usesVFilter=1;
2099
    if (dstFilter->chrH!=NULL && dstFilter->chrH->length>1) usesHFilter=1;
2100
    if (srcFilter->lumV!=NULL && srcFilter->lumV->length>1) usesVFilter=1;
2101
    if (srcFilter->lumH!=NULL && srcFilter->lumH->length>1) usesHFilter=1;
2102
    if (srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesVFilter=1;
2103
    if (srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesHFilter=1;
2104
2105
    getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
2106
    getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
2107
2108
    // reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation
2109
    if ((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
2110
2111
    // drop some chroma lines if the user wants it
2112
    c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
2113
    c->chrSrcVSubSample+= c->vChrDrop;
2114
2115
    // drop every 2. pixel for chroma calculation unless user wants full chroma
2116
    if ((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)
2117
      && srcFormat!=PIX_FMT_RGB8      && srcFormat!=PIX_FMT_BGR8
2118
      && srcFormat!=PIX_FMT_RGB4      && srcFormat!=PIX_FMT_BGR4
2119
      && srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE)
2120
        c->chrSrcHSubSample=1;
2121
2122
    if (param){
2123
        c->param[0] = param[0];
2124
        c->param[1] = param[1];
2125
    }else{
2126
        c->param[0] =
2127
        c->param[1] = SWS_PARAM_DEFAULT;
2128
    }
2129
2130
    c->chrIntHSubSample= c->chrDstHSubSample;
2131
    c->chrIntVSubSample= c->chrSrcVSubSample;
2132
2133
    // Note the -((-x)>>y) is so that we always round toward +inf.
2134
    c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
2135
    c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
2136
    c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
2137
    c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
2138
2139
    sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], srcRange, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16);
2140
2141
    /* unscaled special Cases */
2142
    if (unscaled && !usesHFilter && !usesVFilter)
2143
    {
2144
        /* yv12_to_nv12 */
2145
        if (srcFormat == PIX_FMT_YUV420P && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21))
2146
        {
2147
            c->swScale= PlanarToNV12Wrapper;
2148
        }
2149 9bde778e Luca Abeni
#ifdef CONFIG_GPL
2150 221b804f Diego Biurrun
        /* yuv2bgr */
2151
        if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P) && (isBGR(dstFormat) || isRGB(dstFormat)))
2152
        {
2153
            c->swScale= yuv2rgb_get_func_ptr(c);
2154
        }
2155 9bde778e Luca Abeni
#endif
2156 6a4970ab Diego Biurrun
2157 221b804f Diego Biurrun
        if ( srcFormat==PIX_FMT_YUV410P && dstFormat==PIX_FMT_YUV420P )
2158
        {
2159
            c->swScale= yvu9toyv12Wrapper;
2160
        }
2161
2162
        /* bgr24toYV12 */
2163
        if (srcFormat==PIX_FMT_BGR24 && dstFormat==PIX_FMT_YUV420P)
2164
            c->swScale= bgr24toyv12Wrapper;
2165
2166
        /* rgb/bgr -> rgb/bgr (no dither needed forms) */
2167
        if (  (isBGR(srcFormat) || isRGB(srcFormat))
2168
           && (isBGR(dstFormat) || isRGB(dstFormat))
2169
           && srcFormat != PIX_FMT_BGR8      && dstFormat != PIX_FMT_BGR8
2170
           && srcFormat != PIX_FMT_RGB8      && dstFormat != PIX_FMT_RGB8
2171
           && srcFormat != PIX_FMT_BGR4      && dstFormat != PIX_FMT_BGR4
2172
           && srcFormat != PIX_FMT_RGB4      && dstFormat != PIX_FMT_RGB4
2173
           && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE
2174
           && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE
2175
           && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK
2176
           && !needsDither)
2177
             c->swScale= rgb2rgbWrapper;
2178
2179
        /* LQ converters if -sws 0 or -sws 4*/
2180
        if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
2181
            /* rgb/bgr -> rgb/bgr (dither needed forms) */
2182
            if ( (isBGR(srcFormat) || isRGB(srcFormat))
2183
              && (isBGR(dstFormat) || isRGB(dstFormat))
2184
              && needsDither)
2185
                c->swScale= rgb2rgbWrapper;
2186
2187
            /* yv12_to_yuy2 */
2188
            if (srcFormat == PIX_FMT_YUV420P &&
2189
                (dstFormat == PIX_FMT_YUYV422 || dstFormat == PIX_FMT_UYVY422))
2190
            {
2191
                if (dstFormat == PIX_FMT_YUYV422)
2192
                    c->swScale= PlanarToYuy2Wrapper;
2193
                else
2194
                    c->swScale= PlanarToUyvyWrapper;
2195
            }
2196
        }
2197 ec22603f Michael Niedermayer
2198 6634d0ef Nicolas Plourde
#ifdef COMPILE_ALTIVEC
2199 221b804f Diego Biurrun
        if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
2200
            ((srcFormat == PIX_FMT_YUV420P &&
2201
             (dstFormat == PIX_FMT_YUYV422 || dstFormat == PIX_FMT_UYVY422)))) {
2202
          // unscaled YV12 -> packed YUV, we want speed
2203
          if (dstFormat == PIX_FMT_YUYV422)
2204
              c->swScale= yv12toyuy2_unscaled_altivec;
2205
          else
2206
              c->swScale= yv12touyvy_unscaled_altivec;
2207
        }
2208 b71cf33c Romain Dolbeau
#endif
2209
2210 221b804f Diego Biurrun
        /* simple copy */
2211
        if (  srcFormat == dstFormat
2212
            || (isPlanarYUV(srcFormat) && isGray(dstFormat))
2213
            || (isPlanarYUV(dstFormat) && isGray(srcFormat)) )
2214
        {
2215
            c->swScale= simpleCopy;
2216
        }
2217
2218
        /* gray16{le,be} conversions */
2219
        if (isGray16(srcFormat) && (isPlanarYUV(dstFormat) || (dstFormat == PIX_FMT_GRAY8)))
2220
        {
2221
            c->swScale= gray16togray;
2222
        }
2223
        if ((isPlanarYUV(srcFormat) || (srcFormat == PIX_FMT_GRAY8)) && isGray16(dstFormat))
2224
        {
2225
            c->swScale= graytogray16;
2226
        }
2227
        if (srcFormat != dstFormat && isGray16(srcFormat) && isGray16(dstFormat))
2228
        {
2229
            c->swScale= gray16swap;
2230
        }
2231
2232 1ebbfe15 Marc Hoffman
#ifdef ARCH_BFIN
2233
        if (flags & SWS_CPU_CAPS_BFIN)
2234
            ff_bfin_get_unscaled_swscale (c);
2235
#endif
2236
2237 221b804f Diego Biurrun
        if (c->swScale){
2238
            if (flags&SWS_PRINT_INFO)
2239
                av_log(c, AV_LOG_INFO, "SwScaler: using unscaled %s -> %s special converter\n",
2240
                                sws_format_name(srcFormat), sws_format_name(dstFormat));
2241
            return c;
2242
        }
2243
    }
2244
2245
    if (flags & SWS_CPU_CAPS_MMX2)
2246
    {
2247
        c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
2248
        if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
2249
        {
2250
            if (flags&SWS_PRINT_INFO)
2251
                av_log(c, AV_LOG_INFO, "SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n");
2252
        }
2253
        if (usesHFilter) c->canMMX2BeUsed=0;
2254
    }
2255
    else
2256
        c->canMMX2BeUsed=0;
2257
2258
    c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
2259
    c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
2260
2261
    // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
2262
    // but only for the FAST_BILINEAR mode otherwise do correct scaling
2263
    // n-2 is the last chrominance sample available
2264 86bdf3fd Diego Biurrun
    // this is not perfect, but no one should notice the difference, the more correct variant
2265 221b804f Diego Biurrun
    // would be like the vertical one, but that would require some special code for the
2266
    // first and last pixel
2267
    if (flags&SWS_FAST_BILINEAR)
2268
    {
2269
        if (c->canMMX2BeUsed)
2270
        {
2271
            c->lumXInc+= 20;
2272
            c->chrXInc+= 20;
2273
        }
2274
        //we don't use the x86asm scaler if mmx is available
2275
        else if (flags & SWS_CPU_CAPS_MMX)
2276
        {
2277
            c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
2278
            c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
2279
        }
2280
    }
2281
2282
    /* precalculate horizontal scaler filter coefficients */
2283
    {
2284
        const int filterAlign=
2285
            (flags & SWS_CPU_CAPS_MMX) ? 4 :
2286
            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2287
            1;
2288
2289
        initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
2290
                   srcW      ,       dstW, filterAlign, 1<<14,
2291
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2292
                   srcFilter->lumH, dstFilter->lumH, c->param);
2293
        initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
2294
                   c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
2295
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2296
                   srcFilter->chrH, dstFilter->chrH, c->param);
2297 28bf81c9 Michael Niedermayer
2298 dbdae6ec Diego Biurrun
#define MAX_FUNNY_CODE_SIZE 10000
2299 17c613ef Uoti Urpala
#if defined(COMPILE_MMX2)
2300 77a416e8 Gabucino
// can't downscale !!!
2301 221b804f Diego Biurrun
        if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
2302
        {
2303 113ef149 Reimar Döffinger
#ifdef MAP_ANONYMOUS
2304 221b804f Diego Biurrun
            c->funnyYCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2305
            c->funnyUVCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2306 38d5c282 Aurelien Jacobs
#else
2307 221b804f Diego Biurrun
            c->funnyYCode = av_malloc(MAX_FUNNY_CODE_SIZE);
2308
            c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE);
2309 38d5c282 Aurelien Jacobs
#endif
2310
2311 221b804f Diego Biurrun
            c->lumMmx2Filter   = av_malloc((dstW        /8+8)*sizeof(int16_t));
2312
            c->chrMmx2Filter   = av_malloc((c->chrDstW  /4+8)*sizeof(int16_t));
2313
            c->lumMmx2FilterPos= av_malloc((dstW      /2/8+8)*sizeof(int32_t));
2314
            c->chrMmx2FilterPos= av_malloc((c->chrDstW/2/4+8)*sizeof(int32_t));
2315 b7dc6f66 Michael Niedermayer
2316 221b804f Diego Biurrun
            initMMX2HScaler(      dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
2317
            initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
2318
        }
2319 17c613ef Uoti Urpala
#endif /* defined(COMPILE_MMX2) */
2320 221b804f Diego Biurrun
    } // Init Horizontal stuff
2321 28bf81c9 Michael Niedermayer
2322
2323
2324 221b804f Diego Biurrun
    /* precalculate vertical scaler filter coefficients */
2325
    {
2326
        const int filterAlign=
2327
            (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
2328
            (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2329
            1;
2330 8c266f0c Romain Dolbeau
2331 221b804f Diego Biurrun
        initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
2332
                   srcH      ,        dstH, filterAlign, (1<<12)-4,
2333
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC)  : flags,
2334
                   srcFilter->lumV, dstFilter->lumV, c->param);
2335
        initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
2336
                   c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
2337
                   (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2338
                   srcFilter->chrV, dstFilter->chrV, c->param);
2339 d33d485e Alan Curry
2340
#ifdef HAVE_ALTIVEC
2341 221b804f Diego Biurrun
        c->vYCoeffsBank = av_malloc(sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
2342
        c->vCCoeffsBank = av_malloc(sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH);
2343
2344
        for (i=0;i<c->vLumFilterSize*c->dstH;i++) {
2345
            int j;
2346
            short *p = (short *)&c->vYCoeffsBank[i];
2347
            for (j=0;j<8;j++)
2348
                p[j] = c->vLumFilter[i];
2349
        }
2350
2351
        for (i=0;i<c->vChrFilterSize*c->chrDstH;i++) {
2352
            int j;
2353
            short *p = (short *)&c->vCCoeffsBank[i];
2354
            for (j=0;j<8;j++)
2355
                p[j] = c->vChrFilter[i];
2356
        }
2357 d33d485e Alan Curry
#endif
2358 221b804f Diego Biurrun
    }
2359
2360
    // Calculate Buffer Sizes so that they won't run out while handling these damn slices
2361
    c->vLumBufSize= c->vLumFilterSize;
2362
    c->vChrBufSize= c->vChrFilterSize;
2363
    for (i=0; i<dstH; i++)
2364
    {
2365
        int chrI= i*c->chrDstH / dstH;
2366
        int nextSlice= FFMAX(c->vLumFilterPos[i   ] + c->vLumFilterSize - 1,
2367
                           ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
2368
2369
        nextSlice>>= c->chrSrcVSubSample;
2370
        nextSlice<<= c->chrSrcVSubSample;
2371
        if (c->vLumFilterPos[i   ] + c->vLumBufSize < nextSlice)
2372
            c->vLumBufSize= nextSlice - c->vLumFilterPos[i   ];
2373
        if (c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
2374
            c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
2375
    }
2376
2377
    // allocate pixbufs (we use dynamic allocation because otherwise we would need to
2378
    c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
2379
    c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*));
2380
    //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
2381
    /* align at 16 bytes for AltiVec */
2382
    for (i=0; i<c->vLumBufSize; i++)
2383
        c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(4000);
2384
    for (i=0; i<c->vChrBufSize; i++)
2385
        c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= av_malloc(8000);
2386
2387
    //try to avoid drawing green stuff between the right end and the stride end
2388
    for (i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000);
2389
2390
    ASSERT(c->chrDstH <= dstH)
2391
2392
    if (flags&SWS_PRINT_INFO)
2393
    {
2394 28bf81c9 Michael Niedermayer
#ifdef DITHER1XBPP
2395 221b804f Diego Biurrun
        char *dither= " dithered";
2396 5521b193 Michael Niedermayer
#else
2397 221b804f Diego Biurrun
        char *dither= "";
2398 28bf81c9 Michael Niedermayer
#endif
2399 221b804f Diego Biurrun
        if (flags&SWS_FAST_BILINEAR)
2400
            av_log(c, AV_LOG_INFO, "SwScaler: FAST_BILINEAR scaler, ");
2401
        else if (flags&SWS_BILINEAR)
2402
            av_log(c, AV_LOG_INFO, "SwScaler: BILINEAR scaler, ");
2403
        else if (flags&SWS_BICUBIC)
2404
            av_log(c, AV_LOG_INFO, "SwScaler: BICUBIC scaler, ");
2405
        else if (flags&SWS_X)
2406
            av_log(c, AV_LOG_INFO, "SwScaler: Experimental scaler, ");
2407
        else if (flags&SWS_POINT)
2408
            av_log(c, AV_LOG_INFO, "SwScaler: Nearest Neighbor / POINT scaler, ");
2409
        else if (flags&SWS_AREA)
2410
            av_log(c, AV_LOG_INFO, "SwScaler: Area Averageing scaler, ");
2411
        else if (flags&SWS_BICUBLIN)
2412
            av_log(c, AV_LOG_INFO, "SwScaler: luma BICUBIC / chroma BILINEAR scaler, ");
2413
        else if (flags&SWS_GAUSS)
2414
            av_log(c, AV_LOG_INFO, "SwScaler: Gaussian scaler, ");
2415
        else if (flags&SWS_SINC)
2416
            av_log(c, AV_LOG_INFO, "SwScaler: Sinc scaler, ");
2417
        else if (flags&SWS_LANCZOS)
2418
            av_log(c, AV_LOG_INFO, "SwScaler: Lanczos scaler, ");
2419
        else if (flags&SWS_SPLINE)
2420
            av_log(c, AV_LOG_INFO, "SwScaler: Bicubic spline scaler, ");
2421
        else
2422
            av_log(c, AV_LOG_INFO, "SwScaler: ehh flags invalid?! ");
2423
2424
        if (dstFormat==PIX_FMT_BGR555 || dstFormat==PIX_FMT_BGR565)
2425
            av_log(c, AV_LOG_INFO, "from %s to%s %s ",
2426
                   sws_format_name(srcFormat), dither, sws_format_name(dstFormat));
2427
        else
2428
            av_log(c, AV_LOG_INFO, "from %s to %s ",
2429
                   sws_format_name(srcFormat), sws_format_name(dstFormat));
2430
2431
        if (flags & SWS_CPU_CAPS_MMX2)
2432
            av_log(c, AV_LOG_INFO, "using MMX2\n");
2433
        else if (flags & SWS_CPU_CAPS_3DNOW)
2434
            av_log(c, AV_LOG_INFO, "using 3DNOW\n");
2435
        else if (flags & SWS_CPU_CAPS_MMX)
2436
            av_log(c, AV_LOG_INFO, "using MMX\n");
2437
        else if (flags & SWS_CPU_CAPS_ALTIVEC)
2438
            av_log(c, AV_LOG_INFO, "using AltiVec\n");
2439
        else
2440
            av_log(c, AV_LOG_INFO, "using C\n");
2441
    }
2442
2443
    if (flags & SWS_PRINT_INFO)
2444
    {
2445
        if (flags & SWS_CPU_CAPS_MMX)
2446
        {
2447
            if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
2448
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
2449
            else
2450
            {
2451
                if (c->hLumFilterSize==4)
2452
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n");
2453
                else if (c->hLumFilterSize==8)
2454
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n");
2455
                else
2456
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n");
2457
2458
                if (c->hChrFilterSize==4)
2459
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n");
2460
                else if (c->hChrFilterSize==8)
2461
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n");
2462
                else
2463
                    av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n");
2464
            }
2465
        }
2466
        else
2467
        {
2468 3d6a30d9 Diego Biurrun
#if defined(ARCH_X86)
2469 221b804f Diego Biurrun
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using X86-Asm scaler for horizontal scaling\n");
2470 28bf81c9 Michael Niedermayer
#else
2471 221b804f Diego Biurrun
            if (flags & SWS_FAST_BILINEAR)
2472
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n");
2473
            else
2474
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using C scaler for horizontal scaling\n");
2475 28bf81c9 Michael Niedermayer
#endif
2476 221b804f Diego Biurrun
        }
2477
        if (isPlanarYUV(dstFormat))
2478
        {
2479
            if (c->vLumFilterSize==1)
2480
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2481
            else
2482
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2483
        }
2484
        else
2485
        {
2486
            if (c->vLumFilterSize==1 && c->vChrFilterSize==2)
2487
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
2488
                       "SwScaler:       2-tap scaler for vertical chrominance scaling (BGR)\n",(flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2489
            else if (c->vLumFilterSize==2 && c->vChrFilterSize==2)
2490
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2491
            else
2492
                av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2493
        }
2494
2495
        if (dstFormat==PIX_FMT_BGR24)
2496
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR24 Converter\n",
2497
                   (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
2498
        else if (dstFormat==PIX_FMT_RGB32)
2499
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR32 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2500
        else if (dstFormat==PIX_FMT_BGR565)
2501
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR16 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2502
        else if (dstFormat==PIX_FMT_BGR555)
2503
            av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR15 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2504
2505
        av_log(c, AV_LOG_VERBOSE, "SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
2506
    }
2507
    if (flags & SWS_PRINT_INFO)
2508
    {
2509
        av_log(c, AV_LOG_DEBUG, "SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2510
               c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
2511
        av_log(c, AV_LOG_DEBUG, "SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2512
               c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
2513
    }
2514
2515
    c->swScale= getSwsFunc(flags);
2516
    return c;
2517 28bf81c9 Michael Niedermayer
}
2518
2519
/**
2520 77a416e8 Gabucino
 * swscale warper, so we don't need to export the SwsContext.
2521 fccb9b2b Michael Niedermayer
 * assumes planar YUV to be in YUV order instead of YVU
2522
 */
2523 703b56fb Luca Abeni
int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2524 221b804f Diego Biurrun
              int srcSliceH, uint8_t* dst[], int dstStride[]){
2525
    int i;
2526
    uint8_t* src2[4]= {src[0], src[1], src[2]};
2527
    uint32_t pal[256];
2528
    if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
2529
        av_log(c, AV_LOG_ERROR, "swScaler: slices start in the middle!\n");
2530
        return 0;
2531
    }
2532
    if (c->sliceDir == 0) {
2533
        if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
2534
    }
2535
2536
    if (c->srcFormat == PIX_FMT_PAL8){
2537
        for (i=0; i<256; i++){
2538
            int p= ((uint32_t*)(src[1]))[i];
2539
            int r= (p>>16)&0xFF;
2540
            int g= (p>> 8)&0xFF;
2541
            int b=  p     &0xFF;
2542
            int y= av_clip_uint8(((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16 );
2543
            int u= av_clip_uint8(((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128);
2544
            int v= av_clip_uint8(((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128);
2545
            pal[i]= y + (u<<8) + (v<<16);
2546 21c08a3f Michael Niedermayer
        }
2547 221b804f Diego Biurrun
        src2[1]= pal;
2548
    }
2549 21c08a3f Michael Niedermayer
2550 221b804f Diego Biurrun
    // copy strides, so they can safely be modified
2551
    if (c->sliceDir == 1) {
2552
        // slices go from top to bottom
2553
        int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2]};
2554
        int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2]};
2555
        return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst, dstStride2);
2556
    } else {
2557
        // slices go from bottom to top => we flip the image internally
2558
        uint8_t* dst2[4]= {dst[0] + (c->dstH-1)*dstStride[0],
2559
                           dst[1] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1],
2560
                           dst[2] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2]};
2561
        int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2]};
2562
        int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2]};
2563
2564
        src2[0] += (srcSliceH-1)*srcStride[0];
2565
        if (c->srcFormat != PIX_FMT_PAL8)
2566
            src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1];
2567
        src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2];
2568
2569
        return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
2570
    }
2571 fccb9b2b Michael Niedermayer
}
2572
2573
/**
2574 77a416e8 Gabucino
 * swscale warper, so we don't need to export the SwsContext
2575 d4e24275 Michael Niedermayer
 */
2576 703b56fb Luca Abeni
int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2577 221b804f Diego Biurrun
                      int srcSliceH, uint8_t* dst[], int dstStride[]){
2578
    return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
2579 d4e24275 Michael Niedermayer
}
2580
2581 6a4970ab Diego Biurrun
SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
2582 221b804f Diego Biurrun
                                float lumaSharpen, float chromaSharpen,
2583
                                float chromaHShift, float chromaVShift,
2584
                                int verbose)
2585 e21206a8 Michael Niedermayer
{
2586 221b804f Diego Biurrun
    SwsFilter *filter= av_malloc(sizeof(SwsFilter));
2587
2588
    if (lumaGBlur!=0.0){
2589
        filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0);
2590
        filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0);
2591
    }else{
2592
        filter->lumH= sws_getIdentityVec();
2593
        filter->lumV= sws_getIdentityVec();
2594
    }
2595
2596
    if (chromaGBlur!=0.0){
2597
        filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0);
2598
        filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0);
2599
    }else{
2600
        filter->chrH= sws_getIdentityVec();
2601
        filter->chrV= sws_getIdentityVec();
2602
    }
2603
2604
    if (chromaSharpen!=0.0){
2605
        SwsVector *id= sws_getIdentityVec();
2606
        sws_scaleVec(filter->chrH, -chromaSharpen);
2607
        sws_scaleVec(filter->chrV, -chromaSharpen);
2608
        sws_addVec(filter->chrH, id);
2609
        sws_addVec(filter->chrV, id);
2610
        sws_freeVec(id);
2611
    }
2612
2613
    if (lumaSharpen!=0.0){
2614
        SwsVector *id= sws_getIdentityVec();
2615
        sws_scaleVec(filter->lumH, -lumaSharpen);
2616
        sws_scaleVec(filter->lumV, -lumaSharpen);
2617
        sws_addVec(filter->lumH, id);
2618
        sws_addVec(filter->lumV, id);
2619
        sws_freeVec(id);
2620
    }
2621
2622
    if (chromaHShift != 0.0)
2623
        sws_shiftVec(filter->chrH, (int)(chromaHShift+0.5));
2624
2625
    if (chromaVShift != 0.0)
2626
        sws_shiftVec(filter->chrV, (int)(chromaVShift+0.5));
2627
2628
    sws_normalizeVec(filter->chrH, 1.0);
2629
    sws_normalizeVec(filter->chrV, 1.0);
2630
    sws_normalizeVec(filter->lumH, 1.0);
2631
    sws_normalizeVec(filter->lumV, 1.0);
2632
2633
    if (verbose) sws_printVec(filter->chrH);
2634
    if (verbose) sws_printVec(filter->lumH);
2635
2636
    return filter;
2637 e21206a8 Michael Niedermayer
}
2638
2639 d4e24275 Michael Niedermayer
/**
2640 28bf81c9 Michael Niedermayer
 * returns a normalized gaussian curve used to filter stuff
2641
 * quality=3 is high quality, lowwer is lowwer quality
2642
 */
2643 d4e24275 Michael Niedermayer
SwsVector *sws_getGaussianVec(double variance, double quality){
2644 221b804f Diego Biurrun
    const int length= (int)(variance*quality + 0.5) | 1;
2645
    int i;
2646
    double *coeff= av_malloc(length*sizeof(double));
2647
    double middle= (length-1)*0.5;
2648
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2649 c7f822d9 Michael Niedermayer
2650 221b804f Diego Biurrun
    vec->coeff= coeff;
2651
    vec->length= length;
2652 28bf81c9 Michael Niedermayer
2653 221b804f Diego Biurrun
    for (i=0; i<length; i++)
2654
    {
2655
        double dist= i-middle;
2656
        coeff[i]= exp( -dist*dist/(2*variance*variance) ) / sqrt(2*variance*PI);
2657
    }
2658 28bf81c9 Michael Niedermayer
2659 221b804f Diego Biurrun
    sws_normalizeVec(vec, 1.0);
2660 c7f822d9 Michael Niedermayer
2661 221b804f Diego Biurrun
    return vec;
2662 28bf81c9 Michael Niedermayer
}
2663
2664 d4e24275 Michael Niedermayer
SwsVector *sws_getConstVec(double c, int length){
2665 221b804f Diego Biurrun
    int i;
2666
    double *coeff= av_malloc(length*sizeof(double));
2667
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2668 5521b193 Michael Niedermayer
2669 221b804f Diego Biurrun
    vec->coeff= coeff;
2670
    vec->length= length;
2671 5521b193 Michael Niedermayer
2672 221b804f Diego Biurrun
    for (i=0; i<length; i++)
2673
        coeff[i]= c;
2674 5521b193 Michael Niedermayer
2675 221b804f Diego Biurrun
    return vec;
2676 5521b193 Michael Niedermayer
}
2677
2678
2679 d4e24275 Michael Niedermayer
SwsVector *sws_getIdentityVec(void){
2680 221b804f Diego Biurrun
    return sws_getConstVec(1.0, 1);
2681 c7f822d9 Michael Niedermayer
}
2682
2683 2e728364 Michael Niedermayer
double sws_dcVec(SwsVector *a){
2684 221b804f Diego Biurrun
    int i;
2685
    double sum=0;
2686 28bf81c9 Michael Niedermayer
2687 221b804f Diego Biurrun
    for (i=0; i<a->length; i++)
2688
        sum+= a->coeff[i];
2689 28bf81c9 Michael Niedermayer
2690 221b804f Diego Biurrun
    return sum;
2691 28bf81c9 Michael Niedermayer
}
2692
2693 d4e24275 Michael Niedermayer
void sws_scaleVec(SwsVector *a, double scalar){
2694 221b804f Diego Biurrun
    int i;
2695 c7f822d9 Michael Niedermayer
2696 221b804f Diego Biurrun
    for (i=0; i<a->length; i++)
2697
        a->coeff[i]*= scalar;
2698 c7f822d9 Michael Niedermayer
}
2699
2700 2e728364 Michael Niedermayer
void sws_normalizeVec(SwsVector *a, double height){
2701 221b804f Diego Biurrun
    sws_scaleVec(a, height/sws_dcVec(a));
2702 2e728364 Michael Niedermayer
}
2703
2704 d4e24275 Michael Niedermayer
static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){
2705 221b804f Diego Biurrun
    int length= a->length + b->length - 1;
2706
    double *coeff= av_malloc(length*sizeof(double));
2707
    int i, j;
2708
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2709 c7f822d9 Michael Niedermayer
2710 221b804f Diego Biurrun
    vec->coeff= coeff;
2711
    vec->length= length;
2712 28bf81c9 Michael Niedermayer
2713 221b804f Diego Biurrun
    for (i=0; i<length; i++) coeff[i]= 0.0;
2714 28bf81c9 Michael Niedermayer
2715 221b804f Diego Biurrun
    for (i=0; i<a->length; i++)
2716
    {
2717
        for (j=0; j<b->length; j++)
2718
        {
2719
            coeff[i+j]+= a->coeff[i]*b->coeff[j];
2720
        }
2721
    }
2722 28bf81c9 Michael Niedermayer
2723 221b804f Diego Biurrun
    return vec;
2724 28bf81c9 Michael Niedermayer
}
2725
2726 d4e24275 Michael Niedermayer
static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){
2727 221b804f Diego Biurrun
    int length= FFMAX(a->length, b->length);
2728
    double *coeff= av_malloc(length*sizeof(double));
2729
    int i;
2730
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2731 c7f822d9 Michael Niedermayer
2732 221b804f Diego Biurrun
    vec->coeff= coeff;
2733
    vec->length= length;
2734 28bf81c9 Michael Niedermayer
2735 221b804f Diego Biurrun
    for (i=0; i<length; i++) coeff[i]= 0.0;
2736 28bf81c9 Michael Niedermayer
2737 221b804f Diego Biurrun
    for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2738
    for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];
2739 c7f822d9 Michael Niedermayer
2740 221b804f Diego Biurrun
    return vec;
2741 28bf81c9 Michael Niedermayer
}
2742 c7f822d9 Michael Niedermayer
2743 d4e24275 Michael Niedermayer
static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){
2744 221b804f Diego Biurrun
    int length= FFMAX(a->length, b->length);
2745
    double *coeff= av_malloc(length*sizeof(double));
2746
    int i;
2747
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2748 c7f822d9 Michael Niedermayer
2749 221b804f Diego Biurrun
    vec->coeff= coeff;
2750
    vec->length= length;
2751 c7f822d9 Michael Niedermayer
2752 221b804f Diego Biurrun
    for (i=0; i<length; i++) coeff[i]= 0.0;
2753 c7f822d9 Michael Niedermayer
2754 221b804f Diego Biurrun
    for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2755
    for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];
2756 c7f822d9 Michael Niedermayer
2757 221b804f Diego Biurrun
    return vec;
2758 c7f822d9 Michael Niedermayer
}
2759
2760
/* shift left / or right if "shift" is negative */
2761 d4e24275 Michael Niedermayer
static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){
2762 221b804f Diego Biurrun
    int length= a->length + FFABS(shift)*2;
2763
    double *coeff= av_malloc(length*sizeof(double));
2764
    int i;
2765
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2766 c7f822d9 Michael Niedermayer
2767 221b804f Diego Biurrun
    vec->coeff= coeff;
2768
    vec->length= length;
2769 c7f822d9 Michael Niedermayer
2770 221b804f Diego Biurrun
    for (i=0; i<length; i++) coeff[i]= 0.0;
2771 c7f822d9 Michael Niedermayer
2772 221b804f Diego Biurrun
    for (i=0; i<a->length; i++)
2773
    {
2774
        coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
2775
    }
2776 c7f822d9 Michael Niedermayer
2777 221b804f Diego Biurrun
    return vec;
2778 c7f822d9 Michael Niedermayer
}
2779
2780 d4e24275 Michael Niedermayer
void sws_shiftVec(SwsVector *a, int shift){
2781 221b804f Diego Biurrun
    SwsVector *shifted= sws_getShiftedVec(a, shift);
2782
    av_free(a->coeff);
2783
    a->coeff= shifted->coeff;
2784
    a->length= shifted->length;
2785
    av_free(shifted);
2786 5cebb24b Michael Niedermayer
}
2787
2788 d4e24275 Michael Niedermayer
void sws_addVec(SwsVector *a, SwsVector *b){
2789 221b804f Diego Biurrun
    SwsVector *sum= sws_sumVec(a, b);
2790
    av_free(a->coeff);
2791
    a->coeff= sum->coeff;
2792
    a->length= sum->length;
2793
    av_free(sum);
2794 5cebb24b Michael Niedermayer
}
2795
2796 d4e24275 Michael Niedermayer
void sws_subVec(SwsVector *a, SwsVector *b){
2797 221b804f Diego Biurrun
    SwsVector *diff= sws_diffVec(a, b);
2798
    av_free(a->coeff);
2799
    a->coeff= diff->coeff;
2800
    a->length= diff->length;
2801
    av_free(diff);
2802 5cebb24b Michael Niedermayer
}
2803
2804 d4e24275 Michael Niedermayer
void sws_convVec(SwsVector *a, SwsVector *b){
2805 221b804f Diego Biurrun
    SwsVector *conv= sws_getConvVec(a, b);
2806
    av_free(a->coeff);
2807
    a->coeff= conv->coeff;
2808
    a->length= conv->length;
2809
    av_free(conv);
2810 5cebb24b Michael Niedermayer
}
2811
2812 d4e24275 Michael Niedermayer
SwsVector *sws_cloneVec(SwsVector *a){
2813 221b804f Diego Biurrun
    double *coeff= av_malloc(a->length*sizeof(double));
2814
    int i;
2815
    SwsVector *vec= av_malloc(sizeof(SwsVector));
2816 5cebb24b Michael Niedermayer
2817 221b804f Diego Biurrun
    vec->coeff= coeff;
2818
    vec->length= a->length;
2819 5cebb24b Michael Niedermayer
2820 221b804f Diego Biurrun
    for (i=0; i<a->length; i++) coeff[i]= a->coeff[i];
2821 5cebb24b Michael Niedermayer
2822 221b804f Diego Biurrun
    return vec;
2823 5cebb24b Michael Niedermayer
}
2824
2825 d4e24275 Michael Niedermayer
void sws_printVec(SwsVector *a){
2826 221b804f Diego Biurrun
    int i;
2827
    double max=0;
2828
    double min=0;
2829
    double range;
2830
2831
    for (i=0; i<a->length; i++)
2832
        if (a->coeff[i]>max) max= a->coeff[i];
2833
2834
    for (i=0; i<a->length; i++)
2835
        if (a->coeff[i]<min) min= a->coeff[i];
2836
2837
    range= max - min;
2838
2839
    for (i=0; i<a->length; i++)
2840
    {
2841
        int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
2842
        av_log(NULL, AV_LOG_DEBUG, "%1.3f ", a->coeff[i]);
2843
        for (;x>0; x--) av_log(NULL, AV_LOG_DEBUG, " ");
2844
        av_log(NULL, AV_LOG_DEBUG, "|\n");
2845
    }
2846 c7f822d9 Michael Niedermayer
}
2847
2848 d4e24275 Michael Niedermayer
void sws_freeVec(SwsVector *a){
2849 221b804f Diego Biurrun
    if (!a) return;
2850
    av_free(a->coeff);
2851
    a->coeff=NULL;
2852
    a->length=0;
2853
    av_free(a);
2854 c7f822d9 Michael Niedermayer
}
2855
2856 e21206a8 Michael Niedermayer
void sws_freeFilter(SwsFilter *filter){
2857 221b804f Diego Biurrun
    if (!filter) return;
2858 e21206a8 Michael Niedermayer
2859 221b804f Diego Biurrun
    if (filter->lumH) sws_freeVec(filter->lumH);
2860
    if (filter->lumV) sws_freeVec(filter->lumV);
2861
    if (filter->chrH) sws_freeVec(filter->chrH);
2862
    if (filter->chrV) sws_freeVec(filter->chrV);
2863
    av_free(filter);
2864 e21206a8 Michael Niedermayer
}
2865