ffmpeg / libavcodec / dsputil.h @ 983e3246
History | View | Annotate | Download (12.4 KB)
1 |
/*
|
---|---|
2 |
* DSP utils
|
3 |
* Copyright (c) 2000, 2001, 2002 Fabrice Bellard.
|
4 |
*
|
5 |
* This library is free software; you can redistribute it and/or
|
6 |
* modify it under the terms of the GNU Lesser General Public
|
7 |
* License as published by the Free Software Foundation; either
|
8 |
* version 2 of the License, or (at your option) any later version.
|
9 |
*
|
10 |
* This library is distributed in the hope that it will be useful,
|
11 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
13 |
* Lesser General Public License for more details.
|
14 |
*
|
15 |
* You should have received a copy of the GNU Lesser General Public
|
16 |
* License along with this library; if not, write to the Free Software
|
17 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18 |
*/
|
19 |
|
20 |
/**
|
21 |
* @file dsputil.h
|
22 |
* DSP utils.
|
23 |
*/
|
24 |
|
25 |
#ifndef DSPUTIL_H
|
26 |
#define DSPUTIL_H
|
27 |
|
28 |
#include "common.h" |
29 |
#include "avcodec.h" |
30 |
|
31 |
|
32 |
//#define DEBUG
|
33 |
/* dct code */
|
34 |
typedef short DCTELEM; |
35 |
//typedef int DCTELEM;
|
36 |
|
37 |
void fdct_ifast (DCTELEM *data);
|
38 |
void ff_jpeg_fdct_islow (DCTELEM *data);
|
39 |
|
40 |
void j_rev_dct (DCTELEM *data);
|
41 |
|
42 |
void ff_fdct_mmx(DCTELEM *block);
|
43 |
|
44 |
/* encoding scans */
|
45 |
extern const uint8_t ff_alternate_horizontal_scan[64]; |
46 |
extern const uint8_t ff_alternate_vertical_scan[64]; |
47 |
extern const uint8_t ff_zigzag_direct[64]; |
48 |
|
49 |
/* pixel operations */
|
50 |
#define MAX_NEG_CROP 384 |
51 |
|
52 |
/* temporary */
|
53 |
extern uint32_t squareTbl[512]; |
54 |
extern uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; |
55 |
|
56 |
|
57 |
/* minimum alignment rules ;)
|
58 |
if u notice errors in the align stuff, need more alignment for some asm code for some cpu
|
59 |
or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ...
|
60 |
|
61 |
!warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible)
|
62 |
i (michael) didnt check them, these are just the alignents which i think could be reached easily ...
|
63 |
|
64 |
!future video codecs might need functions with less strict alignment
|
65 |
*/
|
66 |
|
67 |
/*
|
68 |
void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size);
|
69 |
void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride);
|
70 |
void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
|
71 |
void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
|
72 |
void clear_blocks_c(DCTELEM *blocks);
|
73 |
*/
|
74 |
|
75 |
/* add and put pixel (decoding) */
|
76 |
// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
|
77 |
typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h); |
78 |
typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); |
79 |
|
80 |
#define DEF_OLD_QPEL(name)\
|
81 |
void ff_put_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ |
82 |
void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\ |
83 |
void ff_avg_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride); |
84 |
|
85 |
DEF_OLD_QPEL(qpel16_mc11_old_c) |
86 |
DEF_OLD_QPEL(qpel16_mc31_old_c) |
87 |
DEF_OLD_QPEL(qpel16_mc12_old_c) |
88 |
DEF_OLD_QPEL(qpel16_mc32_old_c) |
89 |
DEF_OLD_QPEL(qpel16_mc13_old_c) |
90 |
DEF_OLD_QPEL(qpel16_mc33_old_c) |
91 |
DEF_OLD_QPEL(qpel8_mc11_old_c) |
92 |
DEF_OLD_QPEL(qpel8_mc31_old_c) |
93 |
DEF_OLD_QPEL(qpel8_mc12_old_c) |
94 |
DEF_OLD_QPEL(qpel8_mc32_old_c) |
95 |
DEF_OLD_QPEL(qpel8_mc13_old_c) |
96 |
DEF_OLD_QPEL(qpel8_mc33_old_c) |
97 |
|
98 |
#define CALL_2X_PIXELS(a, b, n)\
|
99 |
static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
100 |
b(block , pixels , line_size, h);\ |
101 |
b(block+n, pixels+n, line_size, h);\ |
102 |
} |
103 |
|
104 |
/* motion estimation */
|
105 |
|
106 |
typedef int (*op_pixels_abs_func)(uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/; |
107 |
|
108 |
typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/; |
109 |
|
110 |
/**
|
111 |
* DSPContext.
|
112 |
*/
|
113 |
typedef struct DSPContext { |
114 |
/* pixel ops : interface with DCT */
|
115 |
void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size); |
116 |
void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride); |
117 |
void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); |
118 |
void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size); |
119 |
void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); |
120 |
void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy, |
121 |
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); |
122 |
void (*clear_blocks)(DCTELEM *blocks/*align 16*/); |
123 |
int (*pix_sum)(uint8_t * pix, int line_size); |
124 |
int (*pix_norm1)(uint8_t * pix, int line_size); |
125 |
me_cmp_func sad[2]; /* identical to pix_absAxA except additional void * */ |
126 |
me_cmp_func sse[2];
|
127 |
me_cmp_func hadamard8_diff[2];
|
128 |
me_cmp_func dct_sad[2];
|
129 |
me_cmp_func quant_psnr[2];
|
130 |
me_cmp_func bit[2];
|
131 |
me_cmp_func rd[2];
|
132 |
int (*hadamard8_abs )(uint8_t *src, int stride, int mean); |
133 |
|
134 |
me_cmp_func me_pre_cmp[11];
|
135 |
me_cmp_func me_cmp[11];
|
136 |
me_cmp_func me_sub_cmp[11];
|
137 |
me_cmp_func mb_cmp[11];
|
138 |
|
139 |
/* maybe create an array for 16/8 functions */
|
140 |
op_pixels_func put_pixels_tab[2][4]; |
141 |
op_pixels_func avg_pixels_tab[2][4]; |
142 |
op_pixels_func put_no_rnd_pixels_tab[2][4]; |
143 |
op_pixels_func avg_no_rnd_pixels_tab[2][4]; |
144 |
qpel_mc_func put_qpel_pixels_tab[2][16]; |
145 |
qpel_mc_func avg_qpel_pixels_tab[2][16]; |
146 |
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; |
147 |
qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; |
148 |
qpel_mc_func put_mspel_pixels_tab[8];
|
149 |
|
150 |
op_pixels_abs_func pix_abs16x16; |
151 |
op_pixels_abs_func pix_abs16x16_x2; |
152 |
op_pixels_abs_func pix_abs16x16_y2; |
153 |
op_pixels_abs_func pix_abs16x16_xy2; |
154 |
op_pixels_abs_func pix_abs8x8; |
155 |
op_pixels_abs_func pix_abs8x8_x2; |
156 |
op_pixels_abs_func pix_abs8x8_y2; |
157 |
op_pixels_abs_func pix_abs8x8_xy2; |
158 |
|
159 |
/* huffyuv specific */
|
160 |
void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); |
161 |
void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w); |
162 |
|
163 |
/* (I)DCT */
|
164 |
void (*fdct)(DCTELEM *block/* align 16*/); |
165 |
|
166 |
/**
|
167 |
* block -> idct -> clip to unsigned 8 bit -> dest.
|
168 |
* (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
|
169 |
* @param line_size size in pixels of a horizotal line of dest
|
170 |
*/
|
171 |
void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); |
172 |
|
173 |
/**
|
174 |
* block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
|
175 |
* @param line_size size in pixels of a horizotal line of dest
|
176 |
*/
|
177 |
void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); |
178 |
|
179 |
/**
|
180 |
* idct input permutation.
|
181 |
* an example to avoid confusion:
|
182 |
* - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...)
|
183 |
* - (x -> referece dct -> reference idct -> x)
|
184 |
* - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x)
|
185 |
* - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...)
|
186 |
*/
|
187 |
uint8_t idct_permutation[64];
|
188 |
int idct_permutation_type;
|
189 |
#define FF_NO_IDCT_PERM 1 |
190 |
#define FF_LIBMPEG2_IDCT_PERM 2 |
191 |
#define FF_SIMPLE_IDCT_PERM 3 |
192 |
#define FF_TRANSPOSE_IDCT_PERM 4 |
193 |
|
194 |
} DSPContext; |
195 |
|
196 |
void dsputil_init(DSPContext* p, AVCodecContext *avctx);
|
197 |
|
198 |
/**
|
199 |
* permute block according to permuatation.
|
200 |
* @param last last non zero element in scantable order
|
201 |
*/
|
202 |
void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last); |
203 |
|
204 |
/**
|
205 |
* Empty mmx state.
|
206 |
* this must be called between any dsp function and float/double code.
|
207 |
* for example sin(); dsp->idct_put(); emms_c(); cos()
|
208 |
*/
|
209 |
#define emms_c()
|
210 |
|
211 |
/* should be defined by architectures supporting
|
212 |
one or more MultiMedia extension */
|
213 |
int mm_support(void); |
214 |
|
215 |
#if defined(HAVE_MMX)
|
216 |
|
217 |
#undef emms_c
|
218 |
|
219 |
#define MM_MMX 0x0001 /* standard MMX */ |
220 |
#define MM_3DNOW 0x0004 /* AMD 3DNOW */ |
221 |
#define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ |
222 |
#define MM_SSE 0x0008 /* SSE functions */ |
223 |
#define MM_SSE2 0x0010 /* PIV SSE2 functions */ |
224 |
|
225 |
extern int mm_flags; |
226 |
|
227 |
void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); |
228 |
void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); |
229 |
|
230 |
static inline void emms(void) |
231 |
{ |
232 |
__asm __volatile ("emms;":::"memory"); |
233 |
} |
234 |
|
235 |
|
236 |
#define emms_c() \
|
237 |
{\ |
238 |
if (mm_flags & MM_MMX)\
|
239 |
emms();\ |
240 |
} |
241 |
|
242 |
#define __align8 __attribute__ ((aligned (8))) |
243 |
|
244 |
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
|
245 |
void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
|
246 |
|
247 |
#elif defined(ARCH_ARMV4L)
|
248 |
|
249 |
/* This is to use 4 bytes read to the IDCT pointers for some 'zero'
|
250 |
line ptimizations */
|
251 |
#define __align8 __attribute__ ((aligned (4))) |
252 |
|
253 |
void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx);
|
254 |
|
255 |
#elif defined(HAVE_MLIB)
|
256 |
|
257 |
/* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */
|
258 |
#define __align8 __attribute__ ((aligned (8))) |
259 |
|
260 |
void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
|
261 |
|
262 |
#elif defined(ARCH_ALPHA)
|
263 |
|
264 |
#define __align8 __attribute__ ((aligned (8))) |
265 |
|
266 |
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
|
267 |
|
268 |
#elif defined(ARCH_POWERPC)
|
269 |
|
270 |
#define MM_ALTIVEC 0x0001 /* standard AltiVec */ |
271 |
|
272 |
extern int mm_flags; |
273 |
|
274 |
#if defined(HAVE_ALTIVEC) && !defined(CONFIG_DARWIN)
|
275 |
#include <altivec.h> |
276 |
#endif
|
277 |
|
278 |
#define __align8 __attribute__ ((aligned (16))) |
279 |
|
280 |
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
|
281 |
|
282 |
#elif defined(HAVE_MMI)
|
283 |
|
284 |
#define __align8 __attribute__ ((aligned (16))) |
285 |
|
286 |
void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
|
287 |
|
288 |
#else
|
289 |
|
290 |
#define __align8
|
291 |
|
292 |
#endif
|
293 |
|
294 |
#ifdef __GNUC__
|
295 |
|
296 |
struct unaligned_64 { uint64_t l; } __attribute__((packed));
|
297 |
struct unaligned_32 { uint32_t l; } __attribute__((packed));
|
298 |
|
299 |
#define LD32(a) (((const struct unaligned_32 *) (a))->l) |
300 |
#define LD64(a) (((const struct unaligned_64 *) (a))->l) |
301 |
|
302 |
#define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b) |
303 |
|
304 |
#else /* __GNUC__ */ |
305 |
|
306 |
#define LD32(a) (*((uint32_t*)(a)))
|
307 |
#define LD64(a) (*((uint64_t*)(a)))
|
308 |
|
309 |
#define ST32(a, b) *((uint32_t*)(a)) = (b)
|
310 |
|
311 |
#endif /* !__GNUC__ */ |
312 |
|
313 |
/* PSNR */
|
314 |
void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3], |
315 |
int orig_linesize[3], int coded_linesize, |
316 |
AVCodecContext *avctx); |
317 |
|
318 |
/* FFT computation */
|
319 |
|
320 |
/* NOTE: soon integer code will be added, so you must use the
|
321 |
FFTSample type */
|
322 |
typedef float FFTSample; |
323 |
|
324 |
typedef struct FFTComplex { |
325 |
FFTSample re, im; |
326 |
} FFTComplex; |
327 |
|
328 |
typedef struct FFTContext { |
329 |
int nbits;
|
330 |
int inverse;
|
331 |
uint16_t *revtab; |
332 |
FFTComplex *exptab; |
333 |
FFTComplex *exptab1; /* only used by SSE code */
|
334 |
void (*fft_calc)(struct FFTContext *s, FFTComplex *z); |
335 |
} FFTContext; |
336 |
|
337 |
int fft_init(FFTContext *s, int nbits, int inverse); |
338 |
void fft_permute(FFTContext *s, FFTComplex *z);
|
339 |
void fft_calc_c(FFTContext *s, FFTComplex *z);
|
340 |
void fft_calc_sse(FFTContext *s, FFTComplex *z);
|
341 |
void fft_calc_altivec(FFTContext *s, FFTComplex *z);
|
342 |
|
343 |
static inline void fft_calc(FFTContext *s, FFTComplex *z) |
344 |
{ |
345 |
s->fft_calc(s, z); |
346 |
} |
347 |
void fft_end(FFTContext *s);
|
348 |
|
349 |
/* MDCT computation */
|
350 |
|
351 |
typedef struct MDCTContext { |
352 |
int n; /* size of MDCT (i.e. number of input data * 2) */ |
353 |
int nbits; /* n = 2^nbits */ |
354 |
/* pre/post rotation tables */
|
355 |
FFTSample *tcos; |
356 |
FFTSample *tsin; |
357 |
FFTContext fft; |
358 |
} MDCTContext; |
359 |
|
360 |
int ff_mdct_init(MDCTContext *s, int nbits, int inverse); |
361 |
void ff_imdct_calc(MDCTContext *s, FFTSample *output,
|
362 |
const FFTSample *input, FFTSample *tmp);
|
363 |
void ff_mdct_calc(MDCTContext *s, FFTSample *out,
|
364 |
const FFTSample *input, FFTSample *tmp);
|
365 |
void ff_mdct_end(MDCTContext *s);
|
366 |
|
367 |
#define WARPER88_1616(name8, name16)\
|
368 |
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride){\ |
369 |
return name8(s, dst , src , stride)\
|
370 |
+name8(s, dst+8 , src+8 , stride)\ |
371 |
+name8(s, dst +8*stride, src +8*stride, stride)\ |
372 |
+name8(s, dst+8+8*stride, src+8+8*stride, stride);\ |
373 |
} |
374 |
|
375 |
#ifndef HAVE_LRINTF
|
376 |
/* XXX: add ISOC specific test to avoid specific BSD testing. */
|
377 |
/* better than nothing implementation. */
|
378 |
/* btw, rintf() is existing on fbsd too -- alex */
|
379 |
static inline long int lrintf(float x) |
380 |
{ |
381 |
#ifdef CONFIG_WIN32
|
382 |
/* XXX: incorrect, but make it compile */
|
383 |
return (int)(x); |
384 |
#else
|
385 |
return (int)(rint(x)); |
386 |
#endif
|
387 |
} |
388 |
#endif
|
389 |
|
390 |
#endif
|