ffmpeg / libavcodec / liba52 / imdct.c @ 5509bffa
History  View  Annotate  Download (11.2 KB)
1 
/*


2 
* imdct.c

3 
* Copyright (C) 20002003 Michel Lespinasse <walken@zoy.org>

4 
* Copyright (C) 19992000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>

5 
*

6 
* The ifft algorithms in this file have been largely inspired by Dan

7 
* Bernstein's work, djbfft, available at http://cr.yp.to/djbfft.html

8 
*

9 
* This file is part of a52dec, a free ATSC A52 stream decoder.

10 
* See http://liba52.sourceforge.net/ for updates.

11 
*

12 
* a52dec is free software; you can redistribute it and/or modify

13 
* it under the terms of the GNU General Public License as published by

14 
* the Free Software Foundation; either version 2 of the License, or

15 
* (at your option) any later version.

16 
*

17 
* a52dec is distributed in the hope that it will be useful,

18 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

19 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

20 
* GNU General Public License for more details.

21 
*

22 
* You should have received a copy of the GNU General Public License

23 
* along with this program; if not, write to the Free Software

24 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

25 
*/

26  
27 
#include "a52.h" 
28 
#include "a52_internal.h" 
29 
#include "mm_accel.h" 
30  
31 
typedef struct complex_s { 
32 
sample_t real; 
33 
sample_t imag; 
34 
} complex_t; 
35  
36 
static uint8_t fftorder[] = {

37 
0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176, 
38 
8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88, 
39 
4,132, 68,196, 36,164,228,100, 20,148, 84,212,244,116, 52,180, 
40 
252,124, 60,188, 28,156,220, 92, 12,140, 76,204,236,108, 44,172, 
41 
2,130, 66,194, 34,162,226, 98, 18,146, 82,210,242,114, 50,178, 
42 
10,138, 74,202, 42,170,234,106,250,122, 58,186, 26,154,218, 90, 
43 
254,126, 62,190, 30,158,222, 94, 14,142, 78,206,238,110, 46,174, 
44 
6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86 
45 
}; 
46  
47 
/* Root values for IFFT */

48 
static sample_t roots16[3]; 
49 
static sample_t roots32[7]; 
50 
static sample_t roots64[15]; 
51 
static sample_t roots128[31]; 
52  
53 
/* Twiddle factors for IMDCT */

54 
static complex_t pre1[128]; 
55 
static complex_t post1[64]; 
56 
static complex_t pre2[64]; 
57 
static complex_t post2[32]; 
58  
59 
static sample_t a52_imdct_window[256]; 
60  
61 
static void (* ifft128) (complex_t * buf); 
62 
static void (* ifft64) (complex_t * buf); 
63  
64 
static inline void ifft2 (complex_t * buf) 
65 
{ 
66 
sample_t r, i; 
67  
68 
r = buf[0].real;

69 
i = buf[0].imag;

70 
buf[0].real += buf[1].real; 
71 
buf[0].imag += buf[1].imag; 
72 
buf[1].real = r  buf[1].real; 
73 
buf[1].imag = i  buf[1].imag; 
74 
} 
75  
76 
static inline void ifft4 (complex_t * buf) 
77 
{ 
78 
sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; 
79  
80 
tmp1 = buf[0].real + buf[1].real; 
81 
tmp2 = buf[3].real + buf[2].real; 
82 
tmp3 = buf[0].imag + buf[1].imag; 
83 
tmp4 = buf[2].imag + buf[3].imag; 
84 
tmp5 = buf[0].real  buf[1].real; 
85 
tmp6 = buf[0].imag  buf[1].imag; 
86 
tmp7 = buf[2].imag  buf[3].imag; 
87 
tmp8 = buf[3].real  buf[2].real; 
88  
89 
buf[0].real = tmp1 + tmp2;

90 
buf[0].imag = tmp3 + tmp4;

91 
buf[2].real = tmp1  tmp2;

92 
buf[2].imag = tmp3  tmp4;

93 
buf[1].real = tmp5 + tmp7;

94 
buf[1].imag = tmp6 + tmp8;

95 
buf[3].real = tmp5  tmp7;

96 
buf[3].imag = tmp6  tmp8;

97 
} 
98  
99 
/* basic radix2 ifft butterfly */

100  
101 
#define BUTTERFLY_0(t0,t1,W0,W1,d0,d1) do { \ 
102 
t0 = MUL (W1, d1) + MUL (W0, d0); \ 
103 
t1 = MUL (W0, d1)  MUL (W1, d0); \ 
104 
} while (0) 
105  
106 
/* radix2 ifft butterfly with bias */

107  
108 
#define BUTTERFLY_B(t0,t1,W0,W1,d0,d1) do { \ 
109 
t0 = BIAS (MUL (d1, W1) + MUL (d0, W0)); \ 
110 
t1 = BIAS (MUL (d1, W0)  MUL (d0, W1)); \ 
111 
} while (0) 
112  
113 
/* the basic splitradix ifft butterfly */

114  
115 
#define BUTTERFLY(a0,a1,a2,a3,wr,wi) do { \ 
116 
BUTTERFLY_0 (tmp5, tmp6, wr, wi, a2.real, a2.imag); \ 
117 
BUTTERFLY_0 (tmp8, tmp7, wr, wi, a3.imag, a3.real); \ 
118 
tmp1 = tmp5 + tmp7; \ 
119 
tmp2 = tmp6 + tmp8; \ 
120 
tmp3 = tmp6  tmp8; \ 
121 
tmp4 = tmp7  tmp5; \ 
122 
a2.real = a0.real  tmp1; \ 
123 
a2.imag = a0.imag  tmp2; \ 
124 
a3.real = a1.real  tmp3; \ 
125 
a3.imag = a1.imag  tmp4; \ 
126 
a0.real += tmp1; \ 
127 
a0.imag += tmp2; \ 
128 
a1.real += tmp3; \ 
129 
a1.imag += tmp4; \ 
130 
} while (0) 
131  
132 
/* splitradix ifft butterfly, specialized for wr=1 wi=0 */

133  
134 
#define BUTTERFLY_ZERO(a0,a1,a2,a3) do { \ 
135 
tmp1 = a2.real + a3.real; \ 
136 
tmp2 = a2.imag + a3.imag; \ 
137 
tmp3 = a2.imag  a3.imag; \ 
138 
tmp4 = a3.real  a2.real; \ 
139 
a2.real = a0.real  tmp1; \ 
140 
a2.imag = a0.imag  tmp2; \ 
141 
a3.real = a1.real  tmp3; \ 
142 
a3.imag = a1.imag  tmp4; \ 
143 
a0.real += tmp1; \ 
144 
a0.imag += tmp2; \ 
145 
a1.real += tmp3; \ 
146 
a1.imag += tmp4; \ 
147 
} while (0) 
148  
149 
/* splitradix ifft butterfly, specialized for wr=wi */

150  
151 
#define BUTTERFLY_HALF(a0,a1,a2,a3,w) do { \ 
152 
tmp5 = MUL (a2.real + a2.imag, w); \ 
153 
tmp6 = MUL (a2.imag  a2.real, w); \ 
154 
tmp7 = MUL (a3.real  a3.imag, w); \ 
155 
tmp8 = MUL (a3.imag + a3.real, w); \ 
156 
tmp1 = tmp5 + tmp7; \ 
157 
tmp2 = tmp6 + tmp8; \ 
158 
tmp3 = tmp6  tmp8; \ 
159 
tmp4 = tmp7  tmp5; \ 
160 
a2.real = a0.real  tmp1; \ 
161 
a2.imag = a0.imag  tmp2; \ 
162 
a3.real = a1.real  tmp3; \ 
163 
a3.imag = a1.imag  tmp4; \ 
164 
a0.real += tmp1; \ 
165 
a0.imag += tmp2; \ 
166 
a1.real += tmp3; \ 
167 
a1.imag += tmp4; \ 
168 
} while (0) 
169  
170 
static inline void ifft8 (complex_t * buf) 
171 
{ 
172 
sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; 
173  
174 
ifft4 (buf); 
175 
ifft2 (buf + 4);

176 
ifft2 (buf + 6);

177 
BUTTERFLY_ZERO (buf[0], buf[2], buf[4], buf[6]); 
178 
BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], roots16[1]); 
179 
} 
180  
181 
static void ifft_pass (complex_t * buf, sample_t * weight, int n) 
182 
{ 
183 
complex_t * buf1; 
184 
complex_t * buf2; 
185 
complex_t * buf3; 
186 
sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; 
187 
int i;

188  
189 
buf++; 
190 
buf1 = buf + n; 
191 
buf2 = buf + 2 * n;

192 
buf3 = buf + 3 * n;

193  
194 
BUTTERFLY_ZERO (buf[1], buf1[1], buf2[1], buf3[1]); 
195  
196 
i = n  1;

197  
198 
do {

199 
BUTTERFLY (buf[0], buf1[0], buf2[0], buf3[0], 
200 
weight[0], weight[2*in]); 
201 
buf++; 
202 
buf1++; 
203 
buf2++; 
204 
buf3++; 
205 
weight++; 
206 
} while (i);

207 
} 
208  
209 
static void ifft16 (complex_t * buf) 
210 
{ 
211 
ifft8 (buf); 
212 
ifft4 (buf + 8);

213 
ifft4 (buf + 12);

214 
ifft_pass (buf, roots16, 4);

215 
} 
216  
217 
static void ifft32 (complex_t * buf) 
218 
{ 
219 
ifft16 (buf); 
220 
ifft8 (buf + 16);

221 
ifft8 (buf + 24);

222 
ifft_pass (buf, roots32, 8);

223 
} 
224  
225 
static void ifft64_c (complex_t * buf) 
226 
{ 
227 
ifft32 (buf); 
228 
ifft16 (buf + 32);

229 
ifft16 (buf + 48);

230 
ifft_pass (buf, roots64, 16);

231 
} 
232  
233 
static void ifft128_c (complex_t * buf) 
234 
{ 
235 
ifft32 (buf); 
236 
ifft16 (buf + 32);

237 
ifft16 (buf + 48);

238 
ifft_pass (buf, roots64, 16);

239  
240 
ifft32 (buf + 64);

241 
ifft32 (buf + 96);

242 
ifft_pass (buf, roots128, 32);

243 
} 
244  
245 
void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias)

246 
{ 
247 
int i, k;

248 
sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; 
249 
const sample_t * window = a52_imdct_window;

250 
complex_t buf[128];

251  
252 
for (i = 0; i < 128; i++) { 
253 
k = fftorder[i]; 
254 
t_r = pre1[i].real; 
255 
t_i = pre1[i].imag; 
256 
BUTTERFLY_0 (buf[i].real, buf[i].imag, t_r, t_i, data[k], data[255k]);

257 
} 
258  
259 
ifft128 (buf); 
260  
261 
/* Post IFFT complex multiply plus IFFT complex conjugate*/

262 
/* Window and convert to real valued signal */

263 
for (i = 0; i < 64; i++) { 
264 
/* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */

265 
t_r = post1[i].real; 
266 
t_i = post1[i].imag; 
267 
BUTTERFLY_0 (a_r, a_i, t_i, t_r, buf[i].imag, buf[i].real); 
268 
BUTTERFLY_0 (b_r, b_i, t_r, t_i, buf[127i].imag, buf[127i].real); 
269  
270 
w_1 = window[2*i];

271 
w_2 = window[2552*i]; 
272 
BUTTERFLY_B (data[2552*i], data[2*i], w_2, w_1, a_r, delay[2*i]); 
273 
delay[2*i] = a_i;

274  
275 
w_1 = window[2*i+1]; 
276 
w_2 = window[2542*i]; 
277 
BUTTERFLY_B (data[2*i+1], data[2542*i], w_1, w_2, b_r, delay[2*i+1]); 
278 
delay[2*i+1] = b_i; 
279 
} 
280 
} 
281  
282 
void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias)

283 
{ 
284 
int i, k;

285 
sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2; 
286 
const sample_t * window = a52_imdct_window;

287 
complex_t buf1[64], buf2[64]; 
288  
289 
/* Pre IFFT complex multiply plus IFFT cmplx conjugate */

290 
for (i = 0; i < 64; i++) { 
291 
k = fftorder[i]; 
292 
t_r = pre2[i].real; 
293 
t_i = pre2[i].imag; 
294 
BUTTERFLY_0 (buf1[i].real, buf1[i].imag, t_r, t_i, data[k], data[254k]);

295 
BUTTERFLY_0 (buf2[i].real, buf2[i].imag, t_r, t_i, data[k+1], data[255k]); 
296 
} 
297  
298 
ifft64 (buf1); 
299 
ifft64 (buf2); 
300  
301 
/* Post IFFT complex multiply */

302 
/* Window and convert to real valued signal */

303 
for (i = 0; i < 32; i++) { 
304 
/* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */

305 
t_r = post2[i].real; 
306 
t_i = post2[i].imag; 
307 
BUTTERFLY_0 (a_r, a_i, t_i, t_r, buf1[i].imag, buf1[i].real); 
308 
BUTTERFLY_0 (b_r, b_i, t_r, t_i, buf1[63i].imag, buf1[63i].real); 
309 
BUTTERFLY_0 (c_r, c_i, t_i, t_r, buf2[i].imag, buf2[i].real); 
310 
BUTTERFLY_0 (d_r, d_i, t_r, t_i, buf2[63i].imag, buf2[63i].real); 
311  
312 
w_1 = window[2*i];

313 
w_2 = window[2552*i]; 
314 
BUTTERFLY_B (data[2552*i], data[2*i], w_2, w_1, a_r, delay[2*i]); 
315 
delay[2*i] = c_i;

316  
317 
w_1 = window[128+2*i]; 
318 
w_2 = window[1272*i]; 
319 
BUTTERFLY_B (data[128+2*i], data[1272*i], w_1, w_2, a_i, delay[1272*i]); 
320 
delay[1272*i] = c_r; 
321  
322 
w_1 = window[2*i+1]; 
323 
w_2 = window[2542*i]; 
324 
BUTTERFLY_B (data[2542*i], data[2*i+1], w_2, w_1, b_i, delay[2*i+1]); 
325 
delay[2*i+1] = d_r; 
326  
327 
w_1 = window[129+2*i]; 
328 
w_2 = window[1262*i]; 
329 
BUTTERFLY_B (data[129+2*i], data[1262*i], w_1, w_2, b_r, delay[1262*i]); 
330 
delay[1262*i] = d_i; 
331 
} 
332 
} 
333  
334 
static double besselI0 (double x) 
335 
{ 
336 
double bessel = 1; 
337 
int i = 100; 
338  
339 
do

340 
bessel = bessel * x / (i * i) + 1;

341 
while (i);

342 
return bessel;

343 
} 
344  
345 
void a52_imdct_init (uint32_t mm_accel)

346 
{ 
347 
int i, k;

348 
double sum;

349 
double local_imdct_window[256]; 
350  
351 
/* compute imdct window  kaiserbessel derived window, alpha = 5.0 */

352 
sum = 0;

353 
for (i = 0; i < 256; i++) { 
354 
sum += besselI0 (i * (256  i) * (5 * M_PI / 256) * (5 * M_PI / 256)); 
355 
local_imdct_window[i] = sum; 
356 
} 
357 
sum++; 
358 
for (i = 0; i < 256; i++) 
359 
a52_imdct_window[i] = SAMPLE (sqrt (local_imdct_window[i] / sum)); 
360  
361 
for (i = 0; i < 3; i++) 
362 
roots16[i] = SAMPLE (cos ((M_PI / 8) * (i + 1))); 
363  
364 
for (i = 0; i < 7; i++) 
365 
roots32[i] = SAMPLE (cos ((M_PI / 16) * (i + 1))); 
366  
367 
for (i = 0; i < 15; i++) 
368 
roots64[i] = SAMPLE (cos ((M_PI / 32) * (i + 1))); 
369  
370 
for (i = 0; i < 31; i++) 
371 
roots128[i] = SAMPLE (cos ((M_PI / 64) * (i + 1))); 
372  
373 
for (i = 0; i < 64; i++) { 
374 
k = fftorder[i] / 2 + 64; 
375 
pre1[i].real = SAMPLE (cos ((M_PI / 256) * (k  0.25))); 
376 
pre1[i].imag = SAMPLE (sin ((M_PI / 256) * (k  0.25))); 
377 
} 
378  
379 
for (i = 64; i < 128; i++) { 
380 
k = fftorder[i] / 2 + 64; 
381 
pre1[i].real = SAMPLE (cos ((M_PI / 256) * (k  0.25))); 
382 
pre1[i].imag = SAMPLE (sin ((M_PI / 256) * (k  0.25))); 
383 
} 
384  
385 
for (i = 0; i < 64; i++) { 
386 
post1[i].real = SAMPLE (cos ((M_PI / 256) * (i + 0.5))); 
387 
post1[i].imag = SAMPLE (sin ((M_PI / 256) * (i + 0.5))); 
388 
} 
389  
390 
for (i = 0; i < 64; i++) { 
391 
k = fftorder[i] / 4;

392 
pre2[i].real = SAMPLE (cos ((M_PI / 128) * (k  0.25))); 
393 
pre2[i].imag = SAMPLE (sin ((M_PI / 128) * (k  0.25))); 
394 
} 
395  
396 
for (i = 0; i < 32; i++) { 
397 
post2[i].real = SAMPLE (cos ((M_PI / 128) * (i + 0.5))); 
398 
post2[i].imag = SAMPLE (sin ((M_PI / 128) * (i + 0.5))); 
399 
} 
400  
401 
#ifdef LIBA52_DJBFFT

402 
if (mm_accel & MM_ACCEL_DJBFFT) {

403 
ifft128 = (void (*) (complex_t *)) fftc4_un128;

404 
ifft64 = (void (*) (complex_t *)) fftc4_un64;

405 
} else

406 
#endif

407 
{ 
408 
ifft128 = ifft128_c; 
409 
ifft64 = ifft64_c; 
410 
} 
411 
} 