ffmpeg / libavcodec / fft.c @ 2912e87a
History  View  Annotate  Download (7.62 KB)
1 
/*


2 
* FFT/IFFT transforms

3 
* Copyright (c) 2008 Loren Merritt

4 
* Copyright (c) 2002 Fabrice Bellard

5 
* Partly based on libdjbfft by D. J. Bernstein

6 
*

7 
* This file is part of Libav.

8 
*

9 
* Libav is free software; you can redistribute it and/or

10 
* modify it under the terms of the GNU Lesser General Public

11 
* License as published by the Free Software Foundation; either

12 
* version 2.1 of the License, or (at your option) any later version.

13 
*

14 
* Libav is distributed in the hope that it will be useful,

15 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

16 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

17 
* Lesser General Public License for more details.

18 
*

19 
* You should have received a copy of the GNU Lesser General Public

20 
* License along with Libav; if not, write to the Free Software

21 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

22 
*/

23  
24 
/**

25 
* @file

26 
* FFT/IFFT transforms.

27 
*/

28  
29 
#include <stdlib.h> 
30 
#include <string.h> 
31 
#include "libavutil/mathematics.h" 
32 
#include "fft.h" 
33  
34 
/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */

35 
#if !CONFIG_HARDCODED_TABLES

36 
COSTABLE(16);

37 
COSTABLE(32);

38 
COSTABLE(64);

39 
COSTABLE(128);

40 
COSTABLE(256);

41 
COSTABLE(512);

42 
COSTABLE(1024);

43 
COSTABLE(2048);

44 
COSTABLE(4096);

45 
COSTABLE(8192);

46 
COSTABLE(16384);

47 
COSTABLE(32768);

48 
COSTABLE(65536);

49 
#endif

50 
COSTABLE_CONST FFTSample * const ff_cos_tabs[] = {

51 
NULL, NULL, NULL, NULL, 
52 
ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, ff_cos_512, ff_cos_1024, 
53 
ff_cos_2048, ff_cos_4096, ff_cos_8192, ff_cos_16384, ff_cos_32768, ff_cos_65536, 
54 
}; 
55  
56 
static void ff_fft_permute_c(FFTContext *s, FFTComplex *z); 
57 
static void ff_fft_calc_c(FFTContext *s, FFTComplex *z); 
58  
59 
static int split_radix_permutation(int i, int n, int inverse) 
60 
{ 
61 
int m;

62 
if(n <= 2) return i&1; 
63 
m = n >> 1;

64 
if(!(i&m)) return split_radix_permutation(i, m, inverse)*2; 
65 
m >>= 1;

66 
if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1; 
67 
else return split_radix_permutation(i, m, inverse)*4  1; 
68 
} 
69  
70 
av_cold void ff_init_ff_cos_tabs(int index) 
71 
{ 
72 
#if !CONFIG_HARDCODED_TABLES

73 
int i;

74 
int m = 1<<index; 
75 
double freq = 2*M_PI/m; 
76 
FFTSample *tab = ff_cos_tabs[index]; 
77 
for(i=0; i<=m/4; i++) 
78 
tab[i] = cos(i*freq); 
79 
for(i=1; i<m/4; i++) 
80 
tab[m/2i] = tab[i];

81 
#endif

82 
} 
83  
84 
av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) 
85 
{ 
86 
int i, j, n;

87  
88 
if (nbits < 2  nbits > 16) 
89 
goto fail;

90 
s>nbits = nbits; 
91 
n = 1 << nbits;

92  
93 
s>revtab = av_malloc(n * sizeof(uint16_t));

94 
if (!s>revtab)

95 
goto fail;

96 
s>tmp_buf = av_malloc(n * sizeof(FFTComplex));

97 
if (!s>tmp_buf)

98 
goto fail;

99 
s>inverse = inverse; 
100 
s>fft_permutation = FF_FFT_PERM_DEFAULT; 
101  
102 
s>fft_permute = ff_fft_permute_c; 
103 
s>fft_calc = ff_fft_calc_c; 
104 
#if CONFIG_MDCT

105 
s>imdct_calc = ff_imdct_calc_c; 
106 
s>imdct_half = ff_imdct_half_c; 
107 
s>mdct_calc = ff_mdct_calc_c; 
108 
#endif

109  
110 
if (ARCH_ARM) ff_fft_init_arm(s);

111 
if (HAVE_ALTIVEC) ff_fft_init_altivec(s);

112 
if (HAVE_MMX) ff_fft_init_mmx(s);

113  
114 
for(j=4; j<=nbits; j++) { 
115 
ff_init_ff_cos_tabs(j); 
116 
} 
117 
for(i=0; i<n; i++) { 
118 
int j = i;

119 
if (s>fft_permutation == FF_FFT_PERM_SWAP_LSBS)

120 
j = (j&~3)  ((j>>1)&1)  ((j<<1)&2); 
121 
s>revtab[split_radix_permutation(i, n, s>inverse) & (n1)] = j;

122 
} 
123  
124 
return 0; 
125 
fail:

126 
av_freep(&s>revtab); 
127 
av_freep(&s>tmp_buf); 
128 
return 1; 
129 
} 
130  
131 
static void ff_fft_permute_c(FFTContext *s, FFTComplex *z) 
132 
{ 
133 
int j, np;

134 
const uint16_t *revtab = s>revtab;

135 
np = 1 << s>nbits;

136 
/* TODO: handle splitradix permute in a more optimal way, probably inplace */

137 
for(j=0;j<np;j++) s>tmp_buf[revtab[j]] = z[j]; 
138 
memcpy(z, s>tmp_buf, np * sizeof(FFTComplex));

139 
} 
140  
141 
av_cold void ff_fft_end(FFTContext *s)

142 
{ 
143 
av_freep(&s>revtab); 
144 
av_freep(&s>tmp_buf); 
145 
} 
146  
147 
#define sqrthalf (float)M_SQRT1_2 
148  
149 
#define BF(x,y,a,b) {\

150 
x = a  b;\ 
151 
y = a + b;\ 
152 
} 
153  
154 
#define BUTTERFLIES(a0,a1,a2,a3) {\

155 
BF(t3, t5, t5, t1);\ 
156 
BF(a2.re, a0.re, a0.re, t5);\ 
157 
BF(a3.im, a1.im, a1.im, t3);\ 
158 
BF(t4, t6, t2, t6);\ 
159 
BF(a3.re, a1.re, a1.re, t4);\ 
160 
BF(a2.im, a0.im, a0.im, t6);\ 
161 
} 
162  
163 
// force loading all the inputs before storing any.

164 
// this is slightly slower for small data, but avoids store>load aliasing

165 
// for addresses separated by large powers of 2.

166 
#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\

167 
FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\ 
168 
BF(t3, t5, t5, t1);\ 
169 
BF(a2.re, a0.re, r0, t5);\ 
170 
BF(a3.im, a1.im, i1, t3);\ 
171 
BF(t4, t6, t2, t6);\ 
172 
BF(a3.re, a1.re, r1, t4);\ 
173 
BF(a2.im, a0.im, i0, t6);\ 
174 
} 
175  
176 
#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\

177 
t1 = a2.re * wre + a2.im * wim;\ 
178 
t2 = a2.im * wre  a2.re * wim;\ 
179 
t5 = a3.re * wre  a3.im * wim;\ 
180 
t6 = a3.im * wre + a3.re * wim;\ 
181 
BUTTERFLIES(a0,a1,a2,a3)\ 
182 
} 
183  
184 
#define TRANSFORM_ZERO(a0,a1,a2,a3) {\

185 
t1 = a2.re;\ 
186 
t2 = a2.im;\ 
187 
t5 = a3.re;\ 
188 
t6 = a3.im;\ 
189 
BUTTERFLIES(a0,a1,a2,a3)\ 
190 
} 
191  
192 
/* z[0...8n1], w[1...2n1] */

193 
#define PASS(name)\

194 
static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\ 
195 
{\ 
196 
FFTSample t1, t2, t3, t4, t5, t6;\ 
197 
int o1 = 2*n;\ 
198 
int o2 = 4*n;\ 
199 
int o3 = 6*n;\ 
200 
const FFTSample *wim = wre+o1;\

201 
n;\ 
202 
\ 
203 
TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\

204 
TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[1]);\ 
205 
do {\

206 
z += 2;\

207 
wre += 2;\

208 
wim = 2;\

209 
TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\ 
210 
TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[1]);\ 
211 
} while(n);\

212 
} 
213  
214 
PASS(pass) 
215 
#undef BUTTERFLIES

216 
#define BUTTERFLIES BUTTERFLIES_BIG

217 
PASS(pass_big) 
218  
219 
#define DECL_FFT(n,n2,n4)\

220 
static void fft##n(FFTComplex *z)\ 
221 
{\ 
222 
fft##n2(z);\ 
223 
fft##n4(z+n4*2);\ 
224 
fft##n4(z+n4*3);\ 
225 
pass(z,ff_cos_##n,n4/2);\ 
226 
} 
227  
228 
static void fft4(FFTComplex *z) 
229 
{ 
230 
FFTSample t1, t2, t3, t4, t5, t6, t7, t8; 
231  
232 
BF(t3, t1, z[0].re, z[1].re); 
233 
BF(t8, t6, z[3].re, z[2].re); 
234 
BF(z[2].re, z[0].re, t1, t6); 
235 
BF(t4, t2, z[0].im, z[1].im); 
236 
BF(t7, t5, z[2].im, z[3].im); 
237 
BF(z[3].im, z[1].im, t4, t8); 
238 
BF(z[3].re, z[1].re, t3, t7); 
239 
BF(z[2].im, z[0].im, t2, t5); 
240 
} 
241  
242 
static void fft8(FFTComplex *z) 
243 
{ 
244 
FFTSample t1, t2, t3, t4, t5, t6, t7, t8; 
245  
246 
fft4(z); 
247  
248 
BF(t1, z[5].re, z[4].re, z[5].re); 
249 
BF(t2, z[5].im, z[4].im, z[5].im); 
250 
BF(t3, z[7].re, z[6].re, z[7].re); 
251 
BF(t4, z[7].im, z[6].im, z[7].im); 
252 
BF(t8, t1, t3, t1); 
253 
BF(t7, t2, t2, t4); 
254 
BF(z[4].re, z[0].re, z[0].re, t1); 
255 
BF(z[4].im, z[0].im, z[0].im, t2); 
256 
BF(z[6].re, z[2].re, z[2].re, t7); 
257 
BF(z[6].im, z[2].im, z[2].im, t8); 
258  
259 
TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf); 
260 
} 
261  
262 
#if !CONFIG_SMALL

263 
static void fft16(FFTComplex *z) 
264 
{ 
265 
FFTSample t1, t2, t3, t4, t5, t6; 
266  
267 
fft8(z); 
268 
fft4(z+8);

269 
fft4(z+12);

270  
271 
TRANSFORM_ZERO(z[0],z[4],z[8],z[12]); 
272 
TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf); 
273 
TRANSFORM(z[1],z[5],z[9],z[13],ff_cos_16[1],ff_cos_16[3]); 
274 
TRANSFORM(z[3],z[7],z[11],z[15],ff_cos_16[3],ff_cos_16[1]); 
275 
} 
276 
#else

277 
DECL_FFT(16,8,4) 
278 
#endif

279 
DECL_FFT(32,16,8) 
280 
DECL_FFT(64,32,16) 
281 
DECL_FFT(128,64,32) 
282 
DECL_FFT(256,128,64) 
283 
DECL_FFT(512,256,128) 
284 
#if !CONFIG_SMALL

285 
#define pass pass_big

286 
#endif

287 
DECL_FFT(1024,512,256) 
288 
DECL_FFT(2048,1024,512) 
289 
DECL_FFT(4096,2048,1024) 
290 
DECL_FFT(8192,4096,2048) 
291 
DECL_FFT(16384,8192,4096) 
292 
DECL_FFT(32768,16384,8192) 
293 
DECL_FFT(65536,32768,16384) 
294  
295 
static void (* const fft_dispatch[])(FFTComplex*) = { 
296 
fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024, 
297 
fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, 
298 
}; 
299  
300 
static void ff_fft_calc_c(FFTContext *s, FFTComplex *z) 
301 
{ 
302 
fft_dispatch[s>nbits2](z);

303 
} 
304 