ffmpeg / libavcodec / fft.c @ 94d85eaf
History  View  Annotate  Download (6.73 KB)
1 
/*


2 
* FFT/IFFT transforms

3 
* Copyright (c) 2002 Fabrice Bellard.

4 
*

5 
* This library is free software; you can redistribute it and/or

6 
* modify it under the terms of the GNU Lesser General Public

7 
* License as published by the Free Software Foundation; either

8 
* version 2 of the License, or (at your option) any later version.

9 
*

10 
* This library is distributed in the hope that it will be useful,

11 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

12 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

13 
* Lesser General Public License for more details.

14 
*

15 
* You should have received a copy of the GNU Lesser General Public

16 
* License along with this library; if not, write to the Free Software

17 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

18 
*/

19  
20 
/**

21 
* @file fft.c

22 
* FFT/IFFT transforms.

23 
*/

24  
25 
#include "dsputil.h" 
26  
27 
/**

28 
* The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is

29 
* done

30 
*/

31 
int ff_fft_init(FFTContext *s, int nbits, int inverse) 
32 
{ 
33 
int i, j, m, n;

34 
float alpha, c1, s1, s2;

35  
36 
s>nbits = nbits; 
37 
n = 1 << nbits;

38  
39 
s>exptab = av_malloc((n / 2) * sizeof(FFTComplex)); 
40 
if (!s>exptab)

41 
goto fail;

42 
s>revtab = av_malloc(n * sizeof(uint16_t));

43 
if (!s>revtab)

44 
goto fail;

45 
s>inverse = inverse; 
46  
47 
s2 = inverse ? 1.0 : 1.0; 
48  
49 
for(i=0;i<(n/2);i++) { 
50 
alpha = 2 * M_PI * (float)i / (float)n; 
51 
c1 = cos(alpha); 
52 
s1 = sin(alpha) * s2; 
53 
s>exptab[i].re = c1; 
54 
s>exptab[i].im = s1; 
55 
} 
56 
s>fft_calc = ff_fft_calc_c; 
57 
s>exptab1 = NULL;

58  
59 
/* compute constant table for HAVE_SSE version */

60 
#if (defined(HAVE_MMX) && (defined(HAVE_BUILTIN_VECTOR)  defined(HAVE_MM3DNOW)))  defined(HAVE_ALTIVEC)

61 
{ 
62 
int has_vectors = 0; 
63  
64 
#if defined(HAVE_MMX)

65 
has_vectors = mm_support() & (MM_3DNOW  MM_3DNOWEXT  MM_SSE  MM_SSE2); 
66 
#endif

67 
#if defined(HAVE_ALTIVEC) && !defined(ALTIVEC_USE_REFERENCE_C_CODE)

68 
has_vectors = mm_support() & MM_ALTIVEC; 
69 
#endif

70 
if (has_vectors) {

71 
int np, nblocks, np2, l;

72 
FFTComplex *q; 
73  
74 
np = 1 << nbits;

75 
nblocks = np >> 3;

76 
np2 = np >> 1;

77 
s>exptab1 = av_malloc(np * 2 * sizeof(FFTComplex)); 
78 
if (!s>exptab1)

79 
goto fail;

80 
q = s>exptab1; 
81 
do {

82 
for(l = 0; l < np2; l += 2 * nblocks) { 
83 
*q++ = s>exptab[l]; 
84 
*q++ = s>exptab[l + nblocks]; 
85  
86 
q>re = s>exptab[l].im; 
87 
q>im = s>exptab[l].re; 
88 
q++; 
89 
q>re = s>exptab[l + nblocks].im; 
90 
q>im = s>exptab[l + nblocks].re; 
91 
q++; 
92 
} 
93 
nblocks = nblocks >> 1;

94 
} while (nblocks != 0); 
95 
av_freep(&s>exptab); 
96 
#if defined(HAVE_MMX)

97 
#ifdef HAVE_MM3DNOW

98 
if (has_vectors & MM_3DNOWEXT)

99 
/* 3DNowEx for Athlon(XP) */

100 
s>fft_calc = ff_fft_calc_3dn2; 
101 
else if (has_vectors & MM_3DNOW) 
102 
/* 3DNow! for K62/3 */

103 
s>fft_calc = ff_fft_calc_3dn; 
104 
#endif

105 
#ifdef HAVE_BUILTIN_VECTOR

106 
if (has_vectors & MM_SSE2)

107 
/* SSE for P4/K8 */

108 
s>fft_calc = ff_fft_calc_sse; 
109 
else if ((has_vectors & MM_SSE) && 
110 
s>fft_calc == ff_fft_calc_c) 
111 
/* SSE for P3 */

112 
s>fft_calc = ff_fft_calc_sse; 
113 
#endif

114 
#else /* HAVE_MMX */ 
115 
s>fft_calc = ff_fft_calc_altivec; 
116 
#endif

117 
} 
118 
} 
119 
#endif

120  
121 
/* compute bit reverse table */

122  
123 
for(i=0;i<n;i++) { 
124 
m=0;

125 
for(j=0;j<nbits;j++) { 
126 
m = ((i >> j) & 1) << (nbitsj1); 
127 
} 
128 
s>revtab[i]=m; 
129 
} 
130 
return 0; 
131 
fail:

132 
av_freep(&s>revtab); 
133 
av_freep(&s>exptab); 
134 
av_freep(&s>exptab1); 
135 
return 1; 
136 
} 
137  
138 
/* butter fly op */

139 
#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \

140 
{\ 
141 
FFTSample ax, ay, bx, by;\ 
142 
bx=pre1;\ 
143 
by=pim1;\ 
144 
ax=qre1;\ 
145 
ay=qim1;\ 
146 
pre = (bx + ax);\ 
147 
pim = (by + ay);\ 
148 
qre = (bx  ax);\ 
149 
qim = (by  ay);\ 
150 
} 
151  
152 
#define MUL16(a,b) ((a) * (b))

153  
154 
#define CMUL(pre, pim, are, aim, bre, bim) \

155 
{\ 
156 
pre = (MUL16(are, bre)  MUL16(aim, bim));\ 
157 
pim = (MUL16(are, bim) + MUL16(bre, aim));\ 
158 
} 
159  
160 
/**

161 
* Do a complex FFT with the parameters defined in ff_fft_init(). The

162 
* input data must be permuted before with s>revtab table. No

163 
* 1.0/sqrt(n) normalization is done.

164 
*/

165 
void ff_fft_calc_c(FFTContext *s, FFTComplex *z)

166 
{ 
167 
int ln = s>nbits;

168 
int j, np, np2;

169 
int nblocks, nloops;

170 
register FFTComplex *p, *q;

171 
FFTComplex *exptab = s>exptab; 
172 
int l;

173 
FFTSample tmp_re, tmp_im; 
174  
175 
np = 1 << ln;

176  
177 
/* pass 0 */

178  
179 
p=&z[0];

180 
j=(np >> 1);

181 
do {

182 
BF(p[0].re, p[0].im, p[1].re, p[1].im, 
183 
p[0].re, p[0].im, p[1].re, p[1].im); 
184 
p+=2;

185 
} while (j != 0); 
186  
187 
/* pass 1 */

188  
189  
190 
p=&z[0];

191 
j=np >> 2;

192 
if (s>inverse) {

193 
do {

194 
BF(p[0].re, p[0].im, p[2].re, p[2].im, 
195 
p[0].re, p[0].im, p[2].re, p[2].im); 
196 
BF(p[1].re, p[1].im, p[3].re, p[3].im, 
197 
p[1].re, p[1].im, p[3].im, p[3].re); 
198 
p+=4;

199 
} while (j != 0); 
200 
} else {

201 
do {

202 
BF(p[0].re, p[0].im, p[2].re, p[2].im, 
203 
p[0].re, p[0].im, p[2].re, p[2].im); 
204 
BF(p[1].re, p[1].im, p[3].re, p[3].im, 
205 
p[1].re, p[1].im, p[3].im, p[3].re); 
206 
p+=4;

207 
} while (j != 0); 
208 
} 
209 
/* pass 2 .. ln1 */

210  
211 
nblocks = np >> 3;

212 
nloops = 1 << 2; 
213 
np2 = np >> 1;

214 
do {

215 
p = z; 
216 
q = z + nloops; 
217 
for (j = 0; j < nblocks; ++j) { 
218 
BF(p>re, p>im, q>re, q>im, 
219 
p>re, p>im, q>re, q>im); 
220  
221 
p++; 
222 
q++; 
223 
for(l = nblocks; l < np2; l += nblocks) {

224 
CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q>re, q>im); 
225 
BF(p>re, p>im, q>re, q>im, 
226 
p>re, p>im, tmp_re, tmp_im); 
227 
p++; 
228 
q++; 
229 
} 
230  
231 
p += nloops; 
232 
q += nloops; 
233 
} 
234 
nblocks = nblocks >> 1;

235 
nloops = nloops << 1;

236 
} while (nblocks != 0); 
237 
} 
238  
239 
/**

240 
* Do the permutation needed BEFORE calling ff_fft_calc()

241 
*/

242 
void ff_fft_permute(FFTContext *s, FFTComplex *z)

243 
{ 
244 
int j, k, np;

245 
FFTComplex tmp; 
246 
const uint16_t *revtab = s>revtab;

247  
248 
/* reverse */

249 
np = 1 << s>nbits;

250 
for(j=0;j<np;j++) { 
251 
k = revtab[j]; 
252 
if (k < j) {

253 
tmp = z[k]; 
254 
z[k] = z[j]; 
255 
z[j] = tmp; 
256 
} 
257 
} 
258 
} 
259  
260 
void ff_fft_end(FFTContext *s)

261 
{ 
262 
av_freep(&s>revtab); 
263 
av_freep(&s>exptab); 
264 
av_freep(&s>exptab1); 
265 
} 
266 