ffmpeg / libavcodec / fft.c @ bcfa3e58
History  View  Annotate  Download (6.85 KB)
1 
/*


2 
* FFT/IFFT transforms

3 
* Copyright (c) 2002 Fabrice Bellard.

4 
*

5 
* This library is free software; you can redistribute it and/or

6 
* modify it under the terms of the GNU Lesser General Public

7 
* License as published by the Free Software Foundation; either

8 
* version 2 of the License, or (at your option) any later version.

9 
*

10 
* This library is distributed in the hope that it will be useful,

11 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

12 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

13 
* Lesser General Public License for more details.

14 
*

15 
* You should have received a copy of the GNU Lesser General Public

16 
* License along with this library; if not, write to the Free Software

17 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

18 
*/

19  
20 
/**

21 
* @file fft.c

22 
* FFT/IFFT transforms.

23 
*/

24  
25 
#include "dsputil.h" 
26  
27 
/**

28 
* The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is

29 
* done

30 
*/

31 
int ff_fft_init(FFTContext *s, int nbits, int inverse) 
32 
{ 
33 
int i, j, m, n;

34 
float alpha, c1, s1, s2;

35  
36 
s>nbits = nbits; 
37 
n = 1 << nbits;

38  
39 
s>exptab = av_malloc((n / 2) * sizeof(FFTComplex)); 
40 
if (!s>exptab)

41 
goto fail;

42 
s>revtab = av_malloc(n * sizeof(uint16_t));

43 
if (!s>revtab)

44 
goto fail;

45 
s>inverse = inverse; 
46  
47 
s2 = inverse ? 1.0 : 1.0; 
48  
49 
for(i=0;i<(n/2);i++) { 
50 
alpha = 2 * M_PI * (float)i / (float)n; 
51 
c1 = cos(alpha); 
52 
s1 = sin(alpha) * s2; 
53 
s>exptab[i].re = c1; 
54 
s>exptab[i].im = s1; 
55 
} 
56 
s>fft_calc = ff_fft_calc_c; 
57 
s>imdct_calc = ff_imdct_calc; 
58 
s>exptab1 = NULL;

59  
60 
/* compute constant table for HAVE_SSE version */

61 
#if (defined(HAVE_MMX) && (defined(HAVE_BUILTIN_VECTOR)  defined(HAVE_MM3DNOW)))  defined(HAVE_ALTIVEC)

62 
{ 
63 
int has_vectors = 0; 
64  
65 
#if defined(HAVE_MMX)

66 
has_vectors = mm_support() & (MM_3DNOW  MM_3DNOWEXT  MM_SSE  MM_SSE2); 
67 
#endif

68 
#if defined(HAVE_ALTIVEC) && !defined(ALTIVEC_USE_REFERENCE_C_CODE)

69 
has_vectors = mm_support() & MM_ALTIVEC; 
70 
#endif

71 
if (has_vectors) {

72 
int np, nblocks, np2, l;

73 
FFTComplex *q; 
74  
75 
np = 1 << nbits;

76 
nblocks = np >> 3;

77 
np2 = np >> 1;

78 
s>exptab1 = av_malloc(np * 2 * sizeof(FFTComplex)); 
79 
if (!s>exptab1)

80 
goto fail;

81 
q = s>exptab1; 
82 
do {

83 
for(l = 0; l < np2; l += 2 * nblocks) { 
84 
*q++ = s>exptab[l]; 
85 
*q++ = s>exptab[l + nblocks]; 
86  
87 
q>re = s>exptab[l].im; 
88 
q>im = s>exptab[l].re; 
89 
q++; 
90 
q>re = s>exptab[l + nblocks].im; 
91 
q>im = s>exptab[l + nblocks].re; 
92 
q++; 
93 
} 
94 
nblocks = nblocks >> 1;

95 
} while (nblocks != 0); 
96 
av_freep(&s>exptab); 
97 
#if defined(HAVE_MMX)

98 
if (has_vectors & MM_3DNOWEXT)

99 
s>imdct_calc = ff_imdct_calc_3dn2; 
100 
#ifdef HAVE_MM3DNOW

101 
if (has_vectors & MM_3DNOWEXT)

102 
/* 3DNowEx for Athlon(XP) */

103 
s>fft_calc = ff_fft_calc_3dn2; 
104 
else if (has_vectors & MM_3DNOW) 
105 
/* 3DNow! for K62/3 */

106 
s>fft_calc = ff_fft_calc_3dn; 
107 
#endif

108 
#ifdef HAVE_BUILTIN_VECTOR

109 
if (has_vectors & MM_SSE2)

110 
/* SSE for P4/K8 */

111 
s>fft_calc = ff_fft_calc_sse; 
112 
else if ((has_vectors & MM_SSE) && 
113 
s>fft_calc == ff_fft_calc_c) 
114 
/* SSE for P3 */

115 
s>fft_calc = ff_fft_calc_sse; 
116 
#endif

117 
#else /* HAVE_MMX */ 
118 
s>fft_calc = ff_fft_calc_altivec; 
119 
#endif

120 
} 
121 
} 
122 
#endif

123  
124 
/* compute bit reverse table */

125  
126 
for(i=0;i<n;i++) { 
127 
m=0;

128 
for(j=0;j<nbits;j++) { 
129 
m = ((i >> j) & 1) << (nbitsj1); 
130 
} 
131 
s>revtab[i]=m; 
132 
} 
133 
return 0; 
134 
fail:

135 
av_freep(&s>revtab); 
136 
av_freep(&s>exptab); 
137 
av_freep(&s>exptab1); 
138 
return 1; 
139 
} 
140  
141 
/* butter fly op */

142 
#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \

143 
{\ 
144 
FFTSample ax, ay, bx, by;\ 
145 
bx=pre1;\ 
146 
by=pim1;\ 
147 
ax=qre1;\ 
148 
ay=qim1;\ 
149 
pre = (bx + ax);\ 
150 
pim = (by + ay);\ 
151 
qre = (bx  ax);\ 
152 
qim = (by  ay);\ 
153 
} 
154  
155 
#define MUL16(a,b) ((a) * (b))

156  
157 
#define CMUL(pre, pim, are, aim, bre, bim) \

158 
{\ 
159 
pre = (MUL16(are, bre)  MUL16(aim, bim));\ 
160 
pim = (MUL16(are, bim) + MUL16(bre, aim));\ 
161 
} 
162  
163 
/**

164 
* Do a complex FFT with the parameters defined in ff_fft_init(). The

165 
* input data must be permuted before with s>revtab table. No

166 
* 1.0/sqrt(n) normalization is done.

167 
*/

168 
void ff_fft_calc_c(FFTContext *s, FFTComplex *z)

169 
{ 
170 
int ln = s>nbits;

171 
int j, np, np2;

172 
int nblocks, nloops;

173 
register FFTComplex *p, *q;

174 
FFTComplex *exptab = s>exptab; 
175 
int l;

176 
FFTSample tmp_re, tmp_im; 
177  
178 
np = 1 << ln;

179  
180 
/* pass 0 */

181  
182 
p=&z[0];

183 
j=(np >> 1);

184 
do {

185 
BF(p[0].re, p[0].im, p[1].re, p[1].im, 
186 
p[0].re, p[0].im, p[1].re, p[1].im); 
187 
p+=2;

188 
} while (j != 0); 
189  
190 
/* pass 1 */

191  
192  
193 
p=&z[0];

194 
j=np >> 2;

195 
if (s>inverse) {

196 
do {

197 
BF(p[0].re, p[0].im, p[2].re, p[2].im, 
198 
p[0].re, p[0].im, p[2].re, p[2].im); 
199 
BF(p[1].re, p[1].im, p[3].re, p[3].im, 
200 
p[1].re, p[1].im, p[3].im, p[3].re); 
201 
p+=4;

202 
} while (j != 0); 
203 
} else {

204 
do {

205 
BF(p[0].re, p[0].im, p[2].re, p[2].im, 
206 
p[0].re, p[0].im, p[2].re, p[2].im); 
207 
BF(p[1].re, p[1].im, p[3].re, p[3].im, 
208 
p[1].re, p[1].im, p[3].im, p[3].re); 
209 
p+=4;

210 
} while (j != 0); 
211 
} 
212 
/* pass 2 .. ln1 */

213  
214 
nblocks = np >> 3;

215 
nloops = 1 << 2; 
216 
np2 = np >> 1;

217 
do {

218 
p = z; 
219 
q = z + nloops; 
220 
for (j = 0; j < nblocks; ++j) { 
221 
BF(p>re, p>im, q>re, q>im, 
222 
p>re, p>im, q>re, q>im); 
223  
224 
p++; 
225 
q++; 
226 
for(l = nblocks; l < np2; l += nblocks) {

227 
CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q>re, q>im); 
228 
BF(p>re, p>im, q>re, q>im, 
229 
p>re, p>im, tmp_re, tmp_im); 
230 
p++; 
231 
q++; 
232 
} 
233  
234 
p += nloops; 
235 
q += nloops; 
236 
} 
237 
nblocks = nblocks >> 1;

238 
nloops = nloops << 1;

239 
} while (nblocks != 0); 
240 
} 
241  
242 
/**

243 
* Do the permutation needed BEFORE calling ff_fft_calc()

244 
*/

245 
void ff_fft_permute(FFTContext *s, FFTComplex *z)

246 
{ 
247 
int j, k, np;

248 
FFTComplex tmp; 
249 
const uint16_t *revtab = s>revtab;

250  
251 
/* reverse */

252 
np = 1 << s>nbits;

253 
for(j=0;j<np;j++) { 
254 
k = revtab[j]; 
255 
if (k < j) {

256 
tmp = z[k]; 
257 
z[k] = z[j]; 
258 
z[j] = tmp; 
259 
} 
260 
} 
261 
} 
262  
263 
void ff_fft_end(FFTContext *s)

264 
{ 
265 
av_freep(&s>revtab); 
266 
av_freep(&s>exptab); 
267 
av_freep(&s>exptab1); 
268 
} 
269 