ffmpeg / libavcodec / fft.c @ 6636b7e8
History  View  Annotate  Download (6.81 KB)
1 
/*


2 
* FFT/IFFT transforms

3 
* Copyright (c) 2002 Fabrice Bellard.

4 
*

5 
* This library is free software; you can redistribute it and/or

6 
* modify it under the terms of the GNU Lesser General Public

7 
* License as published by the Free Software Foundation; either

8 
* version 2 of the License, or (at your option) any later version.

9 
*

10 
* This library is distributed in the hope that it will be useful,

11 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

12 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

13 
* Lesser General Public License for more details.

14 
*

15 
* You should have received a copy of the GNU Lesser General Public

16 
* License along with this library; if not, write to the Free Software

17 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

18 
*/

19  
20 
/**

21 
* @file fft.c

22 
* FFT/IFFT transforms.

23 
*/

24  
25 
#include "dsputil.h" 
26  
27 
/**

28 
* The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is

29 
* done

30 
*/

31 
int ff_fft_init(FFTContext *s, int nbits, int inverse) 
32 
{ 
33 
int i, j, m, n;

34 
float alpha, c1, s1, s2;

35  
36 
s>nbits = nbits; 
37 
n = 1 << nbits;

38  
39 
s>exptab = av_malloc((n / 2) * sizeof(FFTComplex)); 
40 
if (!s>exptab)

41 
goto fail;

42 
s>revtab = av_malloc(n * sizeof(uint16_t));

43 
if (!s>revtab)

44 
goto fail;

45 
s>inverse = inverse; 
46  
47 
s2 = inverse ? 1.0 : 1.0; 
48  
49 
for(i=0;i<(n/2);i++) { 
50 
alpha = 2 * M_PI * (float)i / (float)n; 
51 
c1 = cos(alpha); 
52 
s1 = sin(alpha) * s2; 
53 
s>exptab[i].re = c1; 
54 
s>exptab[i].im = s1; 
55 
} 
56 
s>fft_calc = ff_fft_calc_c; 
57 
s>exptab1 = NULL;

58  
59 
/* compute constant table for HAVE_SSE version */

60 
#if (defined(HAVE_MMX) && (defined(HAVE_BUILTIN_VECTOR)  defined(HAVE_MM3DNOW)))  defined(HAVE_ALTIVEC)

61 
{ 
62 
int has_vectors = 0; 
63  
64 
#if defined(HAVE_MMX)

65 
#ifdef HAVE_MM3DNOW

66 
has_vectors = mm_support() & (MM_3DNOW  MM_3DNOWEXT  MM_SSE  MM_SSE2); 
67 
#else

68 
has_vectors = mm_support() & (MM_SSE  MM_SSE2); 
69 
#endif

70 
#endif

71 
#if defined(HAVE_ALTIVEC) && !defined(ALTIVEC_USE_REFERENCE_C_CODE)

72 
has_vectors = mm_support() & MM_ALTIVEC; 
73 
#endif

74 
if (has_vectors) {

75 
int np, nblocks, np2, l;

76 
FFTComplex *q; 
77  
78 
np = 1 << nbits;

79 
nblocks = np >> 3;

80 
np2 = np >> 1;

81 
s>exptab1 = av_malloc(np * 2 * sizeof(FFTComplex)); 
82 
if (!s>exptab1)

83 
goto fail;

84 
q = s>exptab1; 
85 
do {

86 
for(l = 0; l < np2; l += 2 * nblocks) { 
87 
*q++ = s>exptab[l]; 
88 
*q++ = s>exptab[l + nblocks]; 
89  
90 
q>re = s>exptab[l].im; 
91 
q>im = s>exptab[l].re; 
92 
q++; 
93 
q>re = s>exptab[l + nblocks].im; 
94 
q>im = s>exptab[l + nblocks].re; 
95 
q++; 
96 
} 
97 
nblocks = nblocks >> 1;

98 
} while (nblocks != 0); 
99 
av_freep(&s>exptab); 
100 
#if defined(HAVE_MMX)

101 
#ifdef HAVE_MM3DNOW

102 
if (has_vectors & MM_3DNOWEXT)

103 
/* 3DNowEx for Athlon(XP) */

104 
s>fft_calc = ff_fft_calc_3dn2; 
105 
else if (has_vectors & MM_3DNOW) 
106 
/* 3DNow! for K62/3 */

107 
s>fft_calc = ff_fft_calc_3dn; 
108 
#endif

109 
#ifdef HAVE_BUILTIN_VECTOR

110 
if (has_vectors & MM_SSE2)

111 
/* SSE for P4/K8 */

112 
s>fft_calc = ff_fft_calc_sse; 
113 
else if ((has_vectors & MM_SSE) && 
114 
s>fft_calc == ff_fft_calc_c) 
115 
/* SSE for P3 */

116 
s>fft_calc = ff_fft_calc_sse; 
117 
#endif

118 
#else /* HAVE_MMX */ 
119 
s>fft_calc = ff_fft_calc_altivec; 
120 
#endif

121 
} 
122 
} 
123 
#endif

124  
125 
/* compute bit reverse table */

126  
127 
for(i=0;i<n;i++) { 
128 
m=0;

129 
for(j=0;j<nbits;j++) { 
130 
m = ((i >> j) & 1) << (nbitsj1); 
131 
} 
132 
s>revtab[i]=m; 
133 
} 
134 
return 0; 
135 
fail:

136 
av_freep(&s>revtab); 
137 
av_freep(&s>exptab); 
138 
av_freep(&s>exptab1); 
139 
return 1; 
140 
} 
141  
142 
/* butter fly op */

143 
#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \

144 
{\ 
145 
FFTSample ax, ay, bx, by;\ 
146 
bx=pre1;\ 
147 
by=pim1;\ 
148 
ax=qre1;\ 
149 
ay=qim1;\ 
150 
pre = (bx + ax);\ 
151 
pim = (by + ay);\ 
152 
qre = (bx  ax);\ 
153 
qim = (by  ay);\ 
154 
} 
155  
156 
#define MUL16(a,b) ((a) * (b))

157  
158 
#define CMUL(pre, pim, are, aim, bre, bim) \

159 
{\ 
160 
pre = (MUL16(are, bre)  MUL16(aim, bim));\ 
161 
pim = (MUL16(are, bim) + MUL16(bre, aim));\ 
162 
} 
163  
164 
/**

165 
* Do a complex FFT with the parameters defined in ff_fft_init(). The

166 
* input data must be permuted before with s>revtab table. No

167 
* 1.0/sqrt(n) normalization is done.

168 
*/

169 
void ff_fft_calc_c(FFTContext *s, FFTComplex *z)

170 
{ 
171 
int ln = s>nbits;

172 
int j, np, np2;

173 
int nblocks, nloops;

174 
register FFTComplex *p, *q;

175 
FFTComplex *exptab = s>exptab; 
176 
int l;

177 
FFTSample tmp_re, tmp_im; 
178  
179 
np = 1 << ln;

180  
181 
/* pass 0 */

182  
183 
p=&z[0];

184 
j=(np >> 1);

185 
do {

186 
BF(p[0].re, p[0].im, p[1].re, p[1].im, 
187 
p[0].re, p[0].im, p[1].re, p[1].im); 
188 
p+=2;

189 
} while (j != 0); 
190  
191 
/* pass 1 */

192  
193  
194 
p=&z[0];

195 
j=np >> 2;

196 
if (s>inverse) {

197 
do {

198 
BF(p[0].re, p[0].im, p[2].re, p[2].im, 
199 
p[0].re, p[0].im, p[2].re, p[2].im); 
200 
BF(p[1].re, p[1].im, p[3].re, p[3].im, 
201 
p[1].re, p[1].im, p[3].im, p[3].re); 
202 
p+=4;

203 
} while (j != 0); 
204 
} else {

205 
do {

206 
BF(p[0].re, p[0].im, p[2].re, p[2].im, 
207 
p[0].re, p[0].im, p[2].re, p[2].im); 
208 
BF(p[1].re, p[1].im, p[3].re, p[3].im, 
209 
p[1].re, p[1].im, p[3].im, p[3].re); 
210 
p+=4;

211 
} while (j != 0); 
212 
} 
213 
/* pass 2 .. ln1 */

214  
215 
nblocks = np >> 3;

216 
nloops = 1 << 2; 
217 
np2 = np >> 1;

218 
do {

219 
p = z; 
220 
q = z + nloops; 
221 
for (j = 0; j < nblocks; ++j) { 
222 
BF(p>re, p>im, q>re, q>im, 
223 
p>re, p>im, q>re, q>im); 
224  
225 
p++; 
226 
q++; 
227 
for(l = nblocks; l < np2; l += nblocks) {

228 
CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q>re, q>im); 
229 
BF(p>re, p>im, q>re, q>im, 
230 
p>re, p>im, tmp_re, tmp_im); 
231 
p++; 
232 
q++; 
233 
} 
234  
235 
p += nloops; 
236 
q += nloops; 
237 
} 
238 
nblocks = nblocks >> 1;

239 
nloops = nloops << 1;

240 
} while (nblocks != 0); 
241 
} 
242  
243 
/**

244 
* Do the permutation needed BEFORE calling ff_fft_calc()

245 
*/

246 
void ff_fft_permute(FFTContext *s, FFTComplex *z)

247 
{ 
248 
int j, k, np;

249 
FFTComplex tmp; 
250 
const uint16_t *revtab = s>revtab;

251  
252 
/* reverse */

253 
np = 1 << s>nbits;

254 
for(j=0;j<np;j++) { 
255 
k = revtab[j]; 
256 
if (k < j) {

257 
tmp = z[k]; 
258 
z[k] = z[j]; 
259 
z[j] = tmp; 
260 
} 
261 
} 
262 
} 
263  
264 
void ff_fft_end(FFTContext *s)

265 
{ 
266 
av_freep(&s>revtab); 
267 
av_freep(&s>exptab); 
268 
av_freep(&s>exptab1); 
269 
} 
270 