ffmpeg / libavcodec / fft.c @ 983e3246
History  View  Annotate  Download (6.1 KB)
1 
/*


2 
* FFT/IFFT transforms

3 
* Copyright (c) 2002 Fabrice Bellard.

4 
*

5 
* This library is free software; you can redistribute it and/or

6 
* modify it under the terms of the GNU Lesser General Public

7 
* License as published by the Free Software Foundation; either

8 
* version 2 of the License, or (at your option) any later version.

9 
*

10 
* This library is distributed in the hope that it will be useful,

11 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

12 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

13 
* Lesser General Public License for more details.

14 
*

15 
* You should have received a copy of the GNU Lesser General Public

16 
* License along with this library; if not, write to the Free Software

17 
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 021111307 USA

18 
*/

19  
20 
/**

21 
* @file fft.c

22 
* FFT/IFFT transforms.

23 
*/

24  
25 
#include "dsputil.h" 
26  
27 
/**

28 
* The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is

29 
* done

30 
*/

31 
int fft_init(FFTContext *s, int nbits, int inverse) 
32 
{ 
33 
int i, j, m, n;

34 
float alpha, c1, s1, s2;

35 

36 
s>nbits = nbits; 
37 
n = 1 << nbits;

38  
39 
s>exptab = av_malloc((n / 2) * sizeof(FFTComplex)); 
40 
if (!s>exptab)

41 
goto fail;

42 
s>revtab = av_malloc(n * sizeof(uint16_t));

43 
if (!s>revtab)

44 
goto fail;

45 
s>inverse = inverse; 
46  
47 
s2 = inverse ? 1.0 : 1.0; 
48 

49 
for(i=0;i<(n/2);i++) { 
50 
alpha = 2 * M_PI * (float)i / (float)n; 
51 
c1 = cos(alpha); 
52 
s1 = sin(alpha) * s2; 
53 
s>exptab[i].re = c1; 
54 
s>exptab[i].im = s1; 
55 
} 
56 
s>fft_calc = fft_calc_c; 
57 
s>exptab1 = NULL;

58  
59 
/* compute constant table for HAVE_SSE version */

60 
#if (defined(HAVE_MMX) && defined(HAVE_BUILTIN_VECTOR))  defined(HAVE_ALTIVEC)

61 
{ 
62 
int has_vectors = 0; 
63  
64 
#if defined(HAVE_MMX)

65 
has_vectors = mm_support() & MM_SSE; 
66 
#endif

67 
#if defined(HAVE_ALTIVEC) && !defined(ALTIVEC_USE_REFERENCE_C_CODE)

68 
has_vectors = mm_support() & MM_ALTIVEC; 
69 
#endif

70 
if (has_vectors) {

71 
int np, nblocks, np2, l;

72 
FFTComplex *q; 
73 

74 
np = 1 << nbits;

75 
nblocks = np >> 3;

76 
np2 = np >> 1;

77 
s>exptab1 = av_malloc(np * 2 * sizeof(FFTComplex)); 
78 
if (!s>exptab1)

79 
goto fail;

80 
q = s>exptab1; 
81 
do {

82 
for(l = 0; l < np2; l += 2 * nblocks) { 
83 
*q++ = s>exptab[l]; 
84 
*q++ = s>exptab[l + nblocks]; 
85  
86 
q>re = s>exptab[l].im; 
87 
q>im = s>exptab[l].re; 
88 
q++; 
89 
q>re = s>exptab[l + nblocks].im; 
90 
q>im = s>exptab[l + nblocks].re; 
91 
q++; 
92 
} 
93 
nblocks = nblocks >> 1;

94 
} while (nblocks != 0); 
95 
av_freep(&s>exptab); 
96 
#if defined(HAVE_MMX)

97 
s>fft_calc = fft_calc_sse; 
98 
#else

99 
s>fft_calc = fft_calc_altivec; 
100 
#endif

101 
} 
102 
} 
103 
#endif

104  
105 
/* compute bit reverse table */

106  
107 
for(i=0;i<n;i++) { 
108 
m=0;

109 
for(j=0;j<nbits;j++) { 
110 
m = ((i >> j) & 1) << (nbitsj1); 
111 
} 
112 
s>revtab[i]=m; 
113 
} 
114 
return 0; 
115 
fail:

116 
av_freep(&s>revtab); 
117 
av_freep(&s>exptab); 
118 
av_freep(&s>exptab1); 
119 
return 1; 
120 
} 
121  
122 
/* butter fly op */

123 
#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \

124 
{\ 
125 
FFTSample ax, ay, bx, by;\ 
126 
bx=pre1;\ 
127 
by=pim1;\ 
128 
ax=qre1;\ 
129 
ay=qim1;\ 
130 
pre = (bx + ax);\ 
131 
pim = (by + ay);\ 
132 
qre = (bx  ax);\ 
133 
qim = (by  ay);\ 
134 
} 
135  
136 
#define MUL16(a,b) ((a) * (b))

137  
138 
#define CMUL(pre, pim, are, aim, bre, bim) \

139 
{\ 
140 
pre = (MUL16(are, bre)  MUL16(aim, bim));\ 
141 
pim = (MUL16(are, bim) + MUL16(bre, aim));\ 
142 
} 
143  
144 
/**

145 
* Do a complex FFT with the parameters defined in fft_init(). The

146 
* input data must be permuted before with s>revtab table. No

147 
* 1.0/sqrt(n) normalization is done.

148 
*/

149 
void fft_calc_c(FFTContext *s, FFTComplex *z)

150 
{ 
151 
int ln = s>nbits;

152 
int j, np, np2;

153 
int nblocks, nloops;

154 
register FFTComplex *p, *q;

155 
FFTComplex *exptab = s>exptab; 
156 
int l;

157 
FFTSample tmp_re, tmp_im; 
158  
159 
np = 1 << ln;

160  
161 
/* pass 0 */

162  
163 
p=&z[0];

164 
j=(np >> 1);

165 
do {

166 
BF(p[0].re, p[0].im, p[1].re, p[1].im, 
167 
p[0].re, p[0].im, p[1].re, p[1].im); 
168 
p+=2;

169 
} while (j != 0); 
170  
171 
/* pass 1 */

172  
173 

174 
p=&z[0];

175 
j=np >> 2;

176 
if (s>inverse) {

177 
do {

178 
BF(p[0].re, p[0].im, p[2].re, p[2].im, 
179 
p[0].re, p[0].im, p[2].re, p[2].im); 
180 
BF(p[1].re, p[1].im, p[3].re, p[3].im, 
181 
p[1].re, p[1].im, p[3].im, p[3].re); 
182 
p+=4;

183 
} while (j != 0); 
184 
} else {

185 
do {

186 
BF(p[0].re, p[0].im, p[2].re, p[2].im, 
187 
p[0].re, p[0].im, p[2].re, p[2].im); 
188 
BF(p[1].re, p[1].im, p[3].re, p[3].im, 
189 
p[1].re, p[1].im, p[3].im, p[3].re); 
190 
p+=4;

191 
} while (j != 0); 
192 
} 
193 
/* pass 2 .. ln1 */

194  
195 
nblocks = np >> 3;

196 
nloops = 1 << 2; 
197 
np2 = np >> 1;

198 
do {

199 
p = z; 
200 
q = z + nloops; 
201 
for (j = 0; j < nblocks; ++j) { 
202 
BF(p>re, p>im, q>re, q>im, 
203 
p>re, p>im, q>re, q>im); 
204 

205 
p++; 
206 
q++; 
207 
for(l = nblocks; l < np2; l += nblocks) {

208 
CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q>re, q>im); 
209 
BF(p>re, p>im, q>re, q>im, 
210 
p>re, p>im, tmp_re, tmp_im); 
211 
p++; 
212 
q++; 
213 
} 
214  
215 
p += nloops; 
216 
q += nloops; 
217 
} 
218 
nblocks = nblocks >> 1;

219 
nloops = nloops << 1;

220 
} while (nblocks != 0); 
221 
} 
222  
223 
/**

224 
* Do the permutation needed BEFORE calling fft_calc()

225 
*/

226 
void fft_permute(FFTContext *s, FFTComplex *z)

227 
{ 
228 
int j, k, np;

229 
FFTComplex tmp; 
230 
const uint16_t *revtab = s>revtab;

231 

232 
/* reverse */

233 
np = 1 << s>nbits;

234 
for(j=0;j<np;j++) { 
235 
k = revtab[j]; 
236 
if (k < j) {

237 
tmp = z[k]; 
238 
z[k] = z[j]; 
239 
z[j] = tmp; 
240 
} 
241 
} 
242 
} 
243  
244 
void fft_end(FFTContext *s)

245 
{ 
246 
av_freep(&s>revtab); 
247 
av_freep(&s>exptab); 
248 
av_freep(&s>exptab1); 
249 
} 
250 