ffmpeg / libavcodec / fft.c @ ea937d01
History  View  Annotate  Download (6.05 KB)
1 
/*


2 
* FFT/IFFT transforms

3 
* Copyright (c) 2002 Fabrice Bellard.

4 
*

5 
* This library is free software; you can redistribute it and/or

6 
* modify it under the terms of the GNU Lesser General Public

7 
* License as published by the Free Software Foundation; either

8 
* version 2 of the License, or (at your option) any later version.

9 
*

10 
* This library is distributed in the hope that it will be useful,

11 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

12 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

13 
* Lesser General Public License for more details.

14 
*

15 
* You should have received a copy of the GNU Lesser General Public

16 
* License along with this library; if not, write to the Free Software

17 
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 021111307 USA

18 
*/

19 
#include "dsputil.h" 
20  
21 
/**

22 
* The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is

23 
* done

24 
*/

25 
int fft_init(FFTContext *s, int nbits, int inverse) 
26 
{ 
27 
int i, j, m, n;

28 
float alpha, c1, s1, s2;

29 

30 
s>nbits = nbits; 
31 
n = 1 << nbits;

32  
33 
s>exptab = av_malloc((n / 2) * sizeof(FFTComplex)); 
34 
if (!s>exptab)

35 
goto fail;

36 
s>revtab = av_malloc(n * sizeof(uint16_t));

37 
if (!s>revtab)

38 
goto fail;

39 
s>inverse = inverse; 
40  
41 
s2 = inverse ? 1.0 : 1.0; 
42 

43 
for(i=0;i<(n/2);i++) { 
44 
alpha = 2 * M_PI * (float)i / (float)n; 
45 
c1 = cos(alpha); 
46 
s1 = sin(alpha) * s2; 
47 
s>exptab[i].re = c1; 
48 
s>exptab[i].im = s1; 
49 
} 
50 
s>fft_calc = fft_calc_c; 
51 
s>exptab1 = NULL;

52  
53 
/* compute constant table for HAVE_SSE version */

54 
#if (defined(HAVE_MMX) && defined(HAVE_BUILTIN_VECTOR))  defined(HAVE_ALTIVEC)

55 
{ 
56 
int has_vectors = 0; 
57  
58 
#if defined(HAVE_MMX)

59 
has_vectors = mm_support() & MM_SSE; 
60 
#endif

61 
#if defined(HAVE_ALTIVEC) && !defined(ALTIVEC_USE_REFERENCE_C_CODE)

62 
has_vectors = mm_support() & MM_ALTIVEC; 
63 
#endif

64 
if (has_vectors) {

65 
int np, nblocks, np2, l;

66 
FFTComplex *q; 
67 

68 
np = 1 << nbits;

69 
nblocks = np >> 3;

70 
np2 = np >> 1;

71 
s>exptab1 = av_malloc(np * 2 * sizeof(FFTComplex)); 
72 
if (!s>exptab1)

73 
goto fail;

74 
q = s>exptab1; 
75 
do {

76 
for(l = 0; l < np2; l += 2 * nblocks) { 
77 
*q++ = s>exptab[l]; 
78 
*q++ = s>exptab[l + nblocks]; 
79  
80 
q>re = s>exptab[l].im; 
81 
q>im = s>exptab[l].re; 
82 
q++; 
83 
q>re = s>exptab[l + nblocks].im; 
84 
q>im = s>exptab[l + nblocks].re; 
85 
q++; 
86 
} 
87 
nblocks = nblocks >> 1;

88 
} while (nblocks != 0); 
89 
av_freep(&s>exptab); 
90 
#if defined(HAVE_MMX)

91 
s>fft_calc = fft_calc_sse; 
92 
#else

93 
s>fft_calc = fft_calc_altivec; 
94 
#endif

95 
} 
96 
} 
97 
#endif

98  
99 
/* compute bit reverse table */

100  
101 
for(i=0;i<n;i++) { 
102 
m=0;

103 
for(j=0;j<nbits;j++) { 
104 
m = ((i >> j) & 1) << (nbitsj1); 
105 
} 
106 
s>revtab[i]=m; 
107 
} 
108 
return 0; 
109 
fail:

110 
av_freep(&s>revtab); 
111 
av_freep(&s>exptab); 
112 
av_freep(&s>exptab1); 
113 
return 1; 
114 
} 
115  
116 
/* butter fly op */

117 
#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \

118 
{\ 
119 
FFTSample ax, ay, bx, by;\ 
120 
bx=pre1;\ 
121 
by=pim1;\ 
122 
ax=qre1;\ 
123 
ay=qim1;\ 
124 
pre = (bx + ax);\ 
125 
pim = (by + ay);\ 
126 
qre = (bx  ax);\ 
127 
qim = (by  ay);\ 
128 
} 
129  
130 
#define MUL16(a,b) ((a) * (b))

131  
132 
#define CMUL(pre, pim, are, aim, bre, bim) \

133 
{\ 
134 
pre = (MUL16(are, bre)  MUL16(aim, bim));\ 
135 
pim = (MUL16(are, bim) + MUL16(bre, aim));\ 
136 
} 
137  
138 
/**

139 
* Do a complex FFT with the parameters defined in fft_init(). The

140 
* input data must be permuted before with s>revtab table. No

141 
* 1.0/sqrt(n) normalization is done.

142 
*/

143 
void fft_calc_c(FFTContext *s, FFTComplex *z)

144 
{ 
145 
int ln = s>nbits;

146 
int j, np, np2;

147 
int nblocks, nloops;

148 
register FFTComplex *p, *q;

149 
FFTComplex *exptab = s>exptab; 
150 
int l;

151 
FFTSample tmp_re, tmp_im; 
152  
153 
np = 1 << ln;

154  
155 
/* pass 0 */

156  
157 
p=&z[0];

158 
j=(np >> 1);

159 
do {

160 
BF(p[0].re, p[0].im, p[1].re, p[1].im, 
161 
p[0].re, p[0].im, p[1].re, p[1].im); 
162 
p+=2;

163 
} while (j != 0); 
164  
165 
/* pass 1 */

166  
167 

168 
p=&z[0];

169 
j=np >> 2;

170 
if (s>inverse) {

171 
do {

172 
BF(p[0].re, p[0].im, p[2].re, p[2].im, 
173 
p[0].re, p[0].im, p[2].re, p[2].im); 
174 
BF(p[1].re, p[1].im, p[3].re, p[3].im, 
175 
p[1].re, p[1].im, p[3].im, p[3].re); 
176 
p+=4;

177 
} while (j != 0); 
178 
} else {

179 
do {

180 
BF(p[0].re, p[0].im, p[2].re, p[2].im, 
181 
p[0].re, p[0].im, p[2].re, p[2].im); 
182 
BF(p[1].re, p[1].im, p[3].re, p[3].im, 
183 
p[1].re, p[1].im, p[3].im, p[3].re); 
184 
p+=4;

185 
} while (j != 0); 
186 
} 
187 
/* pass 2 .. ln1 */

188  
189 
nblocks = np >> 3;

190 
nloops = 1 << 2; 
191 
np2 = np >> 1;

192 
do {

193 
p = z; 
194 
q = z + nloops; 
195 
for (j = 0; j < nblocks; ++j) { 
196 
BF(p>re, p>im, q>re, q>im, 
197 
p>re, p>im, q>re, q>im); 
198 

199 
p++; 
200 
q++; 
201 
for(l = nblocks; l < np2; l += nblocks) {

202 
CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q>re, q>im); 
203 
BF(p>re, p>im, q>re, q>im, 
204 
p>re, p>im, tmp_re, tmp_im); 
205 
p++; 
206 
q++; 
207 
} 
208  
209 
p += nloops; 
210 
q += nloops; 
211 
} 
212 
nblocks = nblocks >> 1;

213 
nloops = nloops << 1;

214 
} while (nblocks != 0); 
215 
} 
216  
217 
/**

218 
* Do the permutation needed BEFORE calling fft_calc()

219 
*/

220 
void fft_permute(FFTContext *s, FFTComplex *z)

221 
{ 
222 
int j, k, np;

223 
FFTComplex tmp; 
224 
const uint16_t *revtab = s>revtab;

225 

226 
/* reverse */

227 
np = 1 << s>nbits;

228 
for(j=0;j<np;j++) { 
229 
k = revtab[j]; 
230 
if (k < j) {

231 
tmp = z[k]; 
232 
z[k] = z[j]; 
233 
z[j] = tmp; 
234 
} 
235 
} 
236 
} 
237  
238 
void fft_end(FFTContext *s)

239 
{ 
240 
av_freep(&s>revtab); 
241 
av_freep(&s>exptab); 
242 
av_freep(&s>exptab1); 
243 
} 
244 