ffmpeg / libavcodec / fft.c @ 5509bffa
History | View | Annotate | Download (6.08 KB)
1 |
/*
|
---|---|
2 |
* FFT/IFFT transforms
|
3 |
* Copyright (c) 2002 Fabrice Bellard.
|
4 |
*
|
5 |
* This library is free software; you can redistribute it and/or
|
6 |
* modify it under the terms of the GNU Lesser General Public
|
7 |
* License as published by the Free Software Foundation; either
|
8 |
* version 2 of the License, or (at your option) any later version.
|
9 |
*
|
10 |
* This library is distributed in the hope that it will be useful,
|
11 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
13 |
* Lesser General Public License for more details.
|
14 |
*
|
15 |
* You should have received a copy of the GNU Lesser General Public
|
16 |
* License along with this library; if not, write to the Free Software
|
17 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
18 |
*/
|
19 |
|
20 |
/**
|
21 |
* @file fft.c
|
22 |
* FFT/IFFT transforms.
|
23 |
*/
|
24 |
|
25 |
#include "dsputil.h" |
26 |
|
27 |
/**
|
28 |
* The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is
|
29 |
* done
|
30 |
*/
|
31 |
int ff_fft_init(FFTContext *s, int nbits, int inverse) |
32 |
{ |
33 |
int i, j, m, n;
|
34 |
float alpha, c1, s1, s2;
|
35 |
|
36 |
s->nbits = nbits; |
37 |
n = 1 << nbits;
|
38 |
|
39 |
s->exptab = av_malloc((n / 2) * sizeof(FFTComplex)); |
40 |
if (!s->exptab)
|
41 |
goto fail;
|
42 |
s->revtab = av_malloc(n * sizeof(uint16_t));
|
43 |
if (!s->revtab)
|
44 |
goto fail;
|
45 |
s->inverse = inverse; |
46 |
|
47 |
s2 = inverse ? 1.0 : -1.0; |
48 |
|
49 |
for(i=0;i<(n/2);i++) { |
50 |
alpha = 2 * M_PI * (float)i / (float)n; |
51 |
c1 = cos(alpha); |
52 |
s1 = sin(alpha) * s2; |
53 |
s->exptab[i].re = c1; |
54 |
s->exptab[i].im = s1; |
55 |
} |
56 |
s->fft_calc = ff_fft_calc_c; |
57 |
s->exptab1 = NULL;
|
58 |
|
59 |
/* compute constant table for HAVE_SSE version */
|
60 |
#if (defined(HAVE_MMX) && defined(HAVE_BUILTIN_VECTOR)) || defined(HAVE_ALTIVEC)
|
61 |
{ |
62 |
int has_vectors = 0; |
63 |
|
64 |
#if defined(HAVE_MMX)
|
65 |
has_vectors = mm_support() & MM_SSE; |
66 |
#endif
|
67 |
#if defined(HAVE_ALTIVEC) && !defined(ALTIVEC_USE_REFERENCE_C_CODE)
|
68 |
has_vectors = mm_support() & MM_ALTIVEC; |
69 |
#endif
|
70 |
if (has_vectors) {
|
71 |
int np, nblocks, np2, l;
|
72 |
FFTComplex *q; |
73 |
|
74 |
np = 1 << nbits;
|
75 |
nblocks = np >> 3;
|
76 |
np2 = np >> 1;
|
77 |
s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex)); |
78 |
if (!s->exptab1)
|
79 |
goto fail;
|
80 |
q = s->exptab1; |
81 |
do {
|
82 |
for(l = 0; l < np2; l += 2 * nblocks) { |
83 |
*q++ = s->exptab[l]; |
84 |
*q++ = s->exptab[l + nblocks]; |
85 |
|
86 |
q->re = -s->exptab[l].im; |
87 |
q->im = s->exptab[l].re; |
88 |
q++; |
89 |
q->re = -s->exptab[l + nblocks].im; |
90 |
q->im = s->exptab[l + nblocks].re; |
91 |
q++; |
92 |
} |
93 |
nblocks = nblocks >> 1;
|
94 |
} while (nblocks != 0); |
95 |
av_freep(&s->exptab); |
96 |
#if defined(HAVE_MMX)
|
97 |
s->fft_calc = ff_fft_calc_sse; |
98 |
#else
|
99 |
s->fft_calc = ff_fft_calc_altivec; |
100 |
#endif
|
101 |
} |
102 |
} |
103 |
#endif
|
104 |
|
105 |
/* compute bit reverse table */
|
106 |
|
107 |
for(i=0;i<n;i++) { |
108 |
m=0;
|
109 |
for(j=0;j<nbits;j++) { |
110 |
m |= ((i >> j) & 1) << (nbits-j-1); |
111 |
} |
112 |
s->revtab[i]=m; |
113 |
} |
114 |
return 0; |
115 |
fail:
|
116 |
av_freep(&s->revtab); |
117 |
av_freep(&s->exptab); |
118 |
av_freep(&s->exptab1); |
119 |
return -1; |
120 |
} |
121 |
|
122 |
/* butter fly op */
|
123 |
#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
|
124 |
{\ |
125 |
FFTSample ax, ay, bx, by;\ |
126 |
bx=pre1;\ |
127 |
by=pim1;\ |
128 |
ax=qre1;\ |
129 |
ay=qim1;\ |
130 |
pre = (bx + ax);\ |
131 |
pim = (by + ay);\ |
132 |
qre = (bx - ax);\ |
133 |
qim = (by - ay);\ |
134 |
} |
135 |
|
136 |
#define MUL16(a,b) ((a) * (b))
|
137 |
|
138 |
#define CMUL(pre, pim, are, aim, bre, bim) \
|
139 |
{\ |
140 |
pre = (MUL16(are, bre) - MUL16(aim, bim));\ |
141 |
pim = (MUL16(are, bim) + MUL16(bre, aim));\ |
142 |
} |
143 |
|
144 |
/**
|
145 |
* Do a complex FFT with the parameters defined in ff_fft_init(). The
|
146 |
* input data must be permuted before with s->revtab table. No
|
147 |
* 1.0/sqrt(n) normalization is done.
|
148 |
*/
|
149 |
void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
|
150 |
{ |
151 |
int ln = s->nbits;
|
152 |
int j, np, np2;
|
153 |
int nblocks, nloops;
|
154 |
register FFTComplex *p, *q;
|
155 |
FFTComplex *exptab = s->exptab; |
156 |
int l;
|
157 |
FFTSample tmp_re, tmp_im; |
158 |
|
159 |
np = 1 << ln;
|
160 |
|
161 |
/* pass 0 */
|
162 |
|
163 |
p=&z[0];
|
164 |
j=(np >> 1);
|
165 |
do {
|
166 |
BF(p[0].re, p[0].im, p[1].re, p[1].im, |
167 |
p[0].re, p[0].im, p[1].re, p[1].im); |
168 |
p+=2;
|
169 |
} while (--j != 0); |
170 |
|
171 |
/* pass 1 */
|
172 |
|
173 |
|
174 |
p=&z[0];
|
175 |
j=np >> 2;
|
176 |
if (s->inverse) {
|
177 |
do {
|
178 |
BF(p[0].re, p[0].im, p[2].re, p[2].im, |
179 |
p[0].re, p[0].im, p[2].re, p[2].im); |
180 |
BF(p[1].re, p[1].im, p[3].re, p[3].im, |
181 |
p[1].re, p[1].im, -p[3].im, p[3].re); |
182 |
p+=4;
|
183 |
} while (--j != 0); |
184 |
} else {
|
185 |
do {
|
186 |
BF(p[0].re, p[0].im, p[2].re, p[2].im, |
187 |
p[0].re, p[0].im, p[2].re, p[2].im); |
188 |
BF(p[1].re, p[1].im, p[3].re, p[3].im, |
189 |
p[1].re, p[1].im, p[3].im, -p[3].re); |
190 |
p+=4;
|
191 |
} while (--j != 0); |
192 |
} |
193 |
/* pass 2 .. ln-1 */
|
194 |
|
195 |
nblocks = np >> 3;
|
196 |
nloops = 1 << 2; |
197 |
np2 = np >> 1;
|
198 |
do {
|
199 |
p = z; |
200 |
q = z + nloops; |
201 |
for (j = 0; j < nblocks; ++j) { |
202 |
BF(p->re, p->im, q->re, q->im, |
203 |
p->re, p->im, q->re, q->im); |
204 |
|
205 |
p++; |
206 |
q++; |
207 |
for(l = nblocks; l < np2; l += nblocks) {
|
208 |
CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im); |
209 |
BF(p->re, p->im, q->re, q->im, |
210 |
p->re, p->im, tmp_re, tmp_im); |
211 |
p++; |
212 |
q++; |
213 |
} |
214 |
|
215 |
p += nloops; |
216 |
q += nloops; |
217 |
} |
218 |
nblocks = nblocks >> 1;
|
219 |
nloops = nloops << 1;
|
220 |
} while (nblocks != 0); |
221 |
} |
222 |
|
223 |
/**
|
224 |
* Do the permutation needed BEFORE calling ff_fft_calc()
|
225 |
*/
|
226 |
void ff_fft_permute(FFTContext *s, FFTComplex *z)
|
227 |
{ |
228 |
int j, k, np;
|
229 |
FFTComplex tmp; |
230 |
const uint16_t *revtab = s->revtab;
|
231 |
|
232 |
/* reverse */
|
233 |
np = 1 << s->nbits;
|
234 |
for(j=0;j<np;j++) { |
235 |
k = revtab[j]; |
236 |
if (k < j) {
|
237 |
tmp = z[k]; |
238 |
z[k] = z[j]; |
239 |
z[j] = tmp; |
240 |
} |
241 |
} |
242 |
} |
243 |
|
244 |
void ff_fft_end(FFTContext *s)
|
245 |
{ |
246 |
av_freep(&s->revtab); |
247 |
av_freep(&s->exptab); |
248 |
av_freep(&s->exptab1); |
249 |
} |
250 |
|