Statistics
| Branch: | Revision:

ffmpeg / libavcodec / fft.c @ 844ea46a

History | View | Annotate | Download (6.45 KB)

1
/*
2
 * FFT/IFFT transforms
3
 * Copyright (c) 2002 Fabrice Bellard.
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

    
22
/**
23
 * @file fft.c
24
 * FFT/IFFT transforms.
25
 */
26

    
27
#include "dsputil.h"
28

    
29
/**
30
 * The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is
31
 * done
32
 */
33
int ff_fft_init(FFTContext *s, int nbits, int inverse)
34
{
35
    int i, j, m, n;
36
    float alpha, c1, s1, s2;
37
    int shuffle = 0;
38
    int av_unused has_vectors;
39

    
40
    s->nbits = nbits;
41
    n = 1 << nbits;
42

    
43
    s->exptab = av_malloc((n / 2) * sizeof(FFTComplex));
44
    if (!s->exptab)
45
        goto fail;
46
    s->revtab = av_malloc(n * sizeof(uint16_t));
47
    if (!s->revtab)
48
        goto fail;
49
    s->inverse = inverse;
50

    
51
    s2 = inverse ? 1.0 : -1.0;
52

    
53
    for(i=0;i<(n/2);i++) {
54
        alpha = 2 * M_PI * (float)i / (float)n;
55
        c1 = cos(alpha);
56
        s1 = sin(alpha) * s2;
57
        s->exptab[i].re = c1;
58
        s->exptab[i].im = s1;
59
    }
60
    s->fft_calc = ff_fft_calc_c;
61
    s->imdct_calc = ff_imdct_calc;
62
    s->exptab1 = NULL;
63

    
64
#ifdef HAVE_MMX
65
    has_vectors = mm_support();
66
    shuffle = 1;
67
    if (has_vectors & MM_3DNOWEXT) {
68
        /* 3DNowEx for K7/K8 */
69
        s->imdct_calc = ff_imdct_calc_3dn2;
70
        s->fft_calc = ff_fft_calc_3dn2;
71
    } else if (has_vectors & MM_3DNOW) {
72
        /* 3DNow! for K6-2/3 */
73
        s->fft_calc = ff_fft_calc_3dn;
74
    } else if (has_vectors & MM_SSE) {
75
        /* SSE for P3/P4 */
76
        s->imdct_calc = ff_imdct_calc_sse;
77
        s->fft_calc = ff_fft_calc_sse;
78
    } else {
79
        shuffle = 0;
80
    }
81
#elif defined HAVE_ALTIVEC && !defined ALTIVEC_USE_REFERENCE_C_CODE
82
    has_vectors = mm_support();
83
    if (has_vectors & MM_ALTIVEC) {
84
        s->fft_calc = ff_fft_calc_altivec;
85
        shuffle = 1;
86
    }
87
#endif
88

    
89
    /* compute constant table for HAVE_SSE version */
90
        if (shuffle) {
91
            int np, nblocks, np2, l;
92
            FFTComplex *q;
93

    
94
            np = 1 << nbits;
95
            nblocks = np >> 3;
96
            np2 = np >> 1;
97
            s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex));
98
            if (!s->exptab1)
99
                goto fail;
100
            q = s->exptab1;
101
            do {
102
                for(l = 0; l < np2; l += 2 * nblocks) {
103
                    *q++ = s->exptab[l];
104
                    *q++ = s->exptab[l + nblocks];
105

    
106
                    q->re = -s->exptab[l].im;
107
                    q->im = s->exptab[l].re;
108
                    q++;
109
                    q->re = -s->exptab[l + nblocks].im;
110
                    q->im = s->exptab[l + nblocks].re;
111
                    q++;
112
                }
113
                nblocks = nblocks >> 1;
114
            } while (nblocks != 0);
115
            av_freep(&s->exptab);
116
        }
117

    
118
    /* compute bit reverse table */
119

    
120
    for(i=0;i<n;i++) {
121
        m=0;
122
        for(j=0;j<nbits;j++) {
123
            m |= ((i >> j) & 1) << (nbits-j-1);
124
        }
125
        s->revtab[i]=m;
126
    }
127
    return 0;
128
 fail:
129
    av_freep(&s->revtab);
130
    av_freep(&s->exptab);
131
    av_freep(&s->exptab1);
132
    return -1;
133
}
134

    
135
/* butter fly op */
136
#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
137
{\
138
  FFTSample ax, ay, bx, by;\
139
  bx=pre1;\
140
  by=pim1;\
141
  ax=qre1;\
142
  ay=qim1;\
143
  pre = (bx + ax);\
144
  pim = (by + ay);\
145
  qre = (bx - ax);\
146
  qim = (by - ay);\
147
}
148

    
149
#define MUL16(a,b) ((a) * (b))
150

    
151
#define CMUL(pre, pim, are, aim, bre, bim) \
152
{\
153
   pre = (MUL16(are, bre) - MUL16(aim, bim));\
154
   pim = (MUL16(are, bim) + MUL16(bre, aim));\
155
}
156

    
157
/**
158
 * Do a complex FFT with the parameters defined in ff_fft_init(). The
159
 * input data must be permuted before with s->revtab table. No
160
 * 1.0/sqrt(n) normalization is done.
161
 */
162
void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
163
{
164
    int ln = s->nbits;
165
    int j, np, np2;
166
    int nblocks, nloops;
167
    register FFTComplex *p, *q;
168
    FFTComplex *exptab = s->exptab;
169
    int l;
170
    FFTSample tmp_re, tmp_im;
171

    
172
    np = 1 << ln;
173

    
174
    /* pass 0 */
175

    
176
    p=&z[0];
177
    j=(np >> 1);
178
    do {
179
        BF(p[0].re, p[0].im, p[1].re, p[1].im,
180
           p[0].re, p[0].im, p[1].re, p[1].im);
181
        p+=2;
182
    } while (--j != 0);
183

    
184
    /* pass 1 */
185

    
186

    
187
    p=&z[0];
188
    j=np >> 2;
189
    if (s->inverse) {
190
        do {
191
            BF(p[0].re, p[0].im, p[2].re, p[2].im,
192
               p[0].re, p[0].im, p[2].re, p[2].im);
193
            BF(p[1].re, p[1].im, p[3].re, p[3].im,
194
               p[1].re, p[1].im, -p[3].im, p[3].re);
195
            p+=4;
196
        } while (--j != 0);
197
    } else {
198
        do {
199
            BF(p[0].re, p[0].im, p[2].re, p[2].im,
200
               p[0].re, p[0].im, p[2].re, p[2].im);
201
            BF(p[1].re, p[1].im, p[3].re, p[3].im,
202
               p[1].re, p[1].im, p[3].im, -p[3].re);
203
            p+=4;
204
        } while (--j != 0);
205
    }
206
    /* pass 2 .. ln-1 */
207

    
208
    nblocks = np >> 3;
209
    nloops = 1 << 2;
210
    np2 = np >> 1;
211
    do {
212
        p = z;
213
        q = z + nloops;
214
        for (j = 0; j < nblocks; ++j) {
215
            BF(p->re, p->im, q->re, q->im,
216
               p->re, p->im, q->re, q->im);
217

    
218
            p++;
219
            q++;
220
            for(l = nblocks; l < np2; l += nblocks) {
221
                CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im);
222
                BF(p->re, p->im, q->re, q->im,
223
                   p->re, p->im, tmp_re, tmp_im);
224
                p++;
225
                q++;
226
            }
227

    
228
            p += nloops;
229
            q += nloops;
230
        }
231
        nblocks = nblocks >> 1;
232
        nloops = nloops << 1;
233
    } while (nblocks != 0);
234
}
235

    
236
/**
237
 * Do the permutation needed BEFORE calling ff_fft_calc()
238
 */
239
void ff_fft_permute(FFTContext *s, FFTComplex *z)
240
{
241
    int j, k, np;
242
    FFTComplex tmp;
243
    const uint16_t *revtab = s->revtab;
244

    
245
    /* reverse */
246
    np = 1 << s->nbits;
247
    for(j=0;j<np;j++) {
248
        k = revtab[j];
249
        if (k < j) {
250
            tmp = z[k];
251
            z[k] = z[j];
252
            z[j] = tmp;
253
        }
254
    }
255
}
256

    
257
void ff_fft_end(FFTContext *s)
258
{
259
    av_freep(&s->revtab);
260
    av_freep(&s->exptab);
261
    av_freep(&s->exptab1);
262
}
263