Statistics
| Branch: | Revision:

ffmpeg / libavcodec / mpegaudiodec.c @ daf4cd9a

History | View | Annotate | Download (87.5 KB)

1
/*
2
 * MPEG Audio decoder
3
 * Copyright (c) 2001, 2002 Fabrice Bellard.
4
 *
5
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9
 *
10
 * This library is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
 */
19

    
20
/**
21
 * @file mpegaudiodec.c
22
 * MPEG Audio decoder.
23
 */
24

    
25
//#define DEBUG
26
#include "avcodec.h"
27
#include "bitstream.h"
28
#include "dsputil.h"
29

    
30
/*
31
 * TODO:
32
 *  - in low precision mode, use more 16 bit multiplies in synth filter
33
 *  - test lsf / mpeg25 extensively.
34
 */
35

    
36
/* define USE_HIGHPRECISION to have a bit exact (but slower) mpeg
37
   audio decoder */
38
#ifdef CONFIG_MPEGAUDIO_HP
39
#   define USE_HIGHPRECISION
40
#endif
41

    
42
#include "mpegaudio.h"
43

    
44
#define FRAC_ONE    (1 << FRAC_BITS)
45

    
46
#ifdef ARCH_X86
47
#   define MULL(ra, rb) \
48
        ({ int rt, dummy; asm (\
49
            "imull %3               \n\t"\
50
            "shrdl %4, %%edx, %%eax \n\t"\
51
            : "=a"(rt), "=d"(dummy)\
52
            : "a" (ra), "rm" (rb), "i"(FRAC_BITS));\
53
         rt; })
54
#   define MUL64(ra, rb) \
55
        ({ int64_t rt; asm ("imull %2\n\t" : "=A"(rt) : "a" (ra), "g" (rb)); rt; })
56
#   define MULH(ra, rb) \
57
        ({ int rt, dummy; asm ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" (ra), "rm" (rb)); rt; })
58
#elif defined(ARCH_ARMV4L)
59
#   define MULL(a, b) \
60
        ({  int lo, hi;\
61
            asm("smull %0, %1, %2, %3     \n\t"\
62
                "mov   %0, %0,     lsr %4\n\t"\
63
                "add   %1, %0, %1, lsl %5\n\t"\
64
            : "=&r"(lo), "=&r"(hi)\
65
            : "r"(b), "r"(a), "i"(FRAC_BITS), "i"(32-FRAC_BITS));\
66
         hi; })
67
#   define MUL64(a,b) ((int64_t)(a) * (int64_t)(b))
68
#   define MULH(a, b) ({ int lo, hi; asm ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a)); hi; })
69
#else
70
#   define MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> FRAC_BITS)
71
#   define MUL64(a,b) ((int64_t)(a) * (int64_t)(b))
72
//#define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32) //gcc 3.4 creates an incredibly bloated mess out of this
73
static always_inline int MULH(int a, int b){
74
    return ((int64_t)(a) * (int64_t)(b))>>32;
75
}
76
#endif
77
#define FIX(a)   ((int)((a) * FRAC_ONE))
78
/* WARNING: only correct for posititive numbers */
79
#define FIXR(a)   ((int)((a) * FRAC_ONE + 0.5))
80
#define FRAC_RND(a) (((a) + (FRAC_ONE/2)) >> FRAC_BITS)
81

    
82
#define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5))
83

    
84
/****************/
85

    
86
#define HEADER_SIZE 4
87
#define BACKSTEP_SIZE 512
88

    
89
struct GranuleDef;
90

    
91
typedef struct MPADecodeContext {
92
    uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE];        /* input buffer */
93
    int inbuf_index;
94
    uint8_t *inbuf_ptr, *inbuf;
95
    int frame_size;
96
    int free_format_frame_size; /* frame size in case of free format
97
                                   (zero if currently unknown) */
98
    /* next header (used in free format parsing) */
99
    uint32_t free_format_next_header;
100
    int error_protection;
101
    int layer;
102
    int sample_rate;
103
    int sample_rate_index; /* between 0 and 8 */
104
    int bit_rate;
105
    int old_frame_size;
106
    GetBitContext gb;
107
    int nb_channels;
108
    int mode;
109
    int mode_ext;
110
    int lsf;
111
    MPA_INT synth_buf[MPA_MAX_CHANNELS][512 * 2] __attribute__((aligned(16)));
112
    int synth_buf_offset[MPA_MAX_CHANNELS];
113
    int32_t sb_samples[MPA_MAX_CHANNELS][36][SBLIMIT] __attribute__((aligned(16)));
114
    int32_t mdct_buf[MPA_MAX_CHANNELS][SBLIMIT * 18]; /* previous samples, for layer 3 MDCT */
115
#ifdef DEBUG
116
    int frame_count;
117
#endif
118
    void (*compute_antialias)(struct MPADecodeContext *s, struct GranuleDef *g);
119
    int adu_mode; ///< 0 for standard mp3, 1 for adu formatted mp3
120
    unsigned int dither_state;
121
} MPADecodeContext;
122

    
123
/**
124
 * Context for MP3On4 decoder
125
 */
126
typedef struct MP3On4DecodeContext {
127
    int frames;   ///< number of mp3 frames per block (number of mp3 decoder instances)
128
    int chan_cfg; ///< channel config number
129
    MPADecodeContext *mp3decctx[5]; ///< MPADecodeContext for every decoder instance
130
} MP3On4DecodeContext;
131

    
132
/* layer 3 "granule" */
133
typedef struct GranuleDef {
134
    uint8_t scfsi;
135
    int part2_3_length;
136
    int big_values;
137
    int global_gain;
138
    int scalefac_compress;
139
    uint8_t block_type;
140
    uint8_t switch_point;
141
    int table_select[3];
142
    int subblock_gain[3];
143
    uint8_t scalefac_scale;
144
    uint8_t count1table_select;
145
    int region_size[3]; /* number of huffman codes in each region */
146
    int preflag;
147
    int short_start, long_end; /* long/short band indexes */
148
    uint8_t scale_factors[40];
149
    int32_t sb_hybrid[SBLIMIT * 18]; /* 576 samples */
150
} GranuleDef;
151

    
152
#define MODE_EXT_MS_STEREO 2
153
#define MODE_EXT_I_STEREO  1
154

    
155
/* layer 3 huffman tables */
156
typedef struct HuffTable {
157
    int xsize;
158
    const uint8_t *bits;
159
    const uint16_t *codes;
160
} HuffTable;
161

    
162
#include "mpegaudiodectab.h"
163

    
164
static void compute_antialias_integer(MPADecodeContext *s, GranuleDef *g);
165
static void compute_antialias_float(MPADecodeContext *s, GranuleDef *g);
166

    
167
/* vlc structure for decoding layer 3 huffman tables */
168
static VLC huff_vlc[16];
169
static VLC huff_quad_vlc[2];
170
/* computed from band_size_long */
171
static uint16_t band_index_long[9][23];
172
/* XXX: free when all decoders are closed */
173
#define TABLE_4_3_SIZE (8191 + 16)*4
174
static int8_t  *table_4_3_exp;
175
static uint32_t *table_4_3_value;
176
/* intensity stereo coef table */
177
static int32_t is_table[2][16];
178
static int32_t is_table_lsf[2][2][16];
179
static int32_t csa_table[8][4];
180
static float csa_table_float[8][4];
181
static int32_t mdct_win[8][36];
182

    
183
/* lower 2 bits: modulo 3, higher bits: shift */
184
static uint16_t scale_factor_modshift[64];
185
/* [i][j]:  2^(-j/3) * FRAC_ONE * 2^(i+2) / (2^(i+2) - 1) */
186
static int32_t scale_factor_mult[15][3];
187
/* mult table for layer 2 group quantization */
188

    
189
#define SCALE_GEN(v) \
190
{ FIXR(1.0 * (v)), FIXR(0.7937005259 * (v)), FIXR(0.6299605249 * (v)) }
191

    
192
static const int32_t scale_factor_mult2[3][3] = {
193
    SCALE_GEN(4.0 / 3.0), /* 3 steps */
194
    SCALE_GEN(4.0 / 5.0), /* 5 steps */
195
    SCALE_GEN(4.0 / 9.0), /* 9 steps */
196
};
197

    
198
void ff_mpa_synth_init(MPA_INT *window);
199
static MPA_INT window[512] __attribute__((aligned(16)));
200

    
201
/* layer 1 unscaling */
202
/* n = number of bits of the mantissa minus 1 */
203
static inline int l1_unscale(int n, int mant, int scale_factor)
204
{
205
    int shift, mod;
206
    int64_t val;
207

    
208
    shift = scale_factor_modshift[scale_factor];
209
    mod = shift & 3;
210
    shift >>= 2;
211
    val = MUL64(mant + (-1 << n) + 1, scale_factor_mult[n-1][mod]);
212
    shift += n;
213
    /* NOTE: at this point, 1 <= shift >= 21 + 15 */
214
    return (int)((val + (1LL << (shift - 1))) >> shift);
215
}
216

    
217
static inline int l2_unscale_group(int steps, int mant, int scale_factor)
218
{
219
    int shift, mod, val;
220

    
221
    shift = scale_factor_modshift[scale_factor];
222
    mod = shift & 3;
223
    shift >>= 2;
224

    
225
    val = (mant - (steps >> 1)) * scale_factor_mult2[steps >> 2][mod];
226
    /* NOTE: at this point, 0 <= shift <= 21 */
227
    if (shift > 0)
228
        val = (val + (1 << (shift - 1))) >> shift;
229
    return val;
230
}
231

    
232
/* compute value^(4/3) * 2^(exponent/4). It normalized to FRAC_BITS */
233
static inline int l3_unscale(int value, int exponent)
234
{
235
    unsigned int m;
236
    int e;
237

    
238
    e = table_4_3_exp  [4*value + (exponent&3)];
239
    m = table_4_3_value[4*value + (exponent&3)];
240
    e -= (exponent >> 2);
241
    assert(e>=1);
242
    if (e > 31)
243
        return 0;
244
    m = (m + (1 << (e-1))) >> e;
245

    
246
    return m;
247
}
248

    
249
/* all integer n^(4/3) computation code */
250
#define DEV_ORDER 13
251

    
252
#define POW_FRAC_BITS 24
253
#define POW_FRAC_ONE    (1 << POW_FRAC_BITS)
254
#define POW_FIX(a)   ((int)((a) * POW_FRAC_ONE))
255
#define POW_MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> POW_FRAC_BITS)
256

    
257
static int dev_4_3_coefs[DEV_ORDER];
258

    
259
#if 0 /* unused */
260
static int pow_mult3[3] = {
261
    POW_FIX(1.0),
262
    POW_FIX(1.25992104989487316476),
263
    POW_FIX(1.58740105196819947474),
264
};
265
#endif
266

    
267
static void int_pow_init(void)
268
{
269
    int i, a;
270

    
271
    a = POW_FIX(1.0);
272
    for(i=0;i<DEV_ORDER;i++) {
273
        a = POW_MULL(a, POW_FIX(4.0 / 3.0) - i * POW_FIX(1.0)) / (i + 1);
274
        dev_4_3_coefs[i] = a;
275
    }
276
}
277

    
278
#if 0 /* unused, remove? */
279
/* return the mantissa and the binary exponent */
280
static int int_pow(int i, int *exp_ptr)
281
{
282
    int e, er, eq, j;
283
    int a, a1;
284

285
    /* renormalize */
286
    a = i;
287
    e = POW_FRAC_BITS;
288
    while (a < (1 << (POW_FRAC_BITS - 1))) {
289
        a = a << 1;
290
        e--;
291
    }
292
    a -= (1 << POW_FRAC_BITS);
293
    a1 = 0;
294
    for(j = DEV_ORDER - 1; j >= 0; j--)
295
        a1 = POW_MULL(a, dev_4_3_coefs[j] + a1);
296
    a = (1 << POW_FRAC_BITS) + a1;
297
    /* exponent compute (exact) */
298
    e = e * 4;
299
    er = e % 3;
300
    eq = e / 3;
301
    a = POW_MULL(a, pow_mult3[er]);
302
    while (a >= 2 * POW_FRAC_ONE) {
303
        a = a >> 1;
304
        eq++;
305
    }
306
    /* convert to float */
307
    while (a < POW_FRAC_ONE) {
308
        a = a << 1;
309
        eq--;
310
    }
311
    /* now POW_FRAC_ONE <= a < 2 * POW_FRAC_ONE */
312
#if POW_FRAC_BITS > FRAC_BITS
313
    a = (a + (1 << (POW_FRAC_BITS - FRAC_BITS - 1))) >> (POW_FRAC_BITS - FRAC_BITS);
314
    /* correct overflow */
315
    if (a >= 2 * (1 << FRAC_BITS)) {
316
        a = a >> 1;
317
        eq++;
318
    }
319
#endif
320
    *exp_ptr = eq;
321
    return a;
322
}
323
#endif
324

    
325
static int decode_init(AVCodecContext * avctx)
326
{
327
    MPADecodeContext *s = avctx->priv_data;
328
    static int init=0;
329
    int i, j, k;
330

    
331
#if defined(USE_HIGHPRECISION) && defined(CONFIG_AUDIO_NONSHORT)
332
    avctx->sample_fmt= SAMPLE_FMT_S32;
333
#else
334
    avctx->sample_fmt= SAMPLE_FMT_S16;
335
#endif
336

    
337
    if(avctx->antialias_algo != FF_AA_FLOAT)
338
        s->compute_antialias= compute_antialias_integer;
339
    else
340
        s->compute_antialias= compute_antialias_float;
341

    
342
    if (!init && !avctx->parse_only) {
343
        /* scale factors table for layer 1/2 */
344
        for(i=0;i<64;i++) {
345
            int shift, mod;
346
            /* 1.0 (i = 3) is normalized to 2 ^ FRAC_BITS */
347
            shift = (i / 3);
348
            mod = i % 3;
349
            scale_factor_modshift[i] = mod | (shift << 2);
350
        }
351

    
352
        /* scale factor multiply for layer 1 */
353
        for(i=0;i<15;i++) {
354
            int n, norm;
355
            n = i + 2;
356
            norm = ((int64_t_C(1) << n) * FRAC_ONE) / ((1 << n) - 1);
357
            scale_factor_mult[i][0] = MULL(FIXR(1.0 * 2.0), norm);
358
            scale_factor_mult[i][1] = MULL(FIXR(0.7937005259 * 2.0), norm);
359
            scale_factor_mult[i][2] = MULL(FIXR(0.6299605249 * 2.0), norm);
360
            dprintf("%d: norm=%x s=%x %x %x\n",
361
                    i, norm,
362
                    scale_factor_mult[i][0],
363
                    scale_factor_mult[i][1],
364
                    scale_factor_mult[i][2]);
365
        }
366

    
367
        ff_mpa_synth_init(window);
368

    
369
        /* huffman decode tables */
370
        for(i=1;i<16;i++) {
371
            const HuffTable *h = &mpa_huff_tables[i];
372
            int xsize, x, y;
373
            unsigned int n;
374
            uint8_t tmp_bits [256];
375
            uint16_t tmp_codes[256];
376

    
377
            memset(tmp_bits , 0, sizeof(tmp_bits ));
378
            memset(tmp_codes, 0, sizeof(tmp_codes));
379

    
380
            xsize = h->xsize;
381
            n = xsize * xsize;
382

    
383
            j = 0;
384
            for(x=0;x<xsize;x++) {
385
                for(y=0;y<xsize;y++){
386
                    tmp_bits [(x << 4) | y]= h->bits [j  ];
387
                    tmp_codes[(x << 4) | y]= h->codes[j++];
388
                }
389
            }
390

    
391
            /* XXX: fail test */
392
            init_vlc(&huff_vlc[i], 8, 256,
393
                     tmp_bits, 1, 1, tmp_codes, 2, 2, 1);
394
        }
395
        for(i=0;i<2;i++) {
396
            init_vlc(&huff_quad_vlc[i], i == 0 ? 7 : 4, 16,
397
                     mpa_quad_bits[i], 1, 1, mpa_quad_codes[i], 1, 1, 1);
398
        }
399

    
400
        for(i=0;i<9;i++) {
401
            k = 0;
402
            for(j=0;j<22;j++) {
403
                band_index_long[i][j] = k;
404
                k += band_size_long[i][j];
405
            }
406
            band_index_long[i][22] = k;
407
        }
408

    
409
        /* compute n ^ (4/3) and store it in mantissa/exp format */
410
        table_4_3_exp= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_exp[0]));
411
        if(!table_4_3_exp)
412
            return -1;
413
        table_4_3_value= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_value[0]));
414
        if(!table_4_3_value)
415
            return -1;
416

    
417
        int_pow_init();
418
        for(i=1;i<TABLE_4_3_SIZE;i++) {
419
            double f, fm;
420
            int e, m;
421
            f = pow((double)(i/4), 4.0 / 3.0) * pow(2, (i&3)*0.25);
422
            fm = frexp(f, &e);
423
            m = (uint32_t)(fm*(1LL<<31) + 0.5);
424
            e+= FRAC_BITS - 31 + 5;
425

    
426
            /* normalized to FRAC_BITS */
427
            table_4_3_value[i] = m;
428
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %f\n", i, m, pow((double)i, 4.0 / 3.0));
429
            table_4_3_exp[i] = -e;
430
        }
431

    
432
        for(i=0;i<7;i++) {
433
            float f;
434
            int v;
435
            if (i != 6) {
436
                f = tan((double)i * M_PI / 12.0);
437
                v = FIXR(f / (1.0 + f));
438
            } else {
439
                v = FIXR(1.0);
440
            }
441
            is_table[0][i] = v;
442
            is_table[1][6 - i] = v;
443
        }
444
        /* invalid values */
445
        for(i=7;i<16;i++)
446
            is_table[0][i] = is_table[1][i] = 0.0;
447

    
448
        for(i=0;i<16;i++) {
449
            double f;
450
            int e, k;
451

    
452
            for(j=0;j<2;j++) {
453
                e = -(j + 1) * ((i + 1) >> 1);
454
                f = pow(2.0, e / 4.0);
455
                k = i & 1;
456
                is_table_lsf[j][k ^ 1][i] = FIXR(f);
457
                is_table_lsf[j][k][i] = FIXR(1.0);
458
                dprintf("is_table_lsf %d %d: %x %x\n",
459
                        i, j, is_table_lsf[j][0][i], is_table_lsf[j][1][i]);
460
            }
461
        }
462

    
463
        for(i=0;i<8;i++) {
464
            float ci, cs, ca;
465
            ci = ci_table[i];
466
            cs = 1.0 / sqrt(1.0 + ci * ci);
467
            ca = cs * ci;
468
            csa_table[i][0] = FIXHR(cs/4);
469
            csa_table[i][1] = FIXHR(ca/4);
470
            csa_table[i][2] = FIXHR(ca/4) + FIXHR(cs/4);
471
            csa_table[i][3] = FIXHR(ca/4) - FIXHR(cs/4);
472
            csa_table_float[i][0] = cs;
473
            csa_table_float[i][1] = ca;
474
            csa_table_float[i][2] = ca + cs;
475
            csa_table_float[i][3] = ca - cs;
476
//            printf("%d %d %d %d\n", FIX(cs), FIX(cs-1), FIX(ca), FIX(cs)-FIX(ca));
477
//            av_log(NULL, AV_LOG_DEBUG,"%f %f %f %f\n", cs, ca, ca+cs, ca-cs);
478
        }
479

    
480
        /* compute mdct windows */
481
        for(i=0;i<36;i++) {
482
            for(j=0; j<4; j++){
483
                double d;
484

    
485
                if(j==2 && i%3 != 1)
486
                    continue;
487

    
488
                d= sin(M_PI * (i + 0.5) / 36.0);
489
                if(j==1){
490
                    if     (i>=30) d= 0;
491
                    else if(i>=24) d= sin(M_PI * (i - 18 + 0.5) / 12.0);
492
                    else if(i>=18) d= 1;
493
                }else if(j==3){
494
                    if     (i<  6) d= 0;
495
                    else if(i< 12) d= sin(M_PI * (i -  6 + 0.5) / 12.0);
496
                    else if(i< 18) d= 1;
497
                }
498
                //merge last stage of imdct into the window coefficients
499
                d*= 0.5 / cos(M_PI*(2*i + 19)/72);
500

    
501
                if(j==2)
502
                    mdct_win[j][i/3] = FIXHR((d / (1<<5)));
503
                else
504
                    mdct_win[j][i  ] = FIXHR((d / (1<<5)));
505
//                av_log(NULL, AV_LOG_DEBUG, "%2d %d %f\n", i,j,d / (1<<5));
506
            }
507
        }
508

    
509
        /* NOTE: we do frequency inversion adter the MDCT by changing
510
           the sign of the right window coefs */
511
        for(j=0;j<4;j++) {
512
            for(i=0;i<36;i+=2) {
513
                mdct_win[j + 4][i] = mdct_win[j][i];
514
                mdct_win[j + 4][i + 1] = -mdct_win[j][i + 1];
515
            }
516
        }
517

    
518
#if defined(DEBUG)
519
        for(j=0;j<8;j++) {
520
            av_log(avctx, AV_LOG_DEBUG, "win%d=\n", j);
521
            for(i=0;i<36;i++)
522
                av_log(avctx, AV_LOG_DEBUG, "%f, ", (double)mdct_win[j][i] / FRAC_ONE);
523
            av_log(avctx, AV_LOG_DEBUG, "\n");
524
        }
525
#endif
526
        init = 1;
527
    }
528

    
529
    s->inbuf_index = 0;
530
    s->inbuf = &s->inbuf1[s->inbuf_index][BACKSTEP_SIZE];
531
    s->inbuf_ptr = s->inbuf;
532
#ifdef DEBUG
533
    s->frame_count = 0;
534
#endif
535
    if (avctx->codec_id == CODEC_ID_MP3ADU)
536
        s->adu_mode = 1;
537
    return 0;
538
}
539

    
540
/* tab[i][j] = 1.0 / (2.0 * cos(pi*(2*k+1) / 2^(6 - j))) */
541

    
542
/* cos(i*pi/64) */
543

    
544
#define COS0_0  FIXHR(0.50060299823519630134/2)
545
#define COS0_1  FIXHR(0.50547095989754365998/2)
546
#define COS0_2  FIXHR(0.51544730992262454697/2)
547
#define COS0_3  FIXHR(0.53104259108978417447/2)
548
#define COS0_4  FIXHR(0.55310389603444452782/2)
549
#define COS0_5  FIXHR(0.58293496820613387367/2)
550
#define COS0_6  FIXHR(0.62250412303566481615/2)
551
#define COS0_7  FIXHR(0.67480834145500574602/2)
552
#define COS0_8  FIXHR(0.74453627100229844977/2)
553
#define COS0_9  FIXHR(0.83934964541552703873/2)
554
#define COS0_10 FIXHR(0.97256823786196069369/2)
555
#define COS0_11 FIXHR(1.16943993343288495515/4)
556
#define COS0_12 FIXHR(1.48416461631416627724/4)
557
#define COS0_13 FIXHR(2.05778100995341155085/8)
558
#define COS0_14 FIXHR(3.40760841846871878570/8)
559
#define COS0_15 FIXHR(10.19000812354805681150/32)
560

    
561
#define COS1_0 FIXHR(0.50241928618815570551/2)
562
#define COS1_1 FIXHR(0.52249861493968888062/2)
563
#define COS1_2 FIXHR(0.56694403481635770368/2)
564
#define COS1_3 FIXHR(0.64682178335999012954/2)
565
#define COS1_4 FIXHR(0.78815462345125022473/2)
566
#define COS1_5 FIXHR(1.06067768599034747134/4)
567
#define COS1_6 FIXHR(1.72244709823833392782/4)
568
#define COS1_7 FIXHR(5.10114861868916385802/16)
569

    
570
#define COS2_0 FIXHR(0.50979557910415916894/2)
571
#define COS2_1 FIXHR(0.60134488693504528054/2)
572
#define COS2_2 FIXHR(0.89997622313641570463/2)
573
#define COS2_3 FIXHR(2.56291544774150617881/8)
574

    
575
#define COS3_0 FIXHR(0.54119610014619698439/2)
576
#define COS3_1 FIXHR(1.30656296487637652785/4)
577

    
578
#define COS4_0 FIXHR(0.70710678118654752439/2)
579

    
580
/* butterfly operator */
581
#define BF(a, b, c, s)\
582
{\
583
    tmp0 = tab[a] + tab[b];\
584
    tmp1 = tab[a] - tab[b];\
585
    tab[a] = tmp0;\
586
    tab[b] = MULH(tmp1<<(s), c);\
587
}
588

    
589
#define BF1(a, b, c, d)\
590
{\
591
    BF(a, b, COS4_0, 1);\
592
    BF(c, d,-COS4_0, 1);\
593
    tab[c] += tab[d];\
594
}
595

    
596
#define BF2(a, b, c, d)\
597
{\
598
    BF(a, b, COS4_0, 1);\
599
    BF(c, d,-COS4_0, 1);\
600
    tab[c] += tab[d];\
601
    tab[a] += tab[c];\
602
    tab[c] += tab[b];\
603
    tab[b] += tab[d];\
604
}
605

    
606
#define ADD(a, b) tab[a] += tab[b]
607

    
608
/* DCT32 without 1/sqrt(2) coef zero scaling. */
609
static void dct32(int32_t *out, int32_t *tab)
610
{
611
    int tmp0, tmp1;
612

    
613
    /* pass 1 */
614
    BF( 0, 31, COS0_0 , 1);
615
    BF(15, 16, COS0_15, 5);
616
    /* pass 2 */
617
    BF( 0, 15, COS1_0 , 1);
618
    BF(16, 31,-COS1_0 , 1);
619
    /* pass 1 */
620
    BF( 7, 24, COS0_7 , 1);
621
    BF( 8, 23, COS0_8 , 1);
622
    /* pass 2 */
623
    BF( 7,  8, COS1_7 , 4);
624
    BF(23, 24,-COS1_7 , 4);
625
    /* pass 3 */
626
    BF( 0,  7, COS2_0 , 1);
627
    BF( 8, 15,-COS2_0 , 1);
628
    BF(16, 23, COS2_0 , 1);
629
    BF(24, 31,-COS2_0 , 1);
630
    /* pass 1 */
631
    BF( 3, 28, COS0_3 , 1);
632
    BF(12, 19, COS0_12, 2);
633
    /* pass 2 */
634
    BF( 3, 12, COS1_3 , 1);
635
    BF(19, 28,-COS1_3 , 1);
636
    /* pass 1 */
637
    BF( 4, 27, COS0_4 , 1);
638
    BF(11, 20, COS0_11, 2);
639
    /* pass 2 */
640
    BF( 4, 11, COS1_4 , 1);
641
    BF(20, 27,-COS1_4 , 1);
642
    /* pass 3 */
643
    BF( 3,  4, COS2_3 , 3);
644
    BF(11, 12,-COS2_3 , 3);
645
    BF(19, 20, COS2_3 , 3);
646
    BF(27, 28,-COS2_3 , 3);
647
    /* pass 4 */
648
    BF( 0,  3, COS3_0 , 1);
649
    BF( 4,  7,-COS3_0 , 1);
650
    BF( 8, 11, COS3_0 , 1);
651
    BF(12, 15,-COS3_0 , 1);
652
    BF(16, 19, COS3_0 , 1);
653
    BF(20, 23,-COS3_0 , 1);
654
    BF(24, 27, COS3_0 , 1);
655
    BF(28, 31,-COS3_0 , 1);
656

    
657

    
658

    
659
    /* pass 1 */
660
    BF( 1, 30, COS0_1 , 1);
661
    BF(14, 17, COS0_14, 3);
662
    /* pass 2 */
663
    BF( 1, 14, COS1_1 , 1);
664
    BF(17, 30,-COS1_1 , 1);
665
    /* pass 1 */
666
    BF( 6, 25, COS0_6 , 1);
667
    BF( 9, 22, COS0_9 , 1);
668
    /* pass 2 */
669
    BF( 6,  9, COS1_6 , 2);
670
    BF(22, 25,-COS1_6 , 2);
671
    /* pass 3 */
672
    BF( 1,  6, COS2_1 , 1);
673
    BF( 9, 14,-COS2_1 , 1);
674
    BF(17, 22, COS2_1 , 1);
675
    BF(25, 30,-COS2_1 , 1);
676

    
677
    /* pass 1 */
678
    BF( 2, 29, COS0_2 , 1);
679
    BF(13, 18, COS0_13, 3);
680
    /* pass 2 */
681
    BF( 2, 13, COS1_2 , 1);
682
    BF(18, 29,-COS1_2 , 1);
683
    /* pass 1 */
684
    BF( 5, 26, COS0_5 , 1);
685
    BF(10, 21, COS0_10, 1);
686
    /* pass 2 */
687
    BF( 5, 10, COS1_5 , 2);
688
    BF(21, 26,-COS1_5 , 2);
689
    /* pass 3 */
690
    BF( 2,  5, COS2_2 , 1);
691
    BF(10, 13,-COS2_2 , 1);
692
    BF(18, 21, COS2_2 , 1);
693
    BF(26, 29,-COS2_2 , 1);
694
    /* pass 4 */
695
    BF( 1,  2, COS3_1 , 2);
696
    BF( 5,  6,-COS3_1 , 2);
697
    BF( 9, 10, COS3_1 , 2);
698
    BF(13, 14,-COS3_1 , 2);
699
    BF(17, 18, COS3_1 , 2);
700
    BF(21, 22,-COS3_1 , 2);
701
    BF(25, 26, COS3_1 , 2);
702
    BF(29, 30,-COS3_1 , 2);
703

    
704
    /* pass 5 */
705
    BF1( 0,  1,  2,  3);
706
    BF2( 4,  5,  6,  7);
707
    BF1( 8,  9, 10, 11);
708
    BF2(12, 13, 14, 15);
709
    BF1(16, 17, 18, 19);
710
    BF2(20, 21, 22, 23);
711
    BF1(24, 25, 26, 27);
712
    BF2(28, 29, 30, 31);
713

    
714
    /* pass 6 */
715

    
716
    ADD( 8, 12);
717
    ADD(12, 10);
718
    ADD(10, 14);
719
    ADD(14,  9);
720
    ADD( 9, 13);
721
    ADD(13, 11);
722
    ADD(11, 15);
723

    
724
    out[ 0] = tab[0];
725
    out[16] = tab[1];
726
    out[ 8] = tab[2];
727
    out[24] = tab[3];
728
    out[ 4] = tab[4];
729
    out[20] = tab[5];
730
    out[12] = tab[6];
731
    out[28] = tab[7];
732
    out[ 2] = tab[8];
733
    out[18] = tab[9];
734
    out[10] = tab[10];
735
    out[26] = tab[11];
736
    out[ 6] = tab[12];
737
    out[22] = tab[13];
738
    out[14] = tab[14];
739
    out[30] = tab[15];
740

    
741
    ADD(24, 28);
742
    ADD(28, 26);
743
    ADD(26, 30);
744
    ADD(30, 25);
745
    ADD(25, 29);
746
    ADD(29, 27);
747
    ADD(27, 31);
748

    
749
    out[ 1] = tab[16] + tab[24];
750
    out[17] = tab[17] + tab[25];
751
    out[ 9] = tab[18] + tab[26];
752
    out[25] = tab[19] + tab[27];
753
    out[ 5] = tab[20] + tab[28];
754
    out[21] = tab[21] + tab[29];
755
    out[13] = tab[22] + tab[30];
756
    out[29] = tab[23] + tab[31];
757
    out[ 3] = tab[24] + tab[20];
758
    out[19] = tab[25] + tab[21];
759
    out[11] = tab[26] + tab[22];
760
    out[27] = tab[27] + tab[23];
761
    out[ 7] = tab[28] + tab[18];
762
    out[23] = tab[29] + tab[19];
763
    out[15] = tab[30] + tab[17];
764
    out[31] = tab[31];
765
}
766

    
767
#if FRAC_BITS <= 15
768

    
769
static inline int round_sample(int *sum)
770
{
771
    int sum1;
772
    sum1 = (*sum) >> OUT_SHIFT;
773
    *sum &= (1<<OUT_SHIFT)-1;
774
    if (sum1 < OUT_MIN)
775
        sum1 = OUT_MIN;
776
    else if (sum1 > OUT_MAX)
777
        sum1 = OUT_MAX;
778
    return sum1;
779
}
780

    
781
#   if defined(ARCH_POWERPC_405)
782
        /* signed 16x16 -> 32 multiply add accumulate */
783
#       define MACS(rt, ra, rb) \
784
            asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
785

    
786
        /* signed 16x16 -> 32 multiply */
787
#       define MULS(ra, rb) \
788
            ({ int __rt; asm ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); __rt; })
789
#   else
790
        /* signed 16x16 -> 32 multiply add accumulate */
791
#       define MACS(rt, ra, rb) rt += (ra) * (rb)
792

    
793
        /* signed 16x16 -> 32 multiply */
794
#       define MULS(ra, rb) ((ra) * (rb))
795
#   endif
796
#else
797

    
798
static inline int round_sample(int64_t *sum)
799
{
800
    int sum1;
801
    sum1 = (int)((*sum) >> OUT_SHIFT);
802
    *sum &= (1<<OUT_SHIFT)-1;
803
    if (sum1 < OUT_MIN)
804
        sum1 = OUT_MIN;
805
    else if (sum1 > OUT_MAX)
806
        sum1 = OUT_MAX;
807
    return sum1;
808
}
809

    
810
#   define MULS(ra, rb) MUL64(ra, rb)
811
#endif
812

    
813
#define SUM8(sum, op, w, p) \
814
{                                               \
815
    sum op MULS((w)[0 * 64], p[0 * 64]);\
816
    sum op MULS((w)[1 * 64], p[1 * 64]);\
817
    sum op MULS((w)[2 * 64], p[2 * 64]);\
818
    sum op MULS((w)[3 * 64], p[3 * 64]);\
819
    sum op MULS((w)[4 * 64], p[4 * 64]);\
820
    sum op MULS((w)[5 * 64], p[5 * 64]);\
821
    sum op MULS((w)[6 * 64], p[6 * 64]);\
822
    sum op MULS((w)[7 * 64], p[7 * 64]);\
823
}
824

    
825
#define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \
826
{                                               \
827
    int tmp;\
828
    tmp = p[0 * 64];\
829
    sum1 op1 MULS((w1)[0 * 64], tmp);\
830
    sum2 op2 MULS((w2)[0 * 64], tmp);\
831
    tmp = p[1 * 64];\
832
    sum1 op1 MULS((w1)[1 * 64], tmp);\
833
    sum2 op2 MULS((w2)[1 * 64], tmp);\
834
    tmp = p[2 * 64];\
835
    sum1 op1 MULS((w1)[2 * 64], tmp);\
836
    sum2 op2 MULS((w2)[2 * 64], tmp);\
837
    tmp = p[3 * 64];\
838
    sum1 op1 MULS((w1)[3 * 64], tmp);\
839
    sum2 op2 MULS((w2)[3 * 64], tmp);\
840
    tmp = p[4 * 64];\
841
    sum1 op1 MULS((w1)[4 * 64], tmp);\
842
    sum2 op2 MULS((w2)[4 * 64], tmp);\
843
    tmp = p[5 * 64];\
844
    sum1 op1 MULS((w1)[5 * 64], tmp);\
845
    sum2 op2 MULS((w2)[5 * 64], tmp);\
846
    tmp = p[6 * 64];\
847
    sum1 op1 MULS((w1)[6 * 64], tmp);\
848
    sum2 op2 MULS((w2)[6 * 64], tmp);\
849
    tmp = p[7 * 64];\
850
    sum1 op1 MULS((w1)[7 * 64], tmp);\
851
    sum2 op2 MULS((w2)[7 * 64], tmp);\
852
}
853

    
854
void ff_mpa_synth_init(MPA_INT *window)
855
{
856
    int i;
857

    
858
    /* max = 18760, max sum over all 16 coefs : 44736 */
859
    for(i=0;i<257;i++) {
860
        int v;
861
        v = mpa_enwindow[i];
862
#if WFRAC_BITS < 16
863
        v = (v + (1 << (16 - WFRAC_BITS - 1))) >> (16 - WFRAC_BITS);
864
#endif
865
        window[i] = v;
866
        if ((i & 63) != 0)
867
            v = -v;
868
        if (i != 0)
869
            window[512 - i] = v;
870
    }
871
}
872

    
873
/* 32 sub band synthesis filter. Input: 32 sub band samples, Output:
874
   32 samples. */
875
/* XXX: optimize by avoiding ring buffer usage */
876
void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
877
                         MPA_INT *window, int *dither_state,
878
                         OUT_INT *samples, int incr,
879
                         int32_t sb_samples[SBLIMIT])
880
{
881
    int32_t tmp[32];
882
    register MPA_INT *synth_buf;
883
    register const MPA_INT *w, *w2, *p;
884
    int j, offset, v;
885
    OUT_INT *samples2;
886
#if FRAC_BITS <= 15
887
    int sum, sum2;
888
#else
889
    int64_t sum, sum2;
890
#endif
891

    
892
    dct32(tmp, sb_samples);
893

    
894
    offset = *synth_buf_offset;
895
    synth_buf = synth_buf_ptr + offset;
896

    
897
    for(j=0;j<32;j++) {
898
        v = tmp[j];
899
#if FRAC_BITS <= 15
900
        /* NOTE: can cause a loss in precision if very high amplitude
901
           sound */
902
        if (v > 32767)
903
            v = 32767;
904
        else if (v < -32768)
905
            v = -32768;
906
#endif
907
        synth_buf[j] = v;
908
    }
909
    /* copy to avoid wrap */
910
    memcpy(synth_buf + 512, synth_buf, 32 * sizeof(MPA_INT));
911

    
912
    samples2 = samples + 31 * incr;
913
    w = window;
914
    w2 = window + 31;
915

    
916
    sum = *dither_state;
917
    p = synth_buf + 16;
918
    SUM8(sum, +=, w, p);
919
    p = synth_buf + 48;
920
    SUM8(sum, -=, w + 32, p);
921
    *samples = round_sample(&sum);
922
    samples += incr;
923
    w++;
924

    
925
    /* we calculate two samples at the same time to avoid one memory
926
       access per two sample */
927
    for(j=1;j<16;j++) {
928
        sum2 = 0;
929
        p = synth_buf + 16 + j;
930
        SUM8P2(sum, +=, sum2, -=, w, w2, p);
931
        p = synth_buf + 48 - j;
932
        SUM8P2(sum, -=, sum2, -=, w + 32, w2 + 32, p);
933

    
934
        *samples = round_sample(&sum);
935
        samples += incr;
936
        sum += sum2;
937
        *samples2 = round_sample(&sum);
938
        samples2 -= incr;
939
        w++;
940
        w2--;
941
    }
942

    
943
    p = synth_buf + 32;
944
    SUM8(sum, -=, w + 32, p);
945
    *samples = round_sample(&sum);
946
    *dither_state= sum;
947

    
948
    offset = (offset - 32) & 511;
949
    *synth_buf_offset = offset;
950
}
951

    
952
#define C3 FIXHR(0.86602540378443864676/2)
953

    
954
/* 0.5 / cos(pi*(2*i+1)/36) */
955
static const int icos36[9] = {
956
    FIXR(0.50190991877167369479),
957
    FIXR(0.51763809020504152469), //0
958
    FIXR(0.55168895948124587824),
959
    FIXR(0.61038729438072803416),
960
    FIXR(0.70710678118654752439), //1
961
    FIXR(0.87172339781054900991),
962
    FIXR(1.18310079157624925896),
963
    FIXR(1.93185165257813657349), //2
964
    FIXR(5.73685662283492756461),
965
};
966

    
967
/* 0.5 / cos(pi*(2*i+1)/36) */
968
static const int icos36h[9] = {
969
    FIXHR(0.50190991877167369479/2),
970
    FIXHR(0.51763809020504152469/2), //0
971
    FIXHR(0.55168895948124587824/2),
972
    FIXHR(0.61038729438072803416/2),
973
    FIXHR(0.70710678118654752439/2), //1
974
    FIXHR(0.87172339781054900991/2),
975
    FIXHR(1.18310079157624925896/4),
976
    FIXHR(1.93185165257813657349/4), //2
977
//    FIXHR(5.73685662283492756461),
978
};
979

    
980
/* 12 points IMDCT. We compute it "by hand" by factorizing obvious
981
   cases. */
982
static void imdct12(int *out, int *in)
983
{
984
    int in0, in1, in2, in3, in4, in5, t1, t2;
985

    
986
    in0= in[0*3];
987
    in1= in[1*3] + in[0*3];
988
    in2= in[2*3] + in[1*3];
989
    in3= in[3*3] + in[2*3];
990
    in4= in[4*3] + in[3*3];
991
    in5= in[5*3] + in[4*3];
992
    in5 += in3;
993
    in3 += in1;
994

    
995
    in2= MULH(2*in2, C3);
996
    in3= MULH(4*in3, C3);
997

    
998
    t1 = in0 - in4;
999
    t2 = MULH(2*(in1 - in5), icos36h[4]);
1000

    
1001
    out[ 7]=
1002
    out[10]= t1 + t2;
1003
    out[ 1]=
1004
    out[ 4]= t1 - t2;
1005

    
1006
    in0 += in4>>1;
1007
    in4 = in0 + in2;
1008
    in5 += 2*in1;
1009
    in1 = MULH(in5 + in3, icos36h[1]);
1010
    out[ 8]=
1011
    out[ 9]= in4 + in1;
1012
    out[ 2]=
1013
    out[ 3]= in4 - in1;
1014

    
1015
    in0 -= in2;
1016
    in5 = MULH(2*(in5 - in3), icos36h[7]);
1017
    out[ 0]=
1018
    out[ 5]= in0 - in5;
1019
    out[ 6]=
1020
    out[11]= in0 + in5;
1021
}
1022

    
1023
/* cos(pi*i/18) */
1024
#define C1 FIXHR(0.98480775301220805936/2)
1025
#define C2 FIXHR(0.93969262078590838405/2)
1026
#define C3 FIXHR(0.86602540378443864676/2)
1027
#define C4 FIXHR(0.76604444311897803520/2)
1028
#define C5 FIXHR(0.64278760968653932632/2)
1029
#define C6 FIXHR(0.5/2)
1030
#define C7 FIXHR(0.34202014332566873304/2)
1031
#define C8 FIXHR(0.17364817766693034885/2)
1032

    
1033

    
1034
/* using Lee like decomposition followed by hand coded 9 points DCT */
1035
static void imdct36(int *out, int *buf, int *in, int *win)
1036
{
1037
    int i, j, t0, t1, t2, t3, s0, s1, s2, s3;
1038
    int tmp[18], *tmp1, *in1;
1039

    
1040
    for(i=17;i>=1;i--)
1041
        in[i] += in[i-1];
1042
    for(i=17;i>=3;i-=2)
1043
        in[i] += in[i-2];
1044

    
1045
    for(j=0;j<2;j++) {
1046
        tmp1 = tmp + j;
1047
        in1 = in + j;
1048
#if 0
1049
//more accurate but slower
1050
        int64_t t0, t1, t2, t3;
1051
        t2 = in1[2*4] + in1[2*8] - in1[2*2];
1052

1053
        t3 = (in1[2*0] + (int64_t)(in1[2*6]>>1))<<32;
1054
        t1 = in1[2*0] - in1[2*6];
1055
        tmp1[ 6] = t1 - (t2>>1);
1056
        tmp1[16] = t1 + t2;
1057

1058
        t0 = MUL64(2*(in1[2*2] + in1[2*4]),    C2);
1059
        t1 = MUL64(   in1[2*4] - in1[2*8] , -2*C8);
1060
        t2 = MUL64(2*(in1[2*2] + in1[2*8]),   -C4);
1061

1062
        tmp1[10] = (t3 - t0 - t2) >> 32;
1063
        tmp1[ 2] = (t3 + t0 + t1) >> 32;
1064
        tmp1[14] = (t3 + t2 - t1) >> 32;
1065

1066
        tmp1[ 4] = MULH(2*(in1[2*5] + in1[2*7] - in1[2*1]), -C3);
1067
        t2 = MUL64(2*(in1[2*1] + in1[2*5]),    C1);
1068
        t3 = MUL64(   in1[2*5] - in1[2*7] , -2*C7);
1069
        t0 = MUL64(2*in1[2*3], C3);
1070

1071
        t1 = MUL64(2*(in1[2*1] + in1[2*7]),   -C5);
1072

1073
        tmp1[ 0] = (t2 + t3 + t0) >> 32;
1074
        tmp1[12] = (t2 + t1 - t0) >> 32;
1075
        tmp1[ 8] = (t3 - t1 - t0) >> 32;
1076
#else
1077
        t2 = in1[2*4] + in1[2*8] - in1[2*2];
1078

    
1079
        t3 = in1[2*0] + (in1[2*6]>>1);
1080
        t1 = in1[2*0] - in1[2*6];
1081
        tmp1[ 6] = t1 - (t2>>1);
1082
        tmp1[16] = t1 + t2;
1083

    
1084
        t0 = MULH(2*(in1[2*2] + in1[2*4]),    C2);
1085
        t1 = MULH(   in1[2*4] - in1[2*8] , -2*C8);
1086
        t2 = MULH(2*(in1[2*2] + in1[2*8]),   -C4);
1087

    
1088
        tmp1[10] = t3 - t0 - t2;
1089
        tmp1[ 2] = t3 + t0 + t1;
1090
        tmp1[14] = t3 + t2 - t1;
1091

    
1092
        tmp1[ 4] = MULH(2*(in1[2*5] + in1[2*7] - in1[2*1]), -C3);
1093
        t2 = MULH(2*(in1[2*1] + in1[2*5]),    C1);
1094
        t3 = MULH(   in1[2*5] - in1[2*7] , -2*C7);
1095
        t0 = MULH(2*in1[2*3], C3);
1096

    
1097
        t1 = MULH(2*(in1[2*1] + in1[2*7]),   -C5);
1098

    
1099
        tmp1[ 0] = t2 + t3 + t0;
1100
        tmp1[12] = t2 + t1 - t0;
1101
        tmp1[ 8] = t3 - t1 - t0;
1102
#endif
1103
    }
1104

    
1105
    i = 0;
1106
    for(j=0;j<4;j++) {
1107
        t0 = tmp[i];
1108
        t1 = tmp[i + 2];
1109
        s0 = t1 + t0;
1110
        s2 = t1 - t0;
1111

    
1112
        t2 = tmp[i + 1];
1113
        t3 = tmp[i + 3];
1114
        s1 = MULH(2*(t3 + t2), icos36h[j]);
1115
        s3 = MULL(t3 - t2, icos36[8 - j]);
1116

    
1117
        t0 = s0 + s1;
1118
        t1 = s0 - s1;
1119
        out[(9 + j)*SBLIMIT] =  MULH(t1, win[9 + j]) + buf[9 + j];
1120
        out[(8 - j)*SBLIMIT] =  MULH(t1, win[8 - j]) + buf[8 - j];
1121
        buf[9 + j] = MULH(t0, win[18 + 9 + j]);
1122
        buf[8 - j] = MULH(t0, win[18 + 8 - j]);
1123

    
1124
        t0 = s2 + s3;
1125
        t1 = s2 - s3;
1126
        out[(9 + 8 - j)*SBLIMIT] =  MULH(t1, win[9 + 8 - j]) + buf[9 + 8 - j];
1127
        out[(        j)*SBLIMIT] =  MULH(t1, win[        j]) + buf[        j];
1128
        buf[9 + 8 - j] = MULH(t0, win[18 + 9 + 8 - j]);
1129
        buf[      + j] = MULH(t0, win[18         + j]);
1130
        i += 4;
1131
    }
1132

    
1133
    s0 = tmp[16];
1134
    s1 = MULH(2*tmp[17], icos36h[4]);
1135
    t0 = s0 + s1;
1136
    t1 = s0 - s1;
1137
    out[(9 + 4)*SBLIMIT] =  MULH(t1, win[9 + 4]) + buf[9 + 4];
1138
    out[(8 - 4)*SBLIMIT] =  MULH(t1, win[8 - 4]) + buf[8 - 4];
1139
    buf[9 + 4] = MULH(t0, win[18 + 9 + 4]);
1140
    buf[8 - 4] = MULH(t0, win[18 + 8 - 4]);
1141
}
1142

    
1143
/* header decoding. MUST check the header before because no
1144
   consistency check is done there. Return 1 if free format found and
1145
   that the frame size must be computed externally */
1146
static int decode_header(MPADecodeContext *s, uint32_t header)
1147
{
1148
    int sample_rate, frame_size, mpeg25, padding;
1149
    int sample_rate_index, bitrate_index;
1150
    if (header & (1<<20)) {
1151
        s->lsf = (header & (1<<19)) ? 0 : 1;
1152
        mpeg25 = 0;
1153
    } else {
1154
        s->lsf = 1;
1155
        mpeg25 = 1;
1156
    }
1157

    
1158
    s->layer = 4 - ((header >> 17) & 3);
1159
    /* extract frequency */
1160
    sample_rate_index = (header >> 10) & 3;
1161
    sample_rate = mpa_freq_tab[sample_rate_index] >> (s->lsf + mpeg25);
1162
    sample_rate_index += 3 * (s->lsf + mpeg25);
1163
    s->sample_rate_index = sample_rate_index;
1164
    s->error_protection = ((header >> 16) & 1) ^ 1;
1165
    s->sample_rate = sample_rate;
1166

    
1167
    bitrate_index = (header >> 12) & 0xf;
1168
    padding = (header >> 9) & 1;
1169
    //extension = (header >> 8) & 1;
1170
    s->mode = (header >> 6) & 3;
1171
    s->mode_ext = (header >> 4) & 3;
1172
    //copyright = (header >> 3) & 1;
1173
    //original = (header >> 2) & 1;
1174
    //emphasis = header & 3;
1175

    
1176
    if (s->mode == MPA_MONO)
1177
        s->nb_channels = 1;
1178
    else
1179
        s->nb_channels = 2;
1180

    
1181
    if (bitrate_index != 0) {
1182
        frame_size = mpa_bitrate_tab[s->lsf][s->layer - 1][bitrate_index];
1183
        s->bit_rate = frame_size * 1000;
1184
        switch(s->layer) {
1185
        case 1:
1186
            frame_size = (frame_size * 12000) / sample_rate;
1187
            frame_size = (frame_size + padding) * 4;
1188
            break;
1189
        case 2:
1190
            frame_size = (frame_size * 144000) / sample_rate;
1191
            frame_size += padding;
1192
            break;
1193
        default:
1194
        case 3:
1195
            frame_size = (frame_size * 144000) / (sample_rate << s->lsf);
1196
            frame_size += padding;
1197
            break;
1198
        }
1199
        s->frame_size = frame_size;
1200
    } else {
1201
        /* if no frame size computed, signal it */
1202
        if (!s->free_format_frame_size)
1203
            return 1;
1204
        /* free format: compute bitrate and real frame size from the
1205
           frame size we extracted by reading the bitstream */
1206
        s->frame_size = s->free_format_frame_size;
1207
        switch(s->layer) {
1208
        case 1:
1209
            s->frame_size += padding  * 4;
1210
            s->bit_rate = (s->frame_size * sample_rate) / 48000;
1211
            break;
1212
        case 2:
1213
            s->frame_size += padding;
1214
            s->bit_rate = (s->frame_size * sample_rate) / 144000;
1215
            break;
1216
        default:
1217
        case 3:
1218
            s->frame_size += padding;
1219
            s->bit_rate = (s->frame_size * (sample_rate << s->lsf)) / 144000;
1220
            break;
1221
        }
1222
    }
1223

    
1224
#if defined(DEBUG)
1225
    dprintf("layer%d, %d Hz, %d kbits/s, ",
1226
           s->layer, s->sample_rate, s->bit_rate);
1227
    if (s->nb_channels == 2) {
1228
        if (s->layer == 3) {
1229
            if (s->mode_ext & MODE_EXT_MS_STEREO)
1230
                dprintf("ms-");
1231
            if (s->mode_ext & MODE_EXT_I_STEREO)
1232
                dprintf("i-");
1233
        }
1234
        dprintf("stereo");
1235
    } else {
1236
        dprintf("mono");
1237
    }
1238
    dprintf("\n");
1239
#endif
1240
    return 0;
1241
}
1242

    
1243
/* useful helper to get mpeg audio stream infos. Return -1 if error in
1244
   header, otherwise the coded frame size in bytes */
1245
int mpa_decode_header(AVCodecContext *avctx, uint32_t head)
1246
{
1247
    MPADecodeContext s1, *s = &s1;
1248
    memset( s, 0, sizeof(MPADecodeContext) );
1249

    
1250
    if (ff_mpa_check_header(head) != 0)
1251
        return -1;
1252

    
1253
    if (decode_header(s, head) != 0) {
1254
        return -1;
1255
    }
1256

    
1257
    switch(s->layer) {
1258
    case 1:
1259
        avctx->frame_size = 384;
1260
        break;
1261
    case 2:
1262
        avctx->frame_size = 1152;
1263
        break;
1264
    default:
1265
    case 3:
1266
        if (s->lsf)
1267
            avctx->frame_size = 576;
1268
        else
1269
            avctx->frame_size = 1152;
1270
        break;
1271
    }
1272

    
1273
    avctx->sample_rate = s->sample_rate;
1274
    avctx->channels = s->nb_channels;
1275
    avctx->bit_rate = s->bit_rate;
1276
    avctx->sub_id = s->layer;
1277
    return s->frame_size;
1278
}
1279

    
1280
/* return the number of decoded frames */
1281
static int mp_decode_layer1(MPADecodeContext *s)
1282
{
1283
    int bound, i, v, n, ch, j, mant;
1284
    uint8_t allocation[MPA_MAX_CHANNELS][SBLIMIT];
1285
    uint8_t scale_factors[MPA_MAX_CHANNELS][SBLIMIT];
1286

    
1287
    if (s->mode == MPA_JSTEREO)
1288
        bound = (s->mode_ext + 1) * 4;
1289
    else
1290
        bound = SBLIMIT;
1291

    
1292
    /* allocation bits */
1293
    for(i=0;i<bound;i++) {
1294
        for(ch=0;ch<s->nb_channels;ch++) {
1295
            allocation[ch][i] = get_bits(&s->gb, 4);
1296
        }
1297
    }
1298
    for(i=bound;i<SBLIMIT;i++) {
1299
        allocation[0][i] = get_bits(&s->gb, 4);
1300
    }
1301

    
1302
    /* scale factors */
1303
    for(i=0;i<bound;i++) {
1304
        for(ch=0;ch<s->nb_channels;ch++) {
1305
            if (allocation[ch][i])
1306
                scale_factors[ch][i] = get_bits(&s->gb, 6);
1307
        }
1308
    }
1309
    for(i=bound;i<SBLIMIT;i++) {
1310
        if (allocation[0][i]) {
1311
            scale_factors[0][i] = get_bits(&s->gb, 6);
1312
            scale_factors[1][i] = get_bits(&s->gb, 6);
1313
        }
1314
    }
1315

    
1316
    /* compute samples */
1317
    for(j=0;j<12;j++) {
1318
        for(i=0;i<bound;i++) {
1319
            for(ch=0;ch<s->nb_channels;ch++) {
1320
                n = allocation[ch][i];
1321
                if (n) {
1322
                    mant = get_bits(&s->gb, n + 1);
1323
                    v = l1_unscale(n, mant, scale_factors[ch][i]);
1324
                } else {
1325
                    v = 0;
1326
                }
1327
                s->sb_samples[ch][j][i] = v;
1328
            }
1329
        }
1330
        for(i=bound;i<SBLIMIT;i++) {
1331
            n = allocation[0][i];
1332
            if (n) {
1333
                mant = get_bits(&s->gb, n + 1);
1334
                v = l1_unscale(n, mant, scale_factors[0][i]);
1335
                s->sb_samples[0][j][i] = v;
1336
                v = l1_unscale(n, mant, scale_factors[1][i]);
1337
                s->sb_samples[1][j][i] = v;
1338
            } else {
1339
                s->sb_samples[0][j][i] = 0;
1340
                s->sb_samples[1][j][i] = 0;
1341
            }
1342
        }
1343
    }
1344
    return 12;
1345
}
1346

    
1347
/* bitrate is in kb/s */
1348
int l2_select_table(int bitrate, int nb_channels, int freq, int lsf)
1349
{
1350
    int ch_bitrate, table;
1351

    
1352
    ch_bitrate = bitrate / nb_channels;
1353
    if (!lsf) {
1354
        if ((freq == 48000 && ch_bitrate >= 56) ||
1355
            (ch_bitrate >= 56 && ch_bitrate <= 80))
1356
            table = 0;
1357
        else if (freq != 48000 && ch_bitrate >= 96)
1358
            table = 1;
1359
        else if (freq != 32000 && ch_bitrate <= 48)
1360
            table = 2;
1361
        else
1362
            table = 3;
1363
    } else {
1364
        table = 4;
1365
    }
1366
    return table;
1367
}
1368

    
1369
static int mp_decode_layer2(MPADecodeContext *s)
1370
{
1371
    int sblimit; /* number of used subbands */
1372
    const unsigned char *alloc_table;
1373
    int table, bit_alloc_bits, i, j, ch, bound, v;
1374
    unsigned char bit_alloc[MPA_MAX_CHANNELS][SBLIMIT];
1375
    unsigned char scale_code[MPA_MAX_CHANNELS][SBLIMIT];
1376
    unsigned char scale_factors[MPA_MAX_CHANNELS][SBLIMIT][3], *sf;
1377
    int scale, qindex, bits, steps, k, l, m, b;
1378

    
1379
    /* select decoding table */
1380
    table = l2_select_table(s->bit_rate / 1000, s->nb_channels,
1381
                            s->sample_rate, s->lsf);
1382
    sblimit = sblimit_table[table];
1383
    alloc_table = alloc_tables[table];
1384

    
1385
    if (s->mode == MPA_JSTEREO)
1386
        bound = (s->mode_ext + 1) * 4;
1387
    else
1388
        bound = sblimit;
1389

    
1390
    dprintf("bound=%d sblimit=%d\n", bound, sblimit);
1391

    
1392
    /* sanity check */
1393
    if( bound > sblimit ) bound = sblimit;
1394

    
1395
    /* parse bit allocation */
1396
    j = 0;
1397
    for(i=0;i<bound;i++) {
1398
        bit_alloc_bits = alloc_table[j];
1399
        for(ch=0;ch<s->nb_channels;ch++) {
1400
            bit_alloc[ch][i] = get_bits(&s->gb, bit_alloc_bits);
1401
        }
1402
        j += 1 << bit_alloc_bits;
1403
    }
1404
    for(i=bound;i<sblimit;i++) {
1405
        bit_alloc_bits = alloc_table[j];
1406
        v = get_bits(&s->gb, bit_alloc_bits);
1407
        bit_alloc[0][i] = v;
1408
        bit_alloc[1][i] = v;
1409
        j += 1 << bit_alloc_bits;
1410
    }
1411

    
1412
#ifdef DEBUG
1413
    {
1414
        for(ch=0;ch<s->nb_channels;ch++) {
1415
            for(i=0;i<sblimit;i++)
1416
                dprintf(" %d", bit_alloc[ch][i]);
1417
            dprintf("\n");
1418
        }
1419
    }
1420
#endif
1421

    
1422
    /* scale codes */
1423
    for(i=0;i<sblimit;i++) {
1424
        for(ch=0;ch<s->nb_channels;ch++) {
1425
            if (bit_alloc[ch][i])
1426
                scale_code[ch][i] = get_bits(&s->gb, 2);
1427
        }
1428
    }
1429

    
1430
    /* scale factors */
1431
    for(i=0;i<sblimit;i++) {
1432
        for(ch=0;ch<s->nb_channels;ch++) {
1433
            if (bit_alloc[ch][i]) {
1434
                sf = scale_factors[ch][i];
1435
                switch(scale_code[ch][i]) {
1436
                default:
1437
                case 0:
1438
                    sf[0] = get_bits(&s->gb, 6);
1439
                    sf[1] = get_bits(&s->gb, 6);
1440
                    sf[2] = get_bits(&s->gb, 6);
1441
                    break;
1442
                case 2:
1443
                    sf[0] = get_bits(&s->gb, 6);
1444
                    sf[1] = sf[0];
1445
                    sf[2] = sf[0];
1446
                    break;
1447
                case 1:
1448
                    sf[0] = get_bits(&s->gb, 6);
1449
                    sf[2] = get_bits(&s->gb, 6);
1450
                    sf[1] = sf[0];
1451
                    break;
1452
                case 3:
1453
                    sf[0] = get_bits(&s->gb, 6);
1454
                    sf[2] = get_bits(&s->gb, 6);
1455
                    sf[1] = sf[2];
1456
                    break;
1457
                }
1458
            }
1459
        }
1460
    }
1461

    
1462
#ifdef DEBUG
1463
    for(ch=0;ch<s->nb_channels;ch++) {
1464
        for(i=0;i<sblimit;i++) {
1465
            if (bit_alloc[ch][i]) {
1466
                sf = scale_factors[ch][i];
1467
                dprintf(" %d %d %d", sf[0], sf[1], sf[2]);
1468
            } else {
1469
                dprintf(" -");
1470
            }
1471
        }
1472
        dprintf("\n");
1473
    }
1474
#endif
1475

    
1476
    /* samples */
1477
    for(k=0;k<3;k++) {
1478
        for(l=0;l<12;l+=3) {
1479
            j = 0;
1480
            for(i=0;i<bound;i++) {
1481
                bit_alloc_bits = alloc_table[j];
1482
                for(ch=0;ch<s->nb_channels;ch++) {
1483
                    b = bit_alloc[ch][i];
1484
                    if (b) {
1485
                        scale = scale_factors[ch][i][k];
1486
                        qindex = alloc_table[j+b];
1487
                        bits = quant_bits[qindex];
1488
                        if (bits < 0) {
1489
                            /* 3 values at the same time */
1490
                            v = get_bits(&s->gb, -bits);
1491
                            steps = quant_steps[qindex];
1492
                            s->sb_samples[ch][k * 12 + l + 0][i] =
1493
                                l2_unscale_group(steps, v % steps, scale);
1494
                            v = v / steps;
1495
                            s->sb_samples[ch][k * 12 + l + 1][i] =
1496
                                l2_unscale_group(steps, v % steps, scale);
1497
                            v = v / steps;
1498
                            s->sb_samples[ch][k * 12 + l + 2][i] =
1499
                                l2_unscale_group(steps, v, scale);
1500
                        } else {
1501
                            for(m=0;m<3;m++) {
1502
                                v = get_bits(&s->gb, bits);
1503
                                v = l1_unscale(bits - 1, v, scale);
1504
                                s->sb_samples[ch][k * 12 + l + m][i] = v;
1505
                            }
1506
                        }
1507
                    } else {
1508
                        s->sb_samples[ch][k * 12 + l + 0][i] = 0;
1509
                        s->sb_samples[ch][k * 12 + l + 1][i] = 0;
1510
                        s->sb_samples[ch][k * 12 + l + 2][i] = 0;
1511
                    }
1512
                }
1513
                /* next subband in alloc table */
1514
                j += 1 << bit_alloc_bits;
1515
            }
1516
            /* XXX: find a way to avoid this duplication of code */
1517
            for(i=bound;i<sblimit;i++) {
1518
                bit_alloc_bits = alloc_table[j];
1519
                b = bit_alloc[0][i];
1520
                if (b) {
1521
                    int mant, scale0, scale1;
1522
                    scale0 = scale_factors[0][i][k];
1523
                    scale1 = scale_factors[1][i][k];
1524
                    qindex = alloc_table[j+b];
1525
                    bits = quant_bits[qindex];
1526
                    if (bits < 0) {
1527
                        /* 3 values at the same time */
1528
                        v = get_bits(&s->gb, -bits);
1529
                        steps = quant_steps[qindex];
1530
                        mant = v % steps;
1531
                        v = v / steps;
1532
                        s->sb_samples[0][k * 12 + l + 0][i] =
1533
                            l2_unscale_group(steps, mant, scale0);
1534
                        s->sb_samples[1][k * 12 + l + 0][i] =
1535
                            l2_unscale_group(steps, mant, scale1);
1536
                        mant = v % steps;
1537
                        v = v / steps;
1538
                        s->sb_samples[0][k * 12 + l + 1][i] =
1539
                            l2_unscale_group(steps, mant, scale0);
1540
                        s->sb_samples[1][k * 12 + l + 1][i] =
1541
                            l2_unscale_group(steps, mant, scale1);
1542
                        s->sb_samples[0][k * 12 + l + 2][i] =
1543
                            l2_unscale_group(steps, v, scale0);
1544
                        s->sb_samples[1][k * 12 + l + 2][i] =
1545
                            l2_unscale_group(steps, v, scale1);
1546
                    } else {
1547
                        for(m=0;m<3;m++) {
1548
                            mant = get_bits(&s->gb, bits);
1549
                            s->sb_samples[0][k * 12 + l + m][i] =
1550
                                l1_unscale(bits - 1, mant, scale0);
1551
                            s->sb_samples[1][k * 12 + l + m][i] =
1552
                                l1_unscale(bits - 1, mant, scale1);
1553
                        }
1554
                    }
1555
                } else {
1556
                    s->sb_samples[0][k * 12 + l + 0][i] = 0;
1557
                    s->sb_samples[0][k * 12 + l + 1][i] = 0;
1558
                    s->sb_samples[0][k * 12 + l + 2][i] = 0;
1559
                    s->sb_samples[1][k * 12 + l + 0][i] = 0;
1560
                    s->sb_samples[1][k * 12 + l + 1][i] = 0;
1561
                    s->sb_samples[1][k * 12 + l + 2][i] = 0;
1562
                }
1563
                /* next subband in alloc table */
1564
                j += 1 << bit_alloc_bits;
1565
            }
1566
            /* fill remaining samples to zero */
1567
            for(i=sblimit;i<SBLIMIT;i++) {
1568
                for(ch=0;ch<s->nb_channels;ch++) {
1569
                    s->sb_samples[ch][k * 12 + l + 0][i] = 0;
1570
                    s->sb_samples[ch][k * 12 + l + 1][i] = 0;
1571
                    s->sb_samples[ch][k * 12 + l + 2][i] = 0;
1572
                }
1573
            }
1574
        }
1575
    }
1576
    return 3 * 12;
1577
}
1578

    
1579
/*
1580
 * Seek back in the stream for backstep bytes (at most 511 bytes)
1581
 */
1582
static void seek_to_maindata(MPADecodeContext *s, unsigned int backstep)
1583
{
1584
    uint8_t *ptr;
1585

    
1586
    /* compute current position in stream */
1587
    ptr = (uint8_t *)(s->gb.buffer + (get_bits_count(&s->gb)>>3));
1588

    
1589
    /* copy old data before current one */
1590
    ptr -= backstep;
1591
    memcpy(ptr, s->inbuf1[s->inbuf_index ^ 1] +
1592
           BACKSTEP_SIZE + s->old_frame_size - backstep, backstep);
1593
    /* init get bits again */
1594
    init_get_bits(&s->gb, ptr, (s->frame_size + backstep)*8);
1595

    
1596
    /* prepare next buffer */
1597
    s->inbuf_index ^= 1;
1598
    s->inbuf = &s->inbuf1[s->inbuf_index][BACKSTEP_SIZE];
1599
    s->old_frame_size = s->frame_size;
1600
}
1601

    
1602
static inline void lsf_sf_expand(int *slen,
1603
                                 int sf, int n1, int n2, int n3)
1604
{
1605
    if (n3) {
1606
        slen[3] = sf % n3;
1607
        sf /= n3;
1608
    } else {
1609
        slen[3] = 0;
1610
    }
1611
    if (n2) {
1612
        slen[2] = sf % n2;
1613
        sf /= n2;
1614
    } else {
1615
        slen[2] = 0;
1616
    }
1617
    slen[1] = sf % n1;
1618
    sf /= n1;
1619
    slen[0] = sf;
1620
}
1621

    
1622
static void exponents_from_scale_factors(MPADecodeContext *s,
1623
                                         GranuleDef *g,
1624
                                         int16_t *exponents)
1625
{
1626
    const uint8_t *bstab, *pretab;
1627
    int len, i, j, k, l, v0, shift, gain, gains[3];
1628
    int16_t *exp_ptr;
1629

    
1630
    exp_ptr = exponents;
1631
    gain = g->global_gain - 210;
1632
    shift = g->scalefac_scale + 1;
1633

    
1634
    bstab = band_size_long[s->sample_rate_index];
1635
    pretab = mpa_pretab[g->preflag];
1636
    for(i=0;i<g->long_end;i++) {
1637
        v0 = gain - ((g->scale_factors[i] + pretab[i]) << shift);
1638
        len = bstab[i];
1639
        for(j=len;j>0;j--)
1640
            *exp_ptr++ = v0;
1641
    }
1642

    
1643
    if (g->short_start < 13) {
1644
        bstab = band_size_short[s->sample_rate_index];
1645
        gains[0] = gain - (g->subblock_gain[0] << 3);
1646
        gains[1] = gain - (g->subblock_gain[1] << 3);
1647
        gains[2] = gain - (g->subblock_gain[2] << 3);
1648
        k = g->long_end;
1649
        for(i=g->short_start;i<13;i++) {
1650
            len = bstab[i];
1651
            for(l=0;l<3;l++) {
1652
                v0 = gains[l] - (g->scale_factors[k++] << shift);
1653
                for(j=len;j>0;j--)
1654
                *exp_ptr++ = v0;
1655
            }
1656
        }
1657
    }
1658
}
1659

    
1660
/* handle n = 0 too */
1661
static inline int get_bitsz(GetBitContext *s, int n)
1662
{
1663
    if (n == 0)
1664
        return 0;
1665
    else
1666
        return get_bits(s, n);
1667
}
1668

    
1669
static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
1670
                          int16_t *exponents, int end_pos)
1671
{
1672
    int s_index;
1673
    int linbits, code, x, y, l, v, i, j, k, pos;
1674
    GetBitContext last_gb;
1675
    VLC *vlc;
1676

    
1677
    /* low frequencies (called big values) */
1678
    s_index = 0;
1679
    for(i=0;i<3;i++) {
1680
        j = g->region_size[i];
1681
        if (j == 0)
1682
            continue;
1683
        /* select vlc table */
1684
        k = g->table_select[i];
1685
        l = mpa_huff_data[k][0];
1686
        linbits = mpa_huff_data[k][1];
1687
        vlc = &huff_vlc[l];
1688

    
1689
        if(!l){
1690
            memset(&g->sb_hybrid[s_index], 0, sizeof(*g->sb_hybrid)*j);
1691
            s_index += 2*j;
1692
            continue;
1693
        }
1694

    
1695
        /* read huffcode and compute each couple */
1696
        for(;j>0;j--) {
1697
            if (get_bits_count(&s->gb) >= end_pos)
1698
                break;
1699
            y = get_vlc2(&s->gb, vlc->table, 8, 3);
1700
            x = y >> 4;
1701
            y = y & 0x0f;
1702

    
1703
            dprintf("region=%d n=%d x=%d y=%d exp=%d\n",
1704
                    i, g->region_size[i] - j, x, y, exponents[s_index]);
1705
            if (x) {
1706
                if (x == 15)
1707
                    x += get_bitsz(&s->gb, linbits);
1708
                v = l3_unscale(x, exponents[s_index]);
1709
                if (get_bits1(&s->gb))
1710
                    v = -v;
1711
            } else {
1712
                v = 0;
1713
            }
1714
            g->sb_hybrid[s_index++] = v;
1715
            if (y) {
1716
                if (y == 15)
1717
                    y += get_bitsz(&s->gb, linbits);
1718
                v = l3_unscale(y, exponents[s_index]);
1719
                if (get_bits1(&s->gb))
1720
                    v = -v;
1721
            } else {
1722
                v = 0;
1723
            }
1724
            g->sb_hybrid[s_index++] = v;
1725
        }
1726
    }
1727

    
1728
    /* high frequencies */
1729
    vlc = &huff_quad_vlc[g->count1table_select];
1730
    last_gb.buffer = NULL;
1731
    while (s_index <= 572) {
1732
        pos = get_bits_count(&s->gb);
1733
        if (pos >= end_pos) {
1734
            if (pos > end_pos && last_gb.buffer != NULL) {
1735
                /* some encoders generate an incorrect size for this
1736
                   part. We must go back into the data */
1737
                s_index -= 4;
1738
                s->gb = last_gb;
1739
            }
1740
            break;
1741
        }
1742
        last_gb= s->gb;
1743

    
1744
        code = get_vlc2(&s->gb, vlc->table, vlc->bits, 2);
1745
        dprintf("t=%d code=%d\n", g->count1table_select, code);
1746
        if (code < 0)
1747
            return -1;
1748
        for(i=0;i<4;i++) {
1749
            if (code & (8 >> i)) {
1750
                /* non zero value. Could use a hand coded function for
1751
                   'one' value */
1752
                v = l3_unscale(1, exponents[s_index]);
1753
                if(get_bits1(&s->gb))
1754
                    v = -v;
1755
            } else {
1756
                v = 0;
1757
            }
1758
            g->sb_hybrid[s_index++] = v;
1759
        }
1760
    }
1761
    memset(&g->sb_hybrid[s_index], 0, sizeof(*g->sb_hybrid)*(576 - s_index));
1762
    return 0;
1763
}
1764

    
1765
/* Reorder short blocks from bitstream order to interleaved order. It
1766
   would be faster to do it in parsing, but the code would be far more
1767
   complicated */
1768
static void reorder_block(MPADecodeContext *s, GranuleDef *g)
1769
{
1770
    int i, j, k, len;
1771
    int32_t *ptr, *dst, *ptr1;
1772
    int32_t tmp[576];
1773

    
1774
    if (g->block_type != 2)
1775
        return;
1776

    
1777
    if (g->switch_point) {
1778
        if (s->sample_rate_index != 8) {
1779
            ptr = g->sb_hybrid + 36;
1780
        } else {
1781
            ptr = g->sb_hybrid + 48;
1782
        }
1783
    } else {
1784
        ptr = g->sb_hybrid;
1785
    }
1786

    
1787
    for(i=g->short_start;i<13;i++) {
1788
        len = band_size_short[s->sample_rate_index][i];
1789
        ptr1 = ptr;
1790
        for(k=0;k<3;k++) {
1791
            dst = tmp + k;
1792
            for(j=len;j>0;j--) {
1793
                *dst = *ptr++;
1794
                dst += 3;
1795
            }
1796
        }
1797
        memcpy(ptr1, tmp, len * 3 * sizeof(int32_t));
1798
    }
1799
}
1800

    
1801
#define ISQRT2 FIXR(0.70710678118654752440)
1802

    
1803
static void compute_stereo(MPADecodeContext *s,
1804
                           GranuleDef *g0, GranuleDef *g1)
1805
{
1806
    int i, j, k, l;
1807
    int32_t v1, v2;
1808
    int sf_max, tmp0, tmp1, sf, len, non_zero_found;
1809
    int32_t (*is_tab)[16];
1810
    int32_t *tab0, *tab1;
1811
    int non_zero_found_short[3];
1812

    
1813
    /* intensity stereo */
1814
    if (s->mode_ext & MODE_EXT_I_STEREO) {
1815
        if (!s->lsf) {
1816
            is_tab = is_table;
1817
            sf_max = 7;
1818
        } else {
1819
            is_tab = is_table_lsf[g1->scalefac_compress & 1];
1820
            sf_max = 16;
1821
        }
1822

    
1823
        tab0 = g0->sb_hybrid + 576;
1824
        tab1 = g1->sb_hybrid + 576;
1825

    
1826
        non_zero_found_short[0] = 0;
1827
        non_zero_found_short[1] = 0;
1828
        non_zero_found_short[2] = 0;
1829
        k = (13 - g1->short_start) * 3 + g1->long_end - 3;
1830
        for(i = 12;i >= g1->short_start;i--) {
1831
            /* for last band, use previous scale factor */
1832
            if (i != 11)
1833
                k -= 3;
1834
            len = band_size_short[s->sample_rate_index][i];
1835
            for(l=2;l>=0;l--) {
1836
                tab0 -= len;
1837
                tab1 -= len;
1838
                if (!non_zero_found_short[l]) {
1839
                    /* test if non zero band. if so, stop doing i-stereo */
1840
                    for(j=0;j<len;j++) {
1841
                        if (tab1[j] != 0) {
1842
                            non_zero_found_short[l] = 1;
1843
                            goto found1;
1844
                        }
1845
                    }
1846
                    sf = g1->scale_factors[k + l];
1847
                    if (sf >= sf_max)
1848
                        goto found1;
1849

    
1850
                    v1 = is_tab[0][sf];
1851
                    v2 = is_tab[1][sf];
1852
                    for(j=0;j<len;j++) {
1853
                        tmp0 = tab0[j];
1854
                        tab0[j] = MULL(tmp0, v1);
1855
                        tab1[j] = MULL(tmp0, v2);
1856
                    }
1857
                } else {
1858
                found1:
1859
                    if (s->mode_ext & MODE_EXT_MS_STEREO) {
1860
                        /* lower part of the spectrum : do ms stereo
1861
                           if enabled */
1862
                        for(j=0;j<len;j++) {
1863
                            tmp0 = tab0[j];
1864
                            tmp1 = tab1[j];
1865
                            tab0[j] = MULL(tmp0 + tmp1, ISQRT2);
1866
                            tab1[j] = MULL(tmp0 - tmp1, ISQRT2);
1867
                        }
1868
                    }
1869
                }
1870
            }
1871
        }
1872

    
1873
        non_zero_found = non_zero_found_short[0] |
1874
            non_zero_found_short[1] |
1875
            non_zero_found_short[2];
1876

    
1877
        for(i = g1->long_end - 1;i >= 0;i--) {
1878
            len = band_size_long[s->sample_rate_index][i];
1879
            tab0 -= len;
1880
            tab1 -= len;
1881
            /* test if non zero band. if so, stop doing i-stereo */
1882
            if (!non_zero_found) {
1883
                for(j=0;j<len;j++) {
1884
                    if (tab1[j] != 0) {
1885
                        non_zero_found = 1;
1886
                        goto found2;
1887
                    }
1888
                }
1889
                /* for last band, use previous scale factor */
1890
                k = (i == 21) ? 20 : i;
1891
                sf = g1->scale_factors[k];
1892
                if (sf >= sf_max)
1893
                    goto found2;
1894
                v1 = is_tab[0][sf];
1895
                v2 = is_tab[1][sf];
1896
                for(j=0;j<len;j++) {
1897
                    tmp0 = tab0[j];
1898
                    tab0[j] = MULL(tmp0, v1);
1899
                    tab1[j] = MULL(tmp0, v2);
1900
                }
1901
            } else {
1902
            found2:
1903
                if (s->mode_ext & MODE_EXT_MS_STEREO) {
1904
                    /* lower part of the spectrum : do ms stereo
1905
                       if enabled */
1906
                    for(j=0;j<len;j++) {
1907
                        tmp0 = tab0[j];
1908
                        tmp1 = tab1[j];
1909
                        tab0[j] = MULL(tmp0 + tmp1, ISQRT2);
1910
                        tab1[j] = MULL(tmp0 - tmp1, ISQRT2);
1911
                    }
1912
                }
1913
            }
1914
        }
1915
    } else if (s->mode_ext & MODE_EXT_MS_STEREO) {
1916
        /* ms stereo ONLY */
1917
        /* NOTE: the 1/sqrt(2) normalization factor is included in the
1918
           global gain */
1919
        tab0 = g0->sb_hybrid;
1920
        tab1 = g1->sb_hybrid;
1921
        for(i=0;i<576;i++) {
1922
            tmp0 = tab0[i];
1923
            tmp1 = tab1[i];
1924
            tab0[i] = tmp0 + tmp1;
1925
            tab1[i] = tmp0 - tmp1;
1926
        }
1927
    }
1928
}
1929

    
1930
static void compute_antialias_integer(MPADecodeContext *s,
1931
                              GranuleDef *g)
1932
{
1933
    int32_t *ptr, *csa;
1934
    int n, i;
1935

    
1936
    /* we antialias only "long" bands */
1937
    if (g->block_type == 2) {
1938
        if (!g->switch_point)
1939
            return;
1940
        /* XXX: check this for 8000Hz case */
1941
        n = 1;
1942
    } else {
1943
        n = SBLIMIT - 1;
1944
    }
1945

    
1946
    ptr = g->sb_hybrid + 18;
1947
    for(i = n;i > 0;i--) {
1948
        int tmp0, tmp1, tmp2;
1949
        csa = &csa_table[0][0];
1950
#define INT_AA(j) \
1951
            tmp0 = ptr[-1-j];\
1952
            tmp1 = ptr[   j];\
1953
            tmp2= MULH(tmp0 + tmp1, csa[0+4*j]);\
1954
            ptr[-1-j] = 4*(tmp2 - MULH(tmp1, csa[2+4*j]));\
1955
            ptr[   j] = 4*(tmp2 + MULH(tmp0, csa[3+4*j]));
1956

    
1957
        INT_AA(0)
1958
        INT_AA(1)
1959
        INT_AA(2)
1960
        INT_AA(3)
1961
        INT_AA(4)
1962
        INT_AA(5)
1963
        INT_AA(6)
1964
        INT_AA(7)
1965

    
1966
        ptr += 18;
1967
    }
1968
}
1969

    
1970
static void compute_antialias_float(MPADecodeContext *s,
1971
                              GranuleDef *g)
1972
{
1973
    int32_t *ptr;
1974
    int n, i;
1975

    
1976
    /* we antialias only "long" bands */
1977
    if (g->block_type == 2) {
1978
        if (!g->switch_point)
1979
            return;
1980
        /* XXX: check this for 8000Hz case */
1981
        n = 1;
1982
    } else {
1983
        n = SBLIMIT - 1;
1984
    }
1985

    
1986
    ptr = g->sb_hybrid + 18;
1987
    for(i = n;i > 0;i--) {
1988
        float tmp0, tmp1;
1989
        float *csa = &csa_table_float[0][0];
1990
#define FLOAT_AA(j)\
1991
        tmp0= ptr[-1-j];\
1992
        tmp1= ptr[   j];\
1993
        ptr[-1-j] = lrintf(tmp0 * csa[0+4*j] - tmp1 * csa[1+4*j]);\
1994
        ptr[   j] = lrintf(tmp0 * csa[1+4*j] + tmp1 * csa[0+4*j]);
1995

    
1996
        FLOAT_AA(0)
1997
        FLOAT_AA(1)
1998
        FLOAT_AA(2)
1999
        FLOAT_AA(3)
2000
        FLOAT_AA(4)
2001
        FLOAT_AA(5)
2002
        FLOAT_AA(6)
2003
        FLOAT_AA(7)
2004

    
2005
        ptr += 18;
2006
    }
2007
}
2008

    
2009
static void compute_imdct(MPADecodeContext *s,
2010
                          GranuleDef *g,
2011
                          int32_t *sb_samples,
2012
                          int32_t *mdct_buf)
2013
{
2014
    int32_t *ptr, *win, *win1, *buf, *out_ptr, *ptr1;
2015
    int32_t out2[12];
2016
    int i, j, mdct_long_end, v, sblimit;
2017

    
2018
    /* find last non zero block */
2019
    ptr = g->sb_hybrid + 576;
2020
    ptr1 = g->sb_hybrid + 2 * 18;
2021
    while (ptr >= ptr1) {
2022
        ptr -= 6;
2023
        v = ptr[0] | ptr[1] | ptr[2] | ptr[3] | ptr[4] | ptr[5];
2024
        if (v != 0)
2025
            break;
2026
    }
2027
    sblimit = ((ptr - g->sb_hybrid) / 18) + 1;
2028

    
2029
    if (g->block_type == 2) {
2030
        /* XXX: check for 8000 Hz */
2031
        if (g->switch_point)
2032
            mdct_long_end = 2;
2033
        else
2034
            mdct_long_end = 0;
2035
    } else {
2036
        mdct_long_end = sblimit;
2037
    }
2038

    
2039
    buf = mdct_buf;
2040
    ptr = g->sb_hybrid;
2041
    for(j=0;j<mdct_long_end;j++) {
2042
        /* apply window & overlap with previous buffer */
2043
        out_ptr = sb_samples + j;
2044
        /* select window */
2045
        if (g->switch_point && j < 2)
2046
            win1 = mdct_win[0];
2047
        else
2048
            win1 = mdct_win[g->block_type];
2049
        /* select frequency inversion */
2050
        win = win1 + ((4 * 36) & -(j & 1));
2051
        imdct36(out_ptr, buf, ptr, win);
2052
        out_ptr += 18*SBLIMIT;
2053
        ptr += 18;
2054
        buf += 18;
2055
    }
2056
    for(j=mdct_long_end;j<sblimit;j++) {
2057
        /* select frequency inversion */
2058
        win = mdct_win[2] + ((4 * 36) & -(j & 1));
2059
        out_ptr = sb_samples + j;
2060

    
2061
        for(i=0; i<6; i++){
2062
            *out_ptr = buf[i];
2063
            out_ptr += SBLIMIT;
2064
        }
2065
        imdct12(out2, ptr + 0);
2066
        for(i=0;i<6;i++) {
2067
            *out_ptr = MULH(out2[i], win[i]) + buf[i + 6*1];
2068
            buf[i + 6*2] = MULH(out2[i + 6], win[i + 6]);
2069
            out_ptr += SBLIMIT;
2070
        }
2071
        imdct12(out2, ptr + 1);
2072
        for(i=0;i<6;i++) {
2073
            *out_ptr = MULH(out2[i], win[i]) + buf[i + 6*2];
2074
            buf[i + 6*0] = MULH(out2[i + 6], win[i + 6]);
2075
            out_ptr += SBLIMIT;
2076
        }
2077
        imdct12(out2, ptr + 2);
2078
        for(i=0;i<6;i++) {
2079
            buf[i + 6*0] = MULH(out2[i], win[i]) + buf[i + 6*0];
2080
            buf[i + 6*1] = MULH(out2[i + 6], win[i + 6]);
2081
            buf[i + 6*2] = 0;
2082
        }
2083
        ptr += 18;
2084
        buf += 18;
2085
    }
2086
    /* zero bands */
2087
    for(j=sblimit;j<SBLIMIT;j++) {
2088
        /* overlap */
2089
        out_ptr = sb_samples + j;
2090
        for(i=0;i<18;i++) {
2091
            *out_ptr = buf[i];
2092
            buf[i] = 0;
2093
            out_ptr += SBLIMIT;
2094
        }
2095
        buf += 18;
2096
    }
2097
}
2098

    
2099
#if defined(DEBUG)
2100
void sample_dump(int fnum, int32_t *tab, int n)
2101
{
2102
    static FILE *files[16], *f;
2103
    char buf[512];
2104
    int i;
2105
    int32_t v;
2106

    
2107
    f = files[fnum];
2108
    if (!f) {
2109
        snprintf(buf, sizeof(buf), "/tmp/out%d.%s.pcm",
2110
                fnum,
2111
#ifdef USE_HIGHPRECISION
2112
                "hp"
2113
#else
2114
                "lp"
2115
#endif
2116
                );
2117
        f = fopen(buf, "w");
2118
        if (!f)
2119
            return;
2120
        files[fnum] = f;
2121
    }
2122

    
2123
    if (fnum == 0) {
2124
        static int pos = 0;
2125
        av_log(NULL, AV_LOG_DEBUG, "pos=%d\n", pos);
2126
        for(i=0;i<n;i++) {
2127
            av_log(NULL, AV_LOG_DEBUG, " %0.4f", (double)tab[i] / FRAC_ONE);
2128
            if ((i % 18) == 17)
2129
                av_log(NULL, AV_LOG_DEBUG, "\n");
2130
        }
2131
        pos += n;
2132
    }
2133
    for(i=0;i<n;i++) {
2134
        /* normalize to 23 frac bits */
2135
        v = tab[i] << (23 - FRAC_BITS);
2136
        fwrite(&v, 1, sizeof(int32_t), f);
2137
    }
2138
}
2139
#endif
2140

    
2141

    
2142
/* main layer3 decoding function */
2143
static int mp_decode_layer3(MPADecodeContext *s)
2144
{
2145
    int nb_granules, main_data_begin, private_bits;
2146
    int gr, ch, blocksplit_flag, i, j, k, n, bits_pos, bits_left;
2147
    GranuleDef granules[2][2], *g;
2148
    int16_t exponents[576];
2149

    
2150
    /* read side info */
2151
    if (s->lsf) {
2152
        main_data_begin = get_bits(&s->gb, 8);
2153
        if (s->nb_channels == 2)
2154
            private_bits = get_bits(&s->gb, 2);
2155
        else
2156
            private_bits = get_bits(&s->gb, 1);
2157
        nb_granules = 1;
2158
    } else {
2159
        main_data_begin = get_bits(&s->gb, 9);
2160
        if (s->nb_channels == 2)
2161
            private_bits = get_bits(&s->gb, 3);
2162
        else
2163
            private_bits = get_bits(&s->gb, 5);
2164
        nb_granules = 2;
2165
        for(ch=0;ch<s->nb_channels;ch++) {
2166
            granules[ch][0].scfsi = 0; /* all scale factors are transmitted */
2167
            granules[ch][1].scfsi = get_bits(&s->gb, 4);
2168
        }
2169
    }
2170

    
2171
    for(gr=0;gr<nb_granules;gr++) {
2172
        for(ch=0;ch<s->nb_channels;ch++) {
2173
            dprintf("gr=%d ch=%d: side_info\n", gr, ch);
2174
            g = &granules[ch][gr];
2175
            g->part2_3_length = get_bits(&s->gb, 12);
2176
            g->big_values = get_bits(&s->gb, 9);
2177
            g->global_gain = get_bits(&s->gb, 8);
2178
            /* if MS stereo only is selected, we precompute the
2179
               1/sqrt(2) renormalization factor */
2180
            if ((s->mode_ext & (MODE_EXT_MS_STEREO | MODE_EXT_I_STEREO)) ==
2181
                MODE_EXT_MS_STEREO)
2182
                g->global_gain -= 2;
2183
            if (s->lsf)
2184
                g->scalefac_compress = get_bits(&s->gb, 9);
2185
            else
2186
                g->scalefac_compress = get_bits(&s->gb, 4);
2187
            blocksplit_flag = get_bits(&s->gb, 1);
2188
            if (blocksplit_flag) {
2189
                g->block_type = get_bits(&s->gb, 2);
2190
                if (g->block_type == 0)
2191
                    return -1;
2192
                g->switch_point = get_bits(&s->gb, 1);
2193
                for(i=0;i<2;i++)
2194
                    g->table_select[i] = get_bits(&s->gb, 5);
2195
                for(i=0;i<3;i++)
2196
                    g->subblock_gain[i] = get_bits(&s->gb, 3);
2197
                /* compute huffman coded region sizes */
2198
                if (g->block_type == 2)
2199
                    g->region_size[0] = (36 / 2);
2200
                else {
2201
                    if (s->sample_rate_index <= 2)
2202
                        g->region_size[0] = (36 / 2);
2203
                    else if (s->sample_rate_index != 8)
2204
                        g->region_size[0] = (54 / 2);
2205
                    else
2206
                        g->region_size[0] = (108 / 2);
2207
                }
2208
                g->region_size[1] = (576 / 2);
2209
            } else {
2210
                int region_address1, region_address2, l;
2211
                g->block_type = 0;
2212
                g->switch_point = 0;
2213
                for(i=0;i<3;i++)
2214
                    g->table_select[i] = get_bits(&s->gb, 5);
2215
                /* compute huffman coded region sizes */
2216
                region_address1 = get_bits(&s->gb, 4);
2217
                region_address2 = get_bits(&s->gb, 3);
2218
                dprintf("region1=%d region2=%d\n",
2219
                        region_address1, region_address2);
2220
                g->region_size[0] =
2221
                    band_index_long[s->sample_rate_index][region_address1 + 1] >> 1;
2222
                l = region_address1 + region_address2 + 2;
2223
                /* should not overflow */
2224
                if (l > 22)
2225
                    l = 22;
2226
                g->region_size[1] =
2227
                    band_index_long[s->sample_rate_index][l] >> 1;
2228
            }
2229
            /* convert region offsets to region sizes and truncate
2230
               size to big_values */
2231
            g->region_size[2] = (576 / 2);
2232
            j = 0;
2233
            for(i=0;i<3;i++) {
2234
                k = FFMIN(g->region_size[i], g->big_values);
2235
                g->region_size[i] = k - j;
2236
                j = k;
2237
            }
2238

    
2239
            /* compute band indexes */
2240
            if (g->block_type == 2) {
2241
                if (g->switch_point) {
2242
                    /* if switched mode, we handle the 36 first samples as
2243
                       long blocks.  For 8000Hz, we handle the 48 first
2244
                       exponents as long blocks (XXX: check this!) */
2245
                    if (s->sample_rate_index <= 2)
2246
                        g->long_end = 8;
2247
                    else if (s->sample_rate_index != 8)
2248
                        g->long_end = 6;
2249
                    else
2250
                        g->long_end = 4; /* 8000 Hz */
2251

    
2252
                    g->short_start = 2 + (s->sample_rate_index != 8);
2253
                } else {
2254
                    g->long_end = 0;
2255
                    g->short_start = 0;
2256
                }
2257
            } else {
2258
                g->short_start = 13;
2259
                g->long_end = 22;
2260
            }
2261

    
2262
            g->preflag = 0;
2263
            if (!s->lsf)
2264
                g->preflag = get_bits(&s->gb, 1);
2265
            g->scalefac_scale = get_bits(&s->gb, 1);
2266
            g->count1table_select = get_bits(&s->gb, 1);
2267
            dprintf("block_type=%d switch_point=%d\n",
2268
                    g->block_type, g->switch_point);
2269
        }
2270
    }
2271

    
2272
  if (!s->adu_mode) {
2273
    /* now we get bits from the main_data_begin offset */
2274
    dprintf("seekback: %d\n", main_data_begin);
2275
    seek_to_maindata(s, main_data_begin);
2276
  }
2277

    
2278
    for(gr=0;gr<nb_granules;gr++) {
2279
        for(ch=0;ch<s->nb_channels;ch++) {
2280
            g = &granules[ch][gr];
2281

    
2282
            bits_pos = get_bits_count(&s->gb);
2283

    
2284
            if (!s->lsf) {
2285
                uint8_t *sc;
2286
                int slen, slen1, slen2;
2287

    
2288
                /* MPEG1 scale factors */
2289
                slen1 = slen_table[0][g->scalefac_compress];
2290
                slen2 = slen_table[1][g->scalefac_compress];
2291
                dprintf("slen1=%d slen2=%d\n", slen1, slen2);
2292
                if (g->block_type == 2) {
2293
                    n = g->switch_point ? 17 : 18;
2294
                    j = 0;
2295
                    for(i=0;i<n;i++)
2296
                        g->scale_factors[j++] = get_bitsz(&s->gb, slen1);
2297
                    for(i=0;i<18;i++)
2298
                        g->scale_factors[j++] = get_bitsz(&s->gb, slen2);
2299
                    for(i=0;i<3;i++)
2300
                        g->scale_factors[j++] = 0;
2301
                } else {
2302
                    sc = granules[ch][0].scale_factors;
2303
                    j = 0;
2304
                    for(k=0;k<4;k++) {
2305
                        n = (k == 0 ? 6 : 5);
2306
                        if ((g->scfsi & (0x8 >> k)) == 0) {
2307
                            slen = (k < 2) ? slen1 : slen2;
2308
                            for(i=0;i<n;i++)
2309
                                g->scale_factors[j++] = get_bitsz(&s->gb, slen);
2310
                        } else {
2311
                            /* simply copy from last granule */
2312
                            for(i=0;i<n;i++) {
2313
                                g->scale_factors[j] = sc[j];
2314
                                j++;
2315
                            }
2316
                        }
2317
                    }
2318
                    g->scale_factors[j++] = 0;
2319
                }
2320
#if defined(DEBUG)
2321
                {
2322
                    dprintf("scfsi=%x gr=%d ch=%d scale_factors:\n",
2323
                           g->scfsi, gr, ch);
2324
                    for(i=0;i<j;i++)
2325
                        dprintf(" %d", g->scale_factors[i]);
2326
                    dprintf("\n");
2327
                }
2328
#endif
2329
            } else {
2330
                int tindex, tindex2, slen[4], sl, sf;
2331

    
2332
                /* LSF scale factors */
2333
                if (g->block_type == 2) {
2334
                    tindex = g->switch_point ? 2 : 1;
2335
                } else {
2336
                    tindex = 0;
2337
                }
2338
                sf = g->scalefac_compress;
2339
                if ((s->mode_ext & MODE_EXT_I_STEREO) && ch == 1) {
2340
                    /* intensity stereo case */
2341
                    sf >>= 1;
2342
                    if (sf < 180) {
2343
                        lsf_sf_expand(slen, sf, 6, 6, 0);
2344
                        tindex2 = 3;
2345
                    } else if (sf < 244) {
2346
                        lsf_sf_expand(slen, sf - 180, 4, 4, 0);
2347
                        tindex2 = 4;
2348
                    } else {
2349
                        lsf_sf_expand(slen, sf - 244, 3, 0, 0);
2350
                        tindex2 = 5;
2351
                    }
2352
                } else {
2353
                    /* normal case */
2354
                    if (sf < 400) {
2355
                        lsf_sf_expand(slen, sf, 5, 4, 4);
2356
                        tindex2 = 0;
2357
                    } else if (sf < 500) {
2358
                        lsf_sf_expand(slen, sf - 400, 5, 4, 0);
2359
                        tindex2 = 1;
2360
                    } else {
2361
                        lsf_sf_expand(slen, sf - 500, 3, 0, 0);
2362
                        tindex2 = 2;
2363
                        g->preflag = 1;
2364
                    }
2365
                }
2366

    
2367
                j = 0;
2368
                for(k=0;k<4;k++) {
2369
                    n = lsf_nsf_table[tindex2][tindex][k];
2370
                    sl = slen[k];
2371
                    for(i=0;i<n;i++)
2372
                        g->scale_factors[j++] = get_bitsz(&s->gb, sl);
2373
                }
2374
                /* XXX: should compute exact size */
2375
                for(;j<40;j++)
2376
                    g->scale_factors[j] = 0;
2377
#if defined(DEBUG)
2378
                {
2379
                    dprintf("gr=%d ch=%d scale_factors:\n",
2380
                           gr, ch);
2381
                    for(i=0;i<40;i++)
2382
                        dprintf(" %d", g->scale_factors[i]);
2383
                    dprintf("\n");
2384
                }
2385
#endif
2386
            }
2387

    
2388
            exponents_from_scale_factors(s, g, exponents);
2389

    
2390
            /* read Huffman coded residue */
2391
            if (huffman_decode(s, g, exponents,
2392
                               bits_pos + g->part2_3_length) < 0)
2393
                return -1;
2394
#if defined(DEBUG)
2395
            sample_dump(0, g->sb_hybrid, 576);
2396
#endif
2397

    
2398
            /* skip extension bits */
2399
            bits_left = g->part2_3_length - (get_bits_count(&s->gb) - bits_pos);
2400
            if (bits_left < 0) {
2401
                dprintf("bits_left=%d\n", bits_left);
2402
                return -1;
2403
            }
2404
            while (bits_left >= 16) {
2405
                skip_bits(&s->gb, 16);
2406
                bits_left -= 16;
2407
            }
2408
            if (bits_left > 0)
2409
                skip_bits(&s->gb, bits_left);
2410
        } /* ch */
2411

    
2412
        if (s->nb_channels == 2)
2413
            compute_stereo(s, &granules[0][gr], &granules[1][gr]);
2414

    
2415
        for(ch=0;ch<s->nb_channels;ch++) {
2416
            g = &granules[ch][gr];
2417

    
2418
            reorder_block(s, g);
2419
#if defined(DEBUG)
2420
            sample_dump(0, g->sb_hybrid, 576);
2421
#endif
2422
            s->compute_antialias(s, g);
2423
#if defined(DEBUG)
2424
            sample_dump(1, g->sb_hybrid, 576);
2425
#endif
2426
            compute_imdct(s, g, &s->sb_samples[ch][18 * gr][0], s->mdct_buf[ch]);
2427
#if defined(DEBUG)
2428
            sample_dump(2, &s->sb_samples[ch][18 * gr][0], 576);
2429
#endif
2430
        }
2431
    } /* gr */
2432
    return nb_granules * 18;
2433
}
2434

    
2435
static int mp_decode_frame(MPADecodeContext *s,
2436
                           OUT_INT *samples)
2437
{
2438
    int i, nb_frames, ch;
2439
    OUT_INT *samples_ptr;
2440

    
2441
    init_get_bits(&s->gb, s->inbuf + HEADER_SIZE,
2442
                  (s->inbuf_ptr - s->inbuf - HEADER_SIZE)*8);
2443

    
2444
    /* skip error protection field */
2445
    if (s->error_protection)
2446
        get_bits(&s->gb, 16);
2447

    
2448
    dprintf("frame %d:\n", s->frame_count);
2449
    switch(s->layer) {
2450
    case 1:
2451
        nb_frames = mp_decode_layer1(s);
2452
        break;
2453
    case 2:
2454
        nb_frames = mp_decode_layer2(s);
2455
        break;
2456
    case 3:
2457
    default:
2458
        nb_frames = mp_decode_layer3(s);
2459
        break;
2460
    }
2461
#if defined(DEBUG)
2462
    for(i=0;i<nb_frames;i++) {
2463
        for(ch=0;ch<s->nb_channels;ch++) {
2464
            int j;
2465
            dprintf("%d-%d:", i, ch);
2466
            for(j=0;j<SBLIMIT;j++)
2467
                dprintf(" %0.6f", (double)s->sb_samples[ch][i][j] / FRAC_ONE);
2468
            dprintf("\n");
2469
        }
2470
    }
2471
#endif
2472
    /* apply the synthesis filter */
2473
    for(ch=0;ch<s->nb_channels;ch++) {
2474
        samples_ptr = samples + ch;
2475
        for(i=0;i<nb_frames;i++) {
2476
            ff_mpa_synth_filter(s->synth_buf[ch], &(s->synth_buf_offset[ch]),
2477
                         window, &s->dither_state,
2478
                         samples_ptr, s->nb_channels,
2479
                         s->sb_samples[ch][i]);
2480
            samples_ptr += 32 * s->nb_channels;
2481
        }
2482
    }
2483
#ifdef DEBUG
2484
    s->frame_count++;
2485
#endif
2486
    return nb_frames * 32 * sizeof(OUT_INT) * s->nb_channels;
2487
}
2488

    
2489
static int decode_frame(AVCodecContext * avctx,
2490
                        void *data, int *data_size,
2491
                        uint8_t * buf, int buf_size)
2492
{
2493
    MPADecodeContext *s = avctx->priv_data;
2494
    uint32_t header;
2495
    uint8_t *buf_ptr;
2496
    int len, out_size;
2497
    OUT_INT *out_samples = data;
2498

    
2499
    buf_ptr = buf;
2500
    while (buf_size > 0) {
2501
        len = s->inbuf_ptr - s->inbuf;
2502
        if (s->frame_size == 0) {
2503
            /* special case for next header for first frame in free
2504
               format case (XXX: find a simpler method) */
2505
            if (s->free_format_next_header != 0) {
2506
                s->inbuf[0] = s->free_format_next_header >> 24;
2507
                s->inbuf[1] = s->free_format_next_header >> 16;
2508
                s->inbuf[2] = s->free_format_next_header >> 8;
2509
                s->inbuf[3] = s->free_format_next_header;
2510
                s->inbuf_ptr = s->inbuf + 4;
2511
                s->free_format_next_header = 0;
2512
                goto got_header;
2513
            }
2514
            /* no header seen : find one. We need at least HEADER_SIZE
2515
               bytes to parse it */
2516
            len = HEADER_SIZE - len;
2517
            if (len > buf_size)
2518
                len = buf_size;
2519
            if (len > 0) {
2520
                memcpy(s->inbuf_ptr, buf_ptr, len);
2521
                buf_ptr += len;
2522
                buf_size -= len;
2523
                s->inbuf_ptr += len;
2524
            }
2525
            if ((s->inbuf_ptr - s->inbuf) >= HEADER_SIZE) {
2526
            got_header:
2527
                header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
2528
                    (s->inbuf[2] << 8) | s->inbuf[3];
2529

    
2530
                if (ff_mpa_check_header(header) < 0) {
2531
                    /* no sync found : move by one byte (inefficient, but simple!) */
2532
                    memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
2533
                    s->inbuf_ptr--;
2534
                    dprintf("skip %x\n", header);
2535
                    /* reset free format frame size to give a chance
2536
                       to get a new bitrate */
2537
                    s->free_format_frame_size = 0;
2538
                } else {
2539
                    if (decode_header(s, header) == 1) {
2540
                        /* free format: prepare to compute frame size */
2541
                        s->frame_size = -1;
2542
                    }
2543
                    /* update codec info */
2544
                    avctx->sample_rate = s->sample_rate;
2545
                    avctx->channels = s->nb_channels;
2546
                    avctx->bit_rate = s->bit_rate;
2547
                    avctx->sub_id = s->layer;
2548
                    switch(s->layer) {
2549
                    case 1:
2550
                        avctx->frame_size = 384;
2551
                        break;
2552
                    case 2:
2553
                        avctx->frame_size = 1152;
2554
                        break;
2555
                    case 3:
2556
                        if (s->lsf)
2557
                            avctx->frame_size = 576;
2558
                        else
2559
                            avctx->frame_size = 1152;
2560
                        break;
2561
                    }
2562
                }
2563
            }
2564
        } else if (s->frame_size == -1) {
2565
            /* free format : find next sync to compute frame size */
2566
            len = MPA_MAX_CODED_FRAME_SIZE - len;
2567
            if (len > buf_size)
2568
                len = buf_size;
2569
            if (len == 0) {
2570
                /* frame too long: resync */
2571
                s->frame_size = 0;
2572
                memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
2573
                s->inbuf_ptr--;
2574
            } else {
2575
                uint8_t *p, *pend;
2576
                uint32_t header1;
2577
                int padding;
2578

    
2579
                memcpy(s->inbuf_ptr, buf_ptr, len);
2580
                /* check for header */
2581
                p = s->inbuf_ptr - 3;
2582
                pend = s->inbuf_ptr + len - 4;
2583
                while (p <= pend) {
2584
                    header = (p[0] << 24) | (p[1] << 16) |
2585
                        (p[2] << 8) | p[3];
2586
                    header1 = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
2587
                        (s->inbuf[2] << 8) | s->inbuf[3];
2588
                    /* check with high probability that we have a
2589
                       valid header */
2590
                    if ((header & SAME_HEADER_MASK) ==
2591
                        (header1 & SAME_HEADER_MASK)) {
2592
                        /* header found: update pointers */
2593
                        len = (p + 4) - s->inbuf_ptr;
2594
                        buf_ptr += len;
2595
                        buf_size -= len;
2596
                        s->inbuf_ptr = p;
2597
                        /* compute frame size */
2598
                        s->free_format_next_header = header;
2599
                        s->free_format_frame_size = s->inbuf_ptr - s->inbuf;
2600
                        padding = (header1 >> 9) & 1;
2601
                        if (s->layer == 1)
2602
                            s->free_format_frame_size -= padding * 4;
2603
                        else
2604
                            s->free_format_frame_size -= padding;
2605
                        dprintf("free frame size=%d padding=%d\n",
2606
                                s->free_format_frame_size, padding);
2607
                        decode_header(s, header1);
2608
                        goto next_data;
2609
                    }
2610
                    p++;
2611
                }
2612
                /* not found: simply increase pointers */
2613
                buf_ptr += len;
2614
                s->inbuf_ptr += len;
2615
                buf_size -= len;
2616
            }
2617
        } else if (len < s->frame_size) {
2618
            if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE)
2619
                s->frame_size = MPA_MAX_CODED_FRAME_SIZE;
2620
            len = s->frame_size - len;
2621
            if (len > buf_size)
2622
                len = buf_size;
2623
            memcpy(s->inbuf_ptr, buf_ptr, len);
2624
            buf_ptr += len;
2625
            s->inbuf_ptr += len;
2626
            buf_size -= len;
2627
        }
2628
    next_data:
2629
        if (s->frame_size > 0 &&
2630
            (s->inbuf_ptr - s->inbuf) >= s->frame_size) {
2631
            if (avctx->parse_only) {
2632
                /* simply return the frame data */
2633
                *(uint8_t **)data = s->inbuf;
2634
                out_size = s->inbuf_ptr - s->inbuf;
2635
            } else {
2636
                out_size = mp_decode_frame(s, out_samples);
2637
            }
2638
            s->inbuf_ptr = s->inbuf;
2639
            s->frame_size = 0;
2640
            if(out_size>=0)
2641
                *data_size = out_size;
2642
            else
2643
                av_log(avctx, AV_LOG_DEBUG, "Error while decoding mpeg audio frame\n"); //FIXME return -1 / but also return the number of bytes consumed
2644
            break;
2645
        }
2646
    }
2647
    return buf_ptr - buf;
2648
}
2649

    
2650

    
2651
static int decode_frame_adu(AVCodecContext * avctx,
2652
                        void *data, int *data_size,
2653
                        uint8_t * buf, int buf_size)
2654
{
2655
    MPADecodeContext *s = avctx->priv_data;
2656
    uint32_t header;
2657
    int len, out_size;
2658
    OUT_INT *out_samples = data;
2659

    
2660
    len = buf_size;
2661

    
2662
    // Discard too short frames
2663
    if (buf_size < HEADER_SIZE) {
2664
        *data_size = 0;
2665
        return buf_size;
2666
    }
2667

    
2668

    
2669
    if (len > MPA_MAX_CODED_FRAME_SIZE)
2670
        len = MPA_MAX_CODED_FRAME_SIZE;
2671

    
2672
    memcpy(s->inbuf, buf, len);
2673
    s->inbuf_ptr = s->inbuf + len;
2674

    
2675
    // Get header and restore sync word
2676
    header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
2677
              (s->inbuf[2] << 8) | s->inbuf[3] | 0xffe00000;
2678

    
2679
    if (ff_mpa_check_header(header) < 0) { // Bad header, discard frame
2680
        *data_size = 0;
2681
        return buf_size;
2682
    }
2683

    
2684
    decode_header(s, header);
2685
    /* update codec info */
2686
    avctx->sample_rate = s->sample_rate;
2687
    avctx->channels = s->nb_channels;
2688
    avctx->bit_rate = s->bit_rate;
2689
    avctx->sub_id = s->layer;
2690

    
2691
    avctx->frame_size=s->frame_size = len;
2692

    
2693
    if (avctx->parse_only) {
2694
        /* simply return the frame data */
2695
        *(uint8_t **)data = s->inbuf;
2696
        out_size = s->inbuf_ptr - s->inbuf;
2697
    } else {
2698
        out_size = mp_decode_frame(s, out_samples);
2699
    }
2700

    
2701
    *data_size = out_size;
2702
    return buf_size;
2703
}
2704

    
2705

    
2706
/* Next 3 arrays are indexed by channel config number (passed via codecdata) */
2707
static int mp3Frames[16] = {0,1,1,2,3,3,4,5,2};   /* number of mp3 decoder instances */
2708
static int mp3Channels[16] = {0,1,2,3,4,5,6,8,4}; /* total output channels */
2709
/* offsets into output buffer, assume output order is FL FR BL BR C LFE */
2710
static int chan_offset[9][5] = {
2711
    {0},
2712
    {0},            // C
2713
    {0},            // FLR
2714
    {2,0},          // C FLR
2715
    {2,0,3},        // C FLR BS
2716
    {4,0,2},        // C FLR BLRS
2717
    {4,0,2,5},      // C FLR BLRS LFE
2718
    {4,0,2,6,5},    // C FLR BLRS BLR LFE
2719
    {0,2}           // FLR BLRS
2720
};
2721

    
2722

    
2723
static int decode_init_mp3on4(AVCodecContext * avctx)
2724
{
2725
    MP3On4DecodeContext *s = avctx->priv_data;
2726
    int i;
2727

    
2728
    if ((avctx->extradata_size < 2) || (avctx->extradata == NULL)) {
2729
        av_log(avctx, AV_LOG_ERROR, "Codec extradata missing or too short.\n");
2730
        return -1;
2731
    }
2732

    
2733
    s->chan_cfg = (((unsigned char *)avctx->extradata)[1] >> 3) & 0x0f;
2734
    s->frames = mp3Frames[s->chan_cfg];
2735
    if(!s->frames) {
2736
        av_log(avctx, AV_LOG_ERROR, "Invalid channel config number.\n");
2737
        return -1;
2738
    }
2739
    avctx->channels = mp3Channels[s->chan_cfg];
2740

    
2741
    /* Init the first mp3 decoder in standard way, so that all tables get builded
2742
     * We replace avctx->priv_data with the context of the first decoder so that
2743
     * decode_init() does not have to be changed.
2744
     * Other decoders will be inited here copying data from the first context
2745
     */
2746
    // Allocate zeroed memory for the first decoder context
2747
    s->mp3decctx[0] = av_mallocz(sizeof(MPADecodeContext));
2748
    // Put decoder context in place to make init_decode() happy
2749
    avctx->priv_data = s->mp3decctx[0];
2750
    decode_init(avctx);
2751
    // Restore mp3on4 context pointer
2752
    avctx->priv_data = s;
2753
    s->mp3decctx[0]->adu_mode = 1; // Set adu mode
2754

    
2755
    /* Create a separate codec/context for each frame (first is already ok).
2756
     * Each frame is 1 or 2 channels - up to 5 frames allowed
2757
     */
2758
    for (i = 1; i < s->frames; i++) {
2759
        s->mp3decctx[i] = av_mallocz(sizeof(MPADecodeContext));
2760
        s->mp3decctx[i]->compute_antialias = s->mp3decctx[0]->compute_antialias;
2761
        s->mp3decctx[i]->inbuf = &s->mp3decctx[i]->inbuf1[0][BACKSTEP_SIZE];
2762
        s->mp3decctx[i]->inbuf_ptr = s->mp3decctx[i]->inbuf;
2763
        s->mp3decctx[i]->adu_mode = 1;
2764
    }
2765

    
2766
    return 0;
2767
}
2768

    
2769

    
2770
static int decode_close_mp3on4(AVCodecContext * avctx)
2771
{
2772
    MP3On4DecodeContext *s = avctx->priv_data;
2773
    int i;
2774

    
2775
    for (i = 0; i < s->frames; i++)
2776
        if (s->mp3decctx[i])
2777
            av_free(s->mp3decctx[i]);
2778

    
2779
    return 0;
2780
}
2781

    
2782

    
2783
static int decode_frame_mp3on4(AVCodecContext * avctx,
2784
                        void *data, int *data_size,
2785
                        uint8_t * buf, int buf_size)
2786
{
2787
    MP3On4DecodeContext *s = avctx->priv_data;
2788
    MPADecodeContext *m;
2789
    int len, out_size = 0;
2790
    uint32_t header;
2791
    OUT_INT *out_samples = data;
2792
    OUT_INT decoded_buf[MPA_FRAME_SIZE * MPA_MAX_CHANNELS];
2793
    OUT_INT *outptr, *bp;
2794
    int fsize;
2795
    unsigned char *start2 = buf, *start;
2796
    int fr, i, j, n;
2797
    int off = avctx->channels;
2798
    int *coff = chan_offset[s->chan_cfg];
2799

    
2800
    len = buf_size;
2801

    
2802
    // Discard too short frames
2803
    if (buf_size < HEADER_SIZE) {
2804
        *data_size = 0;
2805
        return buf_size;
2806
    }
2807

    
2808
    // If only one decoder interleave is not needed
2809
    outptr = s->frames == 1 ? out_samples : decoded_buf;
2810

    
2811
    for (fr = 0; fr < s->frames; fr++) {
2812
        start = start2;
2813
        fsize = (start[0] << 4) | (start[1] >> 4);
2814
        start2 += fsize;
2815
        if (fsize > len)
2816
            fsize = len;
2817
        len -= fsize;
2818
        if (fsize > MPA_MAX_CODED_FRAME_SIZE)
2819
            fsize = MPA_MAX_CODED_FRAME_SIZE;
2820
        m = s->mp3decctx[fr];
2821
        assert (m != NULL);
2822
        /* copy original to new */
2823
        m->inbuf_ptr = m->inbuf + fsize;
2824
        memcpy(m->inbuf, start, fsize);
2825

    
2826
        // Get header
2827
        header = (m->inbuf[0] << 24) | (m->inbuf[1] << 16) |
2828
                  (m->inbuf[2] << 8) | m->inbuf[3] | 0xfff00000;
2829

    
2830
        if (ff_mpa_check_header(header) < 0) { // Bad header, discard block
2831
            *data_size = 0;
2832
            return buf_size;
2833
        }
2834

    
2835
        decode_header(m, header);
2836
        mp_decode_frame(m, decoded_buf);
2837

    
2838
        n = MPA_FRAME_SIZE * m->nb_channels;
2839
        out_size += n * sizeof(OUT_INT);
2840
        if(s->frames > 1) {
2841
            /* interleave output data */
2842
            bp = out_samples + coff[fr];
2843
            if(m->nb_channels == 1) {
2844
                for(j = 0; j < n; j++) {
2845
                    *bp = decoded_buf[j];
2846
                    bp += off;
2847
                }
2848
            } else {
2849
                for(j = 0; j < n; j++) {
2850
                    bp[0] = decoded_buf[j++];
2851
                    bp[1] = decoded_buf[j];
2852
                    bp += off;
2853
                }
2854
            }
2855
        }
2856
    }
2857

    
2858
    /* update codec info */
2859
    avctx->sample_rate = s->mp3decctx[0]->sample_rate;
2860
    avctx->frame_size= buf_size;
2861
    avctx->bit_rate = 0;
2862
    for (i = 0; i < s->frames; i++)
2863
        avctx->bit_rate += s->mp3decctx[i]->bit_rate;
2864

    
2865
    *data_size = out_size;
2866
    return buf_size;
2867
}
2868

    
2869

    
2870
AVCodec mp2_decoder =
2871
{
2872
    "mp2",
2873
    CODEC_TYPE_AUDIO,
2874
    CODEC_ID_MP2,
2875
    sizeof(MPADecodeContext),
2876
    decode_init,
2877
    NULL,
2878
    NULL,
2879
    decode_frame,
2880
    CODEC_CAP_PARSE_ONLY,
2881
};
2882

    
2883
AVCodec mp3_decoder =
2884
{
2885
    "mp3",
2886
    CODEC_TYPE_AUDIO,
2887
    CODEC_ID_MP3,
2888
    sizeof(MPADecodeContext),
2889
    decode_init,
2890
    NULL,
2891
    NULL,
2892
    decode_frame,
2893
    CODEC_CAP_PARSE_ONLY,
2894
};
2895

    
2896
AVCodec mp3adu_decoder =
2897
{
2898
    "mp3adu",
2899
    CODEC_TYPE_AUDIO,
2900
    CODEC_ID_MP3ADU,
2901
    sizeof(MPADecodeContext),
2902
    decode_init,
2903
    NULL,
2904
    NULL,
2905
    decode_frame_adu,
2906
    CODEC_CAP_PARSE_ONLY,
2907
};
2908

    
2909
AVCodec mp3on4_decoder =
2910
{
2911
    "mp3on4",
2912
    CODEC_TYPE_AUDIO,
2913
    CODEC_ID_MP3ON4,
2914
    sizeof(MP3On4DecodeContext),
2915
    decode_init_mp3on4,
2916
    NULL,
2917
    decode_close_mp3on4,
2918
    decode_frame_mp3on4,
2919
    0
2920
};