Statistics
| Branch: | Revision:

ffmpeg / libavcodec / aacenc.c @ 7d485f16

History | View | Annotate | Download (11.7 KB)

1
/*
2
 * AAC encoder
3
 * Copyright (C) 2008 Konstantin Shishkov
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

    
22
/**
23
 * @file libavcodec/aacenc.c
24
 * AAC encoder
25
 */
26

    
27
/***********************************
28
 *              TODOs:
29
 * psy model selection with some option
30
 * add sane pulse detection
31
 * add temporal noise shaping
32
 ***********************************/
33

    
34
#include "avcodec.h"
35
#include "get_bits.h"
36
#include "dsputil.h"
37
#include "mpeg4audio.h"
38

    
39
#include "aacpsy.h"
40
#include "aac.h"
41
#include "aactab.h"
42

    
43
static const uint8_t swb_size_1024_96[] = {
44
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
45
    12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
46
    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
47
};
48

    
49
static const uint8_t swb_size_1024_64[] = {
50
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
51
    12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
52
    40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
53
};
54

    
55
static const uint8_t swb_size_1024_48[] = {
56
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
57
    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
58
    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
59
    96
60
};
61

    
62
static const uint8_t swb_size_1024_32[] = {
63
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
64
    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
65
    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
66
};
67

    
68
static const uint8_t swb_size_1024_24[] = {
69
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
70
    12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
71
    32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
72
};
73

    
74
static const uint8_t swb_size_1024_16[] = {
75
    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
76
    12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
77
    32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
78
};
79

    
80
static const uint8_t swb_size_1024_8[] = {
81
    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
82
    16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
83
    32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
84
};
85

    
86
static const uint8_t * const swb_size_1024[] = {
87
    swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
88
    swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
89
    swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
90
    swb_size_1024_16, swb_size_1024_16, swb_size_1024_8
91
};
92

    
93
static const uint8_t swb_size_128_96[] = {
94
    4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
95
};
96

    
97
static const uint8_t swb_size_128_48[] = {
98
    4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
99
};
100

    
101
static const uint8_t swb_size_128_24[] = {
102
    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
103
};
104

    
105
static const uint8_t swb_size_128_16[] = {
106
    4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
107
};
108

    
109
static const uint8_t swb_size_128_8[] = {
110
    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
111
};
112

    
113
static const uint8_t * const swb_size_128[] = {
114
    /* the last entry on the following row is swb_size_128_64 but is a
115
       duplicate of swb_size_128_96 */
116
    swb_size_128_96, swb_size_128_96, swb_size_128_96,
117
    swb_size_128_48, swb_size_128_48, swb_size_128_48,
118
    swb_size_128_24, swb_size_128_24, swb_size_128_16,
119
    swb_size_128_16, swb_size_128_16, swb_size_128_8
120
};
121

    
122
/** bits needed to code codebook run value for long windows */
123
static const uint8_t run_value_bits_long[64] = {
124
     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
125
     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5, 10,
126
    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
127
    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
128
};
129

    
130
/** bits needed to code codebook run value for short windows */
131
static const uint8_t run_value_bits_short[16] = {
132
    3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
133
};
134

    
135
static const uint8_t* const run_value_bits[2] = {
136
    run_value_bits_long, run_value_bits_short
137
};
138

    
139
/** default channel configurations */
140
static const uint8_t aac_chan_configs[6][5] = {
141
 {1, TYPE_SCE},                               // 1 channel  - single channel element
142
 {1, TYPE_CPE},                               // 2 channels - channel pair
143
 {2, TYPE_SCE, TYPE_CPE},                     // 3 channels - center + stereo
144
 {3, TYPE_SCE, TYPE_CPE, TYPE_SCE},           // 4 channels - front center + stereo + back center
145
 {3, TYPE_SCE, TYPE_CPE, TYPE_CPE},           // 5 channels - front center + stereo + back stereo
146
 {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
147
};
148

    
149
/**
150
 * structure used in optimal codebook search
151
 */
152
typedef struct BandCodingPath {
153
    int prev_idx; ///< pointer to the previous path point
154
    int codebook; ///< codebook for coding band run
155
    int bits;     ///< number of bit needed to code given number of bands
156
} BandCodingPath;
157

    
158
/**
159
 * AAC encoder context
160
 */
161
typedef struct {
162
    PutBitContext pb;
163
    MDCTContext mdct1024;                        ///< long (1024 samples) frame transform context
164
    MDCTContext mdct128;                         ///< short (128 samples) frame transform context
165
    DSPContext  dsp;
166
    DECLARE_ALIGNED_16(FFTSample, output[2048]); ///< temporary buffer for MDCT input coefficients
167
    int16_t* samples;                            ///< saved preprocessed input
168

    
169
    int samplerate_index;                        ///< MPEG-4 samplerate index
170

    
171
    ChannelElement *cpe;                         ///< channel elements
172
    AACPsyContext psy;                           ///< psychoacoustic model context
173
    int last_frame;
174
} AACEncContext;
175

    
176
/**
177
 * Make AAC audio config object.
178
 * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
179
 */
180
static void put_audio_specific_config(AVCodecContext *avctx)
181
{
182
    PutBitContext pb;
183
    AACEncContext *s = avctx->priv_data;
184

    
185
    init_put_bits(&pb, avctx->extradata, avctx->extradata_size*8);
186
    put_bits(&pb, 5, 2); //object type - AAC-LC
187
    put_bits(&pb, 4, s->samplerate_index); //sample rate index
188
    put_bits(&pb, 4, avctx->channels);
189
    //GASpecificConfig
190
    put_bits(&pb, 1, 0); //frame length - 1024 samples
191
    put_bits(&pb, 1, 0); //does not depend on core coder
192
    put_bits(&pb, 1, 0); //is not extension
193
    flush_put_bits(&pb);
194
}
195

    
196
static av_cold int aac_encode_init(AVCodecContext *avctx)
197
{
198
    AACEncContext *s = avctx->priv_data;
199
    int i;
200

    
201
    avctx->frame_size = 1024;
202

    
203
    for(i = 0; i < 16; i++)
204
        if(avctx->sample_rate == ff_mpeg4audio_sample_rates[i])
205
            break;
206
    if(i == 16){
207
        av_log(avctx, AV_LOG_ERROR, "Unsupported sample rate %d\n", avctx->sample_rate);
208
        return -1;
209
    }
210
    if(avctx->channels > 6){
211
        av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %d\n", avctx->channels);
212
        return -1;
213
    }
214
    s->samplerate_index = i;
215

    
216
    dsputil_init(&s->dsp, avctx);
217
    ff_mdct_init(&s->mdct1024, 11, 0, 1.0);
218
    ff_mdct_init(&s->mdct128,   8, 0, 1.0);
219
    // window init
220
    ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
221
    ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
222
    ff_sine_window_init(ff_sine_1024, 1024);
223
    ff_sine_window_init(ff_sine_128, 128);
224

    
225
    s->samples = av_malloc(2 * 1024 * avctx->channels * sizeof(s->samples[0]));
226
    s->cpe = av_mallocz(sizeof(ChannelElement) * aac_chan_configs[avctx->channels-1][0]);
227
    if(ff_aac_psy_init(&s->psy, avctx, AAC_PSY_3GPP,
228
                       aac_chan_configs[avctx->channels-1][0], 0,
229
                       swb_size_1024[i], ff_aac_num_swb_1024[i], swb_size_128[i], ff_aac_num_swb_128[i]) < 0){
230
        av_log(avctx, AV_LOG_ERROR, "Cannot initialize selected model.\n");
231
        return -1;
232
    }
233
    avctx->extradata = av_malloc(2);
234
    avctx->extradata_size = 2;
235
    put_audio_specific_config(avctx);
236
    return 0;
237
}
238

    
239
/**
240
 * Encode ics_info element.
241
 * @see Table 4.6 (syntax of ics_info)
242
 */
243
static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
244
{
245
    int i;
246

    
247
    put_bits(&s->pb, 1, 0);                // ics_reserved bit
248
    put_bits(&s->pb, 2, info->window_sequence[0]);
249
    put_bits(&s->pb, 1, info->use_kb_window[0]);
250
    if(info->window_sequence[0] != EIGHT_SHORT_SEQUENCE){
251
        put_bits(&s->pb, 6, info->max_sfb);
252
        put_bits(&s->pb, 1, 0);            // no prediction
253
    }else{
254
        put_bits(&s->pb, 4, info->max_sfb);
255
        for(i = 1; i < info->num_windows; i++)
256
            put_bits(&s->pb, 1, info->group_len[i]);
257
    }
258
}
259

    
260
/**
261
 * Calculate the number of bits needed to code all coefficient signs in current band.
262
 */
263
static int calculate_band_sign_bits(AACEncContext *s, SingleChannelElement *sce,
264
                                    int group_len, int start, int size)
265
{
266
    int bits = 0;
267
    int i, w;
268
    for(w = 0; w < group_len; w++){
269
        for(i = 0; i < size; i++){
270
            if(sce->icoefs[start + i])
271
                bits++;
272
        }
273
        start += 128;
274
    }
275
    return bits;
276
}
277

    
278
/**
279
 * Encode pulse data.
280
 */
281
static void encode_pulses(AACEncContext *s, Pulse *pulse)
282
{
283
    int i;
284

    
285
    put_bits(&s->pb, 1, !!pulse->num_pulse);
286
    if(!pulse->num_pulse) return;
287

    
288
    put_bits(&s->pb, 2, pulse->num_pulse - 1);
289
    put_bits(&s->pb, 6, pulse->start);
290
    for(i = 0; i < pulse->num_pulse; i++){
291
        put_bits(&s->pb, 5, pulse->pos[i]);
292
        put_bits(&s->pb, 4, pulse->amp[i]);
293
    }
294
}
295

    
296
/**
297
 * Encode spectral coefficients processed by psychoacoustic model.
298
 */
299
static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
300
{
301
    int start, i, w, w2, wg;
302

    
303
    w = 0;
304
    for(wg = 0; wg < sce->ics.num_window_groups; wg++){
305
        start = 0;
306
        for(i = 0; i < sce->ics.max_sfb; i++){
307
            if(sce->zeroes[w*16 + i]){
308
                start += sce->ics.swb_sizes[i];
309
                continue;
310
            }
311
            for(w2 = w; w2 < w + sce->ics.group_len[wg]; w2++){
312
                encode_band_coeffs(s, sce, start + w2*128,
313
                                   sce->ics.swb_sizes[i],
314
                                   sce->band_type[w*16 + i]);
315
            }
316
            start += sce->ics.swb_sizes[i];
317
        }
318
        w += sce->ics.group_len[wg];
319
    }
320
}
321

    
322
/**
323
 * Write some auxiliary information about the created AAC file.
324
 */
325
static void put_bitstream_info(AVCodecContext *avctx, AACEncContext *s, const char *name)
326
{
327
    int i, namelen, padbits;
328

    
329
    namelen = strlen(name) + 2;
330
    put_bits(&s->pb, 3, TYPE_FIL);
331
    put_bits(&s->pb, 4, FFMIN(namelen, 15));
332
    if(namelen >= 15)
333
        put_bits(&s->pb, 8, namelen - 16);
334
    put_bits(&s->pb, 4, 0); //extension type - filler
335
    padbits = 8 - (put_bits_count(&s->pb) & 7);
336
    align_put_bits(&s->pb);
337
    for(i = 0; i < namelen - 2; i++)
338
        put_bits(&s->pb, 8, name[i]);
339
    put_bits(&s->pb, 12 - padbits, 0);
340
}
341

    
342
static av_cold int aac_encode_end(AVCodecContext *avctx)
343
{
344
    AACEncContext *s = avctx->priv_data;
345

    
346
    ff_mdct_end(&s->mdct1024);
347
    ff_mdct_end(&s->mdct128);
348
    ff_aac_psy_end(&s->psy);
349
    av_freep(&s->samples);
350
    av_freep(&s->cpe);
351
    return 0;
352
}
353

    
354
AVCodec aac_encoder = {
355
    "aac",
356
    CODEC_TYPE_AUDIO,
357
    CODEC_ID_AAC,
358
    sizeof(AACEncContext),
359
    aac_encode_init,
360
    aac_encode_frame,
361
    aac_encode_end,
362
    .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
363
    .sample_fmts = (enum SampleFormat[]){SAMPLE_FMT_S16,SAMPLE_FMT_NONE},
364
    .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
365
};