Statistics
| Branch: | Revision:

ffmpeg / libavcodec / aacenc.c @ cda00def

History | View | Annotate | Download (11.4 KB)

1
/*
2
 * AAC encoder
3
 * Copyright (C) 2008 Konstantin Shishkov
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

    
22
/**
23
 * @file aacenc.c
24
 * AAC encoder
25
 */
26

    
27
/***********************************
28
 *              TODOs:
29
 * psy model selection with some option
30
 * add sane pulse detection
31
 ***********************************/
32

    
33
#include "avcodec.h"
34
#include "bitstream.h"
35
#include "dsputil.h"
36
#include "mpeg4audio.h"
37

    
38
#include "aacpsy.h"
39
#include "aac.h"
40
#include "aactab.h"
41

    
42
static const uint8_t swb_size_1024_96[] = {
43
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
44
    12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
45
    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
46
};
47

    
48
static const uint8_t swb_size_1024_64[] = {
49
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
50
    12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
51
    40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
52
};
53

    
54
static const uint8_t swb_size_1024_48[] = {
55
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
56
    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
57
    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
58
    96
59
};
60

    
61
static const uint8_t swb_size_1024_32[] = {
62
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
63
    12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
64
    32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
65
};
66

    
67
static const uint8_t swb_size_1024_24[] = {
68
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
69
    12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
70
    32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
71
};
72

    
73
static const uint8_t swb_size_1024_16[] = {
74
    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
75
    12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
76
    32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
77
};
78

    
79
static const uint8_t swb_size_1024_8[] = {
80
    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
81
    16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
82
    32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
83
};
84

    
85
static const uint8_t *swb_size_1024[] = {
86
    swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
87
    swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
88
    swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
89
    swb_size_1024_16, swb_size_1024_16, swb_size_1024_8
90
};
91

    
92
static const uint8_t swb_size_128_96[] = {
93
    4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
94
};
95

    
96
static const uint8_t swb_size_128_48[] = {
97
    4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
98
};
99

    
100
static const uint8_t swb_size_128_24[] = {
101
    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
102
};
103

    
104
static const uint8_t swb_size_128_16[] = {
105
    4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
106
};
107

    
108
static const uint8_t swb_size_128_8[] = {
109
    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
110
};
111

    
112
static const uint8_t *swb_size_128[] = {
113
    /* the last entry on the following row is swb_size_128_64 but is a
114
       duplicate of swb_size_128_96 */
115
    swb_size_128_96, swb_size_128_96, swb_size_128_96,
116
    swb_size_128_48, swb_size_128_48, swb_size_128_48,
117
    swb_size_128_24, swb_size_128_24, swb_size_128_16,
118
    swb_size_128_16, swb_size_128_16, swb_size_128_8
119
};
120

    
121
/** bits needed to code codebook run value for long windows */
122
static const uint8_t run_value_bits_long[64] = {
123
     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
124
     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5, 10,
125
    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
126
    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
127
};
128

    
129
/** bits needed to code codebook run value for short windows */
130
static const uint8_t run_value_bits_short[16] = {
131
    3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
132
};
133

    
134
static const uint8_t* run_value_bits[2] = {
135
    run_value_bits_long, run_value_bits_short
136
};
137

    
138
/** default channel configurations */
139
static const uint8_t aac_chan_configs[6][5] = {
140
 {1, TYPE_SCE},                               // 1 channel  - single channel element
141
 {1, TYPE_CPE},                               // 2 channels - channel pair
142
 {2, TYPE_SCE, TYPE_CPE},                     // 3 channels - center + stereo
143
 {3, TYPE_SCE, TYPE_CPE, TYPE_SCE},           // 4 channels - front center + stereo + back center
144
 {3, TYPE_SCE, TYPE_CPE, TYPE_CPE},           // 5 channels - front center + stereo + back stereo
145
 {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
146
};
147

    
148
/**
149
 * structure used in optimal codebook search
150
 */
151
typedef struct BandCodingPath {
152
    int prev_idx; ///< pointer to the previous path point
153
    int codebook; ///< codebook for coding band run
154
    int bits;     ///< number of bit needed to code given number of bands
155
} BandCodingPath;
156

    
157
/**
158
 * AAC encoder context
159
 */
160
typedef struct {
161
    PutBitContext pb;
162
    MDCTContext mdct1024;                        ///< long (1024 samples) frame transform context
163
    MDCTContext mdct128;                         ///< short (128 samples) frame transform context
164
    DSPContext  dsp;
165
    DECLARE_ALIGNED_16(FFTSample, output[2048]); ///< temporary buffer for MDCT input coefficients
166
    int16_t* samples;                            ///< saved preprocessed input
167

    
168
    int samplerate_index;                        ///< MPEG-4 samplerate index
169

    
170
    ChannelElement *cpe;                         ///< channel elements
171
    AACPsyContext psy;                           ///< psychoacoustic model context
172
    int last_frame;
173
} AACEncContext;
174

    
175
/**
176
 * Make AAC audio config object.
177
 * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
178
 */
179
static void put_audio_specific_config(AVCodecContext *avctx)
180
{
181
    PutBitContext pb;
182
    AACEncContext *s = avctx->priv_data;
183

    
184
    init_put_bits(&pb, avctx->extradata, avctx->extradata_size*8);
185
    put_bits(&pb, 5, 2); //object type - AAC-LC
186
    put_bits(&pb, 4, s->samplerate_index); //sample rate index
187
    put_bits(&pb, 4, avctx->channels);
188
    //GASpecificConfig
189
    put_bits(&pb, 1, 0); //frame length - 1024 samples
190
    put_bits(&pb, 1, 0); //does not depend on core coder
191
    put_bits(&pb, 1, 0); //is not extension
192
    flush_put_bits(&pb);
193
}
194

    
195
static av_cold int aac_encode_init(AVCodecContext *avctx)
196
{
197
    AACEncContext *s = avctx->priv_data;
198
    int i;
199

    
200
    avctx->frame_size = 1024;
201

    
202
    for(i = 0; i < 16; i++)
203
        if(avctx->sample_rate == ff_mpeg4audio_sample_rates[i])
204
            break;
205
    if(i == 16){
206
        av_log(avctx, AV_LOG_ERROR, "Unsupported sample rate %d\n", avctx->sample_rate);
207
        return -1;
208
    }
209
    if(avctx->channels > 6){
210
        av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %d\n", avctx->channels);
211
        return -1;
212
    }
213
    s->samplerate_index = i;
214
    s->swb_sizes1024 = swb_size_1024[i];
215
    s->swb_num1024   = ff_aac_num_swb_1024[i];
216
    s->swb_sizes128  = swb_size_128[i];
217
    s->swb_num128    = ff_aac_num_swb_128[i];
218

    
219
    dsputil_init(&s->dsp, avctx);
220
    ff_mdct_init(&s->mdct1024, 11, 0);
221
    ff_mdct_init(&s->mdct128,   8, 0);
222
    // window init
223
    ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
224
    ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
225
    ff_sine_window_init(ff_sine_1024, 1024);
226
    ff_sine_window_init(ff_sine_128, 128);
227

    
228
    s->samples = av_malloc(2 * 1024 * avctx->channels * sizeof(s->samples[0]));
229
    s->cpe = av_mallocz(sizeof(ChannelElement) * aac_chan_configs[avctx->channels-1][0]);
230
    if(ff_aac_psy_init(&s->psy, avctx, AAC_PSY_3GPP,
231
                       aac_chan_configs[avctx->channels-1][0], 0,
232
                       s->swb_sizes1024, s->swb_num1024, s->swb_sizes128, s->swb_num128) < 0){
233
        av_log(avctx, AV_LOG_ERROR, "Cannot initialize selected model.\n");
234
        return -1;
235
    }
236
    avctx->extradata = av_malloc(2);
237
    avctx->extradata_size = 2;
238
    put_audio_specific_config(avctx);
239
    return 0;
240
}
241

    
242
/**
243
 * Encode ics_info element.
244
 * @see Table 4.6 (syntax of ics_info)
245
 */
246
static void put_ics_info(AVCodecContext *avctx, IndividualChannelStream *info)
247
{
248
    AACEncContext *s = avctx->priv_data;
249
    int i;
250

    
251
    put_bits(&s->pb, 1, 0);                // ics_reserved bit
252
    put_bits(&s->pb, 2, info->window_sequence[0]);
253
    put_bits(&s->pb, 1, info->use_kb_window[0]);
254
    if(info->window_sequence[0] != EIGHT_SHORT_SEQUENCE){
255
        put_bits(&s->pb, 6, info->max_sfb);
256
        put_bits(&s->pb, 1, 0);            // no prediction
257
    }else{
258
        put_bits(&s->pb, 4, info->max_sfb);
259
        for(i = 1; i < info->num_windows; i++)
260
            put_bits(&s->pb, 1, info->group_len[i]);
261
    }
262
}
263

    
264
/**
265
 * Encode pulse data.
266
 */
267
static void encode_pulses(AACEncContext *s, Pulse *pulse)
268
{
269
    int i;
270

    
271
    put_bits(&s->pb, 1, !!pulse->num_pulse);
272
    if(!pulse->num_pulse) return;
273

    
274
    put_bits(&s->pb, 2, pulse->num_pulse - 1);
275
    put_bits(&s->pb, 6, pulse->start);
276
    for(i = 0; i < pulse->num_pulse; i++){
277
        put_bits(&s->pb, 5, pulse->pos[i]);
278
        put_bits(&s->pb, 4, pulse->amp[i]);
279
    }
280
}
281

    
282
/**
283
 * Encode spectral coefficients processed by psychoacoustic model.
284
 */
285
static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
286
{
287
    int start, i, w, w2, wg;
288

    
289
    w = 0;
290
    for(wg = 0; wg < sce->ics.num_window_groups; wg++){
291
        start = 0;
292
        for(i = 0; i < sce->ics.max_sfb; i++){
293
            if(sce->zeroes[w*16 + i]){
294
                start += sce->ics.swb_sizes[i];
295
                continue;
296
            }
297
            for(w2 = w; w2 < w + sce->ics.group_len[wg]; w2++){
298
                encode_band_coeffs(s, cpe, channel, start + w2*128,
299
                                   sce->ics.swb_sizes[i],
300
                                   sce->band_type[w*16 + i]);
301
            }
302
            start += sce->ics.swb_sizes[i];
303
        }
304
        w += sce->ics.group_len[wg];
305
    }
306
}
307

    
308
/**
309
 * Write some auxiliary information about the created AAC file.
310
 */
311
static void put_bitstream_info(AVCodecContext *avctx, AACEncContext *s, const char *name)
312
{
313
    int i, namelen, padbits;
314

    
315
    namelen = strlen(name) + 2;
316
    put_bits(&s->pb, 3, TYPE_FIL);
317
    put_bits(&s->pb, 4, FFMIN(namelen, 15));
318
    if(namelen >= 15)
319
        put_bits(&s->pb, 8, namelen - 16);
320
    put_bits(&s->pb, 4, 0); //extension type - filler
321
    padbits = 8 - (put_bits_count(&s->pb) & 7);
322
    align_put_bits(&s->pb);
323
    for(i = 0; i < namelen - 2; i++)
324
        put_bits(&s->pb, 8, name[i]);
325
    put_bits(&s->pb, 12 - padbits, 0);
326
}
327

    
328
static av_cold int aac_encode_end(AVCodecContext *avctx)
329
{
330
    AACEncContext *s = avctx->priv_data;
331

    
332
    ff_mdct_end(&s->mdct1024);
333
    ff_mdct_end(&s->mdct128);
334
    ff_aac_psy_end(&s->psy);
335
    av_freep(&s->samples);
336
    av_freep(&s->cpe);
337
    return 0;
338
}
339

    
340
AVCodec aac_encoder = {
341
    "aac",
342
    CODEC_TYPE_AUDIO,
343
    CODEC_ID_AAC,
344
    sizeof(AACEncContext),
345
    aac_encode_init,
346
    aac_encode_frame,
347
    aac_encode_end,
348
    .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
349
    .sample_fmts = (enum SampleFormat[]){SAMPLE_FMT_S16,SAMPLE_FMT_NONE},
350
    .long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
351
};