Revision 78e65cd7 libavcodec/aacenc.c

View differences:

libavcodec/aacenc.c
26 26

  
27 27
/***********************************
28 28
 *              TODOs:
29
 * psy model selection with some option
30 29
 * add sane pulse detection
31 30
 * add temporal noise shaping
32 31
 ***********************************/
33 32

  
34 33
#include "avcodec.h"
35
#include "get_bits.h"
34
#include "put_bits.h"
36 35
#include "dsputil.h"
37 36
#include "mpeg4audio.h"
38 37

  
39
#include "aacpsy.h"
40 38
#include "aac.h"
41 39
#include "aactab.h"
40
#include "aacenc.h"
41

  
42
#include "psymodel.h"
42 43

  
43 44
static const uint8_t swb_size_1024_96[] = {
44 45
    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
......
83 84
    32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
84 85
};
85 86

  
86
static const uint8_t * const swb_size_1024[] = {
87
static const uint8_t *swb_size_1024[] = {
87 88
    swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
88 89
    swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
89 90
    swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
......
110 111
    4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
111 112
};
112 113

  
113
static const uint8_t * const swb_size_128[] = {
114
static const uint8_t *swb_size_128[] = {
114 115
    /* the last entry on the following row is swb_size_128_64 but is a
115 116
       duplicate of swb_size_128_96 */
116 117
    swb_size_128_96, swb_size_128_96, swb_size_128_96,
......
119 120
    swb_size_128_16, swb_size_128_16, swb_size_128_8
120 121
};
121 122

  
122
/** bits needed to code codebook run value for long windows */
123
static const uint8_t run_value_bits_long[64] = {
124
     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
125
     5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5, 10,
126
    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
127
    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
128
};
129

  
130
/** bits needed to code codebook run value for short windows */
131
static const uint8_t run_value_bits_short[16] = {
132
    3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
133
};
134

  
135
static const uint8_t* const run_value_bits[2] = {
136
    run_value_bits_long, run_value_bits_short
137
};
138

  
139 123
/** default channel configurations */
140 124
static const uint8_t aac_chan_configs[6][5] = {
141 125
 {1, TYPE_SCE},                               // 1 channel  - single channel element
......
147 131
};
148 132

  
149 133
/**
150
 * structure used in optimal codebook search
151
 */
152
typedef struct BandCodingPath {
153
    int prev_idx; ///< pointer to the previous path point
154
    int codebook; ///< codebook for coding band run
155
    int bits;     ///< number of bit needed to code given number of bands
156
} BandCodingPath;
157

  
158
/**
159
 * AAC encoder context
160
 */
161
typedef struct {
162
    PutBitContext pb;
163
    MDCTContext mdct1024;                        ///< long (1024 samples) frame transform context
164
    MDCTContext mdct128;                         ///< short (128 samples) frame transform context
165
    DSPContext  dsp;
166
    DECLARE_ALIGNED_16(FFTSample, output[2048]); ///< temporary buffer for MDCT input coefficients
167
    int16_t* samples;                            ///< saved preprocessed input
168

  
169
    int samplerate_index;                        ///< MPEG-4 samplerate index
170

  
171
    ChannelElement *cpe;                         ///< channel elements
172
    AACPsyContext psy;                           ///< psychoacoustic model context
173
    int last_frame;
174
} AACEncContext;
175

  
176
/**
177 134
 * Make AAC audio config object.
178 135
 * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
179 136
 */
......
197 154
{
198 155
    AACEncContext *s = avctx->priv_data;
199 156
    int i;
157
    const uint8_t *sizes[2];
158
    int lengths[2];
200 159

  
201 160
    avctx->frame_size = 1024;
202 161

  
......
224 183

  
225 184
    s->samples = av_malloc(2 * 1024 * avctx->channels * sizeof(s->samples[0]));
226 185
    s->cpe = av_mallocz(sizeof(ChannelElement) * aac_chan_configs[avctx->channels-1][0]);
227
    if(ff_aac_psy_init(&s->psy, avctx, AAC_PSY_3GPP,
228
                       aac_chan_configs[avctx->channels-1][0], 0,
229
                       swb_size_1024[i], ff_aac_num_swb_1024[i], swb_size_128[i], ff_aac_num_swb_128[i]) < 0){
230
        av_log(avctx, AV_LOG_ERROR, "Cannot initialize selected model.\n");
231
        return -1;
232
    }
233 186
    avctx->extradata = av_malloc(2);
234 187
    avctx->extradata_size = 2;
235 188
    put_audio_specific_config(avctx);
189

  
190
    sizes[0] = swb_size_1024[i];
191
    sizes[1] = swb_size_128[i];
192
    lengths[0] = ff_aac_num_swb_1024[i];
193
    lengths[1] = ff_aac_num_swb_128[i];
194
    ff_psy_init(&s->psy, avctx, 2, sizes, lengths);
195
    s->psypp = ff_psy_preprocess_init(avctx);
196
    s->coder = &ff_aac_coders[0];
197

  
198
    s->lambda = avctx->global_quality ? avctx->global_quality : 120;
199
#if !CONFIG_HARDCODED_TABLES
200
    for (i = 0; i < 428; i++)
201
        ff_aac_pow2sf_tab[i] = pow(2, (i - 200)/4.);
202
#endif /* CONFIG_HARDCODED_TABLES */
203

  
204
    if (avctx->channels > 5)
205
        av_log(avctx, AV_LOG_ERROR, "This encoder does not yet enforce the restrictions on LFEs. "
206
               "The output will most likely be an illegal bitstream.\n");
207

  
236 208
    return 0;
237 209
}
238 210

  
211
static void apply_window_and_mdct(AVCodecContext *avctx, AACEncContext *s,
212
                                  SingleChannelElement *sce, short *audio, int channel)
213
{
214
    int i, j, k;
215
    const float * lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
216
    const float * swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
217
    const float * pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
218

  
219
    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
220
        memcpy(s->output, sce->saved, sizeof(float)*1024);
221
        if(sce->ics.window_sequence[0] == LONG_STOP_SEQUENCE){
222
            memset(s->output, 0, sizeof(s->output[0]) * 448);
223
            for(i = 448; i < 576; i++)
224
                s->output[i] = sce->saved[i] * pwindow[i - 448];
225
            for(i = 576; i < 704; i++)
226
                s->output[i] = sce->saved[i];
227
        }
228
        if(sce->ics.window_sequence[0] != LONG_START_SEQUENCE){
229
            j = channel;
230
            for (i = 0; i < 1024; i++, j += avctx->channels){
231
                s->output[i+1024]         = audio[j] * lwindow[1024 - i - 1];
232
                sce->saved[i] = audio[j] * lwindow[i];
233
            }
234
        }else{
235
            j = channel;
236
            for(i = 0; i < 448; i++, j += avctx->channels)
237
                s->output[i+1024]         = audio[j];
238
            for(i = 448; i < 576; i++, j += avctx->channels)
239
                s->output[i+1024]         = audio[j] * swindow[576 - i - 1];
240
            memset(s->output+1024+576, 0, sizeof(s->output[0]) * 448);
241
            j = channel;
242
            for(i = 0; i < 1024; i++, j += avctx->channels)
243
                sce->saved[i] = audio[j];
244
        }
245
        ff_mdct_calc(&s->mdct1024, sce->coeffs, s->output);
246
    }else{
247
        j = channel;
248
        for (k = 0; k < 1024; k += 128) {
249
            for(i = 448 + k; i < 448 + k + 256; i++)
250
                s->output[i - 448 - k] = (i < 1024)
251
                                         ? sce->saved[i]
252
                                         : audio[channel + (i-1024)*avctx->channels];
253
            s->dsp.vector_fmul        (s->output,     k ?  swindow : pwindow, 128);
254
            s->dsp.vector_fmul_reverse(s->output+128, s->output+128, swindow, 128);
255
            ff_mdct_calc(&s->mdct128, sce->coeffs + k, s->output);
256
        }
257
        j = channel;
258
        for(i = 0; i < 1024; i++, j += avctx->channels)
259
            sce->saved[i] = audio[j];
260
    }
261
}
262

  
239 263
/**
240 264
 * Encode ics_info element.
241 265
 * @see Table 4.6 (syntax of ics_info)
242 266
 */
243 267
static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
244 268
{
245
    int i;
269
    int w;
246 270

  
247 271
    put_bits(&s->pb, 1, 0);                // ics_reserved bit
248 272
    put_bits(&s->pb, 2, info->window_sequence[0]);
......
252 276
        put_bits(&s->pb, 1, 0);            // no prediction
253 277
    }else{
254 278
        put_bits(&s->pb, 4, info->max_sfb);
255
        for(i = 1; i < info->num_windows; i++)
256
            put_bits(&s->pb, 1, info->group_len[i]);
279
        for(w = 1; w < 8; w++){
280
            put_bits(&s->pb, 1, !info->group_len[w]);
281
        }
257 282
    }
258 283
}
259 284

  
260 285
/**
261
 * Calculate the number of bits needed to code all coefficient signs in current band.
286
 * Encode MS data.
287
 * @see 4.6.8.1 "Joint Coding - M/S Stereo"
262 288
 */
263
static int calculate_band_sign_bits(AACEncContext *s, SingleChannelElement *sce,
264
                                    int group_len, int start, int size)
289
static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
265 290
{
266
    int bits = 0;
267 291
    int i, w;
268
    for(w = 0; w < group_len; w++){
269
        for(i = 0; i < size; i++){
270
            if(sce->icoefs[start + i])
271
                bits++;
292

  
293
    put_bits(pb, 2, cpe->ms_mode);
294
    if(cpe->ms_mode == 1){
295
        for(w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w]){
296
            for(i = 0; i < cpe->ch[0].ics.max_sfb; i++)
297
                put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
298
        }
299
    }
300
}
301

  
302
/**
303
 * Produce integer coefficients from scalefactors provided by the model.
304
 */
305
static void adjust_frame_information(AACEncContext *apc, ChannelElement *cpe, int chans)
306
{
307
    int i, w, w2, g, ch;
308
    int start, sum, maxsfb, cmaxsfb;
309

  
310
    for(ch = 0; ch < chans; ch++){
311
        IndividualChannelStream *ics = &cpe->ch[ch].ics;
312
        start = 0;
313
        maxsfb = 0;
314
        cpe->ch[ch].pulse.num_pulse = 0;
315
        for(w = 0; w < ics->num_windows*16; w += 16){
316
            for(g = 0; g < ics->num_swb; g++){
317
                sum = 0;
318
                //apply M/S
319
                if(!ch && cpe->ms_mask[w + g]){
320
                    for(i = 0; i < ics->swb_sizes[g]; i++){
321
                        cpe->ch[0].coeffs[start+i] = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) / 2.0;
322
                        cpe->ch[1].coeffs[start+i] =  cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i];
323
                    }
324
                }
325
                start += ics->swb_sizes[g];
326
            }
327
            for(cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w+cmaxsfb-1]; cmaxsfb--);
328
            maxsfb = FFMAX(maxsfb, cmaxsfb);
329
        }
330
        ics->max_sfb = maxsfb;
331

  
332
        //adjust zero bands for window groups
333
        for(w = 0; w < ics->num_windows; w += ics->group_len[w]){
334
            for(g = 0; g < ics->max_sfb; g++){
335
                i = 1;
336
                for(w2 = w; w2 < w + ics->group_len[w]; w2++){
337
                    if(!cpe->ch[ch].zeroes[w2*16 + g]){
338
                        i = 0;
339
                        break;
340
                    }
341
                }
342
                cpe->ch[ch].zeroes[w*16 + g] = i;
343
            }
344
        }
345
    }
346

  
347
    if(chans > 1 && cpe->common_window){
348
        IndividualChannelStream *ics0 = &cpe->ch[0].ics;
349
        IndividualChannelStream *ics1 = &cpe->ch[1].ics;
350
        int msc = 0;
351
        ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
352
        ics1->max_sfb = ics0->max_sfb;
353
        for(w = 0; w < ics0->num_windows*16; w += 16)
354
            for(i = 0; i < ics0->max_sfb; i++)
355
                if(cpe->ms_mask[w+i]) msc++;
356
        if(msc == 0 || ics0->max_sfb == 0) cpe->ms_mode = 0;
357
        else cpe->ms_mode = msc < ics0->max_sfb ? 1 : 2;
358
    }
359
}
360

  
361
/**
362
 * Encode scalefactor band coding type.
363
 */
364
static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
365
{
366
    int w;
367

  
368
    for(w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]){
369
        s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
370
    }
371
}
372

  
373
/**
374
 * Encode scalefactors.
375
 */
376
static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce)
377
{
378
    int off = sce->sf_idx[0], diff;
379
    int i, w;
380

  
381
    for(w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]){
382
        for(i = 0; i < sce->ics.max_sfb; i++){
383
            if(!sce->zeroes[w*16 + i]){
384
                diff = sce->sf_idx[w*16 + i] - off + SCALE_DIFF_ZERO;
385
                if(diff < 0 || diff > 120) av_log(avctx, AV_LOG_ERROR, "Scalefactor difference is too big to be coded\n");
386
                off = sce->sf_idx[w*16 + i];
387
                put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
388
            }
272 389
        }
273
        start += 128;
274 390
    }
275
    return bits;
276 391
}
277 392

  
278 393
/**
......
298 413
 */
299 414
static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
300 415
{
301
    int start, i, w, w2, wg;
416
    int start, i, w, w2;
302 417

  
303
    w = 0;
304
    for(wg = 0; wg < sce->ics.num_window_groups; wg++){
418
    for(w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]){
305 419
        start = 0;
306 420
        for(i = 0; i < sce->ics.max_sfb; i++){
307 421
            if(sce->zeroes[w*16 + i]){
308 422
                start += sce->ics.swb_sizes[i];
309 423
                continue;
310 424
            }
311
            for(w2 = w; w2 < w + sce->ics.group_len[wg]; w2++){
312
                encode_band_coeffs(s, sce, start + w2*128,
313
                                   sce->ics.swb_sizes[i],
314
                                   sce->band_type[w*16 + i]);
425
            for(w2 = w; w2 < w + sce->ics.group_len[w]; w2++){
426
                s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
427
                                         sce->ics.swb_sizes[i],
428
                                         sce->sf_idx[w*16 + i],
429
                                         sce->band_type[w*16 + i],
430
                                         s->lambda);
315 431
            }
316 432
            start += sce->ics.swb_sizes[i];
317 433
        }
318
        w += sce->ics.group_len[wg];
319 434
    }
320 435
}
321 436

  
322 437
/**
438
 * Encode one channel of audio data.
439
 */
440
static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, int common_window)
441
{
442
    put_bits(&s->pb, 8, sce->sf_idx[0]);
443
    if(!common_window) put_ics_info(s, &sce->ics);
444
    encode_band_info(s, sce);
445
    encode_scale_factors(avctx, s, sce);
446
    encode_pulses(s, &sce->pulse);
447
    put_bits(&s->pb, 1, 0); //tns
448
    put_bits(&s->pb, 1, 0); //ssr
449
    encode_spectral_coeffs(s, sce);
450
    return 0;
451
}
452

  
453
/**
323 454
 * Write some auxiliary information about the created AAC file.
324 455
 */
325 456
static void put_bitstream_info(AVCodecContext *avctx, AACEncContext *s, const char *name)
......
339 470
    put_bits(&s->pb, 12 - padbits, 0);
340 471
}
341 472

  
473
static int aac_encode_frame(AVCodecContext *avctx,
474
                            uint8_t *frame, int buf_size, void *data)
475
{
476
    AACEncContext *s = avctx->priv_data;
477
    int16_t *samples = s->samples, *samples2, *la;
478
    ChannelElement *cpe;
479
    int i, j, chans, tag, start_ch;
480
    const uint8_t *chan_map = aac_chan_configs[avctx->channels-1];
481
    int chan_el_counter[4];
482

  
483
    if(s->last_frame)
484
        return 0;
485
    if(data){
486
        if(!s->psypp){
487
            memcpy(s->samples + 1024 * avctx->channels, data, 1024 * avctx->channels * sizeof(s->samples[0]));
488
        }else{
489
            start_ch = 0;
490
            samples2 = s->samples + 1024 * avctx->channels;
491
            for(i = 0; i < chan_map[0]; i++){
492
                tag = chan_map[i+1];
493
                chans = tag == TYPE_CPE ? 2 : 1;
494
                ff_psy_preprocess(s->psypp, (uint16_t*)data + start_ch, samples2 + start_ch, start_ch, chans);
495
                start_ch += chans;
496
            }
497
        }
498
    }
499
    if(!avctx->frame_number){
500
        memcpy(s->samples, s->samples + 1024 * avctx->channels, 1024 * avctx->channels * sizeof(s->samples[0]));
501
        return 0;
502
    }
503

  
504
    init_put_bits(&s->pb, frame, buf_size*8);
505
    if((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT)){
506
        put_bitstream_info(avctx, s, LIBAVCODEC_IDENT);
507
    }
508
    start_ch = 0;
509
    memset(chan_el_counter, 0, sizeof(chan_el_counter));
510
    for(i = 0; i < chan_map[0]; i++){
511
        FFPsyWindowInfo wi[2];
512
        tag = chan_map[i+1];
513
        chans = tag == TYPE_CPE ? 2 : 1;
514
        cpe = &s->cpe[i];
515
        samples2 = samples + start_ch;
516
        la = samples2 + 1024 * avctx->channels + start_ch;
517
        if(!data) la = NULL;
518
        for(j = 0; j < chans; j++){
519
            IndividualChannelStream *ics = &cpe->ch[j].ics;
520
            int k;
521
            wi[j] = ff_psy_suggest_window(&s->psy, samples2, la, start_ch + j, ics->window_sequence[0]);
522
            ics->window_sequence[1] = ics->window_sequence[0];
523
            ics->window_sequence[0] = wi[j].window_type[0];
524
            ics->use_kb_window[1]   = ics->use_kb_window[0];
525
            ics->use_kb_window[0]   = wi[j].window_shape;
526
            ics->num_windows        = wi[j].num_windows;
527
            ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
528
            ics->num_swb            = s->psy.num_bands[ics->num_windows == 8];
529
            for(k = 0; k < ics->num_windows; k++)
530
                ics->group_len[k] = wi[j].grouping[k];
531

  
532
            s->cur_channel = start_ch + j;
533
            apply_window_and_mdct(avctx, s, &cpe->ch[j], samples2, j);
534
            s->coder->search_for_quantizers(avctx, s, &cpe->ch[j], s->lambda);
535
        }
536
        cpe->common_window = 0;
537
        if(chans > 1
538
            && wi[0].window_type[0] == wi[1].window_type[0]
539
            && wi[0].window_shape   == wi[1].window_shape){
540

  
541
            cpe->common_window = 1;
542
            for(j = 0; j < wi[0].num_windows; j++){
543
                if(wi[0].grouping[j] != wi[1].grouping[j]){
544
                    cpe->common_window = 0;
545
                    break;
546
                }
547
            }
548
        }
549
        if(cpe->common_window && s->coder->search_for_ms)
550
            s->coder->search_for_ms(s, cpe, s->lambda);
551
        adjust_frame_information(s, cpe, chans);
552
        put_bits(&s->pb, 3, tag);
553
        put_bits(&s->pb, 4, chan_el_counter[tag]++);
554
        if(chans == 2){
555
            put_bits(&s->pb, 1, cpe->common_window);
556
            if(cpe->common_window){
557
                put_ics_info(s, &cpe->ch[0].ics);
558
                encode_ms_info(&s->pb, cpe);
559
            }
560
        }
561
        for(j = 0; j < chans; j++){
562
            s->cur_channel = start_ch + j;
563
            ff_psy_set_band_info(&s->psy, s->cur_channel, cpe->ch[j].coeffs, &wi[j]);
564
            encode_individual_channel(avctx, s, &cpe->ch[j], cpe->common_window);
565
        }
566
        start_ch += chans;
567
    }
568

  
569
    put_bits(&s->pb, 3, TYPE_END);
570
    flush_put_bits(&s->pb);
571
    avctx->frame_bits = put_bits_count(&s->pb);
572

  
573
    // rate control stuff
574
    if(!(avctx->flags & CODEC_FLAG_QSCALE)){
575
        float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
576
        s->lambda *= ratio;
577
    }
578

  
579
    if (avctx->frame_bits > 6144*avctx->channels) {
580
        av_log(avctx, AV_LOG_ERROR, "input buffer violation %d > %d.\n", avctx->frame_bits, 6144*avctx->channels);
581
    }
582

  
583
    if(!data)
584
        s->last_frame = 1;
585
    memcpy(s->samples, s->samples + 1024 * avctx->channels, 1024 * avctx->channels * sizeof(s->samples[0]));
586
    return put_bits_count(&s->pb)>>3;
587
}
588

  
342 589
static av_cold int aac_encode_end(AVCodecContext *avctx)
343 590
{
344 591
    AACEncContext *s = avctx->priv_data;
345 592

  
346 593
    ff_mdct_end(&s->mdct1024);
347 594
    ff_mdct_end(&s->mdct128);
348
    ff_aac_psy_end(&s->psy);
595
    ff_psy_end(&s->psy);
596
    ff_psy_preprocess_end(s->psypp);
349 597
    av_freep(&s->samples);
350 598
    av_freep(&s->cpe);
351 599
    return 0;

Also available in: Unified diff