ffmpeg / libavcodec / aacenc.c @ 6eabb0d3
History | View | Annotate | Download (22.7 KB)
1 |
/*
|
---|---|
2 |
* AAC encoder
|
3 |
* Copyright (C) 2008 Konstantin Shishkov
|
4 |
*
|
5 |
* This file is part of FFmpeg.
|
6 |
*
|
7 |
* FFmpeg is free software; you can redistribute it and/or
|
8 |
* modify it under the terms of the GNU Lesser General Public
|
9 |
* License as published by the Free Software Foundation; either
|
10 |
* version 2.1 of the License, or (at your option) any later version.
|
11 |
*
|
12 |
* FFmpeg is distributed in the hope that it will be useful,
|
13 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15 |
* Lesser General Public License for more details.
|
16 |
*
|
17 |
* You should have received a copy of the GNU Lesser General Public
|
18 |
* License along with FFmpeg; if not, write to the Free Software
|
19 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
20 |
*/
|
21 |
|
22 |
/**
|
23 |
* @file
|
24 |
* AAC encoder
|
25 |
*/
|
26 |
|
27 |
/***********************************
|
28 |
* TODOs:
|
29 |
* add sane pulse detection
|
30 |
* add temporal noise shaping
|
31 |
***********************************/
|
32 |
|
33 |
#include "avcodec.h" |
34 |
#include "put_bits.h" |
35 |
#include "dsputil.h" |
36 |
#include "mpeg4audio.h" |
37 |
|
38 |
#include "aac.h" |
39 |
#include "aactab.h" |
40 |
#include "aacenc.h" |
41 |
|
42 |
#include "psymodel.h" |
43 |
|
44 |
#define AAC_MAX_CHANNELS 6 |
45 |
|
46 |
static const uint8_t swb_size_1024_96[] = { |
47 |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, |
48 |
12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44, |
49 |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 |
50 |
}; |
51 |
|
52 |
static const uint8_t swb_size_1024_64[] = { |
53 |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, |
54 |
12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36, |
55 |
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40 |
56 |
}; |
57 |
|
58 |
static const uint8_t swb_size_1024_48[] = { |
59 |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, |
60 |
12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28, |
61 |
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, |
62 |
96
|
63 |
}; |
64 |
|
65 |
static const uint8_t swb_size_1024_32[] = { |
66 |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, |
67 |
12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28, |
68 |
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 |
69 |
}; |
70 |
|
71 |
static const uint8_t swb_size_1024_24[] = { |
72 |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
73 |
12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28, |
74 |
32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64 |
75 |
}; |
76 |
|
77 |
static const uint8_t swb_size_1024_16[] = { |
78 |
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
79 |
12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28, |
80 |
32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64 |
81 |
}; |
82 |
|
83 |
static const uint8_t swb_size_1024_8[] = { |
84 |
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, |
85 |
16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28, |
86 |
32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80 |
87 |
}; |
88 |
|
89 |
static const uint8_t *swb_size_1024[] = { |
90 |
swb_size_1024_96, swb_size_1024_96, swb_size_1024_64, |
91 |
swb_size_1024_48, swb_size_1024_48, swb_size_1024_32, |
92 |
swb_size_1024_24, swb_size_1024_24, swb_size_1024_16, |
93 |
swb_size_1024_16, swb_size_1024_16, swb_size_1024_8 |
94 |
}; |
95 |
|
96 |
static const uint8_t swb_size_128_96[] = { |
97 |
4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36 |
98 |
}; |
99 |
|
100 |
static const uint8_t swb_size_128_48[] = { |
101 |
4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16 |
102 |
}; |
103 |
|
104 |
static const uint8_t swb_size_128_24[] = { |
105 |
4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20 |
106 |
}; |
107 |
|
108 |
static const uint8_t swb_size_128_16[] = { |
109 |
4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20 |
110 |
}; |
111 |
|
112 |
static const uint8_t swb_size_128_8[] = { |
113 |
4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20 |
114 |
}; |
115 |
|
116 |
static const uint8_t *swb_size_128[] = { |
117 |
/* the last entry on the following row is swb_size_128_64 but is a
|
118 |
duplicate of swb_size_128_96 */
|
119 |
swb_size_128_96, swb_size_128_96, swb_size_128_96, |
120 |
swb_size_128_48, swb_size_128_48, swb_size_128_48, |
121 |
swb_size_128_24, swb_size_128_24, swb_size_128_16, |
122 |
swb_size_128_16, swb_size_128_16, swb_size_128_8 |
123 |
}; |
124 |
|
125 |
/** default channel configurations */
|
126 |
static const uint8_t aac_chan_configs[6][5] = { |
127 |
{1, TYPE_SCE}, // 1 channel - single channel element |
128 |
{1, TYPE_CPE}, // 2 channels - channel pair |
129 |
{2, TYPE_SCE, TYPE_CPE}, // 3 channels - center + stereo |
130 |
{3, TYPE_SCE, TYPE_CPE, TYPE_SCE}, // 4 channels - front center + stereo + back center |
131 |
{3, TYPE_SCE, TYPE_CPE, TYPE_CPE}, // 5 channels - front center + stereo + back stereo |
132 |
{4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE |
133 |
}; |
134 |
|
135 |
/**
|
136 |
* Make AAC audio config object.
|
137 |
* @see 1.6.2.1 "Syntax - AudioSpecificConfig"
|
138 |
*/
|
139 |
static void put_audio_specific_config(AVCodecContext *avctx) |
140 |
{ |
141 |
PutBitContext pb; |
142 |
AACEncContext *s = avctx->priv_data; |
143 |
|
144 |
init_put_bits(&pb, avctx->extradata, avctx->extradata_size*8);
|
145 |
put_bits(&pb, 5, 2); //object type - AAC-LC |
146 |
put_bits(&pb, 4, s->samplerate_index); //sample rate index |
147 |
put_bits(&pb, 4, avctx->channels);
|
148 |
//GASpecificConfig
|
149 |
put_bits(&pb, 1, 0); //frame length - 1024 samples |
150 |
put_bits(&pb, 1, 0); //does not depend on core coder |
151 |
put_bits(&pb, 1, 0); //is not extension |
152 |
|
153 |
//Explicitly Mark SBR absent
|
154 |
put_bits(&pb, 11, 0x27b); //sync extension |
155 |
put_bits(&pb, 5, AOT_SBR);
|
156 |
put_bits(&pb, 1, 0); |
157 |
flush_put_bits(&pb); |
158 |
} |
159 |
|
160 |
static av_cold int aac_encode_init(AVCodecContext *avctx) |
161 |
{ |
162 |
AACEncContext *s = avctx->priv_data; |
163 |
int i;
|
164 |
const uint8_t *sizes[2]; |
165 |
int lengths[2]; |
166 |
|
167 |
avctx->frame_size = 1024;
|
168 |
|
169 |
for (i = 0; i < 16; i++) |
170 |
if (avctx->sample_rate == ff_mpeg4audio_sample_rates[i])
|
171 |
break;
|
172 |
if (i == 16) { |
173 |
av_log(avctx, AV_LOG_ERROR, "Unsupported sample rate %d\n", avctx->sample_rate);
|
174 |
return -1; |
175 |
} |
176 |
if (avctx->channels > AAC_MAX_CHANNELS) {
|
177 |
av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %d\n", avctx->channels);
|
178 |
return -1; |
179 |
} |
180 |
if (avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW) {
|
181 |
av_log(avctx, AV_LOG_ERROR, "Unsupported profile %d\n", avctx->profile);
|
182 |
return -1; |
183 |
} |
184 |
if (1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * avctx->channels) { |
185 |
av_log(avctx, AV_LOG_ERROR, "Too many bits per frame requested\n");
|
186 |
return -1; |
187 |
} |
188 |
s->samplerate_index = i; |
189 |
|
190 |
dsputil_init(&s->dsp, avctx); |
191 |
ff_mdct_init(&s->mdct1024, 11, 0, 1.0); |
192 |
ff_mdct_init(&s->mdct128, 8, 0, 1.0); |
193 |
// window init
|
194 |
ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024); |
195 |
ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128); |
196 |
ff_init_ff_sine_windows(10);
|
197 |
ff_init_ff_sine_windows(7);
|
198 |
|
199 |
s->samples = av_malloc(2 * 1024 * avctx->channels * sizeof(s->samples[0])); |
200 |
s->cpe = av_mallocz(sizeof(ChannelElement) * aac_chan_configs[avctx->channels-1][0]); |
201 |
avctx->extradata = av_mallocz(5 + FF_INPUT_BUFFER_PADDING_SIZE);
|
202 |
avctx->extradata_size = 5;
|
203 |
put_audio_specific_config(avctx); |
204 |
|
205 |
sizes[0] = swb_size_1024[i];
|
206 |
sizes[1] = swb_size_128[i];
|
207 |
lengths[0] = ff_aac_num_swb_1024[i];
|
208 |
lengths[1] = ff_aac_num_swb_128[i];
|
209 |
ff_psy_init(&s->psy, avctx, 2, sizes, lengths);
|
210 |
s->psypp = ff_psy_preprocess_init(avctx); |
211 |
s->coder = &ff_aac_coders[2];
|
212 |
|
213 |
s->lambda = avctx->global_quality ? avctx->global_quality : 120;
|
214 |
|
215 |
ff_aac_tableinit(); |
216 |
|
217 |
return 0; |
218 |
} |
219 |
|
220 |
static void apply_window_and_mdct(AVCodecContext *avctx, AACEncContext *s, |
221 |
SingleChannelElement *sce, short *audio)
|
222 |
{ |
223 |
int i, k;
|
224 |
const int chans = avctx->channels; |
225 |
const float * lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; |
226 |
const float * swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; |
227 |
const float * pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; |
228 |
|
229 |
if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) { |
230 |
memcpy(s->output, sce->saved, sizeof(float)*1024); |
231 |
if (sce->ics.window_sequence[0] == LONG_STOP_SEQUENCE) { |
232 |
memset(s->output, 0, sizeof(s->output[0]) * 448); |
233 |
for (i = 448; i < 576; i++) |
234 |
s->output[i] = sce->saved[i] * pwindow[i - 448];
|
235 |
for (i = 576; i < 704; i++) |
236 |
s->output[i] = sce->saved[i]; |
237 |
} |
238 |
if (sce->ics.window_sequence[0] != LONG_START_SEQUENCE) { |
239 |
for (i = 0; i < 1024; i++) { |
240 |
s->output[i+1024] = audio[i * chans] * lwindow[1024 - i - 1]; |
241 |
sce->saved[i] = audio[i * chans] * lwindow[i]; |
242 |
} |
243 |
} else {
|
244 |
for (i = 0; i < 448; i++) |
245 |
s->output[i+1024] = audio[i * chans];
|
246 |
for (; i < 576; i++) |
247 |
s->output[i+1024] = audio[i * chans] * swindow[576 - i - 1]; |
248 |
memset(s->output+1024+576, 0, sizeof(s->output[0]) * 448); |
249 |
for (i = 0; i < 1024; i++) |
250 |
sce->saved[i] = audio[i * chans]; |
251 |
} |
252 |
ff_mdct_calc(&s->mdct1024, sce->coeffs, s->output); |
253 |
} else {
|
254 |
for (k = 0; k < 1024; k += 128) { |
255 |
for (i = 448 + k; i < 448 + k + 256; i++) |
256 |
s->output[i - 448 - k] = (i < 1024) |
257 |
? sce->saved[i] |
258 |
: audio[(i-1024)*chans];
|
259 |
s->dsp.vector_fmul (s->output, s->output, k ? swindow : pwindow, 128);
|
260 |
s->dsp.vector_fmul_reverse(s->output+128, s->output+128, swindow, 128); |
261 |
ff_mdct_calc(&s->mdct128, sce->coeffs + k, s->output); |
262 |
} |
263 |
for (i = 0; i < 1024; i++) |
264 |
sce->saved[i] = audio[i * chans]; |
265 |
} |
266 |
} |
267 |
|
268 |
/**
|
269 |
* Encode ics_info element.
|
270 |
* @see Table 4.6 (syntax of ics_info)
|
271 |
*/
|
272 |
static void put_ics_info(AACEncContext *s, IndividualChannelStream *info) |
273 |
{ |
274 |
int w;
|
275 |
|
276 |
put_bits(&s->pb, 1, 0); // ics_reserved bit |
277 |
put_bits(&s->pb, 2, info->window_sequence[0]); |
278 |
put_bits(&s->pb, 1, info->use_kb_window[0]); |
279 |
if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) { |
280 |
put_bits(&s->pb, 6, info->max_sfb);
|
281 |
put_bits(&s->pb, 1, 0); // no prediction |
282 |
} else {
|
283 |
put_bits(&s->pb, 4, info->max_sfb);
|
284 |
for (w = 1; w < 8; w++) |
285 |
put_bits(&s->pb, 1, !info->group_len[w]);
|
286 |
} |
287 |
} |
288 |
|
289 |
/**
|
290 |
* Encode MS data.
|
291 |
* @see 4.6.8.1 "Joint Coding - M/S Stereo"
|
292 |
*/
|
293 |
static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe) |
294 |
{ |
295 |
int i, w;
|
296 |
|
297 |
put_bits(pb, 2, cpe->ms_mode);
|
298 |
if (cpe->ms_mode == 1) |
299 |
for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w]) |
300 |
for (i = 0; i < cpe->ch[0].ics.max_sfb; i++) |
301 |
put_bits(pb, 1, cpe->ms_mask[w*16 + i]); |
302 |
} |
303 |
|
304 |
/**
|
305 |
* Produce integer coefficients from scalefactors provided by the model.
|
306 |
*/
|
307 |
static void adjust_frame_information(AACEncContext *apc, ChannelElement *cpe, int chans) |
308 |
{ |
309 |
int i, w, w2, g, ch;
|
310 |
int start, maxsfb, cmaxsfb;
|
311 |
|
312 |
for (ch = 0; ch < chans; ch++) { |
313 |
IndividualChannelStream *ics = &cpe->ch[ch].ics; |
314 |
start = 0;
|
315 |
maxsfb = 0;
|
316 |
cpe->ch[ch].pulse.num_pulse = 0;
|
317 |
for (w = 0; w < ics->num_windows*16; w += 16) { |
318 |
for (g = 0; g < ics->num_swb; g++) { |
319 |
//apply M/S
|
320 |
if (cpe->common_window && !ch && cpe->ms_mask[w + g]) {
|
321 |
for (i = 0; i < ics->swb_sizes[g]; i++) { |
322 |
cpe->ch[0].coeffs[start+i] = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) / 2.0; |
323 |
cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i]; |
324 |
} |
325 |
} |
326 |
start += ics->swb_sizes[g]; |
327 |
} |
328 |
for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w+cmaxsfb-1]; cmaxsfb--) |
329 |
; |
330 |
maxsfb = FFMAX(maxsfb, cmaxsfb); |
331 |
} |
332 |
ics->max_sfb = maxsfb; |
333 |
|
334 |
//adjust zero bands for window groups
|
335 |
for (w = 0; w < ics->num_windows; w += ics->group_len[w]) { |
336 |
for (g = 0; g < ics->max_sfb; g++) { |
337 |
i = 1;
|
338 |
for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
|
339 |
if (!cpe->ch[ch].zeroes[w2*16 + g]) { |
340 |
i = 0;
|
341 |
break;
|
342 |
} |
343 |
} |
344 |
cpe->ch[ch].zeroes[w*16 + g] = i;
|
345 |
} |
346 |
} |
347 |
} |
348 |
|
349 |
if (chans > 1 && cpe->common_window) { |
350 |
IndividualChannelStream *ics0 = &cpe->ch[0].ics;
|
351 |
IndividualChannelStream *ics1 = &cpe->ch[1].ics;
|
352 |
int msc = 0; |
353 |
ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb); |
354 |
ics1->max_sfb = ics0->max_sfb; |
355 |
for (w = 0; w < ics0->num_windows*16; w += 16) |
356 |
for (i = 0; i < ics0->max_sfb; i++) |
357 |
if (cpe->ms_mask[w+i])
|
358 |
msc++; |
359 |
if (msc == 0 || ics0->max_sfb == 0) |
360 |
cpe->ms_mode = 0;
|
361 |
else
|
362 |
cpe->ms_mode = msc < ics0->max_sfb ? 1 : 2; |
363 |
} |
364 |
} |
365 |
|
366 |
/**
|
367 |
* Encode scalefactor band coding type.
|
368 |
*/
|
369 |
static void encode_band_info(AACEncContext *s, SingleChannelElement *sce) |
370 |
{ |
371 |
int w;
|
372 |
|
373 |
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) |
374 |
s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda); |
375 |
} |
376 |
|
377 |
/**
|
378 |
* Encode scalefactors.
|
379 |
*/
|
380 |
static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s, |
381 |
SingleChannelElement *sce) |
382 |
{ |
383 |
int off = sce->sf_idx[0], diff; |
384 |
int i, w;
|
385 |
|
386 |
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
387 |
for (i = 0; i < sce->ics.max_sfb; i++) { |
388 |
if (!sce->zeroes[w*16 + i]) { |
389 |
diff = sce->sf_idx[w*16 + i] - off + SCALE_DIFF_ZERO;
|
390 |
if (diff < 0 || diff > 120) |
391 |
av_log(avctx, AV_LOG_ERROR, "Scalefactor difference is too big to be coded\n");
|
392 |
off = sce->sf_idx[w*16 + i];
|
393 |
put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]); |
394 |
} |
395 |
} |
396 |
} |
397 |
} |
398 |
|
399 |
/**
|
400 |
* Encode pulse data.
|
401 |
*/
|
402 |
static void encode_pulses(AACEncContext *s, Pulse *pulse) |
403 |
{ |
404 |
int i;
|
405 |
|
406 |
put_bits(&s->pb, 1, !!pulse->num_pulse);
|
407 |
if (!pulse->num_pulse)
|
408 |
return;
|
409 |
|
410 |
put_bits(&s->pb, 2, pulse->num_pulse - 1); |
411 |
put_bits(&s->pb, 6, pulse->start);
|
412 |
for (i = 0; i < pulse->num_pulse; i++) { |
413 |
put_bits(&s->pb, 5, pulse->pos[i]);
|
414 |
put_bits(&s->pb, 4, pulse->amp[i]);
|
415 |
} |
416 |
} |
417 |
|
418 |
/**
|
419 |
* Encode spectral coefficients processed by psychoacoustic model.
|
420 |
*/
|
421 |
static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce) |
422 |
{ |
423 |
int start, i, w, w2;
|
424 |
|
425 |
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
426 |
start = 0;
|
427 |
for (i = 0; i < sce->ics.max_sfb; i++) { |
428 |
if (sce->zeroes[w*16 + i]) { |
429 |
start += sce->ics.swb_sizes[i]; |
430 |
continue;
|
431 |
} |
432 |
for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
|
433 |
s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
|
434 |
sce->ics.swb_sizes[i], |
435 |
sce->sf_idx[w*16 + i],
|
436 |
sce->band_type[w*16 + i],
|
437 |
s->lambda); |
438 |
start += sce->ics.swb_sizes[i]; |
439 |
} |
440 |
} |
441 |
} |
442 |
|
443 |
/**
|
444 |
* Encode one channel of audio data.
|
445 |
*/
|
446 |
static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s, |
447 |
SingleChannelElement *sce, |
448 |
int common_window)
|
449 |
{ |
450 |
put_bits(&s->pb, 8, sce->sf_idx[0]); |
451 |
if (!common_window)
|
452 |
put_ics_info(s, &sce->ics); |
453 |
encode_band_info(s, sce); |
454 |
encode_scale_factors(avctx, s, sce); |
455 |
encode_pulses(s, &sce->pulse); |
456 |
put_bits(&s->pb, 1, 0); //tns |
457 |
put_bits(&s->pb, 1, 0); //ssr |
458 |
encode_spectral_coeffs(s, sce); |
459 |
return 0; |
460 |
} |
461 |
|
462 |
/**
|
463 |
* Write some auxiliary information about the created AAC file.
|
464 |
*/
|
465 |
static void put_bitstream_info(AVCodecContext *avctx, AACEncContext *s, |
466 |
const char *name) |
467 |
{ |
468 |
int i, namelen, padbits;
|
469 |
|
470 |
namelen = strlen(name) + 2;
|
471 |
put_bits(&s->pb, 3, TYPE_FIL);
|
472 |
put_bits(&s->pb, 4, FFMIN(namelen, 15)); |
473 |
if (namelen >= 15) |
474 |
put_bits(&s->pb, 8, namelen - 16); |
475 |
put_bits(&s->pb, 4, 0); //extension type - filler |
476 |
padbits = 8 - (put_bits_count(&s->pb) & 7); |
477 |
align_put_bits(&s->pb); |
478 |
for (i = 0; i < namelen - 2; i++) |
479 |
put_bits(&s->pb, 8, name[i]);
|
480 |
put_bits(&s->pb, 12 - padbits, 0); |
481 |
} |
482 |
|
483 |
static int aac_encode_frame(AVCodecContext *avctx, |
484 |
uint8_t *frame, int buf_size, void *data) |
485 |
{ |
486 |
AACEncContext *s = avctx->priv_data; |
487 |
int16_t *samples = s->samples, *samples2, *la; |
488 |
ChannelElement *cpe; |
489 |
int i, j, chans, tag, start_ch;
|
490 |
const uint8_t *chan_map = aac_chan_configs[avctx->channels-1]; |
491 |
int chan_el_counter[4]; |
492 |
FFPsyWindowInfo windows[AAC_MAX_CHANNELS]; |
493 |
|
494 |
if (s->last_frame)
|
495 |
return 0; |
496 |
if (data) {
|
497 |
if (!s->psypp) {
|
498 |
memcpy(s->samples + 1024 * avctx->channels, data,
|
499 |
1024 * avctx->channels * sizeof(s->samples[0])); |
500 |
} else {
|
501 |
start_ch = 0;
|
502 |
samples2 = s->samples + 1024 * avctx->channels;
|
503 |
for (i = 0; i < chan_map[0]; i++) { |
504 |
tag = chan_map[i+1];
|
505 |
chans = tag == TYPE_CPE ? 2 : 1; |
506 |
ff_psy_preprocess(s->psypp, (uint16_t*)data + start_ch, |
507 |
samples2 + start_ch, start_ch, chans); |
508 |
start_ch += chans; |
509 |
} |
510 |
} |
511 |
} |
512 |
if (!avctx->frame_number) {
|
513 |
memcpy(s->samples, s->samples + 1024 * avctx->channels,
|
514 |
1024 * avctx->channels * sizeof(s->samples[0])); |
515 |
return 0; |
516 |
} |
517 |
|
518 |
start_ch = 0;
|
519 |
for (i = 0; i < chan_map[0]; i++) { |
520 |
FFPsyWindowInfo* wi = windows + start_ch; |
521 |
tag = chan_map[i+1];
|
522 |
chans = tag == TYPE_CPE ? 2 : 1; |
523 |
cpe = &s->cpe[i]; |
524 |
for (j = 0; j < chans; j++) { |
525 |
IndividualChannelStream *ics = &cpe->ch[j].ics; |
526 |
int k;
|
527 |
int cur_channel = start_ch + j;
|
528 |
samples2 = samples + cur_channel; |
529 |
la = samples2 + (448+64) * avctx->channels; |
530 |
if (!data)
|
531 |
la = NULL;
|
532 |
if (tag == TYPE_LFE) {
|
533 |
wi[j].window_type[0] = ONLY_LONG_SEQUENCE;
|
534 |
wi[j].window_shape = 0;
|
535 |
wi[j].num_windows = 1;
|
536 |
wi[j].grouping[0] = 1; |
537 |
} else {
|
538 |
wi[j] = ff_psy_suggest_window(&s->psy, samples2, la, cur_channel, |
539 |
ics->window_sequence[0]);
|
540 |
} |
541 |
ics->window_sequence[1] = ics->window_sequence[0]; |
542 |
ics->window_sequence[0] = wi[j].window_type[0]; |
543 |
ics->use_kb_window[1] = ics->use_kb_window[0]; |
544 |
ics->use_kb_window[0] = wi[j].window_shape;
|
545 |
ics->num_windows = wi[j].num_windows; |
546 |
ics->swb_sizes = s->psy.bands [ics->num_windows == 8];
|
547 |
ics->num_swb = tag == TYPE_LFE ? 12 : s->psy.num_bands[ics->num_windows == 8]; |
548 |
for (k = 0; k < ics->num_windows; k++) |
549 |
ics->group_len[k] = wi[j].grouping[k]; |
550 |
|
551 |
apply_window_and_mdct(avctx, s, &cpe->ch[j], samples2); |
552 |
} |
553 |
start_ch += chans; |
554 |
} |
555 |
do {
|
556 |
int frame_bits;
|
557 |
init_put_bits(&s->pb, frame, buf_size*8);
|
558 |
if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT)) |
559 |
put_bitstream_info(avctx, s, LIBAVCODEC_IDENT); |
560 |
start_ch = 0;
|
561 |
memset(chan_el_counter, 0, sizeof(chan_el_counter)); |
562 |
for (i = 0; i < chan_map[0]; i++) { |
563 |
FFPsyWindowInfo* wi = windows + start_ch; |
564 |
tag = chan_map[i+1];
|
565 |
chans = tag == TYPE_CPE ? 2 : 1; |
566 |
cpe = &s->cpe[i]; |
567 |
put_bits(&s->pb, 3, tag);
|
568 |
put_bits(&s->pb, 4, chan_el_counter[tag]++);
|
569 |
for (j = 0; j < chans; j++) { |
570 |
s->cur_channel = start_ch + j; |
571 |
ff_psy_set_band_info(&s->psy, s->cur_channel, cpe->ch[j].coeffs, &wi[j]); |
572 |
s->coder->search_for_quantizers(avctx, s, &cpe->ch[j], s->lambda); |
573 |
} |
574 |
cpe->common_window = 0;
|
575 |
if (chans > 1 |
576 |
&& wi[0].window_type[0] == wi[1].window_type[0] |
577 |
&& wi[0].window_shape == wi[1].window_shape) { |
578 |
|
579 |
cpe->common_window = 1;
|
580 |
for (j = 0; j < wi[0].num_windows; j++) { |
581 |
if (wi[0].grouping[j] != wi[1].grouping[j]) { |
582 |
cpe->common_window = 0;
|
583 |
break;
|
584 |
} |
585 |
} |
586 |
} |
587 |
s->cur_channel = start_ch; |
588 |
if (cpe->common_window && s->coder->search_for_ms)
|
589 |
s->coder->search_for_ms(s, cpe, s->lambda); |
590 |
adjust_frame_information(s, cpe, chans); |
591 |
if (chans == 2) { |
592 |
put_bits(&s->pb, 1, cpe->common_window);
|
593 |
if (cpe->common_window) {
|
594 |
put_ics_info(s, &cpe->ch[0].ics);
|
595 |
encode_ms_info(&s->pb, cpe); |
596 |
} |
597 |
} |
598 |
for (j = 0; j < chans; j++) { |
599 |
s->cur_channel = start_ch + j; |
600 |
encode_individual_channel(avctx, s, &cpe->ch[j], cpe->common_window); |
601 |
} |
602 |
start_ch += chans; |
603 |
} |
604 |
|
605 |
frame_bits = put_bits_count(&s->pb); |
606 |
if (frame_bits <= 6144 * avctx->channels - 3) |
607 |
break;
|
608 |
|
609 |
s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits; |
610 |
|
611 |
} while (1); |
612 |
|
613 |
put_bits(&s->pb, 3, TYPE_END);
|
614 |
flush_put_bits(&s->pb); |
615 |
avctx->frame_bits = put_bits_count(&s->pb); |
616 |
|
617 |
// rate control stuff
|
618 |
if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
|
619 |
float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits; |
620 |
s->lambda *= ratio; |
621 |
s->lambda = FFMIN(s->lambda, 65536.f); |
622 |
} |
623 |
|
624 |
if (!data)
|
625 |
s->last_frame = 1;
|
626 |
memcpy(s->samples, s->samples + 1024 * avctx->channels,
|
627 |
1024 * avctx->channels * sizeof(s->samples[0])); |
628 |
return put_bits_count(&s->pb)>>3; |
629 |
} |
630 |
|
631 |
static av_cold int aac_encode_end(AVCodecContext *avctx) |
632 |
{ |
633 |
AACEncContext *s = avctx->priv_data; |
634 |
|
635 |
ff_mdct_end(&s->mdct1024); |
636 |
ff_mdct_end(&s->mdct128); |
637 |
ff_psy_end(&s->psy); |
638 |
ff_psy_preprocess_end(s->psypp); |
639 |
av_freep(&s->samples); |
640 |
av_freep(&s->cpe); |
641 |
return 0; |
642 |
} |
643 |
|
644 |
AVCodec aac_encoder = { |
645 |
"aac",
|
646 |
AVMEDIA_TYPE_AUDIO, |
647 |
CODEC_ID_AAC, |
648 |
sizeof(AACEncContext),
|
649 |
aac_encode_init, |
650 |
aac_encode_frame, |
651 |
aac_encode_end, |
652 |
.capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY | CODEC_CAP_EXPERIMENTAL, |
653 |
.sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE}, |
654 |
.long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
|
655 |
}; |