ffmpeg / libavcodec / aacenc.c @ 495af353
History | View | Annotate | Download (22.6 KB)
1 |
/*
|
---|---|
2 |
* AAC encoder
|
3 |
* Copyright (C) 2008 Konstantin Shishkov
|
4 |
*
|
5 |
* This file is part of FFmpeg.
|
6 |
*
|
7 |
* FFmpeg is free software; you can redistribute it and/or
|
8 |
* modify it under the terms of the GNU Lesser General Public
|
9 |
* License as published by the Free Software Foundation; either
|
10 |
* version 2.1 of the License, or (at your option) any later version.
|
11 |
*
|
12 |
* FFmpeg is distributed in the hope that it will be useful,
|
13 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15 |
* Lesser General Public License for more details.
|
16 |
*
|
17 |
* You should have received a copy of the GNU Lesser General Public
|
18 |
* License along with FFmpeg; if not, write to the Free Software
|
19 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
20 |
*/
|
21 |
|
22 |
/**
|
23 |
* @file
|
24 |
* AAC encoder
|
25 |
*/
|
26 |
|
27 |
/***********************************
|
28 |
* TODOs:
|
29 |
* add sane pulse detection
|
30 |
* add temporal noise shaping
|
31 |
***********************************/
|
32 |
|
33 |
#include "avcodec.h" |
34 |
#include "put_bits.h" |
35 |
#include "dsputil.h" |
36 |
#include "mpeg4audio.h" |
37 |
|
38 |
#include "aac.h" |
39 |
#include "aactab.h" |
40 |
#include "aacenc.h" |
41 |
|
42 |
#include "psymodel.h" |
43 |
|
44 |
static const uint8_t swb_size_1024_96[] = { |
45 |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, |
46 |
12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44, |
47 |
64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 |
48 |
}; |
49 |
|
50 |
static const uint8_t swb_size_1024_64[] = { |
51 |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, |
52 |
12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36, |
53 |
40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40 |
54 |
}; |
55 |
|
56 |
static const uint8_t swb_size_1024_48[] = { |
57 |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, |
58 |
12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28, |
59 |
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, |
60 |
96
|
61 |
}; |
62 |
|
63 |
static const uint8_t swb_size_1024_32[] = { |
64 |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, |
65 |
12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28, |
66 |
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 |
67 |
}; |
68 |
|
69 |
static const uint8_t swb_size_1024_24[] = { |
70 |
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
71 |
12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28, |
72 |
32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64 |
73 |
}; |
74 |
|
75 |
static const uint8_t swb_size_1024_16[] = { |
76 |
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, |
77 |
12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28, |
78 |
32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64 |
79 |
}; |
80 |
|
81 |
static const uint8_t swb_size_1024_8[] = { |
82 |
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, |
83 |
16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28, |
84 |
32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80 |
85 |
}; |
86 |
|
87 |
static const uint8_t *swb_size_1024[] = { |
88 |
swb_size_1024_96, swb_size_1024_96, swb_size_1024_64, |
89 |
swb_size_1024_48, swb_size_1024_48, swb_size_1024_32, |
90 |
swb_size_1024_24, swb_size_1024_24, swb_size_1024_16, |
91 |
swb_size_1024_16, swb_size_1024_16, swb_size_1024_8 |
92 |
}; |
93 |
|
94 |
static const uint8_t swb_size_128_96[] = { |
95 |
4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36 |
96 |
}; |
97 |
|
98 |
static const uint8_t swb_size_128_48[] = { |
99 |
4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16 |
100 |
}; |
101 |
|
102 |
static const uint8_t swb_size_128_24[] = { |
103 |
4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20 |
104 |
}; |
105 |
|
106 |
static const uint8_t swb_size_128_16[] = { |
107 |
4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20 |
108 |
}; |
109 |
|
110 |
static const uint8_t swb_size_128_8[] = { |
111 |
4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20 |
112 |
}; |
113 |
|
114 |
static const uint8_t *swb_size_128[] = { |
115 |
/* the last entry on the following row is swb_size_128_64 but is a
|
116 |
duplicate of swb_size_128_96 */
|
117 |
swb_size_128_96, swb_size_128_96, swb_size_128_96, |
118 |
swb_size_128_48, swb_size_128_48, swb_size_128_48, |
119 |
swb_size_128_24, swb_size_128_24, swb_size_128_16, |
120 |
swb_size_128_16, swb_size_128_16, swb_size_128_8 |
121 |
}; |
122 |
|
123 |
/** default channel configurations */
|
124 |
static const uint8_t aac_chan_configs[6][5] = { |
125 |
{1, TYPE_SCE}, // 1 channel - single channel element |
126 |
{1, TYPE_CPE}, // 2 channels - channel pair |
127 |
{2, TYPE_SCE, TYPE_CPE}, // 3 channels - center + stereo |
128 |
{3, TYPE_SCE, TYPE_CPE, TYPE_SCE}, // 4 channels - front center + stereo + back center |
129 |
{3, TYPE_SCE, TYPE_CPE, TYPE_CPE}, // 5 channels - front center + stereo + back stereo |
130 |
{4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE |
131 |
}; |
132 |
|
133 |
/**
|
134 |
* Make AAC audio config object.
|
135 |
* @see 1.6.2.1 "Syntax - AudioSpecificConfig"
|
136 |
*/
|
137 |
static void put_audio_specific_config(AVCodecContext *avctx) |
138 |
{ |
139 |
PutBitContext pb; |
140 |
AACEncContext *s = avctx->priv_data; |
141 |
|
142 |
init_put_bits(&pb, avctx->extradata, avctx->extradata_size*8);
|
143 |
put_bits(&pb, 5, 2); //object type - AAC-LC |
144 |
put_bits(&pb, 4, s->samplerate_index); //sample rate index |
145 |
put_bits(&pb, 4, avctx->channels);
|
146 |
//GASpecificConfig
|
147 |
put_bits(&pb, 1, 0); //frame length - 1024 samples |
148 |
put_bits(&pb, 1, 0); //does not depend on core coder |
149 |
put_bits(&pb, 1, 0); //is not extension |
150 |
flush_put_bits(&pb); |
151 |
} |
152 |
|
153 |
static av_cold int aac_encode_init(AVCodecContext *avctx) |
154 |
{ |
155 |
AACEncContext *s = avctx->priv_data; |
156 |
int i;
|
157 |
const uint8_t *sizes[2]; |
158 |
int lengths[2]; |
159 |
|
160 |
avctx->frame_size = 1024;
|
161 |
|
162 |
for (i = 0; i < 16; i++) |
163 |
if (avctx->sample_rate == ff_mpeg4audio_sample_rates[i])
|
164 |
break;
|
165 |
if (i == 16) { |
166 |
av_log(avctx, AV_LOG_ERROR, "Unsupported sample rate %d\n", avctx->sample_rate);
|
167 |
return -1; |
168 |
} |
169 |
if (avctx->channels > 6) { |
170 |
av_log(avctx, AV_LOG_ERROR, "Unsupported number of channels: %d\n", avctx->channels);
|
171 |
return -1; |
172 |
} |
173 |
if (avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW) {
|
174 |
av_log(avctx, AV_LOG_ERROR, "Unsupported profile %d\n", avctx->profile);
|
175 |
return -1; |
176 |
} |
177 |
if (1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * avctx->channels) { |
178 |
av_log(avctx, AV_LOG_ERROR, "Too many bits per frame requested\n");
|
179 |
return -1; |
180 |
} |
181 |
s->samplerate_index = i; |
182 |
|
183 |
dsputil_init(&s->dsp, avctx); |
184 |
ff_mdct_init(&s->mdct1024, 11, 0, 1.0); |
185 |
ff_mdct_init(&s->mdct128, 8, 0, 1.0); |
186 |
// window init
|
187 |
ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024); |
188 |
ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128); |
189 |
ff_init_ff_sine_windows(10);
|
190 |
ff_init_ff_sine_windows(7);
|
191 |
|
192 |
s->samples = av_malloc(2 * 1024 * avctx->channels * sizeof(s->samples[0])); |
193 |
s->cpe = av_mallocz(sizeof(ChannelElement) * aac_chan_configs[avctx->channels-1][0]); |
194 |
avctx->extradata = av_malloc(2);
|
195 |
avctx->extradata_size = 2;
|
196 |
put_audio_specific_config(avctx); |
197 |
|
198 |
sizes[0] = swb_size_1024[i];
|
199 |
sizes[1] = swb_size_128[i];
|
200 |
lengths[0] = ff_aac_num_swb_1024[i];
|
201 |
lengths[1] = ff_aac_num_swb_128[i];
|
202 |
ff_psy_init(&s->psy, avctx, 2, sizes, lengths);
|
203 |
s->psypp = ff_psy_preprocess_init(avctx); |
204 |
s->coder = &ff_aac_coders[0];
|
205 |
|
206 |
s->lambda = avctx->global_quality ? avctx->global_quality : 120;
|
207 |
#if !CONFIG_HARDCODED_TABLES
|
208 |
for (i = 0; i < 428; i++) |
209 |
ff_aac_pow2sf_tab[i] = pow(2, (i - 200)/4.); |
210 |
#endif /* CONFIG_HARDCODED_TABLES */ |
211 |
|
212 |
if (avctx->channels > 5) |
213 |
av_log(avctx, AV_LOG_ERROR, "This encoder does not yet enforce the restrictions on LFEs. "
|
214 |
"The output will most likely be an illegal bitstream.\n");
|
215 |
|
216 |
return 0; |
217 |
} |
218 |
|
219 |
static void apply_window_and_mdct(AVCodecContext *avctx, AACEncContext *s, |
220 |
SingleChannelElement *sce, short *audio, int channel) |
221 |
{ |
222 |
int i, j, k;
|
223 |
const float * lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; |
224 |
const float * swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; |
225 |
const float * pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; |
226 |
|
227 |
if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) { |
228 |
memcpy(s->output, sce->saved, sizeof(float)*1024); |
229 |
if (sce->ics.window_sequence[0] == LONG_STOP_SEQUENCE) { |
230 |
memset(s->output, 0, sizeof(s->output[0]) * 448); |
231 |
for (i = 448; i < 576; i++) |
232 |
s->output[i] = sce->saved[i] * pwindow[i - 448];
|
233 |
for (i = 576; i < 704; i++) |
234 |
s->output[i] = sce->saved[i]; |
235 |
} |
236 |
if (sce->ics.window_sequence[0] != LONG_START_SEQUENCE) { |
237 |
j = channel; |
238 |
for (i = 0; i < 1024; i++, j += avctx->channels) { |
239 |
s->output[i+1024] = audio[j] * lwindow[1024 - i - 1]; |
240 |
sce->saved[i] = audio[j] * lwindow[i]; |
241 |
} |
242 |
} else {
|
243 |
j = channel; |
244 |
for (i = 0; i < 448; i++, j += avctx->channels) |
245 |
s->output[i+1024] = audio[j];
|
246 |
for (i = 448; i < 576; i++, j += avctx->channels) |
247 |
s->output[i+1024] = audio[j] * swindow[576 - i - 1]; |
248 |
memset(s->output+1024+576, 0, sizeof(s->output[0]) * 448); |
249 |
j = channel; |
250 |
for (i = 0; i < 1024; i++, j += avctx->channels) |
251 |
sce->saved[i] = audio[j]; |
252 |
} |
253 |
ff_mdct_calc(&s->mdct1024, sce->coeffs, s->output); |
254 |
} else {
|
255 |
j = channel; |
256 |
for (k = 0; k < 1024; k += 128) { |
257 |
for (i = 448 + k; i < 448 + k + 256; i++) |
258 |
s->output[i - 448 - k] = (i < 1024) |
259 |
? sce->saved[i] |
260 |
: audio[channel + (i-1024)*avctx->channels];
|
261 |
s->dsp.vector_fmul (s->output, k ? swindow : pwindow, 128);
|
262 |
s->dsp.vector_fmul_reverse(s->output+128, s->output+128, swindow, 128); |
263 |
ff_mdct_calc(&s->mdct128, sce->coeffs + k, s->output); |
264 |
} |
265 |
j = channel; |
266 |
for (i = 0; i < 1024; i++, j += avctx->channels) |
267 |
sce->saved[i] = audio[j]; |
268 |
} |
269 |
} |
270 |
|
271 |
/**
|
272 |
* Encode ics_info element.
|
273 |
* @see Table 4.6 (syntax of ics_info)
|
274 |
*/
|
275 |
static void put_ics_info(AACEncContext *s, IndividualChannelStream *info) |
276 |
{ |
277 |
int w;
|
278 |
|
279 |
put_bits(&s->pb, 1, 0); // ics_reserved bit |
280 |
put_bits(&s->pb, 2, info->window_sequence[0]); |
281 |
put_bits(&s->pb, 1, info->use_kb_window[0]); |
282 |
if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) { |
283 |
put_bits(&s->pb, 6, info->max_sfb);
|
284 |
put_bits(&s->pb, 1, 0); // no prediction |
285 |
} else {
|
286 |
put_bits(&s->pb, 4, info->max_sfb);
|
287 |
for (w = 1; w < 8; w++) |
288 |
put_bits(&s->pb, 1, !info->group_len[w]);
|
289 |
} |
290 |
} |
291 |
|
292 |
/**
|
293 |
* Encode MS data.
|
294 |
* @see 4.6.8.1 "Joint Coding - M/S Stereo"
|
295 |
*/
|
296 |
static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe) |
297 |
{ |
298 |
int i, w;
|
299 |
|
300 |
put_bits(pb, 2, cpe->ms_mode);
|
301 |
if (cpe->ms_mode == 1) |
302 |
for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w]) |
303 |
for (i = 0; i < cpe->ch[0].ics.max_sfb; i++) |
304 |
put_bits(pb, 1, cpe->ms_mask[w*16 + i]); |
305 |
} |
306 |
|
307 |
/**
|
308 |
* Produce integer coefficients from scalefactors provided by the model.
|
309 |
*/
|
310 |
static void adjust_frame_information(AACEncContext *apc, ChannelElement *cpe, int chans) |
311 |
{ |
312 |
int i, w, w2, g, ch;
|
313 |
int start, sum, maxsfb, cmaxsfb;
|
314 |
|
315 |
for (ch = 0; ch < chans; ch++) { |
316 |
IndividualChannelStream *ics = &cpe->ch[ch].ics; |
317 |
start = 0;
|
318 |
maxsfb = 0;
|
319 |
cpe->ch[ch].pulse.num_pulse = 0;
|
320 |
for (w = 0; w < ics->num_windows*16; w += 16) { |
321 |
for (g = 0; g < ics->num_swb; g++) { |
322 |
sum = 0;
|
323 |
//apply M/S
|
324 |
if (!ch && cpe->ms_mask[w + g]) {
|
325 |
for (i = 0; i < ics->swb_sizes[g]; i++) { |
326 |
cpe->ch[0].coeffs[start+i] = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) / 2.0; |
327 |
cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i]; |
328 |
} |
329 |
} |
330 |
start += ics->swb_sizes[g]; |
331 |
} |
332 |
for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w+cmaxsfb-1]; cmaxsfb--) |
333 |
; |
334 |
maxsfb = FFMAX(maxsfb, cmaxsfb); |
335 |
} |
336 |
ics->max_sfb = maxsfb; |
337 |
|
338 |
//adjust zero bands for window groups
|
339 |
for (w = 0; w < ics->num_windows; w += ics->group_len[w]) { |
340 |
for (g = 0; g < ics->max_sfb; g++) { |
341 |
i = 1;
|
342 |
for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
|
343 |
if (!cpe->ch[ch].zeroes[w2*16 + g]) { |
344 |
i = 0;
|
345 |
break;
|
346 |
} |
347 |
} |
348 |
cpe->ch[ch].zeroes[w*16 + g] = i;
|
349 |
} |
350 |
} |
351 |
} |
352 |
|
353 |
if (chans > 1 && cpe->common_window) { |
354 |
IndividualChannelStream *ics0 = &cpe->ch[0].ics;
|
355 |
IndividualChannelStream *ics1 = &cpe->ch[1].ics;
|
356 |
int msc = 0; |
357 |
ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb); |
358 |
ics1->max_sfb = ics0->max_sfb; |
359 |
for (w = 0; w < ics0->num_windows*16; w += 16) |
360 |
for (i = 0; i < ics0->max_sfb; i++) |
361 |
if (cpe->ms_mask[w+i])
|
362 |
msc++; |
363 |
if (msc == 0 || ics0->max_sfb == 0) |
364 |
cpe->ms_mode = 0;
|
365 |
else
|
366 |
cpe->ms_mode = msc < ics0->max_sfb ? 1 : 2; |
367 |
} |
368 |
} |
369 |
|
370 |
/**
|
371 |
* Encode scalefactor band coding type.
|
372 |
*/
|
373 |
static void encode_band_info(AACEncContext *s, SingleChannelElement *sce) |
374 |
{ |
375 |
int w;
|
376 |
|
377 |
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) |
378 |
s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda); |
379 |
} |
380 |
|
381 |
/**
|
382 |
* Encode scalefactors.
|
383 |
*/
|
384 |
static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s, |
385 |
SingleChannelElement *sce) |
386 |
{ |
387 |
int off = sce->sf_idx[0], diff; |
388 |
int i, w;
|
389 |
|
390 |
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
391 |
for (i = 0; i < sce->ics.max_sfb; i++) { |
392 |
if (!sce->zeroes[w*16 + i]) { |
393 |
diff = sce->sf_idx[w*16 + i] - off + SCALE_DIFF_ZERO;
|
394 |
if (diff < 0 || diff > 120) |
395 |
av_log(avctx, AV_LOG_ERROR, "Scalefactor difference is too big to be coded\n");
|
396 |
off = sce->sf_idx[w*16 + i];
|
397 |
put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]); |
398 |
} |
399 |
} |
400 |
} |
401 |
} |
402 |
|
403 |
/**
|
404 |
* Encode pulse data.
|
405 |
*/
|
406 |
static void encode_pulses(AACEncContext *s, Pulse *pulse) |
407 |
{ |
408 |
int i;
|
409 |
|
410 |
put_bits(&s->pb, 1, !!pulse->num_pulse);
|
411 |
if (!pulse->num_pulse)
|
412 |
return;
|
413 |
|
414 |
put_bits(&s->pb, 2, pulse->num_pulse - 1); |
415 |
put_bits(&s->pb, 6, pulse->start);
|
416 |
for (i = 0; i < pulse->num_pulse; i++) { |
417 |
put_bits(&s->pb, 5, pulse->pos[i]);
|
418 |
put_bits(&s->pb, 4, pulse->amp[i]);
|
419 |
} |
420 |
} |
421 |
|
422 |
/**
|
423 |
* Encode spectral coefficients processed by psychoacoustic model.
|
424 |
*/
|
425 |
static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce) |
426 |
{ |
427 |
int start, i, w, w2;
|
428 |
|
429 |
for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { |
430 |
start = 0;
|
431 |
for (i = 0; i < sce->ics.max_sfb; i++) { |
432 |
if (sce->zeroes[w*16 + i]) { |
433 |
start += sce->ics.swb_sizes[i]; |
434 |
continue;
|
435 |
} |
436 |
for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
|
437 |
s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
|
438 |
sce->ics.swb_sizes[i], |
439 |
sce->sf_idx[w*16 + i],
|
440 |
sce->band_type[w*16 + i],
|
441 |
s->lambda); |
442 |
start += sce->ics.swb_sizes[i]; |
443 |
} |
444 |
} |
445 |
} |
446 |
|
447 |
/**
|
448 |
* Encode one channel of audio data.
|
449 |
*/
|
450 |
static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s, |
451 |
SingleChannelElement *sce, |
452 |
int common_window)
|
453 |
{ |
454 |
put_bits(&s->pb, 8, sce->sf_idx[0]); |
455 |
if (!common_window)
|
456 |
put_ics_info(s, &sce->ics); |
457 |
encode_band_info(s, sce); |
458 |
encode_scale_factors(avctx, s, sce); |
459 |
encode_pulses(s, &sce->pulse); |
460 |
put_bits(&s->pb, 1, 0); //tns |
461 |
put_bits(&s->pb, 1, 0); //ssr |
462 |
encode_spectral_coeffs(s, sce); |
463 |
return 0; |
464 |
} |
465 |
|
466 |
/**
|
467 |
* Write some auxiliary information about the created AAC file.
|
468 |
*/
|
469 |
static void put_bitstream_info(AVCodecContext *avctx, AACEncContext *s, |
470 |
const char *name) |
471 |
{ |
472 |
int i, namelen, padbits;
|
473 |
|
474 |
namelen = strlen(name) + 2;
|
475 |
put_bits(&s->pb, 3, TYPE_FIL);
|
476 |
put_bits(&s->pb, 4, FFMIN(namelen, 15)); |
477 |
if (namelen >= 15) |
478 |
put_bits(&s->pb, 8, namelen - 16); |
479 |
put_bits(&s->pb, 4, 0); //extension type - filler |
480 |
padbits = 8 - (put_bits_count(&s->pb) & 7); |
481 |
align_put_bits(&s->pb); |
482 |
for (i = 0; i < namelen - 2; i++) |
483 |
put_bits(&s->pb, 8, name[i]);
|
484 |
put_bits(&s->pb, 12 - padbits, 0); |
485 |
} |
486 |
|
487 |
static int aac_encode_frame(AVCodecContext *avctx, |
488 |
uint8_t *frame, int buf_size, void *data) |
489 |
{ |
490 |
AACEncContext *s = avctx->priv_data; |
491 |
int16_t *samples = s->samples, *samples2, *la; |
492 |
ChannelElement *cpe; |
493 |
int i, j, chans, tag, start_ch;
|
494 |
const uint8_t *chan_map = aac_chan_configs[avctx->channels-1]; |
495 |
int chan_el_counter[4]; |
496 |
FFPsyWindowInfo windows[avctx->channels]; |
497 |
|
498 |
if (s->last_frame)
|
499 |
return 0; |
500 |
if (data) {
|
501 |
if (!s->psypp) {
|
502 |
memcpy(s->samples + 1024 * avctx->channels, data,
|
503 |
1024 * avctx->channels * sizeof(s->samples[0])); |
504 |
} else {
|
505 |
start_ch = 0;
|
506 |
samples2 = s->samples + 1024 * avctx->channels;
|
507 |
for (i = 0; i < chan_map[0]; i++) { |
508 |
tag = chan_map[i+1];
|
509 |
chans = tag == TYPE_CPE ? 2 : 1; |
510 |
ff_psy_preprocess(s->psypp, (uint16_t*)data + start_ch, |
511 |
samples2 + start_ch, start_ch, chans); |
512 |
start_ch += chans; |
513 |
} |
514 |
} |
515 |
} |
516 |
if (!avctx->frame_number) {
|
517 |
memcpy(s->samples, s->samples + 1024 * avctx->channels,
|
518 |
1024 * avctx->channels * sizeof(s->samples[0])); |
519 |
return 0; |
520 |
} |
521 |
|
522 |
start_ch = 0;
|
523 |
for (i = 0; i < chan_map[0]; i++) { |
524 |
FFPsyWindowInfo* wi = windows + start_ch; |
525 |
tag = chan_map[i+1];
|
526 |
chans = tag == TYPE_CPE ? 2 : 1; |
527 |
cpe = &s->cpe[i]; |
528 |
samples2 = samples + start_ch; |
529 |
la = samples2 + 1024 * avctx->channels + start_ch;
|
530 |
if (!data)
|
531 |
la = NULL;
|
532 |
for (j = 0; j < chans; j++) { |
533 |
IndividualChannelStream *ics = &cpe->ch[j].ics; |
534 |
int k;
|
535 |
wi[j] = ff_psy_suggest_window(&s->psy, samples2, la, start_ch + j, ics->window_sequence[0]);
|
536 |
ics->window_sequence[1] = ics->window_sequence[0]; |
537 |
ics->window_sequence[0] = wi[j].window_type[0]; |
538 |
ics->use_kb_window[1] = ics->use_kb_window[0]; |
539 |
ics->use_kb_window[0] = wi[j].window_shape;
|
540 |
ics->num_windows = wi[j].num_windows; |
541 |
ics->swb_sizes = s->psy.bands [ics->num_windows == 8];
|
542 |
ics->num_swb = s->psy.num_bands[ics->num_windows == 8];
|
543 |
for (k = 0; k < ics->num_windows; k++) |
544 |
ics->group_len[k] = wi[j].grouping[k]; |
545 |
|
546 |
s->cur_channel = start_ch + j; |
547 |
apply_window_and_mdct(avctx, s, &cpe->ch[j], samples2, j); |
548 |
} |
549 |
start_ch += chans; |
550 |
} |
551 |
do {
|
552 |
int frame_bits;
|
553 |
init_put_bits(&s->pb, frame, buf_size*8);
|
554 |
if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT)) |
555 |
put_bitstream_info(avctx, s, LIBAVCODEC_IDENT); |
556 |
start_ch = 0;
|
557 |
memset(chan_el_counter, 0, sizeof(chan_el_counter)); |
558 |
for (i = 0; i < chan_map[0]; i++) { |
559 |
FFPsyWindowInfo* wi = windows + start_ch; |
560 |
tag = chan_map[i+1];
|
561 |
chans = tag == TYPE_CPE ? 2 : 1; |
562 |
cpe = &s->cpe[i]; |
563 |
for (j = 0; j < chans; j++) { |
564 |
s->cur_channel = start_ch + j; |
565 |
s->coder->search_for_quantizers(avctx, s, &cpe->ch[j], s->lambda); |
566 |
} |
567 |
cpe->common_window = 0;
|
568 |
if (chans > 1 |
569 |
&& wi[0].window_type[0] == wi[1].window_type[0] |
570 |
&& wi[0].window_shape == wi[1].window_shape) { |
571 |
|
572 |
cpe->common_window = 1;
|
573 |
for (j = 0; j < wi[0].num_windows; j++) { |
574 |
if (wi[0].grouping[j] != wi[1].grouping[j]) { |
575 |
cpe->common_window = 0;
|
576 |
break;
|
577 |
} |
578 |
} |
579 |
} |
580 |
s->cur_channel = start_ch; |
581 |
if (cpe->common_window && s->coder->search_for_ms)
|
582 |
s->coder->search_for_ms(s, cpe, s->lambda); |
583 |
adjust_frame_information(s, cpe, chans); |
584 |
put_bits(&s->pb, 3, tag);
|
585 |
put_bits(&s->pb, 4, chan_el_counter[tag]++);
|
586 |
if (chans == 2) { |
587 |
put_bits(&s->pb, 1, cpe->common_window);
|
588 |
if (cpe->common_window) {
|
589 |
put_ics_info(s, &cpe->ch[0].ics);
|
590 |
encode_ms_info(&s->pb, cpe); |
591 |
} |
592 |
} |
593 |
for (j = 0; j < chans; j++) { |
594 |
s->cur_channel = start_ch + j; |
595 |
ff_psy_set_band_info(&s->psy, s->cur_channel, cpe->ch[j].coeffs, &wi[j]); |
596 |
encode_individual_channel(avctx, s, &cpe->ch[j], cpe->common_window); |
597 |
} |
598 |
start_ch += chans; |
599 |
} |
600 |
|
601 |
frame_bits = put_bits_count(&s->pb); |
602 |
if (frame_bits <= 6144 * avctx->channels - 3) |
603 |
break;
|
604 |
|
605 |
s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits; |
606 |
|
607 |
} while (1); |
608 |
|
609 |
put_bits(&s->pb, 3, TYPE_END);
|
610 |
flush_put_bits(&s->pb); |
611 |
avctx->frame_bits = put_bits_count(&s->pb); |
612 |
|
613 |
// rate control stuff
|
614 |
if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
|
615 |
float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits; |
616 |
s->lambda *= ratio; |
617 |
s->lambda = FFMIN(s->lambda, 65536.f); |
618 |
} |
619 |
|
620 |
if (!data)
|
621 |
s->last_frame = 1;
|
622 |
memcpy(s->samples, s->samples + 1024 * avctx->channels,
|
623 |
1024 * avctx->channels * sizeof(s->samples[0])); |
624 |
return put_bits_count(&s->pb)>>3; |
625 |
} |
626 |
|
627 |
static av_cold int aac_encode_end(AVCodecContext *avctx) |
628 |
{ |
629 |
AACEncContext *s = avctx->priv_data; |
630 |
|
631 |
ff_mdct_end(&s->mdct1024); |
632 |
ff_mdct_end(&s->mdct128); |
633 |
ff_psy_end(&s->psy); |
634 |
ff_psy_preprocess_end(s->psypp); |
635 |
av_freep(&s->samples); |
636 |
av_freep(&s->cpe); |
637 |
return 0; |
638 |
} |
639 |
|
640 |
AVCodec aac_encoder = { |
641 |
"aac",
|
642 |
AVMEDIA_TYPE_AUDIO, |
643 |
CODEC_ID_AAC, |
644 |
sizeof(AACEncContext),
|
645 |
aac_encode_init, |
646 |
aac_encode_frame, |
647 |
aac_encode_end, |
648 |
.capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY, |
649 |
.sample_fmts = (const enum SampleFormat[]){SAMPLE_FMT_S16,SAMPLE_FMT_NONE}, |
650 |
.long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
|
651 |
}; |