ffmpeg / libavcodec / wmaenc.c @ d36beb3f
History | View | Annotate | Download (12.5 KB)
1 |
/*
|
---|---|
2 |
* WMA compatible encoder
|
3 |
* Copyright (c) 2007 Michael Niedermayer
|
4 |
*
|
5 |
* This file is part of FFmpeg.
|
6 |
*
|
7 |
* FFmpeg is free software; you can redistribute it and/or
|
8 |
* modify it under the terms of the GNU Lesser General Public
|
9 |
* License as published by the Free Software Foundation; either
|
10 |
* version 2.1 of the License, or (at your option) any later version.
|
11 |
*
|
12 |
* FFmpeg is distributed in the hope that it will be useful,
|
13 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15 |
* Lesser General Public License for more details.
|
16 |
*
|
17 |
* You should have received a copy of the GNU Lesser General Public
|
18 |
* License along with FFmpeg; if not, write to the Free Software
|
19 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
20 |
*/
|
21 |
|
22 |
#include "avcodec.h" |
23 |
#include "wma.h" |
24 |
|
25 |
#undef NDEBUG
|
26 |
#include <assert.h> |
27 |
|
28 |
|
29 |
static int encode_init(AVCodecContext * avctx){ |
30 |
WMACodecContext *s = avctx->priv_data; |
31 |
int i, flags1, flags2;
|
32 |
uint8_t *extradata; |
33 |
|
34 |
s->avctx = avctx; |
35 |
|
36 |
if(avctx->channels > MAX_CHANNELS)
|
37 |
return -1; |
38 |
|
39 |
if(avctx->bit_rate < 24*1000) |
40 |
return -1; |
41 |
|
42 |
/* extract flag infos */
|
43 |
flags1 = 0;
|
44 |
flags2 = 1;
|
45 |
if (avctx->codec->id == CODEC_ID_WMAV1) {
|
46 |
extradata= av_malloc(4);
|
47 |
avctx->extradata_size= 4;
|
48 |
AV_WL16(extradata, flags1); |
49 |
AV_WL16(extradata+2, flags2);
|
50 |
} else if (avctx->codec->id == CODEC_ID_WMAV2) { |
51 |
extradata= av_mallocz(10);
|
52 |
avctx->extradata_size= 10;
|
53 |
AV_WL32(extradata, flags1); |
54 |
AV_WL16(extradata+4, flags2);
|
55 |
}else
|
56 |
assert(0);
|
57 |
avctx->extradata= extradata; |
58 |
s->use_exp_vlc = flags2 & 0x0001;
|
59 |
s->use_bit_reservoir = flags2 & 0x0002;
|
60 |
s->use_variable_block_len = flags2 & 0x0004;
|
61 |
|
62 |
ff_wma_init(avctx, flags2); |
63 |
|
64 |
/* init MDCT */
|
65 |
for(i = 0; i < s->nb_block_sizes; i++) |
66 |
ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 0, 1.0); |
67 |
|
68 |
avctx->block_align= |
69 |
s->block_align= avctx->bit_rate*(int64_t)s->frame_len / (avctx->sample_rate*8);
|
70 |
//av_log(NULL, AV_LOG_ERROR, "%d %d %d %d\n", s->block_align, avctx->bit_rate, s->frame_len, avctx->sample_rate);
|
71 |
avctx->frame_size= s->frame_len; |
72 |
|
73 |
return 0; |
74 |
} |
75 |
|
76 |
|
77 |
static void apply_window_and_mdct(AVCodecContext * avctx, const signed short * audio, int len) { |
78 |
WMACodecContext *s = avctx->priv_data; |
79 |
int window_index= s->frame_len_bits - s->block_len_bits;
|
80 |
int i, j, channel;
|
81 |
const float * win = s->windows[window_index]; |
82 |
int window_len = 1 << s->block_len_bits; |
83 |
float n = window_len/2; |
84 |
|
85 |
for (channel = 0; channel < avctx->channels; channel++) { |
86 |
memcpy(s->output, s->frame_out[channel], sizeof(float)*window_len); |
87 |
j = channel; |
88 |
for (i = 0; i < len; i++, j += avctx->channels){ |
89 |
s->output[i+window_len] = audio[j] / n * win[window_len - i - 1];
|
90 |
s->frame_out[channel][i] = audio[j] / n * win[i]; |
91 |
} |
92 |
ff_mdct_calc(&s->mdct_ctx[window_index], s->coefs[channel], s->output); |
93 |
} |
94 |
} |
95 |
|
96 |
//FIXME use for decoding too
|
97 |
static void init_exp(WMACodecContext *s, int ch, const int *exp_param){ |
98 |
int n;
|
99 |
const uint16_t *ptr;
|
100 |
float v, *q, max_scale, *q_end;
|
101 |
|
102 |
ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits]; |
103 |
q = s->exponents[ch]; |
104 |
q_end = q + s->block_len; |
105 |
max_scale = 0;
|
106 |
while (q < q_end) {
|
107 |
/* XXX: use a table */
|
108 |
v = pow(10, *exp_param++ * (1.0 / 16.0)); |
109 |
max_scale= FFMAX(max_scale, v); |
110 |
n = *ptr++; |
111 |
do {
|
112 |
*q++ = v; |
113 |
} while (--n);
|
114 |
} |
115 |
s->max_exponent[ch] = max_scale; |
116 |
} |
117 |
|
118 |
static void encode_exp_vlc(WMACodecContext *s, int ch, const int *exp_param){ |
119 |
int last_exp;
|
120 |
const uint16_t *ptr;
|
121 |
float *q, *q_end;
|
122 |
|
123 |
ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits]; |
124 |
q = s->exponents[ch]; |
125 |
q_end = q + s->block_len; |
126 |
if (s->version == 1) { |
127 |
last_exp= *exp_param++; |
128 |
assert(last_exp-10 >= 0 && last_exp-10 < 32); |
129 |
put_bits(&s->pb, 5, last_exp - 10); |
130 |
q+= *ptr++; |
131 |
}else
|
132 |
last_exp = 36;
|
133 |
while (q < q_end) {
|
134 |
int exp = *exp_param++;
|
135 |
int code = exp - last_exp + 60; |
136 |
assert(code >= 0 && code < 120); |
137 |
put_bits(&s->pb, ff_aac_scalefactor_bits[code], ff_aac_scalefactor_code[code]); |
138 |
/* XXX: use a table */
|
139 |
q+= *ptr++; |
140 |
last_exp= exp; |
141 |
} |
142 |
} |
143 |
|
144 |
static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE], int total_gain){ |
145 |
int v, bsize, ch, coef_nb_bits, parse_exponents;
|
146 |
float mdct_norm;
|
147 |
int nb_coefs[MAX_CHANNELS];
|
148 |
static const int fixed_exp[25]={20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20}; |
149 |
|
150 |
//FIXME remove duplication relative to decoder
|
151 |
if (s->use_variable_block_len) {
|
152 |
assert(0); //FIXME not implemented |
153 |
}else{
|
154 |
/* fixed block len */
|
155 |
s->next_block_len_bits = s->frame_len_bits; |
156 |
s->prev_block_len_bits = s->frame_len_bits; |
157 |
s->block_len_bits = s->frame_len_bits; |
158 |
} |
159 |
|
160 |
s->block_len = 1 << s->block_len_bits;
|
161 |
// assert((s->block_pos + s->block_len) <= s->frame_len);
|
162 |
bsize = s->frame_len_bits - s->block_len_bits; |
163 |
|
164 |
//FIXME factor
|
165 |
v = s->coefs_end[bsize] - s->coefs_start; |
166 |
for(ch = 0; ch < s->nb_channels; ch++) |
167 |
nb_coefs[ch] = v; |
168 |
{ |
169 |
int n4 = s->block_len / 2; |
170 |
mdct_norm = 1.0 / (float)n4; |
171 |
if (s->version == 1) { |
172 |
mdct_norm *= sqrt(n4); |
173 |
} |
174 |
} |
175 |
|
176 |
if (s->nb_channels == 2) { |
177 |
put_bits(&s->pb, 1, s->ms_stereo= 1); |
178 |
} |
179 |
|
180 |
for(ch = 0; ch < s->nb_channels; ch++) { |
181 |
s->channel_coded[ch] = 1; //FIXME only set channel_coded when needed, instead of always |
182 |
if (s->channel_coded[ch]) {
|
183 |
init_exp(s, ch, fixed_exp); |
184 |
} |
185 |
} |
186 |
|
187 |
for(ch = 0; ch < s->nb_channels; ch++) { |
188 |
if (s->channel_coded[ch]) {
|
189 |
WMACoef *coefs1; |
190 |
float *coefs, *exponents, mult;
|
191 |
int i, n;
|
192 |
|
193 |
coefs1 = s->coefs1[ch]; |
194 |
exponents = s->exponents[ch]; |
195 |
mult = pow(10, total_gain * 0.05) / s->max_exponent[ch]; |
196 |
mult *= mdct_norm; |
197 |
coefs = src_coefs[ch]; |
198 |
if (s->use_noise_coding && 0) { |
199 |
assert(0); //FIXME not implemented |
200 |
} else {
|
201 |
coefs += s->coefs_start; |
202 |
n = nb_coefs[ch]; |
203 |
for(i = 0;i < n; i++){ |
204 |
double t= *coefs++ / (exponents[i] * mult);
|
205 |
if(t<-32768 || t>32767) |
206 |
return -1; |
207 |
|
208 |
coefs1[i] = lrint(t); |
209 |
} |
210 |
} |
211 |
} |
212 |
} |
213 |
|
214 |
v = 0;
|
215 |
for(ch = 0; ch < s->nb_channels; ch++) { |
216 |
int a = s->channel_coded[ch];
|
217 |
put_bits(&s->pb, 1, a);
|
218 |
v |= a; |
219 |
} |
220 |
|
221 |
if (!v)
|
222 |
return 1; |
223 |
|
224 |
for(v= total_gain-1; v>=127; v-= 127) |
225 |
put_bits(&s->pb, 7, 127); |
226 |
put_bits(&s->pb, 7, v);
|
227 |
|
228 |
coef_nb_bits= ff_wma_total_gain_to_bits(total_gain); |
229 |
|
230 |
if (s->use_noise_coding) {
|
231 |
for(ch = 0; ch < s->nb_channels; ch++) { |
232 |
if (s->channel_coded[ch]) {
|
233 |
int i, n;
|
234 |
n = s->exponent_high_sizes[bsize]; |
235 |
for(i=0;i<n;i++) { |
236 |
put_bits(&s->pb, 1, s->high_band_coded[ch][i]= 0); |
237 |
if (0) |
238 |
nb_coefs[ch] -= s->exponent_high_bands[bsize][i]; |
239 |
} |
240 |
} |
241 |
} |
242 |
} |
243 |
|
244 |
parse_exponents = 1;
|
245 |
if (s->block_len_bits != s->frame_len_bits) {
|
246 |
put_bits(&s->pb, 1, parse_exponents);
|
247 |
} |
248 |
|
249 |
if (parse_exponents) {
|
250 |
for(ch = 0; ch < s->nb_channels; ch++) { |
251 |
if (s->channel_coded[ch]) {
|
252 |
if (s->use_exp_vlc) {
|
253 |
encode_exp_vlc(s, ch, fixed_exp); |
254 |
} else {
|
255 |
assert(0); //FIXME not implemented |
256 |
// encode_exp_lsp(s, ch);
|
257 |
} |
258 |
} |
259 |
} |
260 |
} else {
|
261 |
assert(0); //FIXME not implemented |
262 |
} |
263 |
|
264 |
for(ch = 0; ch < s->nb_channels; ch++) { |
265 |
if (s->channel_coded[ch]) {
|
266 |
int run, tindex;
|
267 |
WMACoef *ptr, *eptr; |
268 |
tindex = (ch == 1 && s->ms_stereo);
|
269 |
ptr = &s->coefs1[ch][0];
|
270 |
eptr = ptr + nb_coefs[ch]; |
271 |
|
272 |
run=0;
|
273 |
for(;ptr < eptr; ptr++){
|
274 |
if(*ptr){
|
275 |
int level= *ptr;
|
276 |
int abs_level= FFABS(level);
|
277 |
int code= 0; |
278 |
if(abs_level <= s->coef_vlcs[tindex]->max_level){
|
279 |
if(run < s->coef_vlcs[tindex]->levels[abs_level-1]) |
280 |
code= run + s->int_table[tindex][abs_level-1];
|
281 |
} |
282 |
|
283 |
assert(code < s->coef_vlcs[tindex]->n); |
284 |
put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[code], s->coef_vlcs[tindex]->huffcodes[code]); |
285 |
|
286 |
if(code == 0){ |
287 |
if(1<<coef_nb_bits <= abs_level) |
288 |
return -1; |
289 |
|
290 |
|
291 |
//Workaround minor rounding differences for the regression tests, FIXME we should find and replace the problematic float by fixpoint for reg tests
|
292 |
if(abs_level == 0x71B && (s->avctx->flags & CODEC_FLAG_BITEXACT)) abs_level=0x71A; |
293 |
|
294 |
put_bits(&s->pb, coef_nb_bits, abs_level); |
295 |
put_bits(&s->pb, s->frame_len_bits, run); |
296 |
} |
297 |
put_bits(&s->pb, 1, level < 0); //FIXME the sign is fliped somewhere |
298 |
run=0;
|
299 |
}else{
|
300 |
run++; |
301 |
} |
302 |
} |
303 |
if(run)
|
304 |
put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[1], s->coef_vlcs[tindex]->huffcodes[1]); |
305 |
} |
306 |
if (s->version == 1 && s->nb_channels >= 2) { |
307 |
align_put_bits(&s->pb); |
308 |
} |
309 |
} |
310 |
return 0; |
311 |
} |
312 |
|
313 |
static int encode_frame(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE], uint8_t *buf, int buf_size, int total_gain){ |
314 |
init_put_bits(&s->pb, buf, buf_size); |
315 |
|
316 |
if (s->use_bit_reservoir) {
|
317 |
assert(0);//FIXME not implemented |
318 |
}else{
|
319 |
if(encode_block(s, src_coefs, total_gain) < 0) |
320 |
return INT_MAX;
|
321 |
} |
322 |
|
323 |
align_put_bits(&s->pb); |
324 |
|
325 |
return put_bits_count(&s->pb)/8 - s->block_align; |
326 |
} |
327 |
|
328 |
static int encode_superframe(AVCodecContext *avctx, |
329 |
unsigned char *buf, int buf_size, void *data){ |
330 |
WMACodecContext *s = avctx->priv_data; |
331 |
const short *samples = data; |
332 |
int i, total_gain;
|
333 |
|
334 |
s->block_len_bits= s->frame_len_bits; //required by non variable block len
|
335 |
s->block_len = 1 << s->block_len_bits;
|
336 |
|
337 |
apply_window_and_mdct(avctx, samples, avctx->frame_size); |
338 |
|
339 |
if (s->ms_stereo) {
|
340 |
float a, b;
|
341 |
int i;
|
342 |
|
343 |
for(i = 0; i < s->block_len; i++) { |
344 |
a = s->coefs[0][i]*0.5; |
345 |
b = s->coefs[1][i]*0.5; |
346 |
s->coefs[0][i] = a + b;
|
347 |
s->coefs[1][i] = a - b;
|
348 |
} |
349 |
} |
350 |
|
351 |
#if 1 |
352 |
total_gain= 128;
|
353 |
for(i=64; i; i>>=1){ |
354 |
int error= encode_frame(s, s->coefs, buf, buf_size, total_gain-i);
|
355 |
if(error<0) |
356 |
total_gain-= i; |
357 |
} |
358 |
#else
|
359 |
total_gain= 90;
|
360 |
best= encode_frame(s, s->coefs, buf, buf_size, total_gain); |
361 |
for(i=32; i; i>>=1){ |
362 |
int scoreL= encode_frame(s, s->coefs, buf, buf_size, total_gain-i);
|
363 |
int scoreR= encode_frame(s, s->coefs, buf, buf_size, total_gain+i);
|
364 |
av_log(NULL, AV_LOG_ERROR, "%d %d %d (%d)\n", scoreL, best, scoreR, total_gain); |
365 |
if(scoreL < FFMIN(best, scoreR)){
|
366 |
best = scoreL; |
367 |
total_gain -= i; |
368 |
}else if(scoreR < best){ |
369 |
best = scoreR; |
370 |
total_gain += i; |
371 |
} |
372 |
} |
373 |
#endif
|
374 |
|
375 |
encode_frame(s, s->coefs, buf, buf_size, total_gain); |
376 |
assert((put_bits_count(&s->pb) & 7) == 0); |
377 |
i= s->block_align - (put_bits_count(&s->pb)+7)/8; |
378 |
assert(i>=0);
|
379 |
while(i--)
|
380 |
put_bits(&s->pb, 8, 'N'); |
381 |
|
382 |
flush_put_bits(&s->pb); |
383 |
return put_bits_ptr(&s->pb) - s->pb.buf;
|
384 |
} |
385 |
|
386 |
AVCodec ff_wmav1_encoder = |
387 |
{ |
388 |
"wmav1",
|
389 |
AVMEDIA_TYPE_AUDIO, |
390 |
CODEC_ID_WMAV1, |
391 |
sizeof(WMACodecContext),
|
392 |
encode_init, |
393 |
encode_superframe, |
394 |
ff_wma_end, |
395 |
.sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE}, |
396 |
.long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"),
|
397 |
}; |
398 |
|
399 |
AVCodec ff_wmav2_encoder = |
400 |
{ |
401 |
"wmav2",
|
402 |
AVMEDIA_TYPE_AUDIO, |
403 |
CODEC_ID_WMAV2, |
404 |
sizeof(WMACodecContext),
|
405 |
encode_init, |
406 |
encode_superframe, |
407 |
ff_wma_end, |
408 |
.sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE}, |
409 |
.long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"),
|
410 |
}; |