ffmpeg / libavcodec / nellymoserenc.c @ b8d62672
History | View | Annotate | Download (12.8 KB)
1 |
/*
|
---|---|
2 |
* Nellymoser encoder
|
3 |
* This code is developed as part of Google Summer of Code 2008 Program.
|
4 |
*
|
5 |
* Copyright (c) 2008 Bartlomiej Wolowiec
|
6 |
*
|
7 |
* This file is part of FFmpeg.
|
8 |
*
|
9 |
* FFmpeg is free software; you can redistribute it and/or
|
10 |
* modify it under the terms of the GNU Lesser General Public
|
11 |
* License as published by the Free Software Foundation; either
|
12 |
* version 2.1 of the License, or (at your option) any later version.
|
13 |
*
|
14 |
* FFmpeg is distributed in the hope that it will be useful,
|
15 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
17 |
* Lesser General Public License for more details.
|
18 |
*
|
19 |
* You should have received a copy of the GNU Lesser General Public
|
20 |
* License along with FFmpeg; if not, write to the Free Software
|
21 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
22 |
*/
|
23 |
|
24 |
/**
|
25 |
* @file nellymoserenc.c
|
26 |
* Nellymoser encoder
|
27 |
* by Bartlomiej Wolowiec
|
28 |
*
|
29 |
* Generic codec information: libavcodec/nellymoserdec.c
|
30 |
*
|
31 |
* Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
|
32 |
* (Copyright Joseph Artsimovich and UAB "DKD")
|
33 |
*
|
34 |
* for more information about nellymoser format, visit:
|
35 |
* http://wiki.multimedia.cx/index.php?title=Nellymoser
|
36 |
*/
|
37 |
|
38 |
#include "nellymoser.h" |
39 |
#include "avcodec.h" |
40 |
#include "dsputil.h" |
41 |
|
42 |
#define BITSTREAM_WRITER_LE
|
43 |
#include "bitstream.h" |
44 |
|
45 |
#define POW_TABLE_SIZE (1<<11) |
46 |
#define POW_TABLE_OFFSET 3 |
47 |
|
48 |
typedef struct NellyMoserEncodeContext { |
49 |
AVCodecContext *avctx; |
50 |
int last_frame;
|
51 |
int bufsel;
|
52 |
int have_saved;
|
53 |
DSPContext dsp; |
54 |
MDCTContext mdct_ctx; |
55 |
DECLARE_ALIGNED_16(float, mdct_out[NELLY_SAMPLES]);
|
56 |
DECLARE_ALIGNED_16(float, buf[2][3 * NELLY_BUF_LEN]); ///< sample buffer |
57 |
} NellyMoserEncodeContext; |
58 |
|
59 |
static float pow_table[POW_TABLE_SIZE]; ///< -pow(2, -i / 2048.0 - 3.0); |
60 |
|
61 |
static const uint8_t sf_lut[96] = { |
62 |
0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, |
63 |
5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14, |
64 |
15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26, |
65 |
27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, |
66 |
41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53, |
67 |
54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62, |
68 |
}; |
69 |
|
70 |
static const uint8_t sf_delta_lut[78] = { |
71 |
0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, |
72 |
4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12, |
73 |
13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23, |
74 |
23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28, |
75 |
28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, |
76 |
}; |
77 |
|
78 |
static const uint8_t quant_lut[230] = { |
79 |
0,
|
80 |
|
81 |
0, 1, 2, |
82 |
|
83 |
0, 1, 2, 3, 4, 5, 6, |
84 |
|
85 |
0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, |
86 |
12, 13, 13, 13, 14, |
87 |
|
88 |
0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, |
89 |
8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, |
90 |
22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29, |
91 |
30,
|
92 |
|
93 |
0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, |
94 |
4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, |
95 |
10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15, |
96 |
15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20, |
97 |
21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32, |
98 |
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45, |
99 |
46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52, |
100 |
53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57, |
101 |
58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61, |
102 |
61, 61, 61, 61, 62, |
103 |
}; |
104 |
|
105 |
static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 }; |
106 |
static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 }; |
107 |
static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 }; |
108 |
|
109 |
void apply_mdct(NellyMoserEncodeContext *s)
|
110 |
{ |
111 |
DECLARE_ALIGNED_16(float, in_buff[NELLY_SAMPLES]);
|
112 |
|
113 |
memcpy(in_buff, s->buf[s->bufsel], NELLY_BUF_LEN * sizeof(float)); |
114 |
s->dsp.vector_fmul(in_buff, ff_sine_128, NELLY_BUF_LEN); |
115 |
s->dsp.vector_fmul_reverse(in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, |
116 |
NELLY_BUF_LEN); |
117 |
ff_mdct_calc(&s->mdct_ctx, s->mdct_out, in_buff); |
118 |
|
119 |
s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, NELLY_BUF_LEN); |
120 |
s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128, |
121 |
NELLY_BUF_LEN); |
122 |
ff_mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN); |
123 |
} |
124 |
|
125 |
static av_cold int encode_init(AVCodecContext *avctx) |
126 |
{ |
127 |
NellyMoserEncodeContext *s = avctx->priv_data; |
128 |
int i;
|
129 |
|
130 |
if (avctx->channels != 1) { |
131 |
av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
|
132 |
return -1; |
133 |
} |
134 |
|
135 |
if (avctx->sample_rate != 8000 && avctx->sample_rate != 11025 && |
136 |
avctx->sample_rate != 22050 && avctx->sample_rate != 44100 && |
137 |
avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) { |
138 |
av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 11025, 22050 and 44100 sample rate\n");
|
139 |
return -1; |
140 |
} |
141 |
|
142 |
avctx->frame_size = NELLY_SAMPLES; |
143 |
s->avctx = avctx; |
144 |
ff_mdct_init(&s->mdct_ctx, 8, 0); |
145 |
dsputil_init(&s->dsp, avctx); |
146 |
|
147 |
/* Generate overlap window */
|
148 |
ff_sine_window_init(ff_sine_128, 128);
|
149 |
for (i = 0; i < POW_TABLE_SIZE; i++) |
150 |
pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET); |
151 |
|
152 |
return 0; |
153 |
} |
154 |
|
155 |
static av_cold int encode_end(AVCodecContext *avctx) |
156 |
{ |
157 |
NellyMoserEncodeContext *s = avctx->priv_data; |
158 |
|
159 |
ff_mdct_end(&s->mdct_ctx); |
160 |
return 0; |
161 |
} |
162 |
|
163 |
#define find_best(val, table, LUT, LUT_add, LUT_size) \
|
164 |
best_idx = \ |
165 |
LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \ |
166 |
if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \ |
167 |
best_idx++; |
168 |
|
169 |
static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table) |
170 |
{ |
171 |
int band, best_idx, power_idx = 0; |
172 |
float power_candidate;
|
173 |
|
174 |
//base exponent
|
175 |
find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96); |
176 |
idx_table[0] = best_idx;
|
177 |
power_idx = ff_nelly_init_table[best_idx]; |
178 |
|
179 |
for (band = 1; band < NELLY_BANDS; band++) { |
180 |
power_candidate = cand[band] - power_idx; |
181 |
find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78); |
182 |
idx_table[band] = best_idx; |
183 |
power_idx += ff_nelly_delta_table[best_idx]; |
184 |
} |
185 |
} |
186 |
|
187 |
#define OPT_SIZE ((1<<15) + 3000) |
188 |
|
189 |
static inline float distance(float x, float y, int band) |
190 |
{ |
191 |
//return pow(fabs(x-y), 2.0);
|
192 |
float tmp = x - y;
|
193 |
return tmp * tmp;
|
194 |
} |
195 |
|
196 |
static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table) |
197 |
{ |
198 |
int i, j, band, best_idx;
|
199 |
float power_candidate, best_val;
|
200 |
|
201 |
float opt[NELLY_BANDS][OPT_SIZE];
|
202 |
int path[NELLY_BANDS][OPT_SIZE];
|
203 |
|
204 |
for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) { |
205 |
opt[0][i] = INFINITY;
|
206 |
} |
207 |
|
208 |
for (i = 0; i < 64; i++) { |
209 |
opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0); |
210 |
path[0][ff_nelly_init_table[i]] = i;
|
211 |
} |
212 |
|
213 |
for (band = 1; band < NELLY_BANDS; band++) { |
214 |
int q, c = 0; |
215 |
float tmp;
|
216 |
int idx_min, idx_max, idx;
|
217 |
power_candidate = cand[band]; |
218 |
for (q = 1000; !c && q < OPT_SIZE; q <<= 2) { |
219 |
idx_min = FFMAX(0, cand[band] - q);
|
220 |
idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
|
221 |
for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) { |
222 |
if ( isinf(opt[band - 1][i]) ) |
223 |
continue;
|
224 |
for (j = 0; j < 32; j++) { |
225 |
idx = i + ff_nelly_delta_table[j]; |
226 |
if (idx > idx_max)
|
227 |
break;
|
228 |
if (idx >= idx_min) {
|
229 |
tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
|
230 |
if (opt[band][idx] > tmp) {
|
231 |
opt[band][idx] = tmp; |
232 |
path[band][idx] = j; |
233 |
c = 1;
|
234 |
} |
235 |
} |
236 |
} |
237 |
} |
238 |
} |
239 |
assert(c); //FIXME
|
240 |
} |
241 |
|
242 |
best_val = INFINITY; |
243 |
best_idx = -1;
|
244 |
band = NELLY_BANDS - 1;
|
245 |
for (i = 0; i < OPT_SIZE; i++) { |
246 |
if (best_val > opt[band][i]) {
|
247 |
best_val = opt[band][i]; |
248 |
best_idx = i; |
249 |
} |
250 |
} |
251 |
for (band = NELLY_BANDS - 1; band >= 0; band--) { |
252 |
idx_table[band] = path[band][best_idx]; |
253 |
if (band) {
|
254 |
best_idx -= ff_nelly_delta_table[path[band][best_idx]]; |
255 |
} |
256 |
} |
257 |
} |
258 |
|
259 |
/**
|
260 |
* Encodes NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
|
261 |
* @param s encoder context
|
262 |
* @param output output buffer
|
263 |
* @param output_size size of output buffer
|
264 |
*/
|
265 |
static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size) |
266 |
{ |
267 |
PutBitContext pb; |
268 |
int i, j, band, block, best_idx, power_idx = 0; |
269 |
float power_val, coeff, coeff_sum;
|
270 |
float pows[NELLY_FILL_LEN];
|
271 |
int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
|
272 |
float cand[NELLY_BANDS];
|
273 |
|
274 |
apply_mdct(s); |
275 |
|
276 |
init_put_bits(&pb, output, output_size * 8);
|
277 |
|
278 |
i = 0;
|
279 |
for (band = 0; band < NELLY_BANDS; band++) { |
280 |
coeff_sum = 0;
|
281 |
for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) { |
282 |
coeff_sum += s->mdct_out[i ] * s->mdct_out[i ] |
283 |
+ s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN]; |
284 |
} |
285 |
cand[band] = |
286 |
log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2; |
287 |
} |
288 |
|
289 |
if (s->avctx->trellis) {
|
290 |
get_exponent_dynamic(s, cand, idx_table); |
291 |
} else {
|
292 |
get_exponent_greedy(s, cand, idx_table); |
293 |
} |
294 |
|
295 |
i = 0;
|
296 |
for (band = 0; band < NELLY_BANDS; band++) { |
297 |
if (band) {
|
298 |
power_idx += ff_nelly_delta_table[idx_table[band]]; |
299 |
put_bits(&pb, 5, idx_table[band]);
|
300 |
} else {
|
301 |
power_idx = ff_nelly_init_table[idx_table[0]];
|
302 |
put_bits(&pb, 6, idx_table[0]); |
303 |
} |
304 |
power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET)); |
305 |
for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) { |
306 |
s->mdct_out[i] *= power_val; |
307 |
s->mdct_out[i + NELLY_BUF_LEN] *= power_val; |
308 |
pows[i] = power_idx; |
309 |
} |
310 |
} |
311 |
|
312 |
ff_nelly_get_sample_bits(pows, bits); |
313 |
|
314 |
for (block = 0; block < 2; block++) { |
315 |
for (i = 0; i < NELLY_FILL_LEN; i++) { |
316 |
if (bits[i] > 0) { |
317 |
const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1; |
318 |
coeff = s->mdct_out[block * NELLY_BUF_LEN + i]; |
319 |
best_idx = |
320 |
quant_lut[av_clip ( |
321 |
coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]], |
322 |
quant_lut_offset[bits[i]], |
323 |
quant_lut_offset[bits[i]+1] - 1 |
324 |
)]; |
325 |
if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1])) |
326 |
best_idx++; |
327 |
|
328 |
put_bits(&pb, bits[i], best_idx); |
329 |
} |
330 |
} |
331 |
if (!block)
|
332 |
put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
|
333 |
} |
334 |
|
335 |
flush_put_bits(&pb); |
336 |
} |
337 |
|
338 |
static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data) |
339 |
{ |
340 |
NellyMoserEncodeContext *s = avctx->priv_data; |
341 |
int16_t *samples = data; |
342 |
int i;
|
343 |
|
344 |
if (s->last_frame)
|
345 |
return 0; |
346 |
|
347 |
if (data) {
|
348 |
for (i = 0; i < avctx->frame_size; i++) { |
349 |
s->buf[s->bufsel][i] = samples[i]; |
350 |
} |
351 |
for (; i < NELLY_SAMPLES; i++) {
|
352 |
s->buf[s->bufsel][i] = 0;
|
353 |
} |
354 |
s->bufsel = 1 - s->bufsel;
|
355 |
if (!s->have_saved) {
|
356 |
s->have_saved = 1;
|
357 |
return 0; |
358 |
} |
359 |
} else {
|
360 |
memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN); |
361 |
s->bufsel = 1 - s->bufsel;
|
362 |
s->last_frame = 1;
|
363 |
} |
364 |
|
365 |
if (s->have_saved) {
|
366 |
encode_block(s, frame, buf_size); |
367 |
return NELLY_BLOCK_LEN;
|
368 |
} |
369 |
return 0; |
370 |
} |
371 |
|
372 |
AVCodec nellymoser_encoder = { |
373 |
.name = "nellymoser",
|
374 |
.type = CODEC_TYPE_AUDIO, |
375 |
.id = CODEC_ID_NELLYMOSER, |
376 |
.priv_data_size = sizeof(NellyMoserEncodeContext),
|
377 |
.init = encode_init, |
378 |
.encode = encode_frame, |
379 |
.close = encode_end, |
380 |
.capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY, |
381 |
.long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao Codec"),
|
382 |
}; |