ffmpeg / libavcodec / nellymoserenc.c @ 015f9f1a
History | View | Annotate | Download (13.3 KB)
1 |
/*
|
---|---|
2 |
* Nellymoser encoder
|
3 |
* This code is developed as part of Google Summer of Code 2008 Program.
|
4 |
*
|
5 |
* Copyright (c) 2008 Bartlomiej Wolowiec
|
6 |
*
|
7 |
* This file is part of FFmpeg.
|
8 |
*
|
9 |
* FFmpeg is free software; you can redistribute it and/or
|
10 |
* modify it under the terms of the GNU Lesser General Public
|
11 |
* License as published by the Free Software Foundation; either
|
12 |
* version 2.1 of the License, or (at your option) any later version.
|
13 |
*
|
14 |
* FFmpeg is distributed in the hope that it will be useful,
|
15 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
17 |
* Lesser General Public License for more details.
|
18 |
*
|
19 |
* You should have received a copy of the GNU Lesser General Public
|
20 |
* License along with FFmpeg; if not, write to the Free Software
|
21 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
22 |
*/
|
23 |
|
24 |
/**
|
25 |
* @file
|
26 |
* Nellymoser encoder
|
27 |
* by Bartlomiej Wolowiec
|
28 |
*
|
29 |
* Generic codec information: libavcodec/nellymoserdec.c
|
30 |
*
|
31 |
* Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
|
32 |
* (Copyright Joseph Artsimovich and UAB "DKD")
|
33 |
*
|
34 |
* for more information about nellymoser format, visit:
|
35 |
* http://wiki.multimedia.cx/index.php?title=Nellymoser
|
36 |
*/
|
37 |
|
38 |
#include "nellymoser.h" |
39 |
#include "avcodec.h" |
40 |
#include "dsputil.h" |
41 |
#include "fft.h" |
42 |
|
43 |
#define BITSTREAM_WRITER_LE
|
44 |
#include "put_bits.h" |
45 |
|
46 |
#define POW_TABLE_SIZE (1<<11) |
47 |
#define POW_TABLE_OFFSET 3 |
48 |
#define OPT_SIZE ((1<<15) + 3000) |
49 |
|
50 |
typedef struct NellyMoserEncodeContext { |
51 |
AVCodecContext *avctx; |
52 |
int last_frame;
|
53 |
int bufsel;
|
54 |
int have_saved;
|
55 |
DSPContext dsp; |
56 |
FFTContext mdct_ctx; |
57 |
DECLARE_ALIGNED(16, float, mdct_out)[NELLY_SAMPLES]; |
58 |
DECLARE_ALIGNED(16, float, in_buff)[NELLY_SAMPLES]; |
59 |
DECLARE_ALIGNED(16, float, buf)[2][3 * NELLY_BUF_LEN]; ///< sample buffer |
60 |
float (*opt )[NELLY_BANDS];
|
61 |
uint8_t (*path)[NELLY_BANDS]; |
62 |
} NellyMoserEncodeContext; |
63 |
|
64 |
static float pow_table[POW_TABLE_SIZE]; ///< -pow(2, -i / 2048.0 - 3.0); |
65 |
|
66 |
static const uint8_t sf_lut[96] = { |
67 |
0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, |
68 |
5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14, |
69 |
15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26, |
70 |
27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, |
71 |
41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53, |
72 |
54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62, |
73 |
}; |
74 |
|
75 |
static const uint8_t sf_delta_lut[78] = { |
76 |
0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, |
77 |
4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12, |
78 |
13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23, |
79 |
23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28, |
80 |
28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, |
81 |
}; |
82 |
|
83 |
static const uint8_t quant_lut[230] = { |
84 |
0,
|
85 |
|
86 |
0, 1, 2, |
87 |
|
88 |
0, 1, 2, 3, 4, 5, 6, |
89 |
|
90 |
0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, |
91 |
12, 13, 13, 13, 14, |
92 |
|
93 |
0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, |
94 |
8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, |
95 |
22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29, |
96 |
30,
|
97 |
|
98 |
0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, |
99 |
4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, |
100 |
10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15, |
101 |
15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20, |
102 |
21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32, |
103 |
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45, |
104 |
46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52, |
105 |
53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57, |
106 |
58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61, |
107 |
61, 61, 61, 61, 62, |
108 |
}; |
109 |
|
110 |
static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 }; |
111 |
static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 }; |
112 |
static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 }; |
113 |
|
114 |
static void apply_mdct(NellyMoserEncodeContext *s) |
115 |
{ |
116 |
s->dsp.vector_fmul(s->in_buff, s->buf[s->bufsel], ff_sine_128, NELLY_BUF_LEN); |
117 |
s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, |
118 |
NELLY_BUF_LEN); |
119 |
ff_mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff); |
120 |
|
121 |
s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, |
122 |
ff_sine_128, NELLY_BUF_LEN); |
123 |
s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128, |
124 |
NELLY_BUF_LEN); |
125 |
ff_mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN); |
126 |
} |
127 |
|
128 |
static av_cold int encode_init(AVCodecContext *avctx) |
129 |
{ |
130 |
NellyMoserEncodeContext *s = avctx->priv_data; |
131 |
int i;
|
132 |
|
133 |
if (avctx->channels != 1) { |
134 |
av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
|
135 |
return -1; |
136 |
} |
137 |
|
138 |
if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 && |
139 |
avctx->sample_rate != 11025 &&
|
140 |
avctx->sample_rate != 22050 && avctx->sample_rate != 44100 && |
141 |
avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) { |
142 |
av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
|
143 |
return -1; |
144 |
} |
145 |
|
146 |
avctx->frame_size = NELLY_SAMPLES; |
147 |
s->avctx = avctx; |
148 |
ff_mdct_init(&s->mdct_ctx, 8, 0, 1.0); |
149 |
dsputil_init(&s->dsp, avctx); |
150 |
|
151 |
/* Generate overlap window */
|
152 |
ff_sine_window_init(ff_sine_128, 128);
|
153 |
for (i = 0; i < POW_TABLE_SIZE; i++) |
154 |
pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET); |
155 |
|
156 |
if (s->avctx->trellis) {
|
157 |
s->opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float )); |
158 |
s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
|
159 |
} |
160 |
|
161 |
return 0; |
162 |
} |
163 |
|
164 |
static av_cold int encode_end(AVCodecContext *avctx) |
165 |
{ |
166 |
NellyMoserEncodeContext *s = avctx->priv_data; |
167 |
|
168 |
ff_mdct_end(&s->mdct_ctx); |
169 |
|
170 |
if (s->avctx->trellis) {
|
171 |
av_free(s->opt); |
172 |
av_free(s->path); |
173 |
} |
174 |
|
175 |
return 0; |
176 |
} |
177 |
|
178 |
#define find_best(val, table, LUT, LUT_add, LUT_size) \
|
179 |
best_idx = \ |
180 |
LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \ |
181 |
if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \ |
182 |
best_idx++; |
183 |
|
184 |
static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table) |
185 |
{ |
186 |
int band, best_idx, power_idx = 0; |
187 |
float power_candidate;
|
188 |
|
189 |
//base exponent
|
190 |
find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96); |
191 |
idx_table[0] = best_idx;
|
192 |
power_idx = ff_nelly_init_table[best_idx]; |
193 |
|
194 |
for (band = 1; band < NELLY_BANDS; band++) { |
195 |
power_candidate = cand[band] - power_idx; |
196 |
find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78); |
197 |
idx_table[band] = best_idx; |
198 |
power_idx += ff_nelly_delta_table[best_idx]; |
199 |
} |
200 |
} |
201 |
|
202 |
static inline float distance(float x, float y, int band) |
203 |
{ |
204 |
//return pow(fabs(x-y), 2.0);
|
205 |
float tmp = x - y;
|
206 |
return tmp * tmp;
|
207 |
} |
208 |
|
209 |
static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table) |
210 |
{ |
211 |
int i, j, band, best_idx;
|
212 |
float power_candidate, best_val;
|
213 |
|
214 |
float (*opt )[NELLY_BANDS] = s->opt ;
|
215 |
uint8_t(*path)[NELLY_BANDS] = s->path; |
216 |
|
217 |
for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) { |
218 |
opt[0][i] = INFINITY;
|
219 |
} |
220 |
|
221 |
for (i = 0; i < 64; i++) { |
222 |
opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0); |
223 |
path[0][ff_nelly_init_table[i]] = i;
|
224 |
} |
225 |
|
226 |
for (band = 1; band < NELLY_BANDS; band++) { |
227 |
int q, c = 0; |
228 |
float tmp;
|
229 |
int idx_min, idx_max, idx;
|
230 |
power_candidate = cand[band]; |
231 |
for (q = 1000; !c && q < OPT_SIZE; q <<= 2) { |
232 |
idx_min = FFMAX(0, cand[band] - q);
|
233 |
idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
|
234 |
for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) { |
235 |
if ( isinf(opt[band - 1][i]) ) |
236 |
continue;
|
237 |
for (j = 0; j < 32; j++) { |
238 |
idx = i + ff_nelly_delta_table[j]; |
239 |
if (idx > idx_max)
|
240 |
break;
|
241 |
if (idx >= idx_min) {
|
242 |
tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
|
243 |
if (opt[band][idx] > tmp) {
|
244 |
opt[band][idx] = tmp; |
245 |
path[band][idx] = j; |
246 |
c = 1;
|
247 |
} |
248 |
} |
249 |
} |
250 |
} |
251 |
} |
252 |
assert(c); //FIXME
|
253 |
} |
254 |
|
255 |
best_val = INFINITY; |
256 |
best_idx = -1;
|
257 |
band = NELLY_BANDS - 1;
|
258 |
for (i = 0; i < OPT_SIZE; i++) { |
259 |
if (best_val > opt[band][i]) {
|
260 |
best_val = opt[band][i]; |
261 |
best_idx = i; |
262 |
} |
263 |
} |
264 |
for (band = NELLY_BANDS - 1; band >= 0; band--) { |
265 |
idx_table[band] = path[band][best_idx]; |
266 |
if (band) {
|
267 |
best_idx -= ff_nelly_delta_table[path[band][best_idx]]; |
268 |
} |
269 |
} |
270 |
} |
271 |
|
272 |
/**
|
273 |
* Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
|
274 |
* @param s encoder context
|
275 |
* @param output output buffer
|
276 |
* @param output_size size of output buffer
|
277 |
*/
|
278 |
static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size) |
279 |
{ |
280 |
PutBitContext pb; |
281 |
int i, j, band, block, best_idx, power_idx = 0; |
282 |
float power_val, coeff, coeff_sum;
|
283 |
float pows[NELLY_FILL_LEN];
|
284 |
int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
|
285 |
float cand[NELLY_BANDS];
|
286 |
|
287 |
apply_mdct(s); |
288 |
|
289 |
init_put_bits(&pb, output, output_size * 8);
|
290 |
|
291 |
i = 0;
|
292 |
for (band = 0; band < NELLY_BANDS; band++) { |
293 |
coeff_sum = 0;
|
294 |
for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) { |
295 |
coeff_sum += s->mdct_out[i ] * s->mdct_out[i ] |
296 |
+ s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN]; |
297 |
} |
298 |
cand[band] = |
299 |
log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2; |
300 |
} |
301 |
|
302 |
if (s->avctx->trellis) {
|
303 |
get_exponent_dynamic(s, cand, idx_table); |
304 |
} else {
|
305 |
get_exponent_greedy(s, cand, idx_table); |
306 |
} |
307 |
|
308 |
i = 0;
|
309 |
for (band = 0; band < NELLY_BANDS; band++) { |
310 |
if (band) {
|
311 |
power_idx += ff_nelly_delta_table[idx_table[band]]; |
312 |
put_bits(&pb, 5, idx_table[band]);
|
313 |
} else {
|
314 |
power_idx = ff_nelly_init_table[idx_table[0]];
|
315 |
put_bits(&pb, 6, idx_table[0]); |
316 |
} |
317 |
power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET)); |
318 |
for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) { |
319 |
s->mdct_out[i] *= power_val; |
320 |
s->mdct_out[i + NELLY_BUF_LEN] *= power_val; |
321 |
pows[i] = power_idx; |
322 |
} |
323 |
} |
324 |
|
325 |
ff_nelly_get_sample_bits(pows, bits); |
326 |
|
327 |
for (block = 0; block < 2; block++) { |
328 |
for (i = 0; i < NELLY_FILL_LEN; i++) { |
329 |
if (bits[i] > 0) { |
330 |
const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1; |
331 |
coeff = s->mdct_out[block * NELLY_BUF_LEN + i]; |
332 |
best_idx = |
333 |
quant_lut[av_clip ( |
334 |
coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]], |
335 |
quant_lut_offset[bits[i]], |
336 |
quant_lut_offset[bits[i]+1] - 1 |
337 |
)]; |
338 |
if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1])) |
339 |
best_idx++; |
340 |
|
341 |
put_bits(&pb, bits[i], best_idx); |
342 |
} |
343 |
} |
344 |
if (!block)
|
345 |
put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
|
346 |
} |
347 |
|
348 |
flush_put_bits(&pb); |
349 |
} |
350 |
|
351 |
static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data) |
352 |
{ |
353 |
NellyMoserEncodeContext *s = avctx->priv_data; |
354 |
const int16_t *samples = data;
|
355 |
int i;
|
356 |
|
357 |
if (s->last_frame)
|
358 |
return 0; |
359 |
|
360 |
if (data) {
|
361 |
for (i = 0; i < avctx->frame_size; i++) { |
362 |
s->buf[s->bufsel][i] = samples[i]; |
363 |
} |
364 |
for (; i < NELLY_SAMPLES; i++) {
|
365 |
s->buf[s->bufsel][i] = 0;
|
366 |
} |
367 |
s->bufsel = 1 - s->bufsel;
|
368 |
if (!s->have_saved) {
|
369 |
s->have_saved = 1;
|
370 |
return 0; |
371 |
} |
372 |
} else {
|
373 |
memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN); |
374 |
s->bufsel = 1 - s->bufsel;
|
375 |
s->last_frame = 1;
|
376 |
} |
377 |
|
378 |
if (s->have_saved) {
|
379 |
encode_block(s, frame, buf_size); |
380 |
return NELLY_BLOCK_LEN;
|
381 |
} |
382 |
return 0; |
383 |
} |
384 |
|
385 |
AVCodec nellymoser_encoder = { |
386 |
.name = "nellymoser",
|
387 |
.type = AVMEDIA_TYPE_AUDIO, |
388 |
.id = CODEC_ID_NELLYMOSER, |
389 |
.priv_data_size = sizeof(NellyMoserEncodeContext),
|
390 |
.init = encode_init, |
391 |
.encode = encode_frame, |
392 |
.close = encode_end, |
393 |
.capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY, |
394 |
.long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
|
395 |
.sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE}, |
396 |
}; |