ffmpeg / libavcodec / nellymoserenc.c @ 84dc2d8a
History  View  Annotate  Download (13.2 KB)
1 
/*


2 
* Nellymoser encoder

3 
* This code is developed as part of Google Summer of Code 2008 Program.

4 
*

5 
* Copyright (c) 2008 Bartlomiej Wolowiec

6 
*

7 
* This file is part of FFmpeg.

8 
*

9 
* FFmpeg is free software; you can redistribute it and/or

10 
* modify it under the terms of the GNU Lesser General Public

11 
* License as published by the Free Software Foundation; either

12 
* version 2.1 of the License, or (at your option) any later version.

13 
*

14 
* FFmpeg is distributed in the hope that it will be useful,

15 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

16 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

17 
* Lesser General Public License for more details.

18 
*

19 
* You should have received a copy of the GNU Lesser General Public

20 
* License along with FFmpeg; if not, write to the Free Software

21 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

22 
*/

23  
24 
/**

25 
* @file libavcodec/nellymoserenc.c

26 
* Nellymoser encoder

27 
* by Bartlomiej Wolowiec

28 
*

29 
* Generic codec information: libavcodec/nellymoserdec.c

30 
*

31 
* Some information also from: http://samples.mplayerhq.hu/Acodecs/Nelly_Moser/ASAO/ASAO.zip

32 
* (Copyright Joseph Artsimovich and UAB "DKD")

33 
*

34 
* for more information about nellymoser format, visit:

35 
* http://wiki.multimedia.cx/index.php?title=Nellymoser

36 
*/

37  
38 
#include "nellymoser.h" 
39 
#include "avcodec.h" 
40 
#include "dsputil.h" 
41  
42 
#define BITSTREAM_WRITER_LE

43 
#include "put_bits.h" 
44  
45 
#define POW_TABLE_SIZE (1<<11) 
46 
#define POW_TABLE_OFFSET 3 
47 
#define OPT_SIZE ((1<<15) + 3000) 
48  
49 
typedef struct NellyMoserEncodeContext { 
50 
AVCodecContext *avctx; 
51 
int last_frame;

52 
int bufsel;

53 
int have_saved;

54 
DSPContext dsp; 
55 
FFTContext mdct_ctx; 
56 
DECLARE_ALIGNED(16, float, mdct_out)[NELLY_SAMPLES]; 
57 
DECLARE_ALIGNED(16, float, in_buff)[NELLY_SAMPLES]; 
58 
DECLARE_ALIGNED(16, float, buf)[2][3 * NELLY_BUF_LEN]; ///< sample buffer 
59 
float (*opt )[NELLY_BANDS];

60 
uint8_t (*path)[NELLY_BANDS]; 
61 
} NellyMoserEncodeContext; 
62  
63 
static float pow_table[POW_TABLE_SIZE]; ///< pow(2, i / 2048.0  3.0); 
64  
65 
static const uint8_t sf_lut[96] = { 
66 
0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 
67 
5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14, 
68 
15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26, 
69 
27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 
70 
41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53, 
71 
54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62, 
72 
}; 
73  
74 
static const uint8_t sf_delta_lut[78] = { 
75 
0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 
76 
4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12, 
77 
13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23, 
78 
23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28, 
79 
28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, 
80 
}; 
81  
82 
static const uint8_t quant_lut[230] = { 
83 
0,

84  
85 
0, 1, 2, 
86  
87 
0, 1, 2, 3, 4, 5, 6, 
88  
89 
0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 
90 
12, 13, 13, 13, 14, 
91  
92 
0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 
93 
8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 
94 
22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29, 
95 
30,

96  
97 
0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 
98 
4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 
99 
10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15, 
100 
15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20, 
101 
21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32, 
102 
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45, 
103 
46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52, 
104 
53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57, 
105 
58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61, 
106 
61, 61, 61, 61, 62, 
107 
}; 
108  
109 
static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 }; 
110 
static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 }; 
111 
static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 }; 
112  
113 
static void apply_mdct(NellyMoserEncodeContext *s) 
114 
{ 
115 
memcpy(s>in_buff, s>buf[s>bufsel], NELLY_BUF_LEN * sizeof(float)); 
116 
s>dsp.vector_fmul(s>in_buff, ff_sine_128, NELLY_BUF_LEN); 
117 
s>dsp.vector_fmul_reverse(s>in_buff + NELLY_BUF_LEN, s>buf[s>bufsel] + NELLY_BUF_LEN, ff_sine_128, 
118 
NELLY_BUF_LEN); 
119 
ff_mdct_calc(&s>mdct_ctx, s>mdct_out, s>in_buff); 
120  
121 
s>dsp.vector_fmul(s>buf[s>bufsel] + NELLY_BUF_LEN, ff_sine_128, NELLY_BUF_LEN); 
122 
s>dsp.vector_fmul_reverse(s>buf[s>bufsel] + 2 * NELLY_BUF_LEN, s>buf[1  s>bufsel], ff_sine_128, 
123 
NELLY_BUF_LEN); 
124 
ff_mdct_calc(&s>mdct_ctx, s>mdct_out + NELLY_BUF_LEN, s>buf[s>bufsel] + NELLY_BUF_LEN); 
125 
} 
126  
127 
static av_cold int encode_init(AVCodecContext *avctx) 
128 
{ 
129 
NellyMoserEncodeContext *s = avctx>priv_data; 
130 
int i;

131  
132 
if (avctx>channels != 1) { 
133 
av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");

134 
return 1; 
135 
} 
136  
137 
if (avctx>sample_rate != 8000 && avctx>sample_rate != 16000 && 
138 
avctx>sample_rate != 11025 &&

139 
avctx>sample_rate != 22050 && avctx>sample_rate != 44100 && 
140 
avctx>strict_std_compliance >= FF_COMPLIANCE_NORMAL) { 
141 
av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");

142 
return 1; 
143 
} 
144  
145 
avctx>frame_size = NELLY_SAMPLES; 
146 
s>avctx = avctx; 
147 
ff_mdct_init(&s>mdct_ctx, 8, 0, 1.0); 
148 
dsputil_init(&s>dsp, avctx); 
149  
150 
/* Generate overlap window */

151 
ff_sine_window_init(ff_sine_128, 128);

152 
for (i = 0; i < POW_TABLE_SIZE; i++) 
153 
pow_table[i] = pow(2, i / 2048.0  3.0 + POW_TABLE_OFFSET); 
154  
155 
if (s>avctx>trellis) {

156 
s>opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float )); 
157 
s>path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));

158 
} 
159  
160 
return 0; 
161 
} 
162  
163 
static av_cold int encode_end(AVCodecContext *avctx) 
164 
{ 
165 
NellyMoserEncodeContext *s = avctx>priv_data; 
166  
167 
ff_mdct_end(&s>mdct_ctx); 
168  
169 
if (s>avctx>trellis) {

170 
av_free(s>opt); 
171 
av_free(s>path); 
172 
} 
173  
174 
return 0; 
175 
} 
176  
177 
#define find_best(val, table, LUT, LUT_add, LUT_size) \

178 
best_idx = \ 
179 
LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size  1)]; \ 
180 
if (fabs(val  table[best_idx]) > fabs(val  table[best_idx + 1])) \ 
181 
best_idx++; 
182  
183 
static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table) 
184 
{ 
185 
int band, best_idx, power_idx = 0; 
186 
float power_candidate;

187  
188 
//base exponent

189 
find_best(cand[0], ff_nelly_init_table, sf_lut, 20, 96); 
190 
idx_table[0] = best_idx;

191 
power_idx = ff_nelly_init_table[best_idx]; 
192  
193 
for (band = 1; band < NELLY_BANDS; band++) { 
194 
power_candidate = cand[band]  power_idx; 
195 
find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78); 
196 
idx_table[band] = best_idx; 
197 
power_idx += ff_nelly_delta_table[best_idx]; 
198 
} 
199 
} 
200  
201 
static inline float distance(float x, float y, int band) 
202 
{ 
203 
//return pow(fabs(xy), 2.0);

204 
float tmp = x  y;

205 
return tmp * tmp;

206 
} 
207  
208 
static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table) 
209 
{ 
210 
int i, j, band, best_idx;

211 
float power_candidate, best_val;

212  
213 
float (*opt )[NELLY_BANDS] = s>opt ;

214 
uint8_t(*path)[NELLY_BANDS] = s>path; 
215  
216 
for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) { 
217 
opt[0][i] = INFINITY;

218 
} 
219  
220 
for (i = 0; i < 64; i++) { 
221 
opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0); 
222 
path[0][ff_nelly_init_table[i]] = i;

223 
} 
224  
225 
for (band = 1; band < NELLY_BANDS; band++) { 
226 
int q, c = 0; 
227 
float tmp;

228 
int idx_min, idx_max, idx;

229 
power_candidate = cand[band]; 
230 
for (q = 1000; !c && q < OPT_SIZE; q <<= 2) { 
231 
idx_min = FFMAX(0, cand[band]  q);

232 
idx_max = FFMIN(OPT_SIZE, cand[band  1] + q);

233 
for (i = FFMAX(0, cand[band  1]  q); i < FFMIN(OPT_SIZE, cand[band  1] + q); i++) { 
234 
if ( isinf(opt[band  1][i]) ) 
235 
continue;

236 
for (j = 0; j < 32; j++) { 
237 
idx = i + ff_nelly_delta_table[j]; 
238 
if (idx > idx_max)

239 
break;

240 
if (idx >= idx_min) {

241 
tmp = opt[band  1][i] + distance(idx, power_candidate, band);

242 
if (opt[band][idx] > tmp) {

243 
opt[band][idx] = tmp; 
244 
path[band][idx] = j; 
245 
c = 1;

246 
} 
247 
} 
248 
} 
249 
} 
250 
} 
251 
assert(c); //FIXME

252 
} 
253  
254 
best_val = INFINITY; 
255 
best_idx = 1;

256 
band = NELLY_BANDS  1;

257 
for (i = 0; i < OPT_SIZE; i++) { 
258 
if (best_val > opt[band][i]) {

259 
best_val = opt[band][i]; 
260 
best_idx = i; 
261 
} 
262 
} 
263 
for (band = NELLY_BANDS  1; band >= 0; band) { 
264 
idx_table[band] = path[band][best_idx]; 
265 
if (band) {

266 
best_idx = ff_nelly_delta_table[path[band][best_idx]]; 
267 
} 
268 
} 
269 
} 
270  
271 
/**

272 
* Encodes NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values

273 
* @param s encoder context

274 
* @param output output buffer

275 
* @param output_size size of output buffer

276 
*/

277 
static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size) 
278 
{ 
279 
PutBitContext pb; 
280 
int i, j, band, block, best_idx, power_idx = 0; 
281 
float power_val, coeff, coeff_sum;

282 
float pows[NELLY_FILL_LEN];

283 
int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];

284 
float cand[NELLY_BANDS];

285  
286 
apply_mdct(s); 
287  
288 
init_put_bits(&pb, output, output_size * 8);

289  
290 
i = 0;

291 
for (band = 0; band < NELLY_BANDS; band++) { 
292 
coeff_sum = 0;

293 
for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) { 
294 
coeff_sum += s>mdct_out[i ] * s>mdct_out[i ] 
295 
+ s>mdct_out[i + NELLY_BUF_LEN] * s>mdct_out[i + NELLY_BUF_LEN]; 
296 
} 
297 
cand[band] = 
298 
log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2; 
299 
} 
300  
301 
if (s>avctx>trellis) {

302 
get_exponent_dynamic(s, cand, idx_table); 
303 
} else {

304 
get_exponent_greedy(s, cand, idx_table); 
305 
} 
306  
307 
i = 0;

308 
for (band = 0; band < NELLY_BANDS; band++) { 
309 
if (band) {

310 
power_idx += ff_nelly_delta_table[idx_table[band]]; 
311 
put_bits(&pb, 5, idx_table[band]);

312 
} else {

313 
power_idx = ff_nelly_init_table[idx_table[0]];

314 
put_bits(&pb, 6, idx_table[0]); 
315 
} 
316 
power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET)); 
317 
for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) { 
318 
s>mdct_out[i] *= power_val; 
319 
s>mdct_out[i + NELLY_BUF_LEN] *= power_val; 
320 
pows[i] = power_idx; 
321 
} 
322 
} 
323  
324 
ff_nelly_get_sample_bits(pows, bits); 
325  
326 
for (block = 0; block < 2; block++) { 
327 
for (i = 0; i < NELLY_FILL_LEN; i++) { 
328 
if (bits[i] > 0) { 
329 
const float *table = ff_nelly_dequantization_table + (1 << bits[i])  1; 
330 
coeff = s>mdct_out[block * NELLY_BUF_LEN + i]; 
331 
best_idx = 
332 
quant_lut[av_clip ( 
333 
coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]], 
334 
quant_lut_offset[bits[i]], 
335 
quant_lut_offset[bits[i]+1]  1 
336 
)]; 
337 
if (fabs(coeff  table[best_idx]) > fabs(coeff  table[best_idx + 1])) 
338 
best_idx++; 
339  
340 
put_bits(&pb, bits[i], best_idx); 
341 
} 
342 
} 
343 
if (!block)

344 
put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS  put_bits_count(&pb), 0);

345 
} 
346  
347 
flush_put_bits(&pb); 
348 
} 
349  
350 
static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data) 
351 
{ 
352 
NellyMoserEncodeContext *s = avctx>priv_data; 
353 
int16_t *samples = data; 
354 
int i;

355  
356 
if (s>last_frame)

357 
return 0; 
358  
359 
if (data) {

360 
for (i = 0; i < avctx>frame_size; i++) { 
361 
s>buf[s>bufsel][i] = samples[i]; 
362 
} 
363 
for (; i < NELLY_SAMPLES; i++) {

364 
s>buf[s>bufsel][i] = 0;

365 
} 
366 
s>bufsel = 1  s>bufsel;

367 
if (!s>have_saved) {

368 
s>have_saved = 1;

369 
return 0; 
370 
} 
371 
} else {

372 
memset(s>buf[s>bufsel], 0, sizeof(s>buf[0][0]) * NELLY_BUF_LEN); 
373 
s>bufsel = 1  s>bufsel;

374 
s>last_frame = 1;

375 
} 
376  
377 
if (s>have_saved) {

378 
encode_block(s, frame, buf_size); 
379 
return NELLY_BLOCK_LEN;

380 
} 
381 
return 0; 
382 
} 
383  
384 
AVCodec nellymoser_encoder = { 
385 
.name = "nellymoser",

386 
.type = CODEC_TYPE_AUDIO, 
387 
.id = CODEC_ID_NELLYMOSER, 
388 
.priv_data_size = sizeof(NellyMoserEncodeContext),

389 
.init = encode_init, 
390 
.encode = encode_frame, 
391 
.close = encode_end, 
392 
.capabilities = CODEC_CAP_SMALL_LAST_FRAME  CODEC_CAP_DELAY, 
393 
.long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),

394 
}; 