ffmpeg / libavcodec / nellymoserenc.c @ 5d6e4c16
History  View  Annotate  Download (13.3 KB)
1 
/*


2 
* Nellymoser encoder

3 
* This code is developed as part of Google Summer of Code 2008 Program.

4 
*

5 
* Copyright (c) 2008 Bartlomiej Wolowiec

6 
*

7 
* This file is part of FFmpeg.

8 
*

9 
* FFmpeg is free software; you can redistribute it and/or

10 
* modify it under the terms of the GNU Lesser General Public

11 
* License as published by the Free Software Foundation; either

12 
* version 2.1 of the License, or (at your option) any later version.

13 
*

14 
* FFmpeg is distributed in the hope that it will be useful,

15 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

16 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

17 
* Lesser General Public License for more details.

18 
*

19 
* You should have received a copy of the GNU Lesser General Public

20 
* License along with FFmpeg; if not, write to the Free Software

21 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

22 
*/

23  
24 
/**

25 
* @file

26 
* Nellymoser encoder

27 
* by Bartlomiej Wolowiec

28 
*

29 
* Generic codec information: libavcodec/nellymoserdec.c

30 
*

31 
* Some information also from: http://samples.mplayerhq.hu/Acodecs/Nelly_Moser/ASAO/ASAO.zip

32 
* (Copyright Joseph Artsimovich and UAB "DKD")

33 
*

34 
* for more information about nellymoser format, visit:

35 
* http://wiki.multimedia.cx/index.php?title=Nellymoser

36 
*/

37  
38 
#include "nellymoser.h" 
39 
#include "avcodec.h" 
40 
#include "dsputil.h" 
41 
#include "fft.h" 
42  
43 
#define BITSTREAM_WRITER_LE

44 
#include "put_bits.h" 
45  
46 
#define POW_TABLE_SIZE (1<<11) 
47 
#define POW_TABLE_OFFSET 3 
48 
#define OPT_SIZE ((1<<15) + 3000) 
49  
50 
typedef struct NellyMoserEncodeContext { 
51 
AVCodecContext *avctx; 
52 
int last_frame;

53 
int bufsel;

54 
int have_saved;

55 
DSPContext dsp; 
56 
FFTContext mdct_ctx; 
57 
DECLARE_ALIGNED(16, float, mdct_out)[NELLY_SAMPLES]; 
58 
DECLARE_ALIGNED(16, float, in_buff)[NELLY_SAMPLES]; 
59 
DECLARE_ALIGNED(16, float, buf)[2][3 * NELLY_BUF_LEN]; ///< sample buffer 
60 
float (*opt )[NELLY_BANDS];

61 
uint8_t (*path)[NELLY_BANDS]; 
62 
} NellyMoserEncodeContext; 
63  
64 
static float pow_table[POW_TABLE_SIZE]; ///< pow(2, i / 2048.0  3.0); 
65  
66 
static const uint8_t sf_lut[96] = { 
67 
0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 
68 
5, 5, 5, 6, 7, 7, 8, 8, 9, 10, 11, 11, 12, 13, 13, 14, 
69 
15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26, 
70 
27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 
71 
41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53, 
72 
54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62, 
73 
}; 
74  
75 
static const uint8_t sf_delta_lut[78] = { 
76 
0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 
77 
4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10, 10, 11, 11, 12, 
78 
13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23, 
79 
23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28, 
80 
28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, 
81 
}; 
82  
83 
static const uint8_t quant_lut[230] = { 
84 
0,

85  
86 
0, 1, 2, 
87  
88 
0, 1, 2, 3, 4, 5, 6, 
89  
90 
0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 11, 
91 
12, 13, 13, 13, 14, 
92  
93 
0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 
94 
8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 
95 
22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29, 
96 
30,

97  
98 
0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 
99 
4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 9, 9, 
100 
10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15, 
101 
15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20, 
102 
21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32, 
103 
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45, 
104 
46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52, 
105 
53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57, 
106 
58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61, 
107 
61, 61, 61, 61, 62, 
108 
}; 
109  
110 
static const float quant_lut_mul[7] = { 0.0, 0.0, 2.0, 2.0, 5.0, 12.0, 36.6 }; 
111 
static const float quant_lut_add[7] = { 0.0, 0.0, 2.0, 7.0, 21.0, 56.0, 157.0 }; 
112 
static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 }; 
113  
114 
static void apply_mdct(NellyMoserEncodeContext *s) 
115 
{ 
116 
memcpy(s>in_buff, s>buf[s>bufsel], NELLY_BUF_LEN * sizeof(float)); 
117 
s>dsp.vector_fmul(s>in_buff, ff_sine_128, NELLY_BUF_LEN); 
118 
s>dsp.vector_fmul_reverse(s>in_buff + NELLY_BUF_LEN, s>buf[s>bufsel] + NELLY_BUF_LEN, ff_sine_128, 
119 
NELLY_BUF_LEN); 
120 
ff_mdct_calc(&s>mdct_ctx, s>mdct_out, s>in_buff); 
121  
122 
s>dsp.vector_fmul(s>buf[s>bufsel] + NELLY_BUF_LEN, ff_sine_128, NELLY_BUF_LEN); 
123 
s>dsp.vector_fmul_reverse(s>buf[s>bufsel] + 2 * NELLY_BUF_LEN, s>buf[1  s>bufsel], ff_sine_128, 
124 
NELLY_BUF_LEN); 
125 
ff_mdct_calc(&s>mdct_ctx, s>mdct_out + NELLY_BUF_LEN, s>buf[s>bufsel] + NELLY_BUF_LEN); 
126 
} 
127  
128 
static av_cold int encode_init(AVCodecContext *avctx) 
129 
{ 
130 
NellyMoserEncodeContext *s = avctx>priv_data; 
131 
int i;

132  
133 
if (avctx>channels != 1) { 
134 
av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");

135 
return 1; 
136 
} 
137  
138 
if (avctx>sample_rate != 8000 && avctx>sample_rate != 16000 && 
139 
avctx>sample_rate != 11025 &&

140 
avctx>sample_rate != 22050 && avctx>sample_rate != 44100 && 
141 
avctx>strict_std_compliance >= FF_COMPLIANCE_NORMAL) { 
142 
av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");

143 
return 1; 
144 
} 
145  
146 
avctx>frame_size = NELLY_SAMPLES; 
147 
s>avctx = avctx; 
148 
ff_mdct_init(&s>mdct_ctx, 8, 0, 1.0); 
149 
dsputil_init(&s>dsp, avctx); 
150  
151 
/* Generate overlap window */

152 
ff_sine_window_init(ff_sine_128, 128);

153 
for (i = 0; i < POW_TABLE_SIZE; i++) 
154 
pow_table[i] = pow(2, i / 2048.0  3.0 + POW_TABLE_OFFSET); 
155  
156 
if (s>avctx>trellis) {

157 
s>opt = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float )); 
158 
s>path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));

159 
} 
160  
161 
return 0; 
162 
} 
163  
164 
static av_cold int encode_end(AVCodecContext *avctx) 
165 
{ 
166 
NellyMoserEncodeContext *s = avctx>priv_data; 
167  
168 
ff_mdct_end(&s>mdct_ctx); 
169  
170 
if (s>avctx>trellis) {

171 
av_free(s>opt); 
172 
av_free(s>path); 
173 
} 
174  
175 
return 0; 
176 
} 
177  
178 
#define find_best(val, table, LUT, LUT_add, LUT_size) \

179 
best_idx = \ 
180 
LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size  1)]; \ 
181 
if (fabs(val  table[best_idx]) > fabs(val  table[best_idx + 1])) \ 
182 
best_idx++; 
183  
184 
static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table) 
185 
{ 
186 
int band, best_idx, power_idx = 0; 
187 
float power_candidate;

188  
189 
//base exponent

190 
find_best(cand[0], ff_nelly_init_table, sf_lut, 20, 96); 
191 
idx_table[0] = best_idx;

192 
power_idx = ff_nelly_init_table[best_idx]; 
193  
194 
for (band = 1; band < NELLY_BANDS; band++) { 
195 
power_candidate = cand[band]  power_idx; 
196 
find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78); 
197 
idx_table[band] = best_idx; 
198 
power_idx += ff_nelly_delta_table[best_idx]; 
199 
} 
200 
} 
201  
202 
static inline float distance(float x, float y, int band) 
203 
{ 
204 
//return pow(fabs(xy), 2.0);

205 
float tmp = x  y;

206 
return tmp * tmp;

207 
} 
208  
209 
static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table) 
210 
{ 
211 
int i, j, band, best_idx;

212 
float power_candidate, best_val;

213  
214 
float (*opt )[NELLY_BANDS] = s>opt ;

215 
uint8_t(*path)[NELLY_BANDS] = s>path; 
216  
217 
for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) { 
218 
opt[0][i] = INFINITY;

219 
} 
220  
221 
for (i = 0; i < 64; i++) { 
222 
opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0); 
223 
path[0][ff_nelly_init_table[i]] = i;

224 
} 
225  
226 
for (band = 1; band < NELLY_BANDS; band++) { 
227 
int q, c = 0; 
228 
float tmp;

229 
int idx_min, idx_max, idx;

230 
power_candidate = cand[band]; 
231 
for (q = 1000; !c && q < OPT_SIZE; q <<= 2) { 
232 
idx_min = FFMAX(0, cand[band]  q);

233 
idx_max = FFMIN(OPT_SIZE, cand[band  1] + q);

234 
for (i = FFMAX(0, cand[band  1]  q); i < FFMIN(OPT_SIZE, cand[band  1] + q); i++) { 
235 
if ( isinf(opt[band  1][i]) ) 
236 
continue;

237 
for (j = 0; j < 32; j++) { 
238 
idx = i + ff_nelly_delta_table[j]; 
239 
if (idx > idx_max)

240 
break;

241 
if (idx >= idx_min) {

242 
tmp = opt[band  1][i] + distance(idx, power_candidate, band);

243 
if (opt[band][idx] > tmp) {

244 
opt[band][idx] = tmp; 
245 
path[band][idx] = j; 
246 
c = 1;

247 
} 
248 
} 
249 
} 
250 
} 
251 
} 
252 
assert(c); //FIXME

253 
} 
254  
255 
best_val = INFINITY; 
256 
best_idx = 1;

257 
band = NELLY_BANDS  1;

258 
for (i = 0; i < OPT_SIZE; i++) { 
259 
if (best_val > opt[band][i]) {

260 
best_val = opt[band][i]; 
261 
best_idx = i; 
262 
} 
263 
} 
264 
for (band = NELLY_BANDS  1; band >= 0; band) { 
265 
idx_table[band] = path[band][best_idx]; 
266 
if (band) {

267 
best_idx = ff_nelly_delta_table[path[band][best_idx]]; 
268 
} 
269 
} 
270 
} 
271  
272 
/**

273 
* Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values

274 
* @param s encoder context

275 
* @param output output buffer

276 
* @param output_size size of output buffer

277 
*/

278 
static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size) 
279 
{ 
280 
PutBitContext pb; 
281 
int i, j, band, block, best_idx, power_idx = 0; 
282 
float power_val, coeff, coeff_sum;

283 
float pows[NELLY_FILL_LEN];

284 
int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];

285 
float cand[NELLY_BANDS];

286  
287 
apply_mdct(s); 
288  
289 
init_put_bits(&pb, output, output_size * 8);

290  
291 
i = 0;

292 
for (band = 0; band < NELLY_BANDS; band++) { 
293 
coeff_sum = 0;

294 
for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) { 
295 
coeff_sum += s>mdct_out[i ] * s>mdct_out[i ] 
296 
+ s>mdct_out[i + NELLY_BUF_LEN] * s>mdct_out[i + NELLY_BUF_LEN]; 
297 
} 
298 
cand[band] = 
299 
log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2; 
300 
} 
301  
302 
if (s>avctx>trellis) {

303 
get_exponent_dynamic(s, cand, idx_table); 
304 
} else {

305 
get_exponent_greedy(s, cand, idx_table); 
306 
} 
307  
308 
i = 0;

309 
for (band = 0; band < NELLY_BANDS; band++) { 
310 
if (band) {

311 
power_idx += ff_nelly_delta_table[idx_table[band]]; 
312 
put_bits(&pb, 5, idx_table[band]);

313 
} else {

314 
power_idx = ff_nelly_init_table[idx_table[0]];

315 
put_bits(&pb, 6, idx_table[0]); 
316 
} 
317 
power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET)); 
318 
for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) { 
319 
s>mdct_out[i] *= power_val; 
320 
s>mdct_out[i + NELLY_BUF_LEN] *= power_val; 
321 
pows[i] = power_idx; 
322 
} 
323 
} 
324  
325 
ff_nelly_get_sample_bits(pows, bits); 
326  
327 
for (block = 0; block < 2; block++) { 
328 
for (i = 0; i < NELLY_FILL_LEN; i++) { 
329 
if (bits[i] > 0) { 
330 
const float *table = ff_nelly_dequantization_table + (1 << bits[i])  1; 
331 
coeff = s>mdct_out[block * NELLY_BUF_LEN + i]; 
332 
best_idx = 
333 
quant_lut[av_clip ( 
334 
coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]], 
335 
quant_lut_offset[bits[i]], 
336 
quant_lut_offset[bits[i]+1]  1 
337 
)]; 
338 
if (fabs(coeff  table[best_idx]) > fabs(coeff  table[best_idx + 1])) 
339 
best_idx++; 
340  
341 
put_bits(&pb, bits[i], best_idx); 
342 
} 
343 
} 
344 
if (!block)

345 
put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS  put_bits_count(&pb), 0);

346 
} 
347  
348 
flush_put_bits(&pb); 
349 
} 
350  
351 
static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data) 
352 
{ 
353 
NellyMoserEncodeContext *s = avctx>priv_data; 
354 
const int16_t *samples = data;

355 
int i;

356  
357 
if (s>last_frame)

358 
return 0; 
359  
360 
if (data) {

361 
for (i = 0; i < avctx>frame_size; i++) { 
362 
s>buf[s>bufsel][i] = samples[i]; 
363 
} 
364 
for (; i < NELLY_SAMPLES; i++) {

365 
s>buf[s>bufsel][i] = 0;

366 
} 
367 
s>bufsel = 1  s>bufsel;

368 
if (!s>have_saved) {

369 
s>have_saved = 1;

370 
return 0; 
371 
} 
372 
} else {

373 
memset(s>buf[s>bufsel], 0, sizeof(s>buf[0][0]) * NELLY_BUF_LEN); 
374 
s>bufsel = 1  s>bufsel;

375 
s>last_frame = 1;

376 
} 
377  
378 
if (s>have_saved) {

379 
encode_block(s, frame, buf_size); 
380 
return NELLY_BLOCK_LEN;

381 
} 
382 
return 0; 
383 
} 
384  
385 
AVCodec nellymoser_encoder = { 
386 
.name = "nellymoser",

387 
.type = AVMEDIA_TYPE_AUDIO, 
388 
.id = CODEC_ID_NELLYMOSER, 
389 
.priv_data_size = sizeof(NellyMoserEncodeContext),

390 
.init = encode_init, 
391 
.encode = encode_frame, 
392 
.close = encode_end, 
393 
.capabilities = CODEC_CAP_SMALL_LAST_FRAME  CODEC_CAP_DELAY, 
394 
.long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),

395 
.sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE}, 
396 
}; 