ffmpeg / libavcodec / alacenc.c @ b2755007
History | View | Annotate | Download (15.4 KB)
1 |
/**
|
---|---|
2 |
* ALAC audio encoder
|
3 |
* Copyright (c) 2008 Jaikrishnan Menon <realityman@gmx.net>
|
4 |
*
|
5 |
* This file is part of FFmpeg.
|
6 |
*
|
7 |
* FFmpeg is free software; you can redistribute it and/or
|
8 |
* modify it under the terms of the GNU Lesser General Public
|
9 |
* License as published by the Free Software Foundation; either
|
10 |
* version 2.1 of the License, or (at your option) any later version.
|
11 |
*
|
12 |
* FFmpeg is distributed in the hope that it will be useful,
|
13 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15 |
* Lesser General Public License for more details.
|
16 |
*
|
17 |
* You should have received a copy of the GNU Lesser General Public
|
18 |
* License along with FFmpeg; if not, write to the Free Software
|
19 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
20 |
*/
|
21 |
|
22 |
#include "avcodec.h" |
23 |
#include "bitstream.h" |
24 |
#include "put_bits.h" |
25 |
#include "dsputil.h" |
26 |
#include "lpc.h" |
27 |
#include "mathops.h" |
28 |
|
29 |
#define DEFAULT_FRAME_SIZE 4096 |
30 |
#define DEFAULT_SAMPLE_SIZE 16 |
31 |
#define MAX_CHANNELS 8 |
32 |
#define ALAC_EXTRADATA_SIZE 36 |
33 |
#define ALAC_FRAME_HEADER_SIZE 55 |
34 |
#define ALAC_FRAME_FOOTER_SIZE 3 |
35 |
|
36 |
#define ALAC_ESCAPE_CODE 0x1FF |
37 |
#define ALAC_MAX_LPC_ORDER 30 |
38 |
#define DEFAULT_MAX_PRED_ORDER 6 |
39 |
#define DEFAULT_MIN_PRED_ORDER 4 |
40 |
#define ALAC_MAX_LPC_PRECISION 9 |
41 |
#define ALAC_MAX_LPC_SHIFT 9 |
42 |
|
43 |
#define ALAC_CHMODE_LEFT_RIGHT 0 |
44 |
#define ALAC_CHMODE_LEFT_SIDE 1 |
45 |
#define ALAC_CHMODE_RIGHT_SIDE 2 |
46 |
#define ALAC_CHMODE_MID_SIDE 3 |
47 |
|
48 |
typedef struct RiceContext { |
49 |
int history_mult;
|
50 |
int initial_history;
|
51 |
int k_modifier;
|
52 |
int rice_modifier;
|
53 |
} RiceContext; |
54 |
|
55 |
typedef struct LPCContext { |
56 |
int lpc_order;
|
57 |
int lpc_coeff[ALAC_MAX_LPC_ORDER+1]; |
58 |
int lpc_quant;
|
59 |
} LPCContext; |
60 |
|
61 |
typedef struct AlacEncodeContext { |
62 |
int compression_level;
|
63 |
int min_prediction_order;
|
64 |
int max_prediction_order;
|
65 |
int max_coded_frame_size;
|
66 |
int write_sample_size;
|
67 |
int32_t sample_buf[MAX_CHANNELS][DEFAULT_FRAME_SIZE]; |
68 |
int32_t predictor_buf[DEFAULT_FRAME_SIZE]; |
69 |
int interlacing_shift;
|
70 |
int interlacing_leftweight;
|
71 |
PutBitContext pbctx; |
72 |
RiceContext rc; |
73 |
LPCContext lpc[MAX_CHANNELS]; |
74 |
DSPContext dspctx; |
75 |
AVCodecContext *avctx; |
76 |
} AlacEncodeContext; |
77 |
|
78 |
|
79 |
static void init_sample_buffers(AlacEncodeContext *s, int16_t *input_samples) |
80 |
{ |
81 |
int ch, i;
|
82 |
|
83 |
for(ch=0;ch<s->avctx->channels;ch++) { |
84 |
int16_t *sptr = input_samples + ch; |
85 |
for(i=0;i<s->avctx->frame_size;i++) { |
86 |
s->sample_buf[ch][i] = *sptr; |
87 |
sptr += s->avctx->channels; |
88 |
} |
89 |
} |
90 |
} |
91 |
|
92 |
static void encode_scalar(AlacEncodeContext *s, int x, int k, int write_sample_size) |
93 |
{ |
94 |
int divisor, q, r;
|
95 |
|
96 |
k = FFMIN(k, s->rc.k_modifier); |
97 |
divisor = (1<<k) - 1; |
98 |
q = x / divisor; |
99 |
r = x % divisor; |
100 |
|
101 |
if(q > 8) { |
102 |
// write escape code and sample value directly
|
103 |
put_bits(&s->pbctx, 9, ALAC_ESCAPE_CODE);
|
104 |
put_bits(&s->pbctx, write_sample_size, x); |
105 |
} else {
|
106 |
if(q)
|
107 |
put_bits(&s->pbctx, q, (1<<q) - 1); |
108 |
put_bits(&s->pbctx, 1, 0); |
109 |
|
110 |
if(k != 1) { |
111 |
if(r > 0) |
112 |
put_bits(&s->pbctx, k, r+1);
|
113 |
else
|
114 |
put_bits(&s->pbctx, k-1, 0); |
115 |
} |
116 |
} |
117 |
} |
118 |
|
119 |
static void write_frame_header(AlacEncodeContext *s, int is_verbatim) |
120 |
{ |
121 |
put_bits(&s->pbctx, 3, s->avctx->channels-1); // No. of channels -1 |
122 |
put_bits(&s->pbctx, 16, 0); // Seems to be zero |
123 |
put_bits(&s->pbctx, 1, 1); // Sample count is in the header |
124 |
put_bits(&s->pbctx, 2, 0); // FIXME: Wasted bytes field |
125 |
put_bits(&s->pbctx, 1, is_verbatim); // Audio block is verbatim |
126 |
put_bits(&s->pbctx, 32, s->avctx->frame_size); // No. of samples in the frame |
127 |
} |
128 |
|
129 |
static void calc_predictor_params(AlacEncodeContext *s, int ch) |
130 |
{ |
131 |
int32_t coefs[MAX_LPC_ORDER][MAX_LPC_ORDER]; |
132 |
int shift[MAX_LPC_ORDER];
|
133 |
int opt_order;
|
134 |
|
135 |
opt_order = ff_lpc_calc_coefs(&s->dspctx, s->sample_buf[ch], s->avctx->frame_size, s->min_prediction_order, s->max_prediction_order, |
136 |
ALAC_MAX_LPC_PRECISION, coefs, shift, 1, ORDER_METHOD_EST, ALAC_MAX_LPC_SHIFT, 1); |
137 |
|
138 |
s->lpc[ch].lpc_order = opt_order; |
139 |
s->lpc[ch].lpc_quant = shift[opt_order-1];
|
140 |
memcpy(s->lpc[ch].lpc_coeff, coefs[opt_order-1], opt_order*sizeof(int)); |
141 |
} |
142 |
|
143 |
static int estimate_stereo_mode(int32_t *left_ch, int32_t *right_ch, int n) |
144 |
{ |
145 |
int i, best;
|
146 |
int32_t lt, rt; |
147 |
uint64_t sum[4];
|
148 |
uint64_t score[4];
|
149 |
|
150 |
/* calculate sum of 2nd order residual for each channel */
|
151 |
sum[0] = sum[1] = sum[2] = sum[3] = 0; |
152 |
for(i=2; i<n; i++) { |
153 |
lt = left_ch[i] - 2*left_ch[i-1] + left_ch[i-2]; |
154 |
rt = right_ch[i] - 2*right_ch[i-1] + right_ch[i-2]; |
155 |
sum[2] += FFABS((lt + rt) >> 1); |
156 |
sum[3] += FFABS(lt - rt);
|
157 |
sum[0] += FFABS(lt);
|
158 |
sum[1] += FFABS(rt);
|
159 |
} |
160 |
|
161 |
/* calculate score for each mode */
|
162 |
score[0] = sum[0] + sum[1]; |
163 |
score[1] = sum[0] + sum[3]; |
164 |
score[2] = sum[1] + sum[3]; |
165 |
score[3] = sum[2] + sum[3]; |
166 |
|
167 |
/* return mode with lowest score */
|
168 |
best = 0;
|
169 |
for(i=1; i<4; i++) { |
170 |
if(score[i] < score[best]) {
|
171 |
best = i; |
172 |
} |
173 |
} |
174 |
return best;
|
175 |
} |
176 |
|
177 |
static void alac_stereo_decorrelation(AlacEncodeContext *s) |
178 |
{ |
179 |
int32_t *left = s->sample_buf[0], *right = s->sample_buf[1]; |
180 |
int i, mode, n = s->avctx->frame_size;
|
181 |
int32_t tmp; |
182 |
|
183 |
mode = estimate_stereo_mode(left, right, n); |
184 |
|
185 |
switch(mode)
|
186 |
{ |
187 |
case ALAC_CHMODE_LEFT_RIGHT:
|
188 |
s->interlacing_leftweight = 0;
|
189 |
s->interlacing_shift = 0;
|
190 |
break;
|
191 |
|
192 |
case ALAC_CHMODE_LEFT_SIDE:
|
193 |
for(i=0; i<n; i++) { |
194 |
right[i] = left[i] - right[i]; |
195 |
} |
196 |
s->interlacing_leftweight = 1;
|
197 |
s->interlacing_shift = 0;
|
198 |
break;
|
199 |
|
200 |
case ALAC_CHMODE_RIGHT_SIDE:
|
201 |
for(i=0; i<n; i++) { |
202 |
tmp = right[i]; |
203 |
right[i] = left[i] - right[i]; |
204 |
left[i] = tmp + (right[i] >> 31);
|
205 |
} |
206 |
s->interlacing_leftweight = 1;
|
207 |
s->interlacing_shift = 31;
|
208 |
break;
|
209 |
|
210 |
default:
|
211 |
for(i=0; i<n; i++) { |
212 |
tmp = left[i]; |
213 |
left[i] = (tmp + right[i]) >> 1;
|
214 |
right[i] = tmp - right[i]; |
215 |
} |
216 |
s->interlacing_leftweight = 1;
|
217 |
s->interlacing_shift = 1;
|
218 |
break;
|
219 |
} |
220 |
} |
221 |
|
222 |
static void alac_linear_predictor(AlacEncodeContext *s, int ch) |
223 |
{ |
224 |
int i;
|
225 |
LPCContext lpc = s->lpc[ch]; |
226 |
|
227 |
if(lpc.lpc_order == 31) { |
228 |
s->predictor_buf[0] = s->sample_buf[ch][0]; |
229 |
|
230 |
for(i=1; i<s->avctx->frame_size; i++) |
231 |
s->predictor_buf[i] = s->sample_buf[ch][i] - s->sample_buf[ch][i-1];
|
232 |
|
233 |
return;
|
234 |
} |
235 |
|
236 |
// generalised linear predictor
|
237 |
|
238 |
if(lpc.lpc_order > 0) { |
239 |
int32_t *samples = s->sample_buf[ch]; |
240 |
int32_t *residual = s->predictor_buf; |
241 |
|
242 |
// generate warm-up samples
|
243 |
residual[0] = samples[0]; |
244 |
for(i=1;i<=lpc.lpc_order;i++) |
245 |
residual[i] = samples[i] - samples[i-1];
|
246 |
|
247 |
// perform lpc on remaining samples
|
248 |
for(i = lpc.lpc_order + 1; i < s->avctx->frame_size; i++) { |
249 |
int sum = 1 << (lpc.lpc_quant - 1), res_val, j; |
250 |
|
251 |
for (j = 0; j < lpc.lpc_order; j++) { |
252 |
sum += (samples[lpc.lpc_order-j] - samples[0]) *
|
253 |
lpc.lpc_coeff[j]; |
254 |
} |
255 |
|
256 |
sum >>= lpc.lpc_quant; |
257 |
sum += samples[0];
|
258 |
residual[i] = sign_extend(samples[lpc.lpc_order+1] - sum,
|
259 |
s->write_sample_size); |
260 |
res_val = residual[i]; |
261 |
|
262 |
if(res_val) {
|
263 |
int index = lpc.lpc_order - 1; |
264 |
int neg = (res_val < 0); |
265 |
|
266 |
while(index >= 0 && (neg ? (res_val < 0):(res_val > 0))) { |
267 |
int val = samples[0] - samples[lpc.lpc_order - index]; |
268 |
int sign = (val ? FFSIGN(val) : 0); |
269 |
|
270 |
if(neg)
|
271 |
sign*=-1;
|
272 |
|
273 |
lpc.lpc_coeff[index] -= sign; |
274 |
val *= sign; |
275 |
res_val -= ((val >> lpc.lpc_quant) * |
276 |
(lpc.lpc_order - index)); |
277 |
index--; |
278 |
} |
279 |
} |
280 |
samples++; |
281 |
} |
282 |
} |
283 |
} |
284 |
|
285 |
static void alac_entropy_coder(AlacEncodeContext *s) |
286 |
{ |
287 |
unsigned int history = s->rc.initial_history; |
288 |
int sign_modifier = 0, i, k; |
289 |
int32_t *samples = s->predictor_buf; |
290 |
|
291 |
for(i=0;i < s->avctx->frame_size;) { |
292 |
int x;
|
293 |
|
294 |
k = av_log2((history >> 9) + 3); |
295 |
|
296 |
x = -2*(*samples)-1; |
297 |
x ^= (x>>31);
|
298 |
|
299 |
samples++; |
300 |
i++; |
301 |
|
302 |
encode_scalar(s, x - sign_modifier, k, s->write_sample_size); |
303 |
|
304 |
history += x * s->rc.history_mult |
305 |
- ((history * s->rc.history_mult) >> 9);
|
306 |
|
307 |
sign_modifier = 0;
|
308 |
if(x > 0xFFFF) |
309 |
history = 0xFFFF;
|
310 |
|
311 |
if((history < 128) && (i < s->avctx->frame_size)) { |
312 |
unsigned int block_size = 0; |
313 |
|
314 |
k = 7 - av_log2(history) + ((history + 16) >> 6); |
315 |
|
316 |
while((*samples == 0) && (i < s->avctx->frame_size)) { |
317 |
samples++; |
318 |
i++; |
319 |
block_size++; |
320 |
} |
321 |
encode_scalar(s, block_size, k, 16);
|
322 |
|
323 |
sign_modifier = (block_size <= 0xFFFF);
|
324 |
|
325 |
history = 0;
|
326 |
} |
327 |
|
328 |
} |
329 |
} |
330 |
|
331 |
static void write_compressed_frame(AlacEncodeContext *s) |
332 |
{ |
333 |
int i, j;
|
334 |
|
335 |
/* only simple mid/side decorrelation supported as of now */
|
336 |
if(s->avctx->channels == 2) |
337 |
alac_stereo_decorrelation(s); |
338 |
put_bits(&s->pbctx, 8, s->interlacing_shift);
|
339 |
put_bits(&s->pbctx, 8, s->interlacing_leftweight);
|
340 |
|
341 |
for(i=0;i<s->avctx->channels;i++) { |
342 |
|
343 |
calc_predictor_params(s, i); |
344 |
|
345 |
put_bits(&s->pbctx, 4, 0); // prediction type : currently only type 0 has been RE'd |
346 |
put_bits(&s->pbctx, 4, s->lpc[i].lpc_quant);
|
347 |
|
348 |
put_bits(&s->pbctx, 3, s->rc.rice_modifier);
|
349 |
put_bits(&s->pbctx, 5, s->lpc[i].lpc_order);
|
350 |
// predictor coeff. table
|
351 |
for(j=0;j<s->lpc[i].lpc_order;j++) { |
352 |
put_sbits(&s->pbctx, 16, s->lpc[i].lpc_coeff[j]);
|
353 |
} |
354 |
} |
355 |
|
356 |
// apply lpc and entropy coding to audio samples
|
357 |
|
358 |
for(i=0;i<s->avctx->channels;i++) { |
359 |
alac_linear_predictor(s, i); |
360 |
alac_entropy_coder(s); |
361 |
} |
362 |
} |
363 |
|
364 |
static av_cold int alac_encode_init(AVCodecContext *avctx) |
365 |
{ |
366 |
AlacEncodeContext *s = avctx->priv_data; |
367 |
uint8_t *alac_extradata = av_mallocz(ALAC_EXTRADATA_SIZE+1);
|
368 |
|
369 |
avctx->frame_size = DEFAULT_FRAME_SIZE; |
370 |
avctx->bits_per_coded_sample = DEFAULT_SAMPLE_SIZE; |
371 |
|
372 |
if(avctx->sample_fmt != SAMPLE_FMT_S16) {
|
373 |
av_log(avctx, AV_LOG_ERROR, "only pcm_s16 input samples are supported\n");
|
374 |
return -1; |
375 |
} |
376 |
|
377 |
// Set default compression level
|
378 |
if(avctx->compression_level == FF_COMPRESSION_DEFAULT)
|
379 |
s->compression_level = 1;
|
380 |
else
|
381 |
s->compression_level = av_clip(avctx->compression_level, 0, 1); |
382 |
|
383 |
// Initialize default Rice parameters
|
384 |
s->rc.history_mult = 40;
|
385 |
s->rc.initial_history = 10;
|
386 |
s->rc.k_modifier = 14;
|
387 |
s->rc.rice_modifier = 4;
|
388 |
|
389 |
s->max_coded_frame_size = (ALAC_FRAME_HEADER_SIZE + ALAC_FRAME_FOOTER_SIZE + |
390 |
avctx->frame_size*avctx->channels*avctx->bits_per_coded_sample)>>3;
|
391 |
|
392 |
s->write_sample_size = avctx->bits_per_coded_sample + avctx->channels - 1; // FIXME: consider wasted_bytes |
393 |
|
394 |
AV_WB32(alac_extradata, ALAC_EXTRADATA_SIZE); |
395 |
AV_WB32(alac_extradata+4, MKBETAG('a','l','a','c')); |
396 |
AV_WB32(alac_extradata+12, avctx->frame_size);
|
397 |
AV_WB8 (alac_extradata+17, avctx->bits_per_coded_sample);
|
398 |
AV_WB8 (alac_extradata+21, avctx->channels);
|
399 |
AV_WB32(alac_extradata+24, s->max_coded_frame_size);
|
400 |
AV_WB32(alac_extradata+28, avctx->sample_rate*avctx->channels*avctx->bits_per_coded_sample); // average bitrate |
401 |
AV_WB32(alac_extradata+32, avctx->sample_rate);
|
402 |
|
403 |
// Set relevant extradata fields
|
404 |
if(s->compression_level > 0) { |
405 |
AV_WB8(alac_extradata+18, s->rc.history_mult);
|
406 |
AV_WB8(alac_extradata+19, s->rc.initial_history);
|
407 |
AV_WB8(alac_extradata+20, s->rc.k_modifier);
|
408 |
} |
409 |
|
410 |
s->min_prediction_order = DEFAULT_MIN_PRED_ORDER; |
411 |
if(avctx->min_prediction_order >= 0) { |
412 |
if(avctx->min_prediction_order < MIN_LPC_ORDER ||
|
413 |
avctx->min_prediction_order > ALAC_MAX_LPC_ORDER) { |
414 |
av_log(avctx, AV_LOG_ERROR, "invalid min prediction order: %d\n", avctx->min_prediction_order);
|
415 |
return -1; |
416 |
} |
417 |
|
418 |
s->min_prediction_order = avctx->min_prediction_order; |
419 |
} |
420 |
|
421 |
s->max_prediction_order = DEFAULT_MAX_PRED_ORDER; |
422 |
if(avctx->max_prediction_order >= 0) { |
423 |
if(avctx->max_prediction_order < MIN_LPC_ORDER ||
|
424 |
avctx->max_prediction_order > ALAC_MAX_LPC_ORDER) { |
425 |
av_log(avctx, AV_LOG_ERROR, "invalid max prediction order: %d\n", avctx->max_prediction_order);
|
426 |
return -1; |
427 |
} |
428 |
|
429 |
s->max_prediction_order = avctx->max_prediction_order; |
430 |
} |
431 |
|
432 |
if(s->max_prediction_order < s->min_prediction_order) {
|
433 |
av_log(avctx, AV_LOG_ERROR, "invalid prediction orders: min=%d max=%d\n",
|
434 |
s->min_prediction_order, s->max_prediction_order); |
435 |
return -1; |
436 |
} |
437 |
|
438 |
avctx->extradata = alac_extradata; |
439 |
avctx->extradata_size = ALAC_EXTRADATA_SIZE; |
440 |
|
441 |
avctx->coded_frame = avcodec_alloc_frame(); |
442 |
avctx->coded_frame->key_frame = 1;
|
443 |
|
444 |
s->avctx = avctx; |
445 |
dsputil_init(&s->dspctx, avctx); |
446 |
|
447 |
return 0; |
448 |
} |
449 |
|
450 |
static int alac_encode_frame(AVCodecContext *avctx, uint8_t *frame, |
451 |
int buf_size, void *data) |
452 |
{ |
453 |
AlacEncodeContext *s = avctx->priv_data; |
454 |
PutBitContext *pb = &s->pbctx; |
455 |
int i, out_bytes, verbatim_flag = 0; |
456 |
|
457 |
if(avctx->frame_size > DEFAULT_FRAME_SIZE) {
|
458 |
av_log(avctx, AV_LOG_ERROR, "input frame size exceeded\n");
|
459 |
return -1; |
460 |
} |
461 |
|
462 |
if(buf_size < 2*s->max_coded_frame_size) { |
463 |
av_log(avctx, AV_LOG_ERROR, "buffer size is too small\n");
|
464 |
return -1; |
465 |
} |
466 |
|
467 |
verbatim:
|
468 |
init_put_bits(pb, frame, buf_size); |
469 |
|
470 |
if((s->compression_level == 0) || verbatim_flag) { |
471 |
// Verbatim mode
|
472 |
int16_t *samples = data; |
473 |
write_frame_header(s, 1);
|
474 |
for(i=0; i<avctx->frame_size*avctx->channels; i++) { |
475 |
put_sbits(pb, 16, *samples++);
|
476 |
} |
477 |
} else {
|
478 |
init_sample_buffers(s, data); |
479 |
write_frame_header(s, 0);
|
480 |
write_compressed_frame(s); |
481 |
} |
482 |
|
483 |
put_bits(pb, 3, 7); |
484 |
flush_put_bits(pb); |
485 |
out_bytes = put_bits_count(pb) >> 3;
|
486 |
|
487 |
if(out_bytes > s->max_coded_frame_size) {
|
488 |
/* frame too large. use verbatim mode */
|
489 |
if(verbatim_flag || (s->compression_level == 0)) { |
490 |
/* still too large. must be an error. */
|
491 |
av_log(avctx, AV_LOG_ERROR, "error encoding frame\n");
|
492 |
return -1; |
493 |
} |
494 |
verbatim_flag = 1;
|
495 |
goto verbatim;
|
496 |
} |
497 |
|
498 |
return out_bytes;
|
499 |
} |
500 |
|
501 |
static av_cold int alac_encode_close(AVCodecContext *avctx) |
502 |
{ |
503 |
av_freep(&avctx->extradata); |
504 |
avctx->extradata_size = 0;
|
505 |
av_freep(&avctx->coded_frame); |
506 |
return 0; |
507 |
} |
508 |
|
509 |
AVCodec alac_encoder = { |
510 |
"alac",
|
511 |
CODEC_TYPE_AUDIO, |
512 |
CODEC_ID_ALAC, |
513 |
sizeof(AlacEncodeContext),
|
514 |
alac_encode_init, |
515 |
alac_encode_frame, |
516 |
alac_encode_close, |
517 |
.capabilities = CODEC_CAP_SMALL_LAST_FRAME, |
518 |
.long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
|
519 |
}; |