ffmpeg / libavcodec / alacenc.c @ 2912e87a
History  View  Annotate  Download (16.1 KB)
1 
/**


2 
* ALAC audio encoder

3 
* Copyright (c) 2008 Jaikrishnan Menon <realityman@gmx.net>

4 
*

5 
* This file is part of Libav.

6 
*

7 
* Libav is free software; you can redistribute it and/or

8 
* modify it under the terms of the GNU Lesser General Public

9 
* License as published by the Free Software Foundation; either

10 
* version 2.1 of the License, or (at your option) any later version.

11 
*

12 
* Libav is distributed in the hope that it will be useful,

13 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

14 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

15 
* Lesser General Public License for more details.

16 
*

17 
* You should have received a copy of the GNU Lesser General Public

18 
* License along with Libav; if not, write to the Free Software

19 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

20 
*/

21  
22 
#include "avcodec.h" 
23 
#include "put_bits.h" 
24 
#include "dsputil.h" 
25 
#include "lpc.h" 
26 
#include "mathops.h" 
27  
28 
#define DEFAULT_FRAME_SIZE 4096 
29 
#define DEFAULT_SAMPLE_SIZE 16 
30 
#define MAX_CHANNELS 8 
31 
#define ALAC_EXTRADATA_SIZE 36 
32 
#define ALAC_FRAME_HEADER_SIZE 55 
33 
#define ALAC_FRAME_FOOTER_SIZE 3 
34  
35 
#define ALAC_ESCAPE_CODE 0x1FF 
36 
#define ALAC_MAX_LPC_ORDER 30 
37 
#define DEFAULT_MAX_PRED_ORDER 6 
38 
#define DEFAULT_MIN_PRED_ORDER 4 
39 
#define ALAC_MAX_LPC_PRECISION 9 
40 
#define ALAC_MAX_LPC_SHIFT 9 
41  
42 
#define ALAC_CHMODE_LEFT_RIGHT 0 
43 
#define ALAC_CHMODE_LEFT_SIDE 1 
44 
#define ALAC_CHMODE_RIGHT_SIDE 2 
45 
#define ALAC_CHMODE_MID_SIDE 3 
46  
47 
typedef struct RiceContext { 
48 
int history_mult;

49 
int initial_history;

50 
int k_modifier;

51 
int rice_modifier;

52 
} RiceContext; 
53  
54 
typedef struct AlacLPCContext { 
55 
int lpc_order;

56 
int lpc_coeff[ALAC_MAX_LPC_ORDER+1]; 
57 
int lpc_quant;

58 
} AlacLPCContext; 
59  
60 
typedef struct AlacEncodeContext { 
61 
int compression_level;

62 
int min_prediction_order;

63 
int max_prediction_order;

64 
int max_coded_frame_size;

65 
int write_sample_size;

66 
int32_t sample_buf[MAX_CHANNELS][DEFAULT_FRAME_SIZE]; 
67 
int32_t predictor_buf[DEFAULT_FRAME_SIZE]; 
68 
int interlacing_shift;

69 
int interlacing_leftweight;

70 
PutBitContext pbctx; 
71 
RiceContext rc; 
72 
AlacLPCContext lpc[MAX_CHANNELS]; 
73 
LPCContext lpc_ctx; 
74 
AVCodecContext *avctx; 
75 
} AlacEncodeContext; 
76  
77  
78 
static void init_sample_buffers(AlacEncodeContext *s, const int16_t *input_samples) 
79 
{ 
80 
int ch, i;

81  
82 
for(ch=0;ch<s>avctx>channels;ch++) { 
83 
const int16_t *sptr = input_samples + ch;

84 
for(i=0;i<s>avctx>frame_size;i++) { 
85 
s>sample_buf[ch][i] = *sptr; 
86 
sptr += s>avctx>channels; 
87 
} 
88 
} 
89 
} 
90  
91 
static void encode_scalar(AlacEncodeContext *s, int x, int k, int write_sample_size) 
92 
{ 
93 
int divisor, q, r;

94  
95 
k = FFMIN(k, s>rc.k_modifier); 
96 
divisor = (1<<k)  1; 
97 
q = x / divisor; 
98 
r = x % divisor; 
99  
100 
if(q > 8) { 
101 
// write escape code and sample value directly

102 
put_bits(&s>pbctx, 9, ALAC_ESCAPE_CODE);

103 
put_bits(&s>pbctx, write_sample_size, x); 
104 
} else {

105 
if(q)

106 
put_bits(&s>pbctx, q, (1<<q)  1); 
107 
put_bits(&s>pbctx, 1, 0); 
108  
109 
if(k != 1) { 
110 
if(r > 0) 
111 
put_bits(&s>pbctx, k, r+1);

112 
else

113 
put_bits(&s>pbctx, k1, 0); 
114 
} 
115 
} 
116 
} 
117  
118 
static void write_frame_header(AlacEncodeContext *s, int is_verbatim) 
119 
{ 
120 
put_bits(&s>pbctx, 3, s>avctx>channels1); // No. of channels 1 
121 
put_bits(&s>pbctx, 16, 0); // Seems to be zero 
122 
put_bits(&s>pbctx, 1, 1); // Sample count is in the header 
123 
put_bits(&s>pbctx, 2, 0); // FIXME: Wasted bytes field 
124 
put_bits(&s>pbctx, 1, is_verbatim); // Audio block is verbatim 
125 
put_bits32(&s>pbctx, s>avctx>frame_size); // No. of samples in the frame

126 
} 
127  
128 
static void calc_predictor_params(AlacEncodeContext *s, int ch) 
129 
{ 
130 
int32_t coefs[MAX_LPC_ORDER][MAX_LPC_ORDER]; 
131 
int shift[MAX_LPC_ORDER];

132 
int opt_order;

133  
134 
if (s>compression_level == 1) { 
135 
s>lpc[ch].lpc_order = 6;

136 
s>lpc[ch].lpc_quant = 6;

137 
s>lpc[ch].lpc_coeff[0] = 160; 
138 
s>lpc[ch].lpc_coeff[1] = 190; 
139 
s>lpc[ch].lpc_coeff[2] = 170; 
140 
s>lpc[ch].lpc_coeff[3] = 130; 
141 
s>lpc[ch].lpc_coeff[4] = 80; 
142 
s>lpc[ch].lpc_coeff[5] = 25; 
143 
} else {

144 
opt_order = ff_lpc_calc_coefs(&s>lpc_ctx, s>sample_buf[ch], 
145 
s>avctx>frame_size, 
146 
s>min_prediction_order, 
147 
s>max_prediction_order, 
148 
ALAC_MAX_LPC_PRECISION, coefs, shift, 
149 
AV_LPC_TYPE_LEVINSON, 0,

150 
ORDER_METHOD_EST, ALAC_MAX_LPC_SHIFT, 1);

151  
152 
s>lpc[ch].lpc_order = opt_order; 
153 
s>lpc[ch].lpc_quant = shift[opt_order1];

154 
memcpy(s>lpc[ch].lpc_coeff, coefs[opt_order1], opt_order*sizeof(int)); 
155 
} 
156 
} 
157  
158 
static int estimate_stereo_mode(int32_t *left_ch, int32_t *right_ch, int n) 
159 
{ 
160 
int i, best;

161 
int32_t lt, rt; 
162 
uint64_t sum[4];

163 
uint64_t score[4];

164  
165 
/* calculate sum of 2nd order residual for each channel */

166 
sum[0] = sum[1] = sum[2] = sum[3] = 0; 
167 
for(i=2; i<n; i++) { 
168 
lt = left_ch[i]  2*left_ch[i1] + left_ch[i2]; 
169 
rt = right_ch[i]  2*right_ch[i1] + right_ch[i2]; 
170 
sum[2] += FFABS((lt + rt) >> 1); 
171 
sum[3] += FFABS(lt  rt);

172 
sum[0] += FFABS(lt);

173 
sum[1] += FFABS(rt);

174 
} 
175  
176 
/* calculate score for each mode */

177 
score[0] = sum[0] + sum[1]; 
178 
score[1] = sum[0] + sum[3]; 
179 
score[2] = sum[1] + sum[3]; 
180 
score[3] = sum[2] + sum[3]; 
181  
182 
/* return mode with lowest score */

183 
best = 0;

184 
for(i=1; i<4; i++) { 
185 
if(score[i] < score[best]) {

186 
best = i; 
187 
} 
188 
} 
189 
return best;

190 
} 
191  
192 
static void alac_stereo_decorrelation(AlacEncodeContext *s) 
193 
{ 
194 
int32_t *left = s>sample_buf[0], *right = s>sample_buf[1]; 
195 
int i, mode, n = s>avctx>frame_size;

196 
int32_t tmp; 
197  
198 
mode = estimate_stereo_mode(left, right, n); 
199  
200 
switch(mode)

201 
{ 
202 
case ALAC_CHMODE_LEFT_RIGHT:

203 
s>interlacing_leftweight = 0;

204 
s>interlacing_shift = 0;

205 
break;

206  
207 
case ALAC_CHMODE_LEFT_SIDE:

208 
for(i=0; i<n; i++) { 
209 
right[i] = left[i]  right[i]; 
210 
} 
211 
s>interlacing_leftweight = 1;

212 
s>interlacing_shift = 0;

213 
break;

214  
215 
case ALAC_CHMODE_RIGHT_SIDE:

216 
for(i=0; i<n; i++) { 
217 
tmp = right[i]; 
218 
right[i] = left[i]  right[i]; 
219 
left[i] = tmp + (right[i] >> 31);

220 
} 
221 
s>interlacing_leftweight = 1;

222 
s>interlacing_shift = 31;

223 
break;

224  
225 
default:

226 
for(i=0; i<n; i++) { 
227 
tmp = left[i]; 
228 
left[i] = (tmp + right[i]) >> 1;

229 
right[i] = tmp  right[i]; 
230 
} 
231 
s>interlacing_leftweight = 1;

232 
s>interlacing_shift = 1;

233 
break;

234 
} 
235 
} 
236  
237 
static void alac_linear_predictor(AlacEncodeContext *s, int ch) 
238 
{ 
239 
int i;

240 
AlacLPCContext lpc = s>lpc[ch]; 
241  
242 
if(lpc.lpc_order == 31) { 
243 
s>predictor_buf[0] = s>sample_buf[ch][0]; 
244  
245 
for(i=1; i<s>avctx>frame_size; i++) 
246 
s>predictor_buf[i] = s>sample_buf[ch][i]  s>sample_buf[ch][i1];

247  
248 
return;

249 
} 
250  
251 
// generalised linear predictor

252  
253 
if(lpc.lpc_order > 0) { 
254 
int32_t *samples = s>sample_buf[ch]; 
255 
int32_t *residual = s>predictor_buf; 
256  
257 
// generate warmup samples

258 
residual[0] = samples[0]; 
259 
for(i=1;i<=lpc.lpc_order;i++) 
260 
residual[i] = samples[i]  samples[i1];

261  
262 
// perform lpc on remaining samples

263 
for(i = lpc.lpc_order + 1; i < s>avctx>frame_size; i++) { 
264 
int sum = 1 << (lpc.lpc_quant  1), res_val, j; 
265  
266 
for (j = 0; j < lpc.lpc_order; j++) { 
267 
sum += (samples[lpc.lpc_orderj]  samples[0]) *

268 
lpc.lpc_coeff[j]; 
269 
} 
270  
271 
sum >>= lpc.lpc_quant; 
272 
sum += samples[0];

273 
residual[i] = sign_extend(samples[lpc.lpc_order+1]  sum,

274 
s>write_sample_size); 
275 
res_val = residual[i]; 
276  
277 
if(res_val) {

278 
int index = lpc.lpc_order  1; 
279 
int neg = (res_val < 0); 
280  
281 
while(index >= 0 && (neg ? (res_val < 0):(res_val > 0))) { 
282 
int val = samples[0]  samples[lpc.lpc_order  index]; 
283 
int sign = (val ? FFSIGN(val) : 0); 
284  
285 
if(neg)

286 
sign*=1;

287  
288 
lpc.lpc_coeff[index] = sign; 
289 
val *= sign; 
290 
res_val = ((val >> lpc.lpc_quant) * 
291 
(lpc.lpc_order  index)); 
292 
index; 
293 
} 
294 
} 
295 
samples++; 
296 
} 
297 
} 
298 
} 
299  
300 
static void alac_entropy_coder(AlacEncodeContext *s) 
301 
{ 
302 
unsigned int history = s>rc.initial_history; 
303 
int sign_modifier = 0, i, k; 
304 
int32_t *samples = s>predictor_buf; 
305  
306 
for(i=0;i < s>avctx>frame_size;) { 
307 
int x;

308  
309 
k = av_log2((history >> 9) + 3); 
310  
311 
x = 2*(*samples)1; 
312 
x ^= (x>>31);

313  
314 
samples++; 
315 
i++; 
316  
317 
encode_scalar(s, x  sign_modifier, k, s>write_sample_size); 
318  
319 
history += x * s>rc.history_mult 
320 
 ((history * s>rc.history_mult) >> 9);

321  
322 
sign_modifier = 0;

323 
if(x > 0xFFFF) 
324 
history = 0xFFFF;

325  
326 
if((history < 128) && (i < s>avctx>frame_size)) { 
327 
unsigned int block_size = 0; 
328  
329 
k = 7  av_log2(history) + ((history + 16) >> 6); 
330  
331 
while((*samples == 0) && (i < s>avctx>frame_size)) { 
332 
samples++; 
333 
i++; 
334 
block_size++; 
335 
} 
336 
encode_scalar(s, block_size, k, 16);

337  
338 
sign_modifier = (block_size <= 0xFFFF);

339  
340 
history = 0;

341 
} 
342  
343 
} 
344 
} 
345  
346 
static void write_compressed_frame(AlacEncodeContext *s) 
347 
{ 
348 
int i, j;

349  
350 
if(s>avctx>channels == 2) 
351 
alac_stereo_decorrelation(s); 
352 
put_bits(&s>pbctx, 8, s>interlacing_shift);

353 
put_bits(&s>pbctx, 8, s>interlacing_leftweight);

354  
355 
for(i=0;i<s>avctx>channels;i++) { 
356  
357 
calc_predictor_params(s, i); 
358  
359 
put_bits(&s>pbctx, 4, 0); // prediction type : currently only type 0 has been RE'd 
360 
put_bits(&s>pbctx, 4, s>lpc[i].lpc_quant);

361  
362 
put_bits(&s>pbctx, 3, s>rc.rice_modifier);

363 
put_bits(&s>pbctx, 5, s>lpc[i].lpc_order);

364 
// predictor coeff. table

365 
for(j=0;j<s>lpc[i].lpc_order;j++) { 
366 
put_sbits(&s>pbctx, 16, s>lpc[i].lpc_coeff[j]);

367 
} 
368 
} 
369  
370 
// apply lpc and entropy coding to audio samples

371  
372 
for(i=0;i<s>avctx>channels;i++) { 
373 
alac_linear_predictor(s, i); 
374 
alac_entropy_coder(s); 
375 
} 
376 
} 
377  
378 
static av_cold int alac_encode_init(AVCodecContext *avctx) 
379 
{ 
380 
AlacEncodeContext *s = avctx>priv_data; 
381 
int ret;

382 
uint8_t *alac_extradata = av_mallocz(ALAC_EXTRADATA_SIZE+1);

383  
384 
avctx>frame_size = DEFAULT_FRAME_SIZE; 
385 
avctx>bits_per_coded_sample = DEFAULT_SAMPLE_SIZE; 
386  
387 
if(avctx>sample_fmt != AV_SAMPLE_FMT_S16) {

388 
av_log(avctx, AV_LOG_ERROR, "only pcm_s16 input samples are supported\n");

389 
return 1; 
390 
} 
391  
392 
// Set default compression level

393 
if(avctx>compression_level == FF_COMPRESSION_DEFAULT)

394 
s>compression_level = 2;

395 
else

396 
s>compression_level = av_clip(avctx>compression_level, 0, 2); 
397  
398 
// Initialize default Rice parameters

399 
s>rc.history_mult = 40;

400 
s>rc.initial_history = 10;

401 
s>rc.k_modifier = 14;

402 
s>rc.rice_modifier = 4;

403  
404 
s>max_coded_frame_size = 8 + (avctx>frame_size*avctx>channels*avctx>bits_per_coded_sample>>3); 
405  
406 
s>write_sample_size = avctx>bits_per_coded_sample + avctx>channels  1; // FIXME: consider wasted_bytes 
407  
408 
AV_WB32(alac_extradata, ALAC_EXTRADATA_SIZE); 
409 
AV_WB32(alac_extradata+4, MKBETAG('a','l','a','c')); 
410 
AV_WB32(alac_extradata+12, avctx>frame_size);

411 
AV_WB8 (alac_extradata+17, avctx>bits_per_coded_sample);

412 
AV_WB8 (alac_extradata+21, avctx>channels);

413 
AV_WB32(alac_extradata+24, s>max_coded_frame_size);

414 
AV_WB32(alac_extradata+28, avctx>sample_rate*avctx>channels*avctx>bits_per_coded_sample); // average bitrate 
415 
AV_WB32(alac_extradata+32, avctx>sample_rate);

416  
417 
// Set relevant extradata fields

418 
if(s>compression_level > 0) { 
419 
AV_WB8(alac_extradata+18, s>rc.history_mult);

420 
AV_WB8(alac_extradata+19, s>rc.initial_history);

421 
AV_WB8(alac_extradata+20, s>rc.k_modifier);

422 
} 
423  
424 
s>min_prediction_order = DEFAULT_MIN_PRED_ORDER; 
425 
if(avctx>min_prediction_order >= 0) { 
426 
if(avctx>min_prediction_order < MIN_LPC_ORDER 

427 
avctx>min_prediction_order > ALAC_MAX_LPC_ORDER) { 
428 
av_log(avctx, AV_LOG_ERROR, "invalid min prediction order: %d\n", avctx>min_prediction_order);

429 
return 1; 
430 
} 
431  
432 
s>min_prediction_order = avctx>min_prediction_order; 
433 
} 
434  
435 
s>max_prediction_order = DEFAULT_MAX_PRED_ORDER; 
436 
if(avctx>max_prediction_order >= 0) { 
437 
if(avctx>max_prediction_order < MIN_LPC_ORDER 

438 
avctx>max_prediction_order > ALAC_MAX_LPC_ORDER) { 
439 
av_log(avctx, AV_LOG_ERROR, "invalid max prediction order: %d\n", avctx>max_prediction_order);

440 
return 1; 
441 
} 
442  
443 
s>max_prediction_order = avctx>max_prediction_order; 
444 
} 
445  
446 
if(s>max_prediction_order < s>min_prediction_order) {

447 
av_log(avctx, AV_LOG_ERROR, "invalid prediction orders: min=%d max=%d\n",

448 
s>min_prediction_order, s>max_prediction_order); 
449 
return 1; 
450 
} 
451  
452 
avctx>extradata = alac_extradata; 
453 
avctx>extradata_size = ALAC_EXTRADATA_SIZE; 
454  
455 
avctx>coded_frame = avcodec_alloc_frame(); 
456 
avctx>coded_frame>key_frame = 1;

457  
458 
s>avctx = avctx; 
459 
ret = ff_lpc_init(&s>lpc_ctx, avctx>frame_size, s>max_prediction_order, 
460 
AV_LPC_TYPE_LEVINSON); 
461  
462 
return ret;

463 
} 
464  
465 
static int alac_encode_frame(AVCodecContext *avctx, uint8_t *frame, 
466 
int buf_size, void *data) 
467 
{ 
468 
AlacEncodeContext *s = avctx>priv_data; 
469 
PutBitContext *pb = &s>pbctx; 
470 
int i, out_bytes, verbatim_flag = 0; 
471  
472 
if(avctx>frame_size > DEFAULT_FRAME_SIZE) {

473 
av_log(avctx, AV_LOG_ERROR, "input frame size exceeded\n");

474 
return 1; 
475 
} 
476  
477 
if(buf_size < 2*s>max_coded_frame_size) { 
478 
av_log(avctx, AV_LOG_ERROR, "buffer size is too small\n");

479 
return 1; 
480 
} 
481  
482 
verbatim:

483 
init_put_bits(pb, frame, buf_size); 
484  
485 
if((s>compression_level == 0)  verbatim_flag) { 
486 
// Verbatim mode

487 
const int16_t *samples = data;

488 
write_frame_header(s, 1);

489 
for(i=0; i<avctx>frame_size*avctx>channels; i++) { 
490 
put_sbits(pb, 16, *samples++);

491 
} 
492 
} else {

493 
init_sample_buffers(s, data); 
494 
write_frame_header(s, 0);

495 
write_compressed_frame(s); 
496 
} 
497  
498 
put_bits(pb, 3, 7); 
499 
flush_put_bits(pb); 
500 
out_bytes = put_bits_count(pb) >> 3;

501  
502 
if(out_bytes > s>max_coded_frame_size) {

503 
/* frame too large. use verbatim mode */

504 
if(verbatim_flag  (s>compression_level == 0)) { 
505 
/* still too large. must be an error. */

506 
av_log(avctx, AV_LOG_ERROR, "error encoding frame\n");

507 
return 1; 
508 
} 
509 
verbatim_flag = 1;

510 
goto verbatim;

511 
} 
512  
513 
return out_bytes;

514 
} 
515  
516 
static av_cold int alac_encode_close(AVCodecContext *avctx) 
517 
{ 
518 
AlacEncodeContext *s = avctx>priv_data; 
519 
ff_lpc_end(&s>lpc_ctx); 
520 
av_freep(&avctx>extradata); 
521 
avctx>extradata_size = 0;

522 
av_freep(&avctx>coded_frame); 
523 
return 0; 
524 
} 
525  
526 
AVCodec ff_alac_encoder = { 
527 
"alac",

528 
AVMEDIA_TYPE_AUDIO, 
529 
CODEC_ID_ALAC, 
530 
sizeof(AlacEncodeContext),

531 
alac_encode_init, 
532 
alac_encode_frame, 
533 
alac_encode_close, 
534 
.capabilities = CODEC_CAP_SMALL_LAST_FRAME, 
535 
.sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE}, 
536 
.long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),

537 
}; 