ffmpeg / libavcodec / alacenc.c @ 72415b2a
History  View  Annotate  Download (15.8 KB)
1 
/**


2 
* ALAC audio encoder

3 
* Copyright (c) 2008 Jaikrishnan Menon <realityman@gmx.net>

4 
*

5 
* This file is part of FFmpeg.

6 
*

7 
* FFmpeg is free software; you can redistribute it and/or

8 
* modify it under the terms of the GNU Lesser General Public

9 
* License as published by the Free Software Foundation; either

10 
* version 2.1 of the License, or (at your option) any later version.

11 
*

12 
* FFmpeg is distributed in the hope that it will be useful,

13 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

14 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

15 
* Lesser General Public License for more details.

16 
*

17 
* You should have received a copy of the GNU Lesser General Public

18 
* License along with FFmpeg; if not, write to the Free Software

19 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

20 
*/

21  
22 
#include "avcodec.h" 
23 
#include "get_bits.h" 
24 
#include "put_bits.h" 
25 
#include "dsputil.h" 
26 
#include "lpc.h" 
27 
#include "mathops.h" 
28  
29 
#define DEFAULT_FRAME_SIZE 4096 
30 
#define DEFAULT_SAMPLE_SIZE 16 
31 
#define MAX_CHANNELS 8 
32 
#define ALAC_EXTRADATA_SIZE 36 
33 
#define ALAC_FRAME_HEADER_SIZE 55 
34 
#define ALAC_FRAME_FOOTER_SIZE 3 
35  
36 
#define ALAC_ESCAPE_CODE 0x1FF 
37 
#define ALAC_MAX_LPC_ORDER 30 
38 
#define DEFAULT_MAX_PRED_ORDER 6 
39 
#define DEFAULT_MIN_PRED_ORDER 4 
40 
#define ALAC_MAX_LPC_PRECISION 9 
41 
#define ALAC_MAX_LPC_SHIFT 9 
42  
43 
#define ALAC_CHMODE_LEFT_RIGHT 0 
44 
#define ALAC_CHMODE_LEFT_SIDE 1 
45 
#define ALAC_CHMODE_RIGHT_SIDE 2 
46 
#define ALAC_CHMODE_MID_SIDE 3 
47  
48 
typedef struct RiceContext { 
49 
int history_mult;

50 
int initial_history;

51 
int k_modifier;

52 
int rice_modifier;

53 
} RiceContext; 
54  
55 
typedef struct LPCContext { 
56 
int lpc_order;

57 
int lpc_coeff[ALAC_MAX_LPC_ORDER+1]; 
58 
int lpc_quant;

59 
} LPCContext; 
60  
61 
typedef struct AlacEncodeContext { 
62 
int compression_level;

63 
int min_prediction_order;

64 
int max_prediction_order;

65 
int max_coded_frame_size;

66 
int write_sample_size;

67 
int32_t sample_buf[MAX_CHANNELS][DEFAULT_FRAME_SIZE]; 
68 
int32_t predictor_buf[DEFAULT_FRAME_SIZE]; 
69 
int interlacing_shift;

70 
int interlacing_leftweight;

71 
PutBitContext pbctx; 
72 
RiceContext rc; 
73 
LPCContext lpc[MAX_CHANNELS]; 
74 
DSPContext dspctx; 
75 
AVCodecContext *avctx; 
76 
} AlacEncodeContext; 
77  
78  
79 
static void init_sample_buffers(AlacEncodeContext *s, int16_t *input_samples) 
80 
{ 
81 
int ch, i;

82  
83 
for(ch=0;ch<s>avctx>channels;ch++) { 
84 
int16_t *sptr = input_samples + ch; 
85 
for(i=0;i<s>avctx>frame_size;i++) { 
86 
s>sample_buf[ch][i] = *sptr; 
87 
sptr += s>avctx>channels; 
88 
} 
89 
} 
90 
} 
91  
92 
static void encode_scalar(AlacEncodeContext *s, int x, int k, int write_sample_size) 
93 
{ 
94 
int divisor, q, r;

95  
96 
k = FFMIN(k, s>rc.k_modifier); 
97 
divisor = (1<<k)  1; 
98 
q = x / divisor; 
99 
r = x % divisor; 
100  
101 
if(q > 8) { 
102 
// write escape code and sample value directly

103 
put_bits(&s>pbctx, 9, ALAC_ESCAPE_CODE);

104 
put_bits(&s>pbctx, write_sample_size, x); 
105 
} else {

106 
if(q)

107 
put_bits(&s>pbctx, q, (1<<q)  1); 
108 
put_bits(&s>pbctx, 1, 0); 
109  
110 
if(k != 1) { 
111 
if(r > 0) 
112 
put_bits(&s>pbctx, k, r+1);

113 
else

114 
put_bits(&s>pbctx, k1, 0); 
115 
} 
116 
} 
117 
} 
118  
119 
static void write_frame_header(AlacEncodeContext *s, int is_verbatim) 
120 
{ 
121 
put_bits(&s>pbctx, 3, s>avctx>channels1); // No. of channels 1 
122 
put_bits(&s>pbctx, 16, 0); // Seems to be zero 
123 
put_bits(&s>pbctx, 1, 1); // Sample count is in the header 
124 
put_bits(&s>pbctx, 2, 0); // FIXME: Wasted bytes field 
125 
put_bits(&s>pbctx, 1, is_verbatim); // Audio block is verbatim 
126 
put_bits32(&s>pbctx, s>avctx>frame_size); // No. of samples in the frame

127 
} 
128  
129 
static void calc_predictor_params(AlacEncodeContext *s, int ch) 
130 
{ 
131 
int32_t coefs[MAX_LPC_ORDER][MAX_LPC_ORDER]; 
132 
int shift[MAX_LPC_ORDER];

133 
int opt_order;

134  
135 
if (s>compression_level == 1) { 
136 
s>lpc[ch].lpc_order = 6;

137 
s>lpc[ch].lpc_quant = 6;

138 
s>lpc[ch].lpc_coeff[0] = 160; 
139 
s>lpc[ch].lpc_coeff[1] = 190; 
140 
s>lpc[ch].lpc_coeff[2] = 170; 
141 
s>lpc[ch].lpc_coeff[3] = 130; 
142 
s>lpc[ch].lpc_coeff[4] = 80; 
143 
s>lpc[ch].lpc_coeff[5] = 25; 
144 
} else {

145 
opt_order = ff_lpc_calc_coefs(&s>dspctx, s>sample_buf[ch], 
146 
s>avctx>frame_size, 
147 
s>min_prediction_order, 
148 
s>max_prediction_order, 
149 
ALAC_MAX_LPC_PRECISION, coefs, shift, 1,

150 
ORDER_METHOD_EST, ALAC_MAX_LPC_SHIFT, 1);

151  
152 
s>lpc[ch].lpc_order = opt_order; 
153 
s>lpc[ch].lpc_quant = shift[opt_order1];

154 
memcpy(s>lpc[ch].lpc_coeff, coefs[opt_order1], opt_order*sizeof(int)); 
155 
} 
156 
} 
157  
158 
static int estimate_stereo_mode(int32_t *left_ch, int32_t *right_ch, int n) 
159 
{ 
160 
int i, best;

161 
int32_t lt, rt; 
162 
uint64_t sum[4];

163 
uint64_t score[4];

164  
165 
/* calculate sum of 2nd order residual for each channel */

166 
sum[0] = sum[1] = sum[2] = sum[3] = 0; 
167 
for(i=2; i<n; i++) { 
168 
lt = left_ch[i]  2*left_ch[i1] + left_ch[i2]; 
169 
rt = right_ch[i]  2*right_ch[i1] + right_ch[i2]; 
170 
sum[2] += FFABS((lt + rt) >> 1); 
171 
sum[3] += FFABS(lt  rt);

172 
sum[0] += FFABS(lt);

173 
sum[1] += FFABS(rt);

174 
} 
175  
176 
/* calculate score for each mode */

177 
score[0] = sum[0] + sum[1]; 
178 
score[1] = sum[0] + sum[3]; 
179 
score[2] = sum[1] + sum[3]; 
180 
score[3] = sum[2] + sum[3]; 
181  
182 
/* return mode with lowest score */

183 
best = 0;

184 
for(i=1; i<4; i++) { 
185 
if(score[i] < score[best]) {

186 
best = i; 
187 
} 
188 
} 
189 
return best;

190 
} 
191  
192 
static void alac_stereo_decorrelation(AlacEncodeContext *s) 
193 
{ 
194 
int32_t *left = s>sample_buf[0], *right = s>sample_buf[1]; 
195 
int i, mode, n = s>avctx>frame_size;

196 
int32_t tmp; 
197  
198 
mode = estimate_stereo_mode(left, right, n); 
199  
200 
switch(mode)

201 
{ 
202 
case ALAC_CHMODE_LEFT_RIGHT:

203 
s>interlacing_leftweight = 0;

204 
s>interlacing_shift = 0;

205 
break;

206  
207 
case ALAC_CHMODE_LEFT_SIDE:

208 
for(i=0; i<n; i++) { 
209 
right[i] = left[i]  right[i]; 
210 
} 
211 
s>interlacing_leftweight = 1;

212 
s>interlacing_shift = 0;

213 
break;

214  
215 
case ALAC_CHMODE_RIGHT_SIDE:

216 
for(i=0; i<n; i++) { 
217 
tmp = right[i]; 
218 
right[i] = left[i]  right[i]; 
219 
left[i] = tmp + (right[i] >> 31);

220 
} 
221 
s>interlacing_leftweight = 1;

222 
s>interlacing_shift = 31;

223 
break;

224  
225 
default:

226 
for(i=0; i<n; i++) { 
227 
tmp = left[i]; 
228 
left[i] = (tmp + right[i]) >> 1;

229 
right[i] = tmp  right[i]; 
230 
} 
231 
s>interlacing_leftweight = 1;

232 
s>interlacing_shift = 1;

233 
break;

234 
} 
235 
} 
236  
237 
static void alac_linear_predictor(AlacEncodeContext *s, int ch) 
238 
{ 
239 
int i;

240 
LPCContext lpc = s>lpc[ch]; 
241  
242 
if(lpc.lpc_order == 31) { 
243 
s>predictor_buf[0] = s>sample_buf[ch][0]; 
244  
245 
for(i=1; i<s>avctx>frame_size; i++) 
246 
s>predictor_buf[i] = s>sample_buf[ch][i]  s>sample_buf[ch][i1];

247  
248 
return;

249 
} 
250  
251 
// generalised linear predictor

252  
253 
if(lpc.lpc_order > 0) { 
254 
int32_t *samples = s>sample_buf[ch]; 
255 
int32_t *residual = s>predictor_buf; 
256  
257 
// generate warmup samples

258 
residual[0] = samples[0]; 
259 
for(i=1;i<=lpc.lpc_order;i++) 
260 
residual[i] = samples[i]  samples[i1];

261  
262 
// perform lpc on remaining samples

263 
for(i = lpc.lpc_order + 1; i < s>avctx>frame_size; i++) { 
264 
int sum = 1 << (lpc.lpc_quant  1), res_val, j; 
265  
266 
for (j = 0; j < lpc.lpc_order; j++) { 
267 
sum += (samples[lpc.lpc_orderj]  samples[0]) *

268 
lpc.lpc_coeff[j]; 
269 
} 
270  
271 
sum >>= lpc.lpc_quant; 
272 
sum += samples[0];

273 
residual[i] = sign_extend(samples[lpc.lpc_order+1]  sum,

274 
s>write_sample_size); 
275 
res_val = residual[i]; 
276  
277 
if(res_val) {

278 
int index = lpc.lpc_order  1; 
279 
int neg = (res_val < 0); 
280  
281 
while(index >= 0 && (neg ? (res_val < 0):(res_val > 0))) { 
282 
int val = samples[0]  samples[lpc.lpc_order  index]; 
283 
int sign = (val ? FFSIGN(val) : 0); 
284  
285 
if(neg)

286 
sign*=1;

287  
288 
lpc.lpc_coeff[index] = sign; 
289 
val *= sign; 
290 
res_val = ((val >> lpc.lpc_quant) * 
291 
(lpc.lpc_order  index)); 
292 
index; 
293 
} 
294 
} 
295 
samples++; 
296 
} 
297 
} 
298 
} 
299  
300 
static void alac_entropy_coder(AlacEncodeContext *s) 
301 
{ 
302 
unsigned int history = s>rc.initial_history; 
303 
int sign_modifier = 0, i, k; 
304 
int32_t *samples = s>predictor_buf; 
305  
306 
for(i=0;i < s>avctx>frame_size;) { 
307 
int x;

308  
309 
k = av_log2((history >> 9) + 3); 
310  
311 
x = 2*(*samples)1; 
312 
x ^= (x>>31);

313  
314 
samples++; 
315 
i++; 
316  
317 
encode_scalar(s, x  sign_modifier, k, s>write_sample_size); 
318  
319 
history += x * s>rc.history_mult 
320 
 ((history * s>rc.history_mult) >> 9);

321  
322 
sign_modifier = 0;

323 
if(x > 0xFFFF) 
324 
history = 0xFFFF;

325  
326 
if((history < 128) && (i < s>avctx>frame_size)) { 
327 
unsigned int block_size = 0; 
328  
329 
k = 7  av_log2(history) + ((history + 16) >> 6); 
330  
331 
while((*samples == 0) && (i < s>avctx>frame_size)) { 
332 
samples++; 
333 
i++; 
334 
block_size++; 
335 
} 
336 
encode_scalar(s, block_size, k, 16);

337  
338 
sign_modifier = (block_size <= 0xFFFF);

339  
340 
history = 0;

341 
} 
342  
343 
} 
344 
} 
345  
346 
static void write_compressed_frame(AlacEncodeContext *s) 
347 
{ 
348 
int i, j;

349  
350 
if(s>avctx>channels == 2) 
351 
alac_stereo_decorrelation(s); 
352 
put_bits(&s>pbctx, 8, s>interlacing_shift);

353 
put_bits(&s>pbctx, 8, s>interlacing_leftweight);

354  
355 
for(i=0;i<s>avctx>channels;i++) { 
356  
357 
calc_predictor_params(s, i); 
358  
359 
put_bits(&s>pbctx, 4, 0); // prediction type : currently only type 0 has been RE'd 
360 
put_bits(&s>pbctx, 4, s>lpc[i].lpc_quant);

361  
362 
put_bits(&s>pbctx, 3, s>rc.rice_modifier);

363 
put_bits(&s>pbctx, 5, s>lpc[i].lpc_order);

364 
// predictor coeff. table

365 
for(j=0;j<s>lpc[i].lpc_order;j++) { 
366 
put_sbits(&s>pbctx, 16, s>lpc[i].lpc_coeff[j]);

367 
} 
368 
} 
369  
370 
// apply lpc and entropy coding to audio samples

371  
372 
for(i=0;i<s>avctx>channels;i++) { 
373 
alac_linear_predictor(s, i); 
374 
alac_entropy_coder(s); 
375 
} 
376 
} 
377  
378 
static av_cold int alac_encode_init(AVCodecContext *avctx) 
379 
{ 
380 
AlacEncodeContext *s = avctx>priv_data; 
381 
uint8_t *alac_extradata = av_mallocz(ALAC_EXTRADATA_SIZE+1);

382  
383 
avctx>frame_size = DEFAULT_FRAME_SIZE; 
384 
avctx>bits_per_coded_sample = DEFAULT_SAMPLE_SIZE; 
385  
386 
if(avctx>sample_fmt != SAMPLE_FMT_S16) {

387 
av_log(avctx, AV_LOG_ERROR, "only pcm_s16 input samples are supported\n");

388 
return 1; 
389 
} 
390  
391 
// Set default compression level

392 
if(avctx>compression_level == FF_COMPRESSION_DEFAULT)

393 
s>compression_level = 2;

394 
else

395 
s>compression_level = av_clip(avctx>compression_level, 0, 2); 
396  
397 
// Initialize default Rice parameters

398 
s>rc.history_mult = 40;

399 
s>rc.initial_history = 10;

400 
s>rc.k_modifier = 14;

401 
s>rc.rice_modifier = 4;

402  
403 
s>max_coded_frame_size = 8 + (avctx>frame_size*avctx>channels*avctx>bits_per_coded_sample>>3); 
404  
405 
s>write_sample_size = avctx>bits_per_coded_sample + avctx>channels  1; // FIXME: consider wasted_bytes 
406  
407 
AV_WB32(alac_extradata, ALAC_EXTRADATA_SIZE); 
408 
AV_WB32(alac_extradata+4, MKBETAG('a','l','a','c')); 
409 
AV_WB32(alac_extradata+12, avctx>frame_size);

410 
AV_WB8 (alac_extradata+17, avctx>bits_per_coded_sample);

411 
AV_WB8 (alac_extradata+21, avctx>channels);

412 
AV_WB32(alac_extradata+24, s>max_coded_frame_size);

413 
AV_WB32(alac_extradata+28, avctx>sample_rate*avctx>channels*avctx>bits_per_coded_sample); // average bitrate 
414 
AV_WB32(alac_extradata+32, avctx>sample_rate);

415  
416 
// Set relevant extradata fields

417 
if(s>compression_level > 0) { 
418 
AV_WB8(alac_extradata+18, s>rc.history_mult);

419 
AV_WB8(alac_extradata+19, s>rc.initial_history);

420 
AV_WB8(alac_extradata+20, s>rc.k_modifier);

421 
} 
422  
423 
s>min_prediction_order = DEFAULT_MIN_PRED_ORDER; 
424 
if(avctx>min_prediction_order >= 0) { 
425 
if(avctx>min_prediction_order < MIN_LPC_ORDER 

426 
avctx>min_prediction_order > ALAC_MAX_LPC_ORDER) { 
427 
av_log(avctx, AV_LOG_ERROR, "invalid min prediction order: %d\n", avctx>min_prediction_order);

428 
return 1; 
429 
} 
430  
431 
s>min_prediction_order = avctx>min_prediction_order; 
432 
} 
433  
434 
s>max_prediction_order = DEFAULT_MAX_PRED_ORDER; 
435 
if(avctx>max_prediction_order >= 0) { 
436 
if(avctx>max_prediction_order < MIN_LPC_ORDER 

437 
avctx>max_prediction_order > ALAC_MAX_LPC_ORDER) { 
438 
av_log(avctx, AV_LOG_ERROR, "invalid max prediction order: %d\n", avctx>max_prediction_order);

439 
return 1; 
440 
} 
441  
442 
s>max_prediction_order = avctx>max_prediction_order; 
443 
} 
444  
445 
if(s>max_prediction_order < s>min_prediction_order) {

446 
av_log(avctx, AV_LOG_ERROR, "invalid prediction orders: min=%d max=%d\n",

447 
s>min_prediction_order, s>max_prediction_order); 
448 
return 1; 
449 
} 
450  
451 
avctx>extradata = alac_extradata; 
452 
avctx>extradata_size = ALAC_EXTRADATA_SIZE; 
453  
454 
avctx>coded_frame = avcodec_alloc_frame(); 
455 
avctx>coded_frame>key_frame = 1;

456  
457 
s>avctx = avctx; 
458 
dsputil_init(&s>dspctx, avctx); 
459  
460 
return 0; 
461 
} 
462  
463 
static int alac_encode_frame(AVCodecContext *avctx, uint8_t *frame, 
464 
int buf_size, void *data) 
465 
{ 
466 
AlacEncodeContext *s = avctx>priv_data; 
467 
PutBitContext *pb = &s>pbctx; 
468 
int i, out_bytes, verbatim_flag = 0; 
469  
470 
if(avctx>frame_size > DEFAULT_FRAME_SIZE) {

471 
av_log(avctx, AV_LOG_ERROR, "input frame size exceeded\n");

472 
return 1; 
473 
} 
474  
475 
if(buf_size < 2*s>max_coded_frame_size) { 
476 
av_log(avctx, AV_LOG_ERROR, "buffer size is too small\n");

477 
return 1; 
478 
} 
479  
480 
verbatim:

481 
init_put_bits(pb, frame, buf_size); 
482  
483 
if((s>compression_level == 0)  verbatim_flag) { 
484 
// Verbatim mode

485 
int16_t *samples = data; 
486 
write_frame_header(s, 1);

487 
for(i=0; i<avctx>frame_size*avctx>channels; i++) { 
488 
put_sbits(pb, 16, *samples++);

489 
} 
490 
} else {

491 
init_sample_buffers(s, data); 
492 
write_frame_header(s, 0);

493 
write_compressed_frame(s); 
494 
} 
495  
496 
put_bits(pb, 3, 7); 
497 
flush_put_bits(pb); 
498 
out_bytes = put_bits_count(pb) >> 3;

499  
500 
if(out_bytes > s>max_coded_frame_size) {

501 
/* frame too large. use verbatim mode */

502 
if(verbatim_flag  (s>compression_level == 0)) { 
503 
/* still too large. must be an error. */

504 
av_log(avctx, AV_LOG_ERROR, "error encoding frame\n");

505 
return 1; 
506 
} 
507 
verbatim_flag = 1;

508 
goto verbatim;

509 
} 
510  
511 
return out_bytes;

512 
} 
513  
514 
static av_cold int alac_encode_close(AVCodecContext *avctx) 
515 
{ 
516 
av_freep(&avctx>extradata); 
517 
avctx>extradata_size = 0;

518 
av_freep(&avctx>coded_frame); 
519 
return 0; 
520 
} 
521  
522 
AVCodec alac_encoder = { 
523 
"alac",

524 
AVMEDIA_TYPE_AUDIO, 
525 
CODEC_ID_ALAC, 
526 
sizeof(AlacEncodeContext),

527 
alac_encode_init, 
528 
alac_encode_frame, 
529 
alac_encode_close, 
530 
.capabilities = CODEC_CAP_SMALL_LAST_FRAME, 
531 
.long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),

532 
}; 