ffmpeg / libavcodec / alacenc.c @ 43d7c611
History  View  Annotate  Download (15.3 KB)
1 
/**


2 
* ALAC audio encoder

3 
* Copyright (c) 2008 Jaikrishnan Menon <realityman@gmx.net>

4 
*

5 
* This file is part of FFmpeg.

6 
*

7 
* FFmpeg is free software; you can redistribute it and/or

8 
* modify it under the terms of the GNU Lesser General Public

9 
* License as published by the Free Software Foundation; either

10 
* version 2.1 of the License, or (at your option) any later version.

11 
*

12 
* FFmpeg is distributed in the hope that it will be useful,

13 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

14 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

15 
* Lesser General Public License for more details.

16 
*

17 
* You should have received a copy of the GNU Lesser General Public

18 
* License along with FFmpeg; if not, write to the Free Software

19 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

20 
*/

21  
22 
#include "avcodec.h" 
23 
#include "get_bits.h" 
24 
#include "put_bits.h" 
25 
#include "dsputil.h" 
26 
#include "lpc.h" 
27 
#include "mathops.h" 
28  
29 
#define DEFAULT_FRAME_SIZE 4096 
30 
#define DEFAULT_SAMPLE_SIZE 16 
31 
#define MAX_CHANNELS 8 
32 
#define ALAC_EXTRADATA_SIZE 36 
33 
#define ALAC_FRAME_HEADER_SIZE 55 
34 
#define ALAC_FRAME_FOOTER_SIZE 3 
35  
36 
#define ALAC_ESCAPE_CODE 0x1FF 
37 
#define ALAC_MAX_LPC_ORDER 30 
38 
#define DEFAULT_MAX_PRED_ORDER 6 
39 
#define DEFAULT_MIN_PRED_ORDER 4 
40 
#define ALAC_MAX_LPC_PRECISION 9 
41 
#define ALAC_MAX_LPC_SHIFT 9 
42  
43 
#define ALAC_CHMODE_LEFT_RIGHT 0 
44 
#define ALAC_CHMODE_LEFT_SIDE 1 
45 
#define ALAC_CHMODE_RIGHT_SIDE 2 
46 
#define ALAC_CHMODE_MID_SIDE 3 
47  
48 
typedef struct RiceContext { 
49 
int history_mult;

50 
int initial_history;

51 
int k_modifier;

52 
int rice_modifier;

53 
} RiceContext; 
54  
55 
typedef struct LPCContext { 
56 
int lpc_order;

57 
int lpc_coeff[ALAC_MAX_LPC_ORDER+1]; 
58 
int lpc_quant;

59 
} LPCContext; 
60  
61 
typedef struct AlacEncodeContext { 
62 
int compression_level;

63 
int min_prediction_order;

64 
int max_prediction_order;

65 
int max_coded_frame_size;

66 
int write_sample_size;

67 
int32_t sample_buf[MAX_CHANNELS][DEFAULT_FRAME_SIZE]; 
68 
int32_t predictor_buf[DEFAULT_FRAME_SIZE]; 
69 
int interlacing_shift;

70 
int interlacing_leftweight;

71 
PutBitContext pbctx; 
72 
RiceContext rc; 
73 
LPCContext lpc[MAX_CHANNELS]; 
74 
DSPContext dspctx; 
75 
AVCodecContext *avctx; 
76 
} AlacEncodeContext; 
77  
78  
79 
static void init_sample_buffers(AlacEncodeContext *s, int16_t *input_samples) 
80 
{ 
81 
int ch, i;

82  
83 
for(ch=0;ch<s>avctx>channels;ch++) { 
84 
int16_t *sptr = input_samples + ch; 
85 
for(i=0;i<s>avctx>frame_size;i++) { 
86 
s>sample_buf[ch][i] = *sptr; 
87 
sptr += s>avctx>channels; 
88 
} 
89 
} 
90 
} 
91  
92 
static void encode_scalar(AlacEncodeContext *s, int x, int k, int write_sample_size) 
93 
{ 
94 
int divisor, q, r;

95  
96 
k = FFMIN(k, s>rc.k_modifier); 
97 
divisor = (1<<k)  1; 
98 
q = x / divisor; 
99 
r = x % divisor; 
100  
101 
if(q > 8) { 
102 
// write escape code and sample value directly

103 
put_bits(&s>pbctx, 9, ALAC_ESCAPE_CODE);

104 
put_bits(&s>pbctx, write_sample_size, x); 
105 
} else {

106 
if(q)

107 
put_bits(&s>pbctx, q, (1<<q)  1); 
108 
put_bits(&s>pbctx, 1, 0); 
109  
110 
if(k != 1) { 
111 
if(r > 0) 
112 
put_bits(&s>pbctx, k, r+1);

113 
else

114 
put_bits(&s>pbctx, k1, 0); 
115 
} 
116 
} 
117 
} 
118  
119 
static void write_frame_header(AlacEncodeContext *s, int is_verbatim) 
120 
{ 
121 
put_bits(&s>pbctx, 3, s>avctx>channels1); // No. of channels 1 
122 
put_bits(&s>pbctx, 16, 0); // Seems to be zero 
123 
put_bits(&s>pbctx, 1, 1); // Sample count is in the header 
124 
put_bits(&s>pbctx, 2, 0); // FIXME: Wasted bytes field 
125 
put_bits(&s>pbctx, 1, is_verbatim); // Audio block is verbatim 
126 
put_bits32(&s>pbctx, s>avctx>frame_size); // No. of samples in the frame

127 
} 
128  
129 
static void calc_predictor_params(AlacEncodeContext *s, int ch) 
130 
{ 
131 
int32_t coefs[MAX_LPC_ORDER][MAX_LPC_ORDER]; 
132 
int shift[MAX_LPC_ORDER];

133 
int opt_order;

134  
135 
opt_order = ff_lpc_calc_coefs(&s>dspctx, s>sample_buf[ch], s>avctx>frame_size, s>min_prediction_order, s>max_prediction_order, 
136 
ALAC_MAX_LPC_PRECISION, coefs, shift, 1, ORDER_METHOD_EST, ALAC_MAX_LPC_SHIFT, 1); 
137  
138 
s>lpc[ch].lpc_order = opt_order; 
139 
s>lpc[ch].lpc_quant = shift[opt_order1];

140 
memcpy(s>lpc[ch].lpc_coeff, coefs[opt_order1], opt_order*sizeof(int)); 
141 
} 
142  
143 
static int estimate_stereo_mode(int32_t *left_ch, int32_t *right_ch, int n) 
144 
{ 
145 
int i, best;

146 
int32_t lt, rt; 
147 
uint64_t sum[4];

148 
uint64_t score[4];

149  
150 
/* calculate sum of 2nd order residual for each channel */

151 
sum[0] = sum[1] = sum[2] = sum[3] = 0; 
152 
for(i=2; i<n; i++) { 
153 
lt = left_ch[i]  2*left_ch[i1] + left_ch[i2]; 
154 
rt = right_ch[i]  2*right_ch[i1] + right_ch[i2]; 
155 
sum[2] += FFABS((lt + rt) >> 1); 
156 
sum[3] += FFABS(lt  rt);

157 
sum[0] += FFABS(lt);

158 
sum[1] += FFABS(rt);

159 
} 
160  
161 
/* calculate score for each mode */

162 
score[0] = sum[0] + sum[1]; 
163 
score[1] = sum[0] + sum[3]; 
164 
score[2] = sum[1] + sum[3]; 
165 
score[3] = sum[2] + sum[3]; 
166  
167 
/* return mode with lowest score */

168 
best = 0;

169 
for(i=1; i<4; i++) { 
170 
if(score[i] < score[best]) {

171 
best = i; 
172 
} 
173 
} 
174 
return best;

175 
} 
176  
177 
static void alac_stereo_decorrelation(AlacEncodeContext *s) 
178 
{ 
179 
int32_t *left = s>sample_buf[0], *right = s>sample_buf[1]; 
180 
int i, mode, n = s>avctx>frame_size;

181 
int32_t tmp; 
182  
183 
mode = estimate_stereo_mode(left, right, n); 
184  
185 
switch(mode)

186 
{ 
187 
case ALAC_CHMODE_LEFT_RIGHT:

188 
s>interlacing_leftweight = 0;

189 
s>interlacing_shift = 0;

190 
break;

191  
192 
case ALAC_CHMODE_LEFT_SIDE:

193 
for(i=0; i<n; i++) { 
194 
right[i] = left[i]  right[i]; 
195 
} 
196 
s>interlacing_leftweight = 1;

197 
s>interlacing_shift = 0;

198 
break;

199  
200 
case ALAC_CHMODE_RIGHT_SIDE:

201 
for(i=0; i<n; i++) { 
202 
tmp = right[i]; 
203 
right[i] = left[i]  right[i]; 
204 
left[i] = tmp + (right[i] >> 31);

205 
} 
206 
s>interlacing_leftweight = 1;

207 
s>interlacing_shift = 31;

208 
break;

209  
210 
default:

211 
for(i=0; i<n; i++) { 
212 
tmp = left[i]; 
213 
left[i] = (tmp + right[i]) >> 1;

214 
right[i] = tmp  right[i]; 
215 
} 
216 
s>interlacing_leftweight = 1;

217 
s>interlacing_shift = 1;

218 
break;

219 
} 
220 
} 
221  
222 
static void alac_linear_predictor(AlacEncodeContext *s, int ch) 
223 
{ 
224 
int i;

225 
LPCContext lpc = s>lpc[ch]; 
226  
227 
if(lpc.lpc_order == 31) { 
228 
s>predictor_buf[0] = s>sample_buf[ch][0]; 
229  
230 
for(i=1; i<s>avctx>frame_size; i++) 
231 
s>predictor_buf[i] = s>sample_buf[ch][i]  s>sample_buf[ch][i1];

232  
233 
return;

234 
} 
235  
236 
// generalised linear predictor

237  
238 
if(lpc.lpc_order > 0) { 
239 
int32_t *samples = s>sample_buf[ch]; 
240 
int32_t *residual = s>predictor_buf; 
241  
242 
// generate warmup samples

243 
residual[0] = samples[0]; 
244 
for(i=1;i<=lpc.lpc_order;i++) 
245 
residual[i] = samples[i]  samples[i1];

246  
247 
// perform lpc on remaining samples

248 
for(i = lpc.lpc_order + 1; i < s>avctx>frame_size; i++) { 
249 
int sum = 1 << (lpc.lpc_quant  1), res_val, j; 
250  
251 
for (j = 0; j < lpc.lpc_order; j++) { 
252 
sum += (samples[lpc.lpc_orderj]  samples[0]) *

253 
lpc.lpc_coeff[j]; 
254 
} 
255  
256 
sum >>= lpc.lpc_quant; 
257 
sum += samples[0];

258 
residual[i] = sign_extend(samples[lpc.lpc_order+1]  sum,

259 
s>write_sample_size); 
260 
res_val = residual[i]; 
261  
262 
if(res_val) {

263 
int index = lpc.lpc_order  1; 
264 
int neg = (res_val < 0); 
265  
266 
while(index >= 0 && (neg ? (res_val < 0):(res_val > 0))) { 
267 
int val = samples[0]  samples[lpc.lpc_order  index]; 
268 
int sign = (val ? FFSIGN(val) : 0); 
269  
270 
if(neg)

271 
sign*=1;

272  
273 
lpc.lpc_coeff[index] = sign; 
274 
val *= sign; 
275 
res_val = ((val >> lpc.lpc_quant) * 
276 
(lpc.lpc_order  index)); 
277 
index; 
278 
} 
279 
} 
280 
samples++; 
281 
} 
282 
} 
283 
} 
284  
285 
static void alac_entropy_coder(AlacEncodeContext *s) 
286 
{ 
287 
unsigned int history = s>rc.initial_history; 
288 
int sign_modifier = 0, i, k; 
289 
int32_t *samples = s>predictor_buf; 
290  
291 
for(i=0;i < s>avctx>frame_size;) { 
292 
int x;

293  
294 
k = av_log2((history >> 9) + 3); 
295  
296 
x = 2*(*samples)1; 
297 
x ^= (x>>31);

298  
299 
samples++; 
300 
i++; 
301  
302 
encode_scalar(s, x  sign_modifier, k, s>write_sample_size); 
303  
304 
history += x * s>rc.history_mult 
305 
 ((history * s>rc.history_mult) >> 9);

306  
307 
sign_modifier = 0;

308 
if(x > 0xFFFF) 
309 
history = 0xFFFF;

310  
311 
if((history < 128) && (i < s>avctx>frame_size)) { 
312 
unsigned int block_size = 0; 
313  
314 
k = 7  av_log2(history) + ((history + 16) >> 6); 
315  
316 
while((*samples == 0) && (i < s>avctx>frame_size)) { 
317 
samples++; 
318 
i++; 
319 
block_size++; 
320 
} 
321 
encode_scalar(s, block_size, k, 16);

322  
323 
sign_modifier = (block_size <= 0xFFFF);

324  
325 
history = 0;

326 
} 
327  
328 
} 
329 
} 
330  
331 
static void write_compressed_frame(AlacEncodeContext *s) 
332 
{ 
333 
int i, j;

334  
335 
if(s>avctx>channels == 2) 
336 
alac_stereo_decorrelation(s); 
337 
put_bits(&s>pbctx, 8, s>interlacing_shift);

338 
put_bits(&s>pbctx, 8, s>interlacing_leftweight);

339  
340 
for(i=0;i<s>avctx>channels;i++) { 
341  
342 
calc_predictor_params(s, i); 
343  
344 
put_bits(&s>pbctx, 4, 0); // prediction type : currently only type 0 has been RE'd 
345 
put_bits(&s>pbctx, 4, s>lpc[i].lpc_quant);

346  
347 
put_bits(&s>pbctx, 3, s>rc.rice_modifier);

348 
put_bits(&s>pbctx, 5, s>lpc[i].lpc_order);

349 
// predictor coeff. table

350 
for(j=0;j<s>lpc[i].lpc_order;j++) { 
351 
put_sbits(&s>pbctx, 16, s>lpc[i].lpc_coeff[j]);

352 
} 
353 
} 
354  
355 
// apply lpc and entropy coding to audio samples

356  
357 
for(i=0;i<s>avctx>channels;i++) { 
358 
alac_linear_predictor(s, i); 
359 
alac_entropy_coder(s); 
360 
} 
361 
} 
362  
363 
static av_cold int alac_encode_init(AVCodecContext *avctx) 
364 
{ 
365 
AlacEncodeContext *s = avctx>priv_data; 
366 
uint8_t *alac_extradata = av_mallocz(ALAC_EXTRADATA_SIZE+1);

367  
368 
avctx>frame_size = DEFAULT_FRAME_SIZE; 
369 
avctx>bits_per_coded_sample = DEFAULT_SAMPLE_SIZE; 
370  
371 
if(avctx>sample_fmt != SAMPLE_FMT_S16) {

372 
av_log(avctx, AV_LOG_ERROR, "only pcm_s16 input samples are supported\n");

373 
return 1; 
374 
} 
375  
376 
// Set default compression level

377 
if(avctx>compression_level == FF_COMPRESSION_DEFAULT)

378 
s>compression_level = 1;

379 
else

380 
s>compression_level = av_clip(avctx>compression_level, 0, 1); 
381  
382 
// Initialize default Rice parameters

383 
s>rc.history_mult = 40;

384 
s>rc.initial_history = 10;

385 
s>rc.k_modifier = 14;

386 
s>rc.rice_modifier = 4;

387  
388 
s>max_coded_frame_size = 8 + (avctx>frame_size*avctx>channels*avctx>bits_per_coded_sample>>3); 
389  
390 
s>write_sample_size = avctx>bits_per_coded_sample + avctx>channels  1; // FIXME: consider wasted_bytes 
391  
392 
AV_WB32(alac_extradata, ALAC_EXTRADATA_SIZE); 
393 
AV_WB32(alac_extradata+4, MKBETAG('a','l','a','c')); 
394 
AV_WB32(alac_extradata+12, avctx>frame_size);

395 
AV_WB8 (alac_extradata+17, avctx>bits_per_coded_sample);

396 
AV_WB8 (alac_extradata+21, avctx>channels);

397 
AV_WB32(alac_extradata+24, s>max_coded_frame_size);

398 
AV_WB32(alac_extradata+28, avctx>sample_rate*avctx>channels*avctx>bits_per_coded_sample); // average bitrate 
399 
AV_WB32(alac_extradata+32, avctx>sample_rate);

400  
401 
// Set relevant extradata fields

402 
if(s>compression_level > 0) { 
403 
AV_WB8(alac_extradata+18, s>rc.history_mult);

404 
AV_WB8(alac_extradata+19, s>rc.initial_history);

405 
AV_WB8(alac_extradata+20, s>rc.k_modifier);

406 
} 
407  
408 
s>min_prediction_order = DEFAULT_MIN_PRED_ORDER; 
409 
if(avctx>min_prediction_order >= 0) { 
410 
if(avctx>min_prediction_order < MIN_LPC_ORDER 

411 
avctx>min_prediction_order > ALAC_MAX_LPC_ORDER) { 
412 
av_log(avctx, AV_LOG_ERROR, "invalid min prediction order: %d\n", avctx>min_prediction_order);

413 
return 1; 
414 
} 
415  
416 
s>min_prediction_order = avctx>min_prediction_order; 
417 
} 
418  
419 
s>max_prediction_order = DEFAULT_MAX_PRED_ORDER; 
420 
if(avctx>max_prediction_order >= 0) { 
421 
if(avctx>max_prediction_order < MIN_LPC_ORDER 

422 
avctx>max_prediction_order > ALAC_MAX_LPC_ORDER) { 
423 
av_log(avctx, AV_LOG_ERROR, "invalid max prediction order: %d\n", avctx>max_prediction_order);

424 
return 1; 
425 
} 
426  
427 
s>max_prediction_order = avctx>max_prediction_order; 
428 
} 
429  
430 
if(s>max_prediction_order < s>min_prediction_order) {

431 
av_log(avctx, AV_LOG_ERROR, "invalid prediction orders: min=%d max=%d\n",

432 
s>min_prediction_order, s>max_prediction_order); 
433 
return 1; 
434 
} 
435  
436 
avctx>extradata = alac_extradata; 
437 
avctx>extradata_size = ALAC_EXTRADATA_SIZE; 
438  
439 
avctx>coded_frame = avcodec_alloc_frame(); 
440 
avctx>coded_frame>key_frame = 1;

441  
442 
s>avctx = avctx; 
443 
dsputil_init(&s>dspctx, avctx); 
444  
445 
return 0; 
446 
} 
447  
448 
static int alac_encode_frame(AVCodecContext *avctx, uint8_t *frame, 
449 
int buf_size, void *data) 
450 
{ 
451 
AlacEncodeContext *s = avctx>priv_data; 
452 
PutBitContext *pb = &s>pbctx; 
453 
int i, out_bytes, verbatim_flag = 0; 
454  
455 
if(avctx>frame_size > DEFAULT_FRAME_SIZE) {

456 
av_log(avctx, AV_LOG_ERROR, "input frame size exceeded\n");

457 
return 1; 
458 
} 
459  
460 
if(buf_size < 2*s>max_coded_frame_size) { 
461 
av_log(avctx, AV_LOG_ERROR, "buffer size is too small\n");

462 
return 1; 
463 
} 
464  
465 
verbatim:

466 
init_put_bits(pb, frame, buf_size); 
467  
468 
if((s>compression_level == 0)  verbatim_flag) { 
469 
// Verbatim mode

470 
int16_t *samples = data; 
471 
write_frame_header(s, 1);

472 
for(i=0; i<avctx>frame_size*avctx>channels; i++) { 
473 
put_sbits(pb, 16, *samples++);

474 
} 
475 
} else {

476 
init_sample_buffers(s, data); 
477 
write_frame_header(s, 0);

478 
write_compressed_frame(s); 
479 
} 
480  
481 
put_bits(pb, 3, 7); 
482 
flush_put_bits(pb); 
483 
out_bytes = put_bits_count(pb) >> 3;

484  
485 
if(out_bytes > s>max_coded_frame_size) {

486 
/* frame too large. use verbatim mode */

487 
if(verbatim_flag  (s>compression_level == 0)) { 
488 
/* still too large. must be an error. */

489 
av_log(avctx, AV_LOG_ERROR, "error encoding frame\n");

490 
return 1; 
491 
} 
492 
verbatim_flag = 1;

493 
goto verbatim;

494 
} 
495  
496 
return out_bytes;

497 
} 
498  
499 
static av_cold int alac_encode_close(AVCodecContext *avctx) 
500 
{ 
501 
av_freep(&avctx>extradata); 
502 
avctx>extradata_size = 0;

503 
av_freep(&avctx>coded_frame); 
504 
return 0; 
505 
} 
506  
507 
AVCodec alac_encoder = { 
508 
"alac",

509 
CODEC_TYPE_AUDIO, 
510 
CODEC_ID_ALAC, 
511 
sizeof(AlacEncodeContext),

512 
alac_encode_init, 
513 
alac_encode_frame, 
514 
alac_encode_close, 
515 
.capabilities = CODEC_CAP_SMALL_LAST_FRAME, 
516 
.long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),

517 
}; 