ffmpeg / libavcodec / atrac3.c @ 9106a698
History | View | Annotate | Download (32.7 KB)
1 |
/*
|
---|---|
2 |
* Atrac 3 compatible decoder
|
3 |
* Copyright (c) 2006-2008 Maxim Poliakovski
|
4 |
* Copyright (c) 2006-2008 Benjamin Larsson
|
5 |
*
|
6 |
* This file is part of FFmpeg.
|
7 |
*
|
8 |
* FFmpeg is free software; you can redistribute it and/or
|
9 |
* modify it under the terms of the GNU Lesser General Public
|
10 |
* License as published by the Free Software Foundation; either
|
11 |
* version 2.1 of the License, or (at your option) any later version.
|
12 |
*
|
13 |
* FFmpeg is distributed in the hope that it will be useful,
|
14 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
16 |
* Lesser General Public License for more details.
|
17 |
*
|
18 |
* You should have received a copy of the GNU Lesser General Public
|
19 |
* License along with FFmpeg; if not, write to the Free Software
|
20 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
21 |
*/
|
22 |
|
23 |
/**
|
24 |
* @file libavcodec/atrac3.c
|
25 |
* Atrac 3 compatible decoder.
|
26 |
* This decoder handles Sony's ATRAC3 data.
|
27 |
*
|
28 |
* Container formats used to store atrac 3 data:
|
29 |
* RealMedia (.rm), RIFF WAV (.wav, .at3), Sony OpenMG (.oma, .aa3).
|
30 |
*
|
31 |
* To use this decoder, a calling application must supply the extradata
|
32 |
* bytes provided in the containers above.
|
33 |
*/
|
34 |
|
35 |
#include <math.h> |
36 |
#include <stddef.h> |
37 |
#include <stdio.h> |
38 |
|
39 |
#include "avcodec.h" |
40 |
#include "get_bits.h" |
41 |
#include "dsputil.h" |
42 |
#include "bytestream.h" |
43 |
|
44 |
#include "atrac3data.h" |
45 |
|
46 |
#define JOINT_STEREO 0x12 |
47 |
#define STEREO 0x2 |
48 |
|
49 |
|
50 |
/* These structures are needed to store the parsed gain control data. */
|
51 |
typedef struct { |
52 |
int num_gain_data;
|
53 |
int levcode[8]; |
54 |
int loccode[8]; |
55 |
} gain_info; |
56 |
|
57 |
typedef struct { |
58 |
gain_info gBlock[4];
|
59 |
} gain_block; |
60 |
|
61 |
typedef struct { |
62 |
int pos;
|
63 |
int numCoefs;
|
64 |
float coef[8]; |
65 |
} tonal_component; |
66 |
|
67 |
typedef struct { |
68 |
int bandsCoded;
|
69 |
int numComponents;
|
70 |
tonal_component components[64];
|
71 |
float prevFrame[1024]; |
72 |
int gcBlkSwitch;
|
73 |
gain_block gainBlock[2];
|
74 |
|
75 |
DECLARE_ALIGNED_16(float, spectrum[1024]); |
76 |
DECLARE_ALIGNED_16(float, IMDCT_buf[1024]); |
77 |
|
78 |
float delayBuf1[46]; ///<qmf delay buffers |
79 |
float delayBuf2[46]; |
80 |
float delayBuf3[46]; |
81 |
} channel_unit; |
82 |
|
83 |
typedef struct { |
84 |
GetBitContext gb; |
85 |
//@{
|
86 |
/** stream data */
|
87 |
int channels;
|
88 |
int codingMode;
|
89 |
int bit_rate;
|
90 |
int sample_rate;
|
91 |
int samples_per_channel;
|
92 |
int samples_per_frame;
|
93 |
|
94 |
int bits_per_frame;
|
95 |
int bytes_per_frame;
|
96 |
int pBs;
|
97 |
channel_unit* pUnits; |
98 |
//@}
|
99 |
//@{
|
100 |
/** joint-stereo related variables */
|
101 |
int matrix_coeff_index_prev[4]; |
102 |
int matrix_coeff_index_now[4]; |
103 |
int matrix_coeff_index_next[4]; |
104 |
int weighting_delay[6]; |
105 |
//@}
|
106 |
//@{
|
107 |
/** data buffers */
|
108 |
float outSamples[2048]; |
109 |
uint8_t* decoded_bytes_buffer; |
110 |
float tempBuf[1070]; |
111 |
//@}
|
112 |
//@{
|
113 |
/** extradata */
|
114 |
int atrac3version;
|
115 |
int delay;
|
116 |
int scrambled_stream;
|
117 |
int frame_factor;
|
118 |
//@}
|
119 |
} ATRAC3Context; |
120 |
|
121 |
static DECLARE_ALIGNED_16(float,mdct_window[512]); |
122 |
static float qmf_window[48]; |
123 |
static VLC spectral_coeff_tab[7]; |
124 |
static float SFTable[64]; |
125 |
static float gain_tab1[16]; |
126 |
static float gain_tab2[31]; |
127 |
static MDCTContext mdct_ctx;
|
128 |
static DSPContext dsp;
|
129 |
|
130 |
|
131 |
/* quadrature mirror synthesis filter */
|
132 |
|
133 |
/**
|
134 |
* Quadrature mirror synthesis filter.
|
135 |
*
|
136 |
* @param inlo lower part of spectrum
|
137 |
* @param inhi higher part of spectrum
|
138 |
* @param nIn size of spectrum buffer
|
139 |
* @param pOut out buffer
|
140 |
* @param delayBuf delayBuf buffer
|
141 |
* @param temp temp buffer
|
142 |
*/
|
143 |
|
144 |
|
145 |
static void iqmf (float *inlo, float *inhi, unsigned int nIn, float *pOut, float *delayBuf, float *temp) |
146 |
{ |
147 |
int i, j;
|
148 |
float *p1, *p3;
|
149 |
|
150 |
memcpy(temp, delayBuf, 46*sizeof(float)); |
151 |
|
152 |
p3 = temp + 46;
|
153 |
|
154 |
/* loop1 */
|
155 |
for(i=0; i<nIn; i+=2){ |
156 |
p3[2*i+0] = inlo[i ] + inhi[i ]; |
157 |
p3[2*i+1] = inlo[i ] - inhi[i ]; |
158 |
p3[2*i+2] = inlo[i+1] + inhi[i+1]; |
159 |
p3[2*i+3] = inlo[i+1] - inhi[i+1]; |
160 |
} |
161 |
|
162 |
/* loop2 */
|
163 |
p1 = temp; |
164 |
for (j = nIn; j != 0; j--) { |
165 |
float s1 = 0.0; |
166 |
float s2 = 0.0; |
167 |
|
168 |
for (i = 0; i < 48; i += 2) { |
169 |
s1 += p1[i] * qmf_window[i]; |
170 |
s2 += p1[i+1] * qmf_window[i+1]; |
171 |
} |
172 |
|
173 |
pOut[0] = s2;
|
174 |
pOut[1] = s1;
|
175 |
|
176 |
p1 += 2;
|
177 |
pOut += 2;
|
178 |
} |
179 |
|
180 |
/* Update the delay buffer. */
|
181 |
memcpy(delayBuf, temp + nIn*2, 46*sizeof(float)); |
182 |
} |
183 |
|
184 |
/**
|
185 |
* Regular 512 points IMDCT without overlapping, with the exception of the swapping of odd bands
|
186 |
* caused by the reverse spectra of the QMF.
|
187 |
*
|
188 |
* @param pInput float input
|
189 |
* @param pOutput float output
|
190 |
* @param odd_band 1 if the band is an odd band
|
191 |
*/
|
192 |
|
193 |
static void IMLT(float *pInput, float *pOutput, int odd_band) |
194 |
{ |
195 |
int i;
|
196 |
|
197 |
if (odd_band) {
|
198 |
/**
|
199 |
* Reverse the odd bands before IMDCT, this is an effect of the QMF transform
|
200 |
* or it gives better compression to do it this way.
|
201 |
* FIXME: It should be possible to handle this in ff_imdct_calc
|
202 |
* for that to happen a modification of the prerotation step of
|
203 |
* all SIMD code and C code is needed.
|
204 |
* Or fix the functions before so they generate a pre reversed spectrum.
|
205 |
*/
|
206 |
|
207 |
for (i=0; i<128; i++) |
208 |
FFSWAP(float, pInput[i], pInput[255-i]); |
209 |
} |
210 |
|
211 |
ff_imdct_calc(&mdct_ctx,pOutput,pInput); |
212 |
|
213 |
/* Perform windowing on the output. */
|
214 |
dsp.vector_fmul(pOutput,mdct_window,512);
|
215 |
|
216 |
} |
217 |
|
218 |
|
219 |
/**
|
220 |
* Atrac 3 indata descrambling, only used for data coming from the rm container
|
221 |
*
|
222 |
* @param in pointer to 8 bit array of indata
|
223 |
* @param bits amount of bits
|
224 |
* @param out pointer to 8 bit array of outdata
|
225 |
*/
|
226 |
|
227 |
static int decode_bytes(const uint8_t* inbuffer, uint8_t* out, int bytes){ |
228 |
int i, off;
|
229 |
uint32_t c; |
230 |
const uint32_t* buf;
|
231 |
uint32_t* obuf = (uint32_t*) out; |
232 |
|
233 |
off = (intptr_t)inbuffer & 3;
|
234 |
buf = (const uint32_t*) (inbuffer - off);
|
235 |
c = be2me_32((0x537F6103 >> (off*8)) | (0x537F6103 << (32-(off*8)))); |
236 |
bytes += 3 + off;
|
237 |
for (i = 0; i < bytes/4; i++) |
238 |
obuf[i] = c ^ buf[i]; |
239 |
|
240 |
if (off)
|
241 |
av_log(NULL,AV_LOG_DEBUG,"Offset of %d not handled, post sample on ffmpeg-dev.\n",off); |
242 |
|
243 |
return off;
|
244 |
} |
245 |
|
246 |
|
247 |
static av_cold void init_atrac3_transforms(ATRAC3Context *q) { |
248 |
float enc_window[256]; |
249 |
float s;
|
250 |
int i;
|
251 |
|
252 |
/* Generate the mdct window, for details see
|
253 |
* http://wiki.multimedia.cx/index.php?title=RealAudio_atrc#Windows */
|
254 |
for (i=0 ; i<256; i++) |
255 |
enc_window[i] = (sin(((i + 0.5) / 256.0 - 0.5) * M_PI) + 1.0) * 0.5; |
256 |
|
257 |
if (!mdct_window[0]) |
258 |
for (i=0 ; i<256; i++) { |
259 |
mdct_window[i] = enc_window[i]/(enc_window[i]*enc_window[i] + enc_window[255-i]*enc_window[255-i]); |
260 |
mdct_window[511-i] = mdct_window[i];
|
261 |
} |
262 |
|
263 |
/* Generate the QMF window. */
|
264 |
for (i=0 ; i<24; i++) { |
265 |
s = qmf_48tap_half[i] * 2.0; |
266 |
qmf_window[i] = s; |
267 |
qmf_window[47 - i] = s;
|
268 |
} |
269 |
|
270 |
/* Initialize the MDCT transform. */
|
271 |
ff_mdct_init(&mdct_ctx, 9, 1); |
272 |
} |
273 |
|
274 |
/**
|
275 |
* Atrac3 uninit, free all allocated memory
|
276 |
*/
|
277 |
|
278 |
static av_cold int atrac3_decode_close(AVCodecContext *avctx) |
279 |
{ |
280 |
ATRAC3Context *q = avctx->priv_data; |
281 |
|
282 |
av_free(q->pUnits); |
283 |
av_free(q->decoded_bytes_buffer); |
284 |
|
285 |
return 0; |
286 |
} |
287 |
|
288 |
/**
|
289 |
/ * Mantissa decoding
|
290 |
*
|
291 |
* @param gb the GetBit context
|
292 |
* @param selector what table is the output values coded with
|
293 |
* @param codingFlag constant length coding or variable length coding
|
294 |
* @param mantissas mantissa output table
|
295 |
* @param numCodes amount of values to get
|
296 |
*/
|
297 |
|
298 |
static void readQuantSpectralCoeffs (GetBitContext *gb, int selector, int codingFlag, int* mantissas, int numCodes) |
299 |
{ |
300 |
int numBits, cnt, code, huffSymb;
|
301 |
|
302 |
if (selector == 1) |
303 |
numCodes /= 2;
|
304 |
|
305 |
if (codingFlag != 0) { |
306 |
/* constant length coding (CLC) */
|
307 |
numBits = CLCLengthTab[selector]; |
308 |
|
309 |
if (selector > 1) { |
310 |
for (cnt = 0; cnt < numCodes; cnt++) { |
311 |
if (numBits)
|
312 |
code = get_sbits(gb, numBits); |
313 |
else
|
314 |
code = 0;
|
315 |
mantissas[cnt] = code; |
316 |
} |
317 |
} else {
|
318 |
for (cnt = 0; cnt < numCodes; cnt++) { |
319 |
if (numBits)
|
320 |
code = get_bits(gb, numBits); //numBits is always 4 in this case
|
321 |
else
|
322 |
code = 0;
|
323 |
mantissas[cnt*2] = seTab_0[code >> 2]; |
324 |
mantissas[cnt*2+1] = seTab_0[code & 3]; |
325 |
} |
326 |
} |
327 |
} else {
|
328 |
/* variable length coding (VLC) */
|
329 |
if (selector != 1) { |
330 |
for (cnt = 0; cnt < numCodes; cnt++) { |
331 |
huffSymb = get_vlc2(gb, spectral_coeff_tab[selector-1].table, spectral_coeff_tab[selector-1].bits, 3); |
332 |
huffSymb += 1;
|
333 |
code = huffSymb >> 1;
|
334 |
if (huffSymb & 1) |
335 |
code = -code; |
336 |
mantissas[cnt] = code; |
337 |
} |
338 |
} else {
|
339 |
for (cnt = 0; cnt < numCodes; cnt++) { |
340 |
huffSymb = get_vlc2(gb, spectral_coeff_tab[selector-1].table, spectral_coeff_tab[selector-1].bits, 3); |
341 |
mantissas[cnt*2] = decTable1[huffSymb*2]; |
342 |
mantissas[cnt*2+1] = decTable1[huffSymb*2+1]; |
343 |
} |
344 |
} |
345 |
} |
346 |
} |
347 |
|
348 |
/**
|
349 |
* Restore the quantized band spectrum coefficients
|
350 |
*
|
351 |
* @param gb the GetBit context
|
352 |
* @param pOut decoded band spectrum
|
353 |
* @return outSubbands subband counter, fix for broken specification/files
|
354 |
*/
|
355 |
|
356 |
static int decodeSpectrum (GetBitContext *gb, float *pOut) |
357 |
{ |
358 |
int numSubbands, codingMode, cnt, first, last, subbWidth, *pIn;
|
359 |
int subband_vlc_index[32], SF_idxs[32]; |
360 |
int mantissas[128]; |
361 |
float SF;
|
362 |
|
363 |
numSubbands = get_bits(gb, 5); // number of coded subbands |
364 |
codingMode = get_bits1(gb); // coding Mode: 0 - VLC/ 1-CLC
|
365 |
|
366 |
/* Get the VLC selector table for the subbands, 0 means not coded. */
|
367 |
for (cnt = 0; cnt <= numSubbands; cnt++) |
368 |
subband_vlc_index[cnt] = get_bits(gb, 3);
|
369 |
|
370 |
/* Read the scale factor indexes from the stream. */
|
371 |
for (cnt = 0; cnt <= numSubbands; cnt++) { |
372 |
if (subband_vlc_index[cnt] != 0) |
373 |
SF_idxs[cnt] = get_bits(gb, 6);
|
374 |
} |
375 |
|
376 |
for (cnt = 0; cnt <= numSubbands; cnt++) { |
377 |
first = subbandTab[cnt]; |
378 |
last = subbandTab[cnt+1];
|
379 |
|
380 |
subbWidth = last - first; |
381 |
|
382 |
if (subband_vlc_index[cnt] != 0) { |
383 |
/* Decode spectral coefficients for this subband. */
|
384 |
/* TODO: This can be done faster is several blocks share the
|
385 |
* same VLC selector (subband_vlc_index) */
|
386 |
readQuantSpectralCoeffs (gb, subband_vlc_index[cnt], codingMode, mantissas, subbWidth); |
387 |
|
388 |
/* Decode the scale factor for this subband. */
|
389 |
SF = SFTable[SF_idxs[cnt]] * iMaxQuant[subband_vlc_index[cnt]]; |
390 |
|
391 |
/* Inverse quantize the coefficients. */
|
392 |
for (pIn=mantissas ; first<last; first++, pIn++)
|
393 |
pOut[first] = *pIn * SF; |
394 |
} else {
|
395 |
/* This subband was not coded, so zero the entire subband. */
|
396 |
memset(pOut+first, 0, subbWidth*sizeof(float)); |
397 |
} |
398 |
} |
399 |
|
400 |
/* Clear the subbands that were not coded. */
|
401 |
first = subbandTab[cnt]; |
402 |
memset(pOut+first, 0, (1024 - first) * sizeof(float)); |
403 |
return numSubbands;
|
404 |
} |
405 |
|
406 |
/**
|
407 |
* Restore the quantized tonal components
|
408 |
*
|
409 |
* @param gb the GetBit context
|
410 |
* @param pComponent tone component
|
411 |
* @param numBands amount of coded bands
|
412 |
*/
|
413 |
|
414 |
static int decodeTonalComponents (GetBitContext *gb, tonal_component *pComponent, int numBands) |
415 |
{ |
416 |
int i,j,k,cnt;
|
417 |
int components, coding_mode_selector, coding_mode, coded_values_per_component;
|
418 |
int sfIndx, coded_values, max_coded_values, quant_step_index, coded_components;
|
419 |
int band_flags[4], mantissa[8]; |
420 |
float *pCoef;
|
421 |
float scalefactor;
|
422 |
int component_count = 0; |
423 |
|
424 |
components = get_bits(gb,5);
|
425 |
|
426 |
/* no tonal components */
|
427 |
if (components == 0) |
428 |
return 0; |
429 |
|
430 |
coding_mode_selector = get_bits(gb,2);
|
431 |
if (coding_mode_selector == 2) |
432 |
return -1; |
433 |
|
434 |
coding_mode = coding_mode_selector & 1;
|
435 |
|
436 |
for (i = 0; i < components; i++) { |
437 |
for (cnt = 0; cnt <= numBands; cnt++) |
438 |
band_flags[cnt] = get_bits1(gb); |
439 |
|
440 |
coded_values_per_component = get_bits(gb,3);
|
441 |
|
442 |
quant_step_index = get_bits(gb,3);
|
443 |
if (quant_step_index <= 1) |
444 |
return -1; |
445 |
|
446 |
if (coding_mode_selector == 3) |
447 |
coding_mode = get_bits1(gb); |
448 |
|
449 |
for (j = 0; j < (numBands + 1) * 4; j++) { |
450 |
if (band_flags[j >> 2] == 0) |
451 |
continue;
|
452 |
|
453 |
coded_components = get_bits(gb,3);
|
454 |
|
455 |
for (k=0; k<coded_components; k++) { |
456 |
sfIndx = get_bits(gb,6);
|
457 |
pComponent[component_count].pos = j * 64 + (get_bits(gb,6)); |
458 |
max_coded_values = 1024 - pComponent[component_count].pos;
|
459 |
coded_values = coded_values_per_component + 1;
|
460 |
coded_values = FFMIN(max_coded_values,coded_values); |
461 |
|
462 |
scalefactor = SFTable[sfIndx] * iMaxQuant[quant_step_index]; |
463 |
|
464 |
readQuantSpectralCoeffs(gb, quant_step_index, coding_mode, mantissa, coded_values); |
465 |
|
466 |
pComponent[component_count].numCoefs = coded_values; |
467 |
|
468 |
/* inverse quant */
|
469 |
pCoef = pComponent[component_count].coef; |
470 |
for (cnt = 0; cnt < coded_values; cnt++) |
471 |
pCoef[cnt] = mantissa[cnt] * scalefactor; |
472 |
|
473 |
component_count++; |
474 |
} |
475 |
} |
476 |
} |
477 |
|
478 |
return component_count;
|
479 |
} |
480 |
|
481 |
/**
|
482 |
* Decode gain parameters for the coded bands
|
483 |
*
|
484 |
* @param gb the GetBit context
|
485 |
* @param pGb the gainblock for the current band
|
486 |
* @param numBands amount of coded bands
|
487 |
*/
|
488 |
|
489 |
static int decodeGainControl (GetBitContext *gb, gain_block *pGb, int numBands) |
490 |
{ |
491 |
int i, cf, numData;
|
492 |
int *pLevel, *pLoc;
|
493 |
|
494 |
gain_info *pGain = pGb->gBlock; |
495 |
|
496 |
for (i=0 ; i<=numBands; i++) |
497 |
{ |
498 |
numData = get_bits(gb,3);
|
499 |
pGain[i].num_gain_data = numData; |
500 |
pLevel = pGain[i].levcode; |
501 |
pLoc = pGain[i].loccode; |
502 |
|
503 |
for (cf = 0; cf < numData; cf++){ |
504 |
pLevel[cf]= get_bits(gb,4);
|
505 |
pLoc [cf]= get_bits(gb,5);
|
506 |
if(cf && pLoc[cf] <= pLoc[cf-1]) |
507 |
return -1; |
508 |
} |
509 |
} |
510 |
|
511 |
/* Clear the unused blocks. */
|
512 |
for (; i<4 ; i++) |
513 |
pGain[i].num_gain_data = 0;
|
514 |
|
515 |
return 0; |
516 |
} |
517 |
|
518 |
/**
|
519 |
* Apply gain parameters and perform the MDCT overlapping part
|
520 |
*
|
521 |
* @param pIn input float buffer
|
522 |
* @param pPrev previous float buffer to perform overlap against
|
523 |
* @param pOut output float buffer
|
524 |
* @param pGain1 current band gain info
|
525 |
* @param pGain2 next band gain info
|
526 |
*/
|
527 |
|
528 |
static void gainCompensateAndOverlap (float *pIn, float *pPrev, float *pOut, gain_info *pGain1, gain_info *pGain2) |
529 |
{ |
530 |
/* gain compensation function */
|
531 |
float gain1, gain2, gain_inc;
|
532 |
int cnt, numdata, nsample, startLoc, endLoc;
|
533 |
|
534 |
|
535 |
if (pGain2->num_gain_data == 0) |
536 |
gain1 = 1.0; |
537 |
else
|
538 |
gain1 = gain_tab1[pGain2->levcode[0]];
|
539 |
|
540 |
if (pGain1->num_gain_data == 0) { |
541 |
for (cnt = 0; cnt < 256; cnt++) |
542 |
pOut[cnt] = pIn[cnt] * gain1 + pPrev[cnt]; |
543 |
} else {
|
544 |
numdata = pGain1->num_gain_data; |
545 |
pGain1->loccode[numdata] = 32;
|
546 |
pGain1->levcode[numdata] = 4;
|
547 |
|
548 |
nsample = 0; // current sample = 0 |
549 |
|
550 |
for (cnt = 0; cnt < numdata; cnt++) { |
551 |
startLoc = pGain1->loccode[cnt] * 8;
|
552 |
endLoc = startLoc + 8;
|
553 |
|
554 |
gain2 = gain_tab1[pGain1->levcode[cnt]]; |
555 |
gain_inc = gain_tab2[(pGain1->levcode[cnt+1] - pGain1->levcode[cnt])+15]; |
556 |
|
557 |
/* interpolate */
|
558 |
for (; nsample < startLoc; nsample++)
|
559 |
pOut[nsample] = (pIn[nsample] * gain1 + pPrev[nsample]) * gain2; |
560 |
|
561 |
/* interpolation is done over eight samples */
|
562 |
for (; nsample < endLoc; nsample++) {
|
563 |
pOut[nsample] = (pIn[nsample] * gain1 + pPrev[nsample]) * gain2; |
564 |
gain2 *= gain_inc; |
565 |
} |
566 |
} |
567 |
|
568 |
for (; nsample < 256; nsample++) |
569 |
pOut[nsample] = (pIn[nsample] * gain1) + pPrev[nsample]; |
570 |
} |
571 |
|
572 |
/* Delay for the overlapping part. */
|
573 |
memcpy(pPrev, &pIn[256], 256*sizeof(float)); |
574 |
} |
575 |
|
576 |
/**
|
577 |
* Combine the tonal band spectrum and regular band spectrum
|
578 |
* Return position of the last tonal coefficient
|
579 |
*
|
580 |
* @param pSpectrum output spectrum buffer
|
581 |
* @param numComponents amount of tonal components
|
582 |
* @param pComponent tonal components for this band
|
583 |
*/
|
584 |
|
585 |
static int addTonalComponents (float *pSpectrum, int numComponents, tonal_component *pComponent) |
586 |
{ |
587 |
int cnt, i, lastPos = -1; |
588 |
float *pIn, *pOut;
|
589 |
|
590 |
for (cnt = 0; cnt < numComponents; cnt++){ |
591 |
lastPos = FFMAX(pComponent[cnt].pos + pComponent[cnt].numCoefs, lastPos); |
592 |
pIn = pComponent[cnt].coef; |
593 |
pOut = &(pSpectrum[pComponent[cnt].pos]); |
594 |
|
595 |
for (i=0 ; i<pComponent[cnt].numCoefs ; i++) |
596 |
pOut[i] += pIn[i]; |
597 |
} |
598 |
|
599 |
return lastPos;
|
600 |
} |
601 |
|
602 |
|
603 |
#define INTERPOLATE(old,new,nsample) ((old) + (nsample)*0.125*((new)-(old))) |
604 |
|
605 |
static void reverseMatrixing(float *su1, float *su2, int *pPrevCode, int *pCurrCode) |
606 |
{ |
607 |
int i, band, nsample, s1, s2;
|
608 |
float c1, c2;
|
609 |
float mc1_l, mc1_r, mc2_l, mc2_r;
|
610 |
|
611 |
for (i=0,band = 0; band < 4*256; band+=256,i++) { |
612 |
s1 = pPrevCode[i]; |
613 |
s2 = pCurrCode[i]; |
614 |
nsample = 0;
|
615 |
|
616 |
if (s1 != s2) {
|
617 |
/* Selector value changed, interpolation needed. */
|
618 |
mc1_l = matrixCoeffs[s1*2];
|
619 |
mc1_r = matrixCoeffs[s1*2+1]; |
620 |
mc2_l = matrixCoeffs[s2*2];
|
621 |
mc2_r = matrixCoeffs[s2*2+1]; |
622 |
|
623 |
/* Interpolation is done over the first eight samples. */
|
624 |
for(; nsample < 8; nsample++) { |
625 |
c1 = su1[band+nsample]; |
626 |
c2 = su2[band+nsample]; |
627 |
c2 = c1 * INTERPOLATE(mc1_l,mc2_l,nsample) + c2 * INTERPOLATE(mc1_r,mc2_r,nsample); |
628 |
su1[band+nsample] = c2; |
629 |
su2[band+nsample] = c1 * 2.0 - c2; |
630 |
} |
631 |
} |
632 |
|
633 |
/* Apply the matrix without interpolation. */
|
634 |
switch (s2) {
|
635 |
case 0: /* M/S decoding */ |
636 |
for (; nsample < 256; nsample++) { |
637 |
c1 = su1[band+nsample]; |
638 |
c2 = su2[band+nsample]; |
639 |
su1[band+nsample] = c2 * 2.0; |
640 |
su2[band+nsample] = (c1 - c2) * 2.0; |
641 |
} |
642 |
break;
|
643 |
|
644 |
case 1: |
645 |
for (; nsample < 256; nsample++) { |
646 |
c1 = su1[band+nsample]; |
647 |
c2 = su2[band+nsample]; |
648 |
su1[band+nsample] = (c1 + c2) * 2.0; |
649 |
su2[band+nsample] = c2 * -2.0; |
650 |
} |
651 |
break;
|
652 |
case 2: |
653 |
case 3: |
654 |
for (; nsample < 256; nsample++) { |
655 |
c1 = su1[band+nsample]; |
656 |
c2 = su2[band+nsample]; |
657 |
su1[band+nsample] = c1 + c2; |
658 |
su2[band+nsample] = c1 - c2; |
659 |
} |
660 |
break;
|
661 |
default:
|
662 |
assert(0);
|
663 |
} |
664 |
} |
665 |
} |
666 |
|
667 |
static void getChannelWeights (int indx, int flag, float ch[2]){ |
668 |
|
669 |
if (indx == 7) { |
670 |
ch[0] = 1.0; |
671 |
ch[1] = 1.0; |
672 |
} else {
|
673 |
ch[0] = (float)(indx & 7) / 7.0; |
674 |
ch[1] = sqrt(2 - ch[0]*ch[0]); |
675 |
if(flag)
|
676 |
FFSWAP(float, ch[0], ch[1]); |
677 |
} |
678 |
} |
679 |
|
680 |
static void channelWeighting (float *su1, float *su2, int *p3) |
681 |
{ |
682 |
int band, nsample;
|
683 |
/* w[x][y] y=0 is left y=1 is right */
|
684 |
float w[2][2]; |
685 |
|
686 |
if (p3[1] != 7 || p3[3] != 7){ |
687 |
getChannelWeights(p3[1], p3[0], w[0]); |
688 |
getChannelWeights(p3[3], p3[2], w[1]); |
689 |
|
690 |
for(band = 1; band < 4; band++) { |
691 |
/* scale the channels by the weights */
|
692 |
for(nsample = 0; nsample < 8; nsample++) { |
693 |
su1[band*256+nsample] *= INTERPOLATE(w[0][0], w[0][1], nsample); |
694 |
su2[band*256+nsample] *= INTERPOLATE(w[1][0], w[1][1], nsample); |
695 |
} |
696 |
|
697 |
for(; nsample < 256; nsample++) { |
698 |
su1[band*256+nsample] *= w[1][0]; |
699 |
su2[band*256+nsample] *= w[1][1]; |
700 |
} |
701 |
} |
702 |
} |
703 |
} |
704 |
|
705 |
|
706 |
/**
|
707 |
* Decode a Sound Unit
|
708 |
*
|
709 |
* @param gb the GetBit context
|
710 |
* @param pSnd the channel unit to be used
|
711 |
* @param pOut the decoded samples before IQMF in float representation
|
712 |
* @param channelNum channel number
|
713 |
* @param codingMode the coding mode (JOINT_STEREO or regular stereo/mono)
|
714 |
*/
|
715 |
|
716 |
|
717 |
static int decodeChannelSoundUnit (ATRAC3Context *q, GetBitContext *gb, channel_unit *pSnd, float *pOut, int channelNum, int codingMode) |
718 |
{ |
719 |
int band, result=0, numSubbands, lastTonal, numBands; |
720 |
|
721 |
if (codingMode == JOINT_STEREO && channelNum == 1) { |
722 |
if (get_bits(gb,2) != 3) { |
723 |
av_log(NULL,AV_LOG_ERROR,"JS mono Sound Unit id != 3.\n"); |
724 |
return -1; |
725 |
} |
726 |
} else {
|
727 |
if (get_bits(gb,6) != 0x28) { |
728 |
av_log(NULL,AV_LOG_ERROR,"Sound Unit id != 0x28.\n"); |
729 |
return -1; |
730 |
} |
731 |
} |
732 |
|
733 |
/* number of coded QMF bands */
|
734 |
pSnd->bandsCoded = get_bits(gb,2);
|
735 |
|
736 |
result = decodeGainControl (gb, &(pSnd->gainBlock[pSnd->gcBlkSwitch]), pSnd->bandsCoded); |
737 |
if (result) return result; |
738 |
|
739 |
pSnd->numComponents = decodeTonalComponents (gb, pSnd->components, pSnd->bandsCoded); |
740 |
if (pSnd->numComponents == -1) return -1; |
741 |
|
742 |
numSubbands = decodeSpectrum (gb, pSnd->spectrum); |
743 |
|
744 |
/* Merge the decoded spectrum and tonal components. */
|
745 |
lastTonal = addTonalComponents (pSnd->spectrum, pSnd->numComponents, pSnd->components); |
746 |
|
747 |
|
748 |
/* calculate number of used MLT/QMF bands according to the amount of coded spectral lines */
|
749 |
numBands = (subbandTab[numSubbands] - 1) >> 8; |
750 |
if (lastTonal >= 0) |
751 |
numBands = FFMAX((lastTonal + 256) >> 8, numBands); |
752 |
|
753 |
|
754 |
/* Reconstruct time domain samples. */
|
755 |
for (band=0; band<4; band++) { |
756 |
/* Perform the IMDCT step without overlapping. */
|
757 |
if (band <= numBands) {
|
758 |
IMLT(&(pSnd->spectrum[band*256]), pSnd->IMDCT_buf, band&1); |
759 |
} else
|
760 |
memset(pSnd->IMDCT_buf, 0, 512 * sizeof(float)); |
761 |
|
762 |
/* gain compensation and overlapping */
|
763 |
gainCompensateAndOverlap (pSnd->IMDCT_buf, &(pSnd->prevFrame[band*256]), &(pOut[band*256]), |
764 |
&((pSnd->gainBlock[1 - (pSnd->gcBlkSwitch)]).gBlock[band]),
|
765 |
&((pSnd->gainBlock[pSnd->gcBlkSwitch]).gBlock[band])); |
766 |
} |
767 |
|
768 |
/* Swap the gain control buffers for the next frame. */
|
769 |
pSnd->gcBlkSwitch ^= 1;
|
770 |
|
771 |
return 0; |
772 |
} |
773 |
|
774 |
/**
|
775 |
* Frame handling
|
776 |
*
|
777 |
* @param q Atrac3 private context
|
778 |
* @param databuf the input data
|
779 |
*/
|
780 |
|
781 |
static int decodeFrame(ATRAC3Context *q, const uint8_t* databuf) |
782 |
{ |
783 |
int result, i;
|
784 |
float *p1, *p2, *p3, *p4;
|
785 |
uint8_t *ptr1; |
786 |
|
787 |
if (q->codingMode == JOINT_STEREO) {
|
788 |
|
789 |
/* channel coupling mode */
|
790 |
/* decode Sound Unit 1 */
|
791 |
init_get_bits(&q->gb,databuf,q->bits_per_frame); |
792 |
|
793 |
result = decodeChannelSoundUnit(q,&q->gb, q->pUnits, q->outSamples, 0, JOINT_STEREO);
|
794 |
if (result != 0) |
795 |
return (result);
|
796 |
|
797 |
/* Framedata of the su2 in the joint-stereo mode is encoded in
|
798 |
* reverse byte order so we need to swap it first. */
|
799 |
if (databuf == q->decoded_bytes_buffer) {
|
800 |
uint8_t *ptr2 = q->decoded_bytes_buffer+q->bytes_per_frame-1;
|
801 |
ptr1 = q->decoded_bytes_buffer; |
802 |
for (i = 0; i < (q->bytes_per_frame/2); i++, ptr1++, ptr2--) { |
803 |
FFSWAP(uint8_t,*ptr1,*ptr2); |
804 |
} |
805 |
} else {
|
806 |
const uint8_t *ptr2 = databuf+q->bytes_per_frame-1; |
807 |
for (i = 0; i < q->bytes_per_frame; i++) |
808 |
q->decoded_bytes_buffer[i] = *ptr2--; |
809 |
} |
810 |
|
811 |
/* Skip the sync codes (0xF8). */
|
812 |
ptr1 = q->decoded_bytes_buffer; |
813 |
for (i = 4; *ptr1 == 0xF8; i++, ptr1++) { |
814 |
if (i >= q->bytes_per_frame)
|
815 |
return -1; |
816 |
} |
817 |
|
818 |
|
819 |
/* set the bitstream reader at the start of the second Sound Unit*/
|
820 |
init_get_bits(&q->gb,ptr1,q->bits_per_frame); |
821 |
|
822 |
/* Fill the Weighting coeffs delay buffer */
|
823 |
memmove(q->weighting_delay,&(q->weighting_delay[2]),4*sizeof(int)); |
824 |
q->weighting_delay[4] = get_bits1(&q->gb);
|
825 |
q->weighting_delay[5] = get_bits(&q->gb,3); |
826 |
|
827 |
for (i = 0; i < 4; i++) { |
828 |
q->matrix_coeff_index_prev[i] = q->matrix_coeff_index_now[i]; |
829 |
q->matrix_coeff_index_now[i] = q->matrix_coeff_index_next[i]; |
830 |
q->matrix_coeff_index_next[i] = get_bits(&q->gb,2);
|
831 |
} |
832 |
|
833 |
/* Decode Sound Unit 2. */
|
834 |
result = decodeChannelSoundUnit(q,&q->gb, &q->pUnits[1], &q->outSamples[1024], 1, JOINT_STEREO); |
835 |
if (result != 0) |
836 |
return (result);
|
837 |
|
838 |
/* Reconstruct the channel coefficients. */
|
839 |
reverseMatrixing(q->outSamples, &q->outSamples[1024], q->matrix_coeff_index_prev, q->matrix_coeff_index_now);
|
840 |
|
841 |
channelWeighting(q->outSamples, &q->outSamples[1024], q->weighting_delay);
|
842 |
|
843 |
} else {
|
844 |
/* normal stereo mode or mono */
|
845 |
/* Decode the channel sound units. */
|
846 |
for (i=0 ; i<q->channels ; i++) { |
847 |
|
848 |
/* Set the bitstream reader at the start of a channel sound unit. */
|
849 |
init_get_bits(&q->gb, databuf+((i*q->bytes_per_frame)/q->channels), (q->bits_per_frame)/q->channels); |
850 |
|
851 |
result = decodeChannelSoundUnit(q,&q->gb, &q->pUnits[i], &q->outSamples[i*1024], i, q->codingMode);
|
852 |
if (result != 0) |
853 |
return (result);
|
854 |
} |
855 |
} |
856 |
|
857 |
/* Apply the iQMF synthesis filter. */
|
858 |
p1= q->outSamples; |
859 |
for (i=0 ; i<q->channels ; i++) { |
860 |
p2= p1+256;
|
861 |
p3= p2+256;
|
862 |
p4= p3+256;
|
863 |
iqmf (p1, p2, 256, p1, q->pUnits[i].delayBuf1, q->tempBuf);
|
864 |
iqmf (p4, p3, 256, p3, q->pUnits[i].delayBuf2, q->tempBuf);
|
865 |
iqmf (p1, p3, 512, p1, q->pUnits[i].delayBuf3, q->tempBuf);
|
866 |
p1 +=1024;
|
867 |
} |
868 |
|
869 |
return 0; |
870 |
} |
871 |
|
872 |
|
873 |
/**
|
874 |
* Atrac frame decoding
|
875 |
*
|
876 |
* @param avctx pointer to the AVCodecContext
|
877 |
*/
|
878 |
|
879 |
static int atrac3_decode_frame(AVCodecContext *avctx, |
880 |
void *data, int *data_size, |
881 |
AVPacket *avpkt) { |
882 |
const uint8_t *buf = avpkt->data;
|
883 |
int buf_size = avpkt->size;
|
884 |
ATRAC3Context *q = avctx->priv_data; |
885 |
int result = 0, i; |
886 |
const uint8_t* databuf;
|
887 |
int16_t* samples = data; |
888 |
|
889 |
if (buf_size < avctx->block_align)
|
890 |
return buf_size;
|
891 |
|
892 |
/* Check if we need to descramble and what buffer to pass on. */
|
893 |
if (q->scrambled_stream) {
|
894 |
decode_bytes(buf, q->decoded_bytes_buffer, avctx->block_align); |
895 |
databuf = q->decoded_bytes_buffer; |
896 |
} else {
|
897 |
databuf = buf; |
898 |
} |
899 |
|
900 |
result = decodeFrame(q, databuf); |
901 |
|
902 |
if (result != 0) { |
903 |
av_log(NULL,AV_LOG_ERROR,"Frame decoding error!\n"); |
904 |
return -1; |
905 |
} |
906 |
|
907 |
if (q->channels == 1) { |
908 |
/* mono */
|
909 |
for (i = 0; i<1024; i++) |
910 |
samples[i] = av_clip_int16(round(q->outSamples[i])); |
911 |
*data_size = 1024 * sizeof(int16_t); |
912 |
} else {
|
913 |
/* stereo */
|
914 |
for (i = 0; i < 1024; i++) { |
915 |
samples[i*2] = av_clip_int16(round(q->outSamples[i]));
|
916 |
samples[i*2+1] = av_clip_int16(round(q->outSamples[1024+i])); |
917 |
} |
918 |
*data_size = 2048 * sizeof(int16_t); |
919 |
} |
920 |
|
921 |
return avctx->block_align;
|
922 |
} |
923 |
|
924 |
|
925 |
/**
|
926 |
* Atrac3 initialization
|
927 |
*
|
928 |
* @param avctx pointer to the AVCodecContext
|
929 |
*/
|
930 |
|
931 |
static av_cold int atrac3_decode_init(AVCodecContext *avctx) |
932 |
{ |
933 |
int i;
|
934 |
const uint8_t *edata_ptr = avctx->extradata;
|
935 |
ATRAC3Context *q = avctx->priv_data; |
936 |
|
937 |
/* Take data from the AVCodecContext (RM container). */
|
938 |
q->sample_rate = avctx->sample_rate; |
939 |
q->channels = avctx->channels; |
940 |
q->bit_rate = avctx->bit_rate; |
941 |
q->bits_per_frame = avctx->block_align * 8;
|
942 |
q->bytes_per_frame = avctx->block_align; |
943 |
|
944 |
/* Take care of the codec-specific extradata. */
|
945 |
if (avctx->extradata_size == 14) { |
946 |
/* Parse the extradata, WAV format */
|
947 |
av_log(avctx,AV_LOG_DEBUG,"[0-1] %d\n",bytestream_get_le16(&edata_ptr)); //Unknown value always 1 |
948 |
q->samples_per_channel = bytestream_get_le32(&edata_ptr); |
949 |
q->codingMode = bytestream_get_le16(&edata_ptr); |
950 |
av_log(avctx,AV_LOG_DEBUG,"[8-9] %d\n",bytestream_get_le16(&edata_ptr)); //Dupe of coding mode |
951 |
q->frame_factor = bytestream_get_le16(&edata_ptr); //Unknown always 1
|
952 |
av_log(avctx,AV_LOG_DEBUG,"[12-13] %d\n",bytestream_get_le16(&edata_ptr)); //Unknown always 0 |
953 |
|
954 |
/* setup */
|
955 |
q->samples_per_frame = 1024 * q->channels;
|
956 |
q->atrac3version = 4;
|
957 |
q->delay = 0x88E;
|
958 |
if (q->codingMode)
|
959 |
q->codingMode = JOINT_STEREO; |
960 |
else
|
961 |
q->codingMode = STEREO; |
962 |
|
963 |
q->scrambled_stream = 0;
|
964 |
|
965 |
if ((q->bytes_per_frame == 96*q->channels*q->frame_factor) || (q->bytes_per_frame == 152*q->channels*q->frame_factor) || (q->bytes_per_frame == 192*q->channels*q->frame_factor)) { |
966 |
} else {
|
967 |
av_log(avctx,AV_LOG_ERROR,"Unknown frame/channel/frame_factor configuration %d/%d/%d\n", q->bytes_per_frame, q->channels, q->frame_factor);
|
968 |
return -1; |
969 |
} |
970 |
|
971 |
} else if (avctx->extradata_size == 10) { |
972 |
/* Parse the extradata, RM format. */
|
973 |
q->atrac3version = bytestream_get_be32(&edata_ptr); |
974 |
q->samples_per_frame = bytestream_get_be16(&edata_ptr); |
975 |
q->delay = bytestream_get_be16(&edata_ptr); |
976 |
q->codingMode = bytestream_get_be16(&edata_ptr); |
977 |
|
978 |
q->samples_per_channel = q->samples_per_frame / q->channels; |
979 |
q->scrambled_stream = 1;
|
980 |
|
981 |
} else {
|
982 |
av_log(NULL,AV_LOG_ERROR,"Unknown extradata size %d.\n",avctx->extradata_size); |
983 |
} |
984 |
/* Check the extradata. */
|
985 |
|
986 |
if (q->atrac3version != 4) { |
987 |
av_log(avctx,AV_LOG_ERROR,"Version %d != 4.\n",q->atrac3version);
|
988 |
return -1; |
989 |
} |
990 |
|
991 |
if (q->samples_per_frame != 1024 && q->samples_per_frame != 2048) { |
992 |
av_log(avctx,AV_LOG_ERROR,"Unknown amount of samples per frame %d.\n",q->samples_per_frame);
|
993 |
return -1; |
994 |
} |
995 |
|
996 |
if (q->delay != 0x88E) { |
997 |
av_log(avctx,AV_LOG_ERROR,"Unknown amount of delay %x != 0x88E.\n",q->delay);
|
998 |
return -1; |
999 |
} |
1000 |
|
1001 |
if (q->codingMode == STEREO) {
|
1002 |
av_log(avctx,AV_LOG_DEBUG,"Normal stereo detected.\n");
|
1003 |
} else if (q->codingMode == JOINT_STEREO) { |
1004 |
av_log(avctx,AV_LOG_DEBUG,"Joint stereo detected.\n");
|
1005 |
} else {
|
1006 |
av_log(avctx,AV_LOG_ERROR,"Unknown channel coding mode %x!\n",q->codingMode);
|
1007 |
return -1; |
1008 |
} |
1009 |
|
1010 |
if (avctx->channels <= 0 || avctx->channels > 2 /*|| ((avctx->channels * 1024) != q->samples_per_frame)*/) { |
1011 |
av_log(avctx,AV_LOG_ERROR,"Channel configuration error!\n");
|
1012 |
return -1; |
1013 |
} |
1014 |
|
1015 |
|
1016 |
if(avctx->block_align >= UINT_MAX/2) |
1017 |
return -1; |
1018 |
|
1019 |
/* Pad the data buffer with FF_INPUT_BUFFER_PADDING_SIZE,
|
1020 |
* this is for the bitstream reader. */
|
1021 |
if ((q->decoded_bytes_buffer = av_mallocz((avctx->block_align+(4-avctx->block_align%4) + FF_INPUT_BUFFER_PADDING_SIZE))) == NULL) |
1022 |
return AVERROR(ENOMEM);
|
1023 |
|
1024 |
|
1025 |
/* Initialize the VLC tables. */
|
1026 |
for (i=0 ; i<7 ; i++) { |
1027 |
init_vlc (&spectral_coeff_tab[i], 9, huff_tab_sizes[i],
|
1028 |
huff_bits[i], 1, 1, |
1029 |
huff_codes[i], 1, 1, INIT_VLC_USE_STATIC); |
1030 |
} |
1031 |
|
1032 |
init_atrac3_transforms(q); |
1033 |
|
1034 |
/* Generate the scale factors. */
|
1035 |
for (i=0 ; i<64 ; i++) |
1036 |
SFTable[i] = pow(2.0, (i - 15) / 3.0); |
1037 |
|
1038 |
/* Generate gain tables. */
|
1039 |
for (i=0 ; i<16 ; i++) |
1040 |
gain_tab1[i] = powf (2.0, (4 - i)); |
1041 |
|
1042 |
for (i=-15 ; i<16 ; i++) |
1043 |
gain_tab2[i+15] = powf (2.0, i * -0.125); |
1044 |
|
1045 |
/* init the joint-stereo decoding data */
|
1046 |
q->weighting_delay[0] = 0; |
1047 |
q->weighting_delay[1] = 7; |
1048 |
q->weighting_delay[2] = 0; |
1049 |
q->weighting_delay[3] = 7; |
1050 |
q->weighting_delay[4] = 0; |
1051 |
q->weighting_delay[5] = 7; |
1052 |
|
1053 |
for (i=0; i<4; i++) { |
1054 |
q->matrix_coeff_index_prev[i] = 3;
|
1055 |
q->matrix_coeff_index_now[i] = 3;
|
1056 |
q->matrix_coeff_index_next[i] = 3;
|
1057 |
} |
1058 |
|
1059 |
dsputil_init(&dsp, avctx); |
1060 |
|
1061 |
q->pUnits = av_mallocz(sizeof(channel_unit)*q->channels);
|
1062 |
if (!q->pUnits) {
|
1063 |
av_free(q->decoded_bytes_buffer); |
1064 |
return AVERROR(ENOMEM);
|
1065 |
} |
1066 |
|
1067 |
avctx->sample_fmt = SAMPLE_FMT_S16; |
1068 |
return 0; |
1069 |
} |
1070 |
|
1071 |
|
1072 |
AVCodec atrac3_decoder = |
1073 |
{ |
1074 |
.name = "atrac3",
|
1075 |
.type = CODEC_TYPE_AUDIO, |
1076 |
.id = CODEC_ID_ATRAC3, |
1077 |
.priv_data_size = sizeof(ATRAC3Context),
|
1078 |
.init = atrac3_decode_init, |
1079 |
.close = atrac3_decode_close, |
1080 |
.decode = atrac3_decode_frame, |
1081 |
.long_name = NULL_IF_CONFIG_SMALL("Atrac 3 (Adaptive TRansform Acoustic Coding 3)"),
|
1082 |
}; |