/*


* Windows Media Audio Voice decoder.

* Copyright (c) 2009 Ronald S. Bultje

*

* This file is part of FFmpeg.

*

* FFmpeg is free software; you can redistribute it and/or

* modify it under the terms of the GNU Lesser General Public

* License as published by the Free Software Foundation; either

* version 2.1 of the License, or (at your option) any later version.

*

* FFmpeg is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

* Lesser General Public License for more details.

*

* You should have received a copy of the GNU Lesser General Public

* License along with FFmpeg; if not, write to the Free Software

* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

*/

/**

* @file libavcodec/wmavoice.c

* @brief Windows Media Audio Voice compatible decoder

* @author Ronald S. Bultje <rsbultje@gmail.com>

*/

#include <math.h> 
#include "avcodec.h" 
#include "get_bits.h" 
#include "put_bits.h" 
#include "wmavoice_data.h" 
#include "celp_math.h" 
#include "celp_filters.h" 
#include "acelp_vectors.h" 
#include "acelp_filters.h" 
#include "lsp.h" 
#include "libavutil/lzo.h" 
#define MAX_BLOCKS 8 ///< maximum number of blocks per frame 
#define MAX_LSPS 16 ///< maximum filter order 
#define MAX_FRAMES 3 ///< maximum number of frames per superframe 
#define MAX_FRAMESIZE 160 ///< maximum number of samples per frame 
#define MAX_SIGNAL_HISTORY 416 ///< maximum excitation signal history 
#define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)

///< maximum number of samples per superframe

#define SFRAME_CACHE_MAXSIZE 256 ///< maximum cache size for frame data that 
///< was split over two packets

#define VLC_NBITS 6 ///< number of bits to read per VLC iteration 
/**

* Frame type VLC coding.

*/

static VLC frame_type_vlc;

/**

* Adaptive codebook types.

*/

enum {

ACB_TYPE_NONE = 0, ///< no adaptive codebook (only hardcoded fixed) 
ACB_TYPE_ASYMMETRIC = 1, ///< adaptive codebook with perframe pitch, which 
///< we interpolate to get a persample pitch.

///< Signal is generated using an asymmetric sinc

///< window function

///< @note see #wmavoice_ipol1_coeffs

ACB_TYPE_HAMMING = 2 ///< Perblock pitch with signal generation using 
///< a Hamming sinc window function

///< @note see #wmavoice_ipol2_coeffs

}; 
/**

* Fixed codebook types.

*/

enum {

FCB_TYPE_SILENCE = 0, ///< comfort noise during silence 
///< generated from a hardcoded (fixed) codebook

///< with perframe (low) gain values

FCB_TYPE_HARDCODED = 1, ///< hardcoded (fixed) codebook with perblock 
///< gain values

FCB_TYPE_AW_PULSES = 2, ///< Pitchadaptive window (AW) pulse signals, 
///< used in particular for lowbitrate streams

FCB_TYPE_EXC_PULSES = 3, ///< Innovation (fixed) codebook pulse sets in 
///< combinations of either single pulses or

///< pulse pairs

}; 
/**

* Description of frame types.

*/

static const struct frame_type_desc { 
uint8_t n_blocks; ///< amount of blocks per frame (each block

///< (contains 160/#n_blocks samples)

uint8_t log_n_blocks; ///< log2(#n_blocks)

uint8_t acb_type; ///< Adaptive codebook type (ACB_TYPE_*)

uint8_t fcb_type; ///< Fixed codebook type (FCB_TYPE_*)

uint8_t dbl_pulses; ///< how many pulse vectors have pulse pairs

///< (rather than just one single pulse)

///< only if #fcb_type == #FCB_TYPE_EXC_PULSES

uint16_t frame_size; ///< the amount of bits that make up the block

///< data (per frame)

} frame_descs[17] = {

{ 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0, 0 }, 
{ 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0, 28 }, 
{ 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES, 0, 46 }, 
{ 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 80 }, 
{ 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 }, 
{ 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 }, 
{ 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 }, 
{ 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 }, 
{ 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 64 }, 
{ 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 80 }, 
{ 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 104 }, 
{ 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 108 }, 
{ 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 132 }, 
{ 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 168 }, 
{ 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 176 }, 
{ 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 208 }, 
{ 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 256 } 
}; 
/**

* WMA Voice decoding context.

*/

typedef struct { 
/**

* @defgroup struct_global Global values

* Global values, specified in the stream header / extradata or used

* all over.

* @{

*/

GetBitContext gb; ///< packet bitreader. During decoder init,

///< it contains the extradata from the

///< demuxer. During decoding, it contains

///< packet data.

int8_t vbm_tree[25]; ///< converts VLC codes to frame type 
int spillover_bitsize; ///< number of bits used to specify 
///< #spillover_nbits in the packet header

///< = ceil(log2(ctx>block_align << 3))

int history_nsamples; ///< number of samples in history for signal 
///< prediction (through ACB)

143 
144 
145  
int lsps; ///< number of LSPs per frame [10 or 16] 
int lsp_q_mode; ///< defines quantizer defaults [0, 1] 
int lsp_def_mode; ///< defines different sets of LSP defaults 
///< [0, 1]

int frame_lsp_bitsize; ///< size (in bits) of LSPs, when encoded 
///< perframe (independent coding)

int sframe_lsp_bitsize; ///< size (in bits) of LSPs, when encoded 
///< per superframe (residual coding)

155 
156 
157 
158 
159 
160 
161 
162 
163 
164 
165 
166 
167 
168 
169 
170  
/**

* @}

* @defgroup struct_packet Packet values

* Packet values, specified in the packet header or related to a packet.

* A packet is considered to be a single unit of data provided to this

* decoder by the demuxer.

* @{

*/

int spillover_nbits; ///< number of bits of the previous packet's 
///< last superframe preceeding this

///< packet's first full superframe (useful

///< for resynchronization also)

int has_residual_lsps; ///< if set, superframes contain one set of 
///< LSPs that cover all frames, encoded as

///< independent and residual LSPs; if not

///< set, each frame contains its own, fully

///< independent, LSPs

int skip_bits_next; ///< number of bits to skip at the next call 
///< to #wmavoice_decode_packet() (since

///< they're part of the previous superframe)

192 
193 
194 
195 
196 
197 
198 
199 
200 
201  
/**

* @}

* @defgroup struct_frame Frame and superframe values

* Superframe and frame data  these can change from frame to frame,

* although some of them do in that case serve as a cache / history for

* the next frame or superframe.

* @{

*/

double prev_lsps[MAX_LSPS]; ///< LSPs of the last frame of the previous 
///< superframe

int last_pitch_val; ///< pitch value of the previous frame 
int last_acb_type; ///< frame type [02] of the previous frame 
int pitch_diff_sh16; ///< ((cur_pitch_val  #last_pitch_val) 
///< << 16) / #MAX_FRAMESIZE

float silence_gain; ///< set for use in blocks if #ACB_TYPE_NONE 
218 
219 
220 
221 
222 
223 
224 
225 
226 
227 
228 
229 
230 
231 
232 
233 
234 
235 
236  
int frame_cntr; ///< current frame index [0  0xFFFE]; is 
///< only used for comfort noise in #pRNG()

float gain_pred_err[6]; ///< cache for gain prediction 
float excitation_history[MAX_SIGNAL_HISTORY];

///< cache of the signal of previous

///< superframes, used as a history for

///< signal generation

float synth_history[MAX_LSPS]; ///< see #excitation_history 
/**

* @}

*/

} WMAVoiceContext; 
/**

* Sets up the variable bit mode (VBM) tree from container extradata.

* @param gb bit I/O context.

* The bit context (s>gb) should be loaded with byte 2346 of the

* container extradata (i.e. the ones containing the VBM tree).

* @param vbm_tree pointer to array to which the decoded VBM tree will be

* written.

* @return 0 on success, <0 on error.

*/

static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25]) 
{ 
static const uint8_t bits[] = { 
2, 2, 2, 4, 4, 4, 
6, 6, 6, 8, 8, 8, 
10, 10, 10, 12, 12, 12, 
14, 14, 14, 14 
}; 
static const uint16_t codes[] = { 
0x0000, 0x0001, 0x0002, // 00/01/10 
0x000c, 0x000d, 0x000e, // 11+00/01/10 
0x003c, 0x003d, 0x003e, // 1111+00/01/10 
0x00fc, 0x00fd, 0x00fe, // 111111+00/01/10 
0x03fc, 0x03fd, 0x03fe, // 11111111+00/01/10 
0x0ffc, 0x0ffd, 0x0ffe, // 1111111111+00/01/10 
0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx 
}; 
int cntr[8], n, res; 
memset(vbm_tree, 0xff, sizeof(vbm_tree)); 
memset(cntr, 0, sizeof(cntr)); 
for (n = 0; n < 17; n++) { 
res = get_bits(gb, 3);

if (cntr[res] > 3) // should be >= 3 + (res == 7)) 
return 1; 
vbm_tree[res * 3 + cntr[res]++] = n;

} 
INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),

bits, 1, 1, codes, 2, 2, 132); 
return 0; 
} 
291 
292 
293 
294 
295 
296 
297 
298  
/**

* Extradata layout:

*  byte 018: WMAProinWMAVoice extradata (see wmaprodec.c),

*  byte 1922: flags field (annoyingly in LE; see below for known

* values),

*  byte 2346: variable bitmode tree (really just 17 * 3 bits,

* rest is 0).

*/

if (ctx>extradata_size != 46) { 
av_log(ctx, AV_LOG_ERROR, 
"Invalid extradata size %d (should be 46)\n",

ctx>extradata_size); 
return 1; 
} 
flags = AV_RL32(ctx>extradata + 18);

s>spillover_bitsize = 3 + av_ceil_log2(ctx>block_align);

s>do_apf = flags & 0x1;

s>lsp_q_mode = !!(flags & 0x2000);

s>lsp_def_mode = !!(flags & 0x4000);

lsp16_flag = flags & 0x1000;

if (lsp16_flag) {

s>lsps = 16;

s>frame_lsp_bitsize = 34;

s>sframe_lsp_bitsize = 60;

} else {

s>lsps = 10;

s>frame_lsp_bitsize = 24;

s>sframe_lsp_bitsize = 48;

} 
for (n = 0; n < s>lsps; n++) 
s>prev_lsps[n] = M_PI * (n + 1.0) / (s>lsps + 1.0); 
331 
332 
333 
334 
335 
336  
s>min_pitch_val = ((ctx>sample_rate << 8) / 400 + 50) >> 8; 
s>max_pitch_val = ((ctx>sample_rate << 8) * 37 / 2000 + 50) >> 8; 
pitch_range = s>max_pitch_val  s>min_pitch_val; 
s>pitch_nbits = av_ceil_log2(pitch_range); 
s>last_pitch_val = 40;

s>last_acb_type = ACB_TYPE_NONE; 
s>history_nsamples = s>max_pitch_val + 8;

345 
346 
347 
348  
av_log(ctx, AV_LOG_ERROR, 
"Unsupported samplerate %d (min=%d, max=%d)\n",

ctx>sample_rate, min_sr, max_sr); // 32222097 Hz

353 
354 
355  
s>block_conv_table[0] = s>min_pitch_val;

s>block_conv_table[1] = (pitch_range * 25) >> 6; 
s>block_conv_table[2] = (pitch_range * 44) >> 6; 
s>block_conv_table[3] = s>max_pitch_val  1; 
s>block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF; 
s>block_delta_pitch_nbits = 1 + av_ceil_log2(s>block_delta_pitch_hrange);

s>block_pitch_range = s>block_conv_table[2] +

s>block_conv_table[3] + 1 + 
2 * (s>block_conv_table[1]  2 * s>min_pitch_val); 
s>block_pitch_nbits = av_ceil_log2(s>block_pitch_range); 
367 
368  
return 0; 
} 
372 
373 
374 
375 
376 
377 
378 
379 
380 
381 
382 
383 
384 
385 
386 
387 
388 
389 
390 
391  
memset(lsps, 0, num * sizeof(*lsps)); 
for (n = 0; n < n_stages; n++) { 
const uint8_t *t_off = &table[values[n] * num];

double base = base_q[n], mul = mul_q[n];

397 
398 
399  
table += sizes[n] * num; 
} 
} 
404 
405 
406 
407 
408 
409 
410 
411 
412 
413 
414 
415 
416 
417 
418 
419 
420 
421 
422 
423 
424 
425 
426 
427  
v[0] = get_bits(gb, 8); 
v[1] = get_bits(gb, 6); 
v[2] = get_bits(gb, 5); 
v[3] = get_bits(gb, 5); 
433 
434 
435 
436  
/**

* Parse 10 independentlycoded LSPs, and then derive the tables to

* generate LSPs for the other frames from them (residual coding).

*/

static void dequant_lsp10r(GetBitContext *gb, 
double *i_lsps, const double *old, 
double *a1, double *a2, int q_mode) 
{ 
static const uint16_t vec_sizes[3] = { 128, 64, 64 }; 
static const double mul_lsf[3] = { 
2.5807601174e3, 1.2354460219e3, 1.1763821673e3 
}; 
static const double base_lsf[3] = { 
M_PI * 1.07448e1, M_PI * 5.2706e2, M_PI * 5.1634e2 
}; 
const float (*ipol_tab)[2][10] = q_mode ? 
wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a; 
uint16_t interpol, v[3];

int n;

457 
458  
interpol = get_bits(gb, 5);

v[0] = get_bits(gb, 7); 
v[1] = get_bits(gb, 6); 
v[2] = get_bits(gb, 6); 
464 
465 
466 
467 
468 
469  
dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r, 
mul_lsf, base_lsf); 
} 
474 
475 
476 
477 
478 
479 
480 
481 
482 
483 
484 
485 
486 
487 
488 
489 
490 
491  
v[0] = get_bits(gb, 8); 
v[1] = get_bits(gb, 6); 
v[2] = get_bits(gb, 7); 
v[3] = get_bits(gb, 6); 
v[4] = get_bits(gb, 7); 
498 
499 
500 
501 
502 
503 
504 
505  
/**

* Parse 16 independentlycoded LSPs, and then derive the tables to

* generate LSPs for the other frames from them (residual coding).

*/

static void dequant_lsp16r(GetBitContext *gb, 
double *i_lsps, const double *old, 
double *a1, double *a2, int q_mode) 
{ 
static const uint16_t vec_sizes[3] = { 128, 128, 128 }; 
static const double mul_lsf[3] = { 
1.2232979501e3, 1.4062241527e3, 1.6114744851e3 
}; 
static const double base_lsf[3] = { 
M_PI * 5.5830e2, M_PI * 5.2908e2, M_PI * 5.4776e2 
}; 
const float (*ipol_tab)[2][16] = q_mode ? 
wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a; 
uint16_t interpol, v[3];

int n;

526 
527  
interpol = get_bits(gb, 5);

v[0] = get_bits(gb, 7); 
v[1] = get_bits(gb, 7); 
v[2] = get_bits(gb, 7); 
533 
534 
535 
536 
537 
538  
dequant_lsps( a2, 10, v, vec_sizes, 1, 
wmavoice_dq_lsp16r1, mul_lsf, base_lsf); 
dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1, 
wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]); 
dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1, 
wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]); 
} 
547 
548 
549 
550 
551 
552 
553 
554 
555 
556 
557 
558 
559 
560 
561 
562 
563 
564 
565 
566 
567 
568 
569 
570 
571 
572 
573 
574  
/* position of pulse */

s>aw_idx_is_ext = 0;

if ((bits = get_bits(gb, 6)) >= 54) { 
s>aw_idx_is_ext = 1;

bits += (bits  54) * 3 + get_bits(gb, 2); 
} 
582 
583 
584 
585 
586 
587 
588 
589 
590 
591  
/* if continuing from a position before the block, reset position to

* start of block (when corrected for the range over which it can be

* spread in aw_pulse_set1()). */

if (start_offset[bits] < MAX_FRAMESIZE / 2) { 
while (s>aw_first_pulse_off[1]  pitch[1] + s>aw_pulse_range > 0) 
s>aw_first_pulse_off[1] = pitch[1]; 
if (start_offset[bits] < 0) 
while (s>aw_first_pulse_off[0]  pitch[0] + s>aw_pulse_range > 0) 
s>aw_first_pulse_off[0] = pitch[0]; 
} 
} 
604 
605 
606 
607 
608 
609 
610 
611 
612 
613 
614 
615 
616 
617 
618 
619 
620 
621 
622 
623 
624  
/* set offset of first pulse to within this block */

if (s>aw_n_pulses[block_idx] > 0) 
while (pulse_off + s>aw_pulse_range < 1) 
pulse_off += fcb>pitch_lag; 
630 
631 
632 
633 
634 
635 
636 
637 
638 
639 
640 
641 
642  
/* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,

* in the range of [pulse_off, pulse_off + s>aw_pulse_range], and thus

645 
* we exclude that range from being pulsed again in this function. */

646 
memset( use_mask, 1, 5 * sizeof(use_mask[0])); 
647 
memset(&use_mask[5], 0, 2 * sizeof(use_mask[0])); 
648 
if (s>aw_n_pulses[block_idx] > 0) 
649 
for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb>pitch_lag) { 
650 
int excl_range = s>aw_pulse_range; // always 16 or 24 
651 
uint16_t *use_mask_ptr = &use_mask[idx >> 4];

652 
int first_sh = 16  (idx & 15); 
653 
*use_mask_ptr++ &= 0xFFFF << first_sh;

654 
excl_range = first_sh; 
655 
if (excl_range >= 16) { 
656 
*use_mask_ptr++ = 0;

657 
*use_mask_ptr &= 0xFFFF >> (excl_range  16); 
658 
} else

659 
*use_mask_ptr &= 0xFFFF >> excl_range;

660 
} 
661  
662 
/* find the 'aidx'th offset that is not excluded */

663 
aidx = get_bits(gb, s>aw_n_pulses[0] > 0 ? 5  2 * block_idx : 4); 
664 
for (n = 0; n <= aidx; pulse_start++) { 
665 
for (idx = pulse_start; idx < 0; idx += fcb>pitch_lag) ; 
666 
if (idx >= MAX_FRAMESIZE / 2) { // find from zero 
667 
if (use_mask[0]) idx = 0x0F; 
668 
else if (use_mask[1]) idx = 0x1F; 
669 
else if (use_mask[2]) idx = 0x2F; 
670 
else if (use_mask[3]) idx = 0x3F; 
671 
else if (use_mask[4]) idx = 0x4F; 
672 
else return; 
673 
idx = av_log2_16bit(use_mask[idx >> 4]);

674 
} 
675 
if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) { 
676 
use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15)); 
677 
n++; 
678 
start_off = idx; 
679 
} 
680 
} 
681  
682 
fcb>x[fcb>n] = start_off; 
683 
fcb>y[fcb>n] = get_bits1(gb) ? 1.0 : 1.0; 
684 
fcb>n++; 
685  
686 
/* set offset for next block, relative to start of that block */

687 
n = (MAX_FRAMESIZE / 2  start_off) % fcb>pitch_lag;

688 
s>aw_next_pulse_off_cache = n ? fcb>pitch_lag  n : 0;

689 
} 
690  
691 
/**

692 
* Apply first set of pitchadaptive window pulses.

693 
* @param s WMA Voice decoding context private data

694 
* @param gb bit I/O context

695 
* @param block_idx block index in frame [0, 1]

696 
* @param fcb storage location for fixed codebook pulse info

697 
*/

698 
static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb, 
699 
int block_idx, AMRFixed *fcb)

700 
{ 
701 
int val = get_bits(gb, 12  2 * (s>aw_idx_is_ext && !block_idx)); 
702 
float v;

703  
704 
if (s>aw_n_pulses[block_idx] > 0) { 
705 
int n, v_mask, i_mask, sh, n_pulses;

706  
707 
if (s>aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each 
708 
n_pulses = 3;

709 
v_mask = 8;

710 
i_mask = 7;

711 
sh = 4;

712 
} else { // 4 pulses, 1:sign + 2:index each 
713 
n_pulses = 4;

714 
v_mask = 4;

715 
i_mask = 3;

716 
sh = 3;

717 
} 
718  
719 
for (n = n_pulses  1; n >= 0; n, val >>= sh) { 
720 
fcb>y[fcb>n] = (val & v_mask) ? 1.0 : 1.0; 
721 
fcb>x[fcb>n] = (val & i_mask) * n_pulses + n + 
722 
s>aw_first_pulse_off[block_idx]; 
723 
while (fcb>x[fcb>n] < 0) 
724 
fcb>x[fcb>n] += fcb>pitch_lag; 
725 
if (fcb>x[fcb>n] < MAX_FRAMESIZE / 2) 
726 
fcb>n++; 
727 
} 
728 
} else {

729 
int num2 = (val & 0x1FF) >> 1, delta, idx; 
730  
731 
if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; } 
732 
else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1  1 * 77; } 
733 
else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1  2 * 76; } 
734 
else { delta = 7; idx = num2 + 1  3 * 75; } 
735 
v = (val & 0x200) ? 1.0 : 1.0; 
736  
737 
fcb>no_repeat_mask = 3 << fcb>n;

738 
fcb>x[fcb>n] = idx  delta; 
739 
fcb>y[fcb>n] = v; 
740 
fcb>x[fcb>n + 1] = idx;

741 
fcb>y[fcb>n + 1] = (val & 1) ? v : v; 
742 
fcb>n += 2;

743 
} 
744 
} 
745  
746 
/**

747 
* @}

748 
*

749 
* Generate a random number from frame_cntr and block_idx, which will lief

750 
* in the range [0, 1000  block_size] (so it can be used as an index in a

751 
* table of size 1000 of which you want to read block_size entries).

752 
*

753 
* @param frame_cntr current frame number

754 
* @param block_num current block index

755 
* @param block_size amount of entries we want to read from a table

756 
* that has 1000 entries

757 
* @return a (non)random number in the [0, 1000  block_size] range.

758 
*/

759 
static int pRNG(int frame_cntr, int block_num, int block_size) 
760 
{ 
761 
/* array to simplify the calculation of z:

762 
* y = (x % 9) * 5 + 6;

763 
* z = (49995 * x) / y;

764 
* Since y only has 9 values, we can remove the division by using a

765 
* LUT and using FASTDIVstyle divisions. For each of the 9 values

766 
* of y, we can rewrite z as:

767 
* z = x * (49995 / y) + x * ((49995 % y) / y)

768 
* In this table, each col represents one possible value of y, the

769 
* first number is 49995 / y, and the second is the FASTDIV variant

770 
* of 49995 % y / y. */

771 
static const unsigned int div_tbl[9][2] = { 
772 
{ 8332, 3 * 715827883U }, // y = 6 
773 
{ 4545, 0 * 390451573U }, // y = 11 
774 
{ 3124, 11 * 268435456U }, // y = 16 
775 
{ 2380, 15 * 204522253U }, // y = 21 
776 
{ 1922, 23 * 165191050U }, // y = 26 
777 
{ 1612, 23 * 138547333U }, // y = 31 
778 
{ 1388, 27 * 119304648U }, // y = 36 
779 
{ 1219, 16 * 104755300U }, // y = 41 
780 
{ 1086, 39 * 93368855U } // y = 46 
781 
}; 
782 
unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr; 
783 
if (x >= 0xFFFF) x = 0xFFFF; // max value of x is 8*1877+0xFFFE=0x13AA6, 
784 
// so this is effectively a modulo (%)

785 
y = x  9 * MULH(477218589, x); // x % 9 
786 
z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1])); 
787 
// z = x * 49995 / (y * 5 + 6)

788 
return z % (1000  block_size); 
789 
} 
790  
791 
/**

792 
* Parse hardcoded signal for a single block.

793 
* @note see #synth_block().

794 
*/

795 
static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb, 
796 
int block_idx, int size, 
797 
const struct frame_type_desc *frame_desc, 
798 
float *excitation)

799 
{ 
800 
float gain;

801 
int n, r_idx;

802  
803 
assert(size <= MAX_FRAMESIZE); 
804  
805 
/* Set the offset from which we start reading wmavoice_std_codebook */

806 
if (frame_desc>fcb_type == FCB_TYPE_SILENCE) {

807 
r_idx = pRNG(s>frame_cntr, block_idx, size); 
808 
gain = s>silence_gain; 
809 
} else /* FCB_TYPE_HARDCODED */ { 
810 
r_idx = get_bits(gb, 8);

811 
gain = wmavoice_gain_universal[get_bits(gb, 6)];

812 
} 
813  
814 
/* Clear gain prediction parameters */

815 
memset(s>gain_pred_err, 0, sizeof(s>gain_pred_err)); 
816  
817 
/* Apply gain to hardcoded codebook and use that as excitation signal */

818 
for (n = 0; n < size; n++) 
819 
excitation[n] = wmavoice_std_codebook[r_idx + n] * gain; 
820 
} 
821  
822 
/**

823 
* Parse FCB/ACB signal for a single block.

824 
* @note see #synth_block().

825 
*/

826 
static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb, 
827 
int block_idx, int size, 
828 
int block_pitch_sh2,

829 
const struct frame_type_desc *frame_desc, 
830 
float *excitation)

831 
{ 
832 
static const float gain_coeff[6] = { 
833 
0.8169, 0.06545, 0.1726, 0.0185, 0.0359, 0.0458 
834 
}; 
835 
float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain; 
836 
int n, idx, gain_weight;

837 
AMRFixed fcb; 
838  
839 
assert(size <= MAX_FRAMESIZE / 2);

840 
memset(pulses, 0, sizeof(*pulses) * size); 
841  
842 
fcb.pitch_lag = block_pitch_sh2 >> 2;

843 
fcb.pitch_fac = 1.0; 
844 
fcb.no_repeat_mask = 0;

845 
fcb.n = 0;

846  
847 
/* For the other frame types, this is where we apply the innovation

848 
* (fixed) codebook pulses of the speech signal. */

849 
if (frame_desc>fcb_type == FCB_TYPE_AW_PULSES) {

850 
aw_pulse_set1(s, gb, block_idx, &fcb); 
851 
aw_pulse_set2(s, gb, block_idx, &fcb); 
852 
} else /* FCB_TYPE_EXC_PULSES */ { 
853 
int offset_nbits = 5  frame_desc>log_n_blocks; 
854  
855 
fcb.no_repeat_mask = 1;

856 
/* similar to ff_decode_10_pulses_35bits(), but with single pulses

857 
* (instead of double) for a subset of pulses */

858 
for (n = 0; n < 5; n++) { 
859 
float sign;

860 
int pos1, pos2;

861  
862 
sign = get_bits1(gb) ? 1.0 : 1.0; 
863 
pos1 = get_bits(gb, offset_nbits); 
864 
fcb.x[fcb.n] = n + 5 * pos1;

865 
fcb.y[fcb.n++] = sign; 
866 
if (n < frame_desc>dbl_pulses) {

867 
pos2 = get_bits(gb, offset_nbits); 
868 
fcb.x[fcb.n] = n + 5 * pos2;

869 
fcb.y[fcb.n++] = (pos1 < pos2) ? sign : sign; 
870 
} 
871 
} 
872 
} 
873 
ff_set_fixed_vector(pulses, &fcb, 1.0, size); 
874  
875 
/* Calculate gain for adaptive & fixed codebook signal.

876 
* see ff_amr_set_fixed_gain(). */

877 
idx = get_bits(gb, 7);

878 
fcb_gain = expf(ff_dot_productf(s>gain_pred_err, gain_coeff, 6) 

879 
5.2409161640 + wmavoice_gain_codebook_fcb[idx]); 
880 
acb_gain = wmavoice_gain_codebook_acb[idx]; 
881 
pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx], 
882 
2.9957322736 /* log(0.05) */, 
883 
1.6094379124 /* log(5.0) */); 
884  
885 
gain_weight = 8 >> frame_desc>log_n_blocks;

886 
memmove(&s>gain_pred_err[gain_weight], s>gain_pred_err, 
887 
sizeof(*s>gain_pred_err) * (6  gain_weight)); 
888 
for (n = 0; n < gain_weight; n++) 
889 
s>gain_pred_err[n] = pred_err; 
890  
891 
/* Calculation of adaptive codebook */

892 
if (frame_desc>acb_type == ACB_TYPE_ASYMMETRIC) {

893 
int len;

894 
for (n = 0; n < size; n += len) { 
895 
int next_idx_sh16;

896 
int abs_idx = block_idx * size + n;

897 
int pitch_sh16 = (s>last_pitch_val << 16) + 
898 
s>pitch_diff_sh16 * abs_idx; 
899 
int pitch = (pitch_sh16 + 0x6FFF) >> 16; 
900 
int idx_sh16 = ((pitch << 16)  pitch_sh16) * 8 + 0x58000; 
901 
idx = idx_sh16 >> 16;

902 
if (s>pitch_diff_sh16) {

903 
if (s>pitch_diff_sh16 > 0) { 
904 
next_idx_sh16 = (idx_sh16) &~ 0xFFFF;

905 
} else

906 
next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF; 
907 
len = av_clip((idx_sh16  next_idx_sh16) / s>pitch_diff_sh16 / 8,

908 
1, size  n);

909 
} else

910 
len = size; 
911  
912 
ff_acelp_interpolatef(&excitation[n], &excitation[n  pitch], 
913 
wmavoice_ipol1_coeffs, 17,

914 
idx, 9, len);

915 
} 
916 
} else /* ACB_TYPE_HAMMING */ { 
917 
int block_pitch = block_pitch_sh2 >> 2; 
918 
idx = block_pitch_sh2 & 3;

919 
if (idx) {

920 
ff_acelp_interpolatef(excitation, &excitation[block_pitch], 
921 
wmavoice_ipol2_coeffs, 4,

922 
idx, 8, size);

923 
} else

924 
av_memcpy_backptr(excitation, sizeof(float) * block_pitch, 
925 
sizeof(float) * size); 
926 
} 
927  
928 
/* Interpolate ACB/FCB and use as excitation signal */

929 
ff_weighted_vector_sumf(excitation, excitation, pulses, 
930 
acb_gain, fcb_gain, size); 
931 
} 
932  
933 
/**

934 
* Parse data in a single block.

935 
* @note we assume enough bits are available, caller should check.

936 
*

937 
* @param s WMA Voice decoding context private data

938 
* @param gb bit I/O context

939 
* @param block_idx index of the toberead block

940 
* @param size amount of samples to be read in this block

941 
* @param block_pitch_sh2 pitch for this block << 2

942 
* @param lsps LSPs for (the end of) this frame

943 
* @param prev_lsps LSPs for the last frame

944 
* @param frame_desc frame type descriptor

945 
* @param excitation target memory for the ACB+FCB interpolated signal

946 
* @param synth target memory for the speech synthesis filter output

947 
* @return 0 on success, <0 on error.

948 
*/

949 
static void synth_block(WMAVoiceContext *s, GetBitContext *gb, 
950 
int block_idx, int size, 
951 
int block_pitch_sh2,

952 
const double *lsps, const double *prev_lsps, 
953 
const struct frame_type_desc *frame_desc, 
954 
float *excitation, float *synth) 
955 
{ 
956 
double i_lsps[MAX_LSPS];

957 
float lpcs[MAX_LSPS];

958 
float fac;

959 
int n;

960  
961 
if (frame_desc>acb_type == ACB_TYPE_NONE)

962 
synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation); 
963 
else

964 
synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2, 
965 
frame_desc, excitation); 
966  
967 
/* convert interpolated LSPs to LPCs */

968 
fac = (block_idx + 0.5) / frame_desc>n_blocks; 
969 
for (n = 0; n < s>lsps; n++) // LSF > LSP 
970 
i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n]  prev_lsps[n])); 
971 
ff_acelp_lspd2lpc(i_lsps, lpcs, s>lsps >> 1);

972  
973 
/* Speech synthesis */

974 
ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s>lsps); 
975 
} 
976  
977 
/**

978 
* Synthesize output samples for a single frame.

979 
* @note we assume enough bits are available, caller should check.

980 
*

981 
* @param ctx WMA Voice decoder context

982 
* @param gb bit I/O context (s>gb or one for crosspacket superframes)

983 
* @param samples pointer to output sample buffer, has space for at least 160

984 
* samples

985 
* @param lsps LSP array

986 
* @param prev_lsps array of previous frame's LSPs

987 
* @param excitation target buffer for excitation signal

988 
* @param synth target buffer for synthesized speech data

989 
* @return 0 on success, <0 on error.

990 
*/

991 
static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, 
992 
float *samples,

993 
const double *lsps, const double *prev_lsps, 
994 
float *excitation, float *synth) 
995 
{ 
996 
WMAVoiceContext *s = ctx>priv_data; 
997 
int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;

998 
int pitch[MAX_BLOCKS], last_block_pitch;

999  
1000 
/* Parse frame type ("frame header"), see frame_descs */

1001 
int bd_idx = s>vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], 
1002 
block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks; 
1003  
1004 
if (bd_idx < 0) { 
1005 
av_log(ctx, AV_LOG_ERROR, 
1006 
"Invalid frame type VLC code, skipping\n");

1007 
return 1; 
1008 
} 
1009  
1010 
/* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitchperframe") */

1011 
if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {

1012 
/* Pitch is provided per frame, which is interpreted as the pitch of

1013 
* the last sample of the last block of this frame. We can interpolate

1014 
* the pitch of other blocks (and even pitchpersample) by gradually

1015 
* incrementing/decrementing prev_frame_pitch to cur_pitch_val. */

1016 
n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;

1017 
log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;

1018 
cur_pitch_val = s>min_pitch_val + get_bits(gb, s>pitch_nbits); 
1019 
cur_pitch_val = FFMIN(cur_pitch_val, s>max_pitch_val  1);

1020 
if (s>last_acb_type == ACB_TYPE_NONE 

1021 
20 * abs(cur_pitch_val  s>last_pitch_val) >

1022 
(cur_pitch_val + s>last_pitch_val)) 
1023 
s>last_pitch_val = cur_pitch_val; 
1024  
1025 
/* pitch per block */

1026 
for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) { 
1027 
int fac = n * 2 + 1; 
1028  
1029 
pitch[n] = (MUL16(fac, cur_pitch_val) + 
1030 
MUL16((n_blocks_x2  fac), s>last_pitch_val) + 
1031 
frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2; 
1032 
} 
1033  
1034 
/* "pitchdiffpersample" for calculation of pitch per sample */

1035 
s>pitch_diff_sh16 = 
1036 
((cur_pitch_val  s>last_pitch_val) << 16) / MAX_FRAMESIZE;

1037 
} 
1038  
1039 
/* Global gain (if silence) and pitchadaptive window coordinates */

1040 
switch (frame_descs[bd_idx].fcb_type) {

1041 
case FCB_TYPE_SILENCE:

1042 
s>silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];

1043 
break;

1044 
case FCB_TYPE_AW_PULSES:

1045 
aw_parse_coords(s, gb, pitch); 
1046 
break;

1047 
} 
1048  
1049 
for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) { 
1050 
int bl_pitch_sh2;

1051  
1052 
/* Pitch calculation for ACB_TYPE_HAMMING ("pitchperblock") */

1053 
switch (frame_descs[bd_idx].acb_type) {

1054 
case ACB_TYPE_HAMMING: {

1055 
/* Pitch is given per block. Perblock pitches are encoded as an

1056 
* absolute value for the first block, and then delta values

1057 
* relative to this value) for all subsequent blocks. The scale of

1058 
* this pitch value is semilogaritmic compared to its use in the

1059 
* decoder, so we convert it to normal scale also. */

1060 
int block_pitch,

1061 
t1 = (s>block_conv_table[1]  s>block_conv_table[0]) << 2, 
1062 
t2 = (s>block_conv_table[2]  s>block_conv_table[1]) << 1, 
1063 
t3 = s>block_conv_table[3]  s>block_conv_table[2] + 1; 
1064  
1065 
if (n == 0) { 
1066 
block_pitch = get_bits(gb, s>block_pitch_nbits); 
1067 
} else

1068 
block_pitch = last_block_pitch  s>block_delta_pitch_hrange + 
1069 
get_bits(gb, s>block_delta_pitch_nbits); 
1070 
/* Convert last_ so that any next delta is within _range */

1071 
last_block_pitch = av_clip(block_pitch, 
1072 
s>block_delta_pitch_hrange, 
1073 
s>block_pitch_range  
1074 
s>block_delta_pitch_hrange); 
1075  
1076 
/* Convert semilogstyle scale back to normal scale */

1077 
if (block_pitch < t1) {

1078 
bl_pitch_sh2 = (s>block_conv_table[0] << 2) + block_pitch; 
1079 
} else {

1080 
block_pitch = t1; 
1081 
if (block_pitch < t2) {

1082 
bl_pitch_sh2 = 
1083 
(s>block_conv_table[1] << 2) + (block_pitch << 1); 
1084 
} else {

1085 
block_pitch = t2; 
1086 
if (block_pitch < t3) {

1087 
bl_pitch_sh2 = 
1088 
(s>block_conv_table[2] + block_pitch) << 2; 
1089 
} else

1090 
bl_pitch_sh2 = s>block_conv_table[3] << 2; 
1091 
} 
1092 
} 
1093 
pitch[n] = bl_pitch_sh2 >> 2;

1094 
break;

1095 
} 
1096  
1097 
case ACB_TYPE_ASYMMETRIC: {

1098 
bl_pitch_sh2 = pitch[n] << 2;

1099 
break;

1100 
} 
1101  
1102 
default: // ACB_TYPE_NONE has no pitch 
1103 
bl_pitch_sh2 = 0;

1104 
break;

1105 
} 
1106  
1107 
synth_block(s, gb, n, block_nsamples, bl_pitch_sh2, 
1108 
lsps, prev_lsps, &frame_descs[bd_idx], 
1109 
&excitation[n * block_nsamples], 
1110 
&synth[n * block_nsamples]); 
1111 
} 
1112  
1113 
/* Averaging projection filter, if applicable. Else, just copy samples

1114 
* from synthesis buffer */

1115 
if (s>do_apf) {

1116 
// FIXME this is where APF would take place, currently not implemented

1117 
av_log_missing_feature(ctx, "APF", 0); 
1118 
s>do_apf = 0;

1119 
} //else

1120 
for (n = 0; n < 160; n++) 
1121 
samples[n] = av_clipf(synth[n], 1.0, 1.0); 
1122  
1123 
/* Cache values for next frame */

1124 
s>frame_cntr++; 
1125 
if (s>frame_cntr >= 0xFFFF) s>frame_cntr = 0xFFFF; // i.e. modulo (%) 
1126 
s>last_acb_type = frame_descs[bd_idx].acb_type; 
1127 
switch (frame_descs[bd_idx].acb_type) {

1128 
case ACB_TYPE_NONE:

1129 
s>last_pitch_val = 0;

1130 
break;

1131 
case ACB_TYPE_ASYMMETRIC:

1132 
s>last_pitch_val = cur_pitch_val; 
1133 
break;

1134 
case ACB_TYPE_HAMMING:

1135 
s>last_pitch_val = pitch[frame_descs[bd_idx].n_blocks  1];

1136 
break;

1137 
} 
1138  
1139 
return 0; 
1140 
} 
1141  
1142 
/**

1143 
* Ensure minimum value for first item, maximum value for last value,

1144 
* proper spacing between each value and proper ordering.

1145 
*

1146 
* @param lsps array of LSPs

1147 
* @param num size of LSP array

1148 
*

1149 
* @note basically a double version of #ff_acelp_reorder_lsf(), might be

1150 
* useful to put in a generic location later on. Parts are also

1151 
* present in #ff_set_min_dist_lsf() + #ff_sort_nearly_sorted_floats(),

1152 
* which is in float.

1153 
*/

1154 
static void stabilize_lsps(double *lsps, int num) 
1155 
{ 
1156 
int n, m, l;

1157  
1158 
/* set minimum value for first, maximum value for last and minimum

1159 
* spacing between LSF values.

1160 
* Very similar to ff_set_min_dist_lsf(), but in double. */

1161 
lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI); 
1162 
for (n = 1; n < num; n++) 
1163 
lsps[n] = FFMAX(lsps[n], lsps[n  1] + 0.0125 * M_PI); 
1164 
lsps[num  1] = FFMIN(lsps[num  1], 0.9985 * M_PI); 
1165  
1166 
/* reorder (looks like onetime / nonrecursed bubblesort).

1167 
* Very similar to ff_sort_nearly_sorted_floats(), but in double. */

1168 
for (n = 1; n < num; n++) { 
1169 
if (lsps[n] < lsps[n  1]) { 
1170 
for (m = 1; m < num; m++) { 
1171 
double tmp = lsps[m];

1172 
for (l = m  1; l >= 0; l) { 
1173 
if (lsps[l] <= tmp) break; 
1174 
lsps[l + 1] = lsps[l];

1175 
} 
1176 
lsps[l + 1] = tmp;

1177 
} 
1178 
break;

1179 
} 
1180 
} 
1181 
} 
1182  
1183 
/**

1184 
* Test if there's enough bits to read 1 superframe.

1185 
*

1186 
* @param orig_gb bit I/O context used for reading. This function

1187 
* does not modify the state of the bitreader; it

1188 
* only uses it to copy the current stream position

1189 
* @param s WMA Voice decoding context private data

1190 
* @return 1 if unsupported, 1 on not enough bits or 0 if OK.

1191 
*/

1192 
static int check_bits_for_superframe(GetBitContext *orig_gb, 
1193 
WMAVoiceContext *s) 
1194 
{ 
1195 
GetBitContext s_gb, *gb = &s_gb; 
1196 
int n, need_bits, bd_idx;

1197 
const struct frame_type_desc *frame_desc; 
1198  
1199 
/* initialize a copy */

1200 
init_get_bits(gb, orig_gb>buffer, orig_gb>size_in_bits); 
1201 
skip_bits_long(gb, get_bits_count(orig_gb)); 
1202 
assert(get_bits_left(gb) == get_bits_left(orig_gb)); 
1203  
1204 
/* superframe header */

1205 
if (get_bits_left(gb) < 14) 
1206 
return 1; 
1207 
if (!get_bits1(gb))

1208 
return 1; // WMAProinWMAVoice superframe 
1209 
if (get_bits1(gb)) skip_bits(gb, 12); // number of samples in superframe 
1210 
if (s>has_residual_lsps) { // residual LSPs (for all frames) 
1211 
if (get_bits_left(gb) < s>sframe_lsp_bitsize)

1212 
return 1; 
1213 
skip_bits_long(gb, s>sframe_lsp_bitsize); 
1214 
} 
1215  
1216 
/* frames */

1217 
for (n = 0; n < MAX_FRAMES; n++) { 
1218 
int aw_idx_is_ext = 0; 
1219  
1220 
if (!s>has_residual_lsps) { // independent LSPs (perframe) 
1221 
if (get_bits_left(gb) < s>frame_lsp_bitsize) return 1; 
1222 
skip_bits_long(gb, s>frame_lsp_bitsize); 
1223 
} 
1224 
bd_idx = s>vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)]; 
1225 
if (bd_idx < 0) 
1226 
return 1; // invalid frame type VLC code 
1227 
frame_desc = &frame_descs[bd_idx]; 
1228 
if (frame_desc>acb_type == ACB_TYPE_ASYMMETRIC) {

1229 
if (get_bits_left(gb) < s>pitch_nbits)

1230 
return 1; 
1231 
skip_bits_long(gb, s>pitch_nbits); 
1232 
} 
1233 
if (frame_desc>fcb_type == FCB_TYPE_SILENCE) {

1234 
skip_bits(gb, 8);

1235 
} else if (frame_desc>fcb_type == FCB_TYPE_AW_PULSES) { 
1236 
int tmp = get_bits(gb, 6); 
1237 
if (tmp >= 0x36) { 
1238 
skip_bits(gb, 2);

1239 
aw_idx_is_ext = 1;

1240 
} 
1241 
} 
1242  
1243 
/* blocks */

1244 
if (frame_desc>acb_type == ACB_TYPE_HAMMING) {

1245 
need_bits = s>block_pitch_nbits + 
1246 
(frame_desc>n_blocks  1) * s>block_delta_pitch_nbits;

1247 
} else if (frame_desc>fcb_type == FCB_TYPE_AW_PULSES) { 
1248 
need_bits = 2 * !aw_idx_is_ext;

1249 
} else

1250 
need_bits = 0;

1251 
need_bits += frame_desc>frame_size; 
1252 
if (get_bits_left(gb) < need_bits)

1253 
return 1; 
1254 
skip_bits_long(gb, need_bits); 
1255 
} 
1256  
1257 
return 0; 
1258 
} 
1259  
1260 
/**

1261 
* Synthesize output samples for a single superframe. If we have any data

1262 
* cached in s>sframe_cache, that will be used instead of whatever is loaded

1263 
* in s>gb.

1264 
*

1265 
* WMA Voice superframes contain 3 frames, each containing 160 audio samples,

1266 
* to give a total of 480 samples per frame. See #synth_frame() for frame

1267 
* parsing. In addition to 3 frames, superframes can also contain the LSPs

1268 
* (if these are globally specified for all frames (residually); they can

1269 
* also be specified individually perframe. See the s>has_residual_lsps

1270 
* option), and can specify the number of samples encoded in this superframe

1271 
* (if less than 480), usually used to prevent blanks at track boundaries.

1272 
*

1273 
* @param ctx WMA Voice decoder context

1274 
* @param samples pointer to output buffer for voice samples

1275 
* @param data_size pointer containing the size of #samples on input, and the

1276 
* amount of #samples filled on output

1277 
* @return 0 on success, <0 on error or 1 if there was not enough data to

1278 
* fully parse the superframe

1279 
*/

1280 
static int synth_superframe(AVCodecContext *ctx, 
1281 
float *samples, int *data_size) 
1282 
{ 
1283 
WMAVoiceContext *s = ctx>priv_data; 
1284 
GetBitContext *gb = &s>gb, s_gb; 
1285 
int n, res, n_samples = 480; 
1286 
double lsps[MAX_FRAMES][MAX_LSPS];

1287 
const double *mean_lsf = s>lsps == 16 ? 
1288 
wmavoice_mean_lsf16[s>lsp_def_mode] : wmavoice_mean_lsf10[s>lsp_def_mode]; 
1289 
float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12]; 
1290 
float synth[MAX_LSPS + MAX_SFRAMESIZE];

1291  
1292 
memcpy(synth, s>synth_history, 
1293 
s>lsps * sizeof(*synth));

1294 
memcpy(excitation, s>excitation_history, 
1295 
s>history_nsamples * sizeof(*excitation));

1296  
1297 
if (s>sframe_cache_size > 0) { 
1298 
gb = &s_gb; 
1299 
init_get_bits(gb, s>sframe_cache, s>sframe_cache_size); 
1300 
s>sframe_cache_size = 0;

1301 
} 
1302  
1303 
if ((res = check_bits_for_superframe(gb, s)) == 1) return 1; 
1304  
1305 
/* First bit is speech/music bit, it differentiates between WMAVoice

1306 
* speech samples (the actual codec) and WMAVoice music samples, which

1307 
* are really WMAProinWMAVoicesuperframes. I've never seen those in

1308 
* the wild yet. */

1309 
if (!get_bits1(gb)) {

1310 
av_log_missing_feature(ctx, "WMAProinWMAVoice support", 1); 
1311 
return 1; 
1312 
} 
1313  
1314 
/* (optional) nr. of samples in superframe; always <= 480 and >= 0 */

1315 
if (get_bits1(gb)) {

1316 
if ((n_samples = get_bits(gb, 12)) > 480) { 
1317 
av_log(ctx, AV_LOG_ERROR, 
1318 
"Superframe encodes >480 samples (%d), not allowed\n",

1319 
n_samples); 
1320 
return 1; 
1321 
} 
1322 
} 
1323 
/* Parse LSPs, if global for the superframe (can also be perframe). */

1324 
if (s>has_residual_lsps) {

1325 
double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2]; 
1326  
1327 
for (n = 0; n < s>lsps; n++) 
1328 
prev_lsps[n] = s>prev_lsps[n]  mean_lsf[n]; 
1329  
1330 
if (s>lsps == 10) { 
1331 
dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s>lsp_q_mode);

1332 
} else /* s>lsps == 16 */ 
1333 
dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s>lsp_q_mode);

1334  
1335 
for (n = 0; n < s>lsps; n++) { 
1336 
lsps[0][n] = mean_lsf[n] + (a1[n]  a2[n * 2]); 
1337 
lsps[1][n] = mean_lsf[n] + (a1[s>lsps + n]  a2[n * 2 + 1]); 
1338 
lsps[2][n] += mean_lsf[n];

1339 
} 
1340 
for (n = 0; n < 3; n++) 
1341 
stabilize_lsps(lsps[n], s>lsps); 
1342 
} 
1343  
1344 
/* Parse frames, optionally preceeded by perframe (independent) LSPs. */

1345 
for (n = 0; n < 3; n++) { 
1346 
if (!s>has_residual_lsps) {

1347 
int m;

1348  
1349 
if (s>lsps == 10) { 
1350 
dequant_lsp10i(gb, lsps[n]); 
1351 
} else /* s>lsps == 16 */ 
1352 
dequant_lsp16i(gb, lsps[n]); 
1353  
1354 
for (m = 0; m < s>lsps; m++) 
1355 
lsps[n][m] += mean_lsf[m]; 
1356 
stabilize_lsps(lsps[n], s>lsps); 
1357 
} 
1358  
1359 
if ((res = synth_frame(ctx, gb,

1360 
&samples[n * MAX_FRAMESIZE], 
1361 
lsps[n], n == 0 ? s>prev_lsps : lsps[n  1], 
1362 
&excitation[s>history_nsamples + n * MAX_FRAMESIZE], 
1363 
&synth[s>lsps + n * MAX_FRAMESIZE]))) 
1364 
return res;

1365 
} 
1366  
1367 
/* Statistics? FIXME  we don't check for length, a slight overrun

1368 
* will be caught by internal buffer padding, and anything else

1369 
* will be skipped, not read. */

1370 
if (get_bits1(gb)) {

1371 
res = get_bits(gb, 4);

1372 
skip_bits(gb, 10 * (res + 1)); 
1373 
} 
1374  
1375 
/* Specify nr. of output samples */

1376 
*data_size = n_samples * sizeof(float); 
1377  
1378 
/* Update history */

1379 
memcpy(s>prev_lsps, lsps[2],

1380 
s>lsps * sizeof(*s>prev_lsps));

1381 
memcpy(s>synth_history, &synth[MAX_SFRAMESIZE], 
1382 
s>lsps * sizeof(*synth));

1383 
memcpy(s>excitation_history, &excitation[MAX_SFRAMESIZE], 
1384 
s>history_nsamples * sizeof(*excitation));

1385  
1386 
return 0; 
1387 
} 
1388  
1389 
/**

1390 
* Parse the packet header at the start of each packet (input data to this

1391 
* decoder).

1392 
*

1393 
* @param s WMA Voice decoding context private data

1394 
* @return 1 if not enough bits were available, or 0 on success.

1395 
*/

1396 
static int parse_packet_header(WMAVoiceContext *s) 
1397 
{ 
1398 
GetBitContext *gb = &s>gb; 
1399 
unsigned int res; 
1400  
1401 
if (get_bits_left(gb) < 11) 
1402 
return 1; 
1403 
skip_bits(gb, 4); // packet sequence number 
1404 
s>has_residual_lsps = get_bits1(gb); 
1405 
do {

1406 
res = get_bits(gb, 6); // number of superframes per packet 
1407 
// (minus first one if there is spillover)

1408 
if (get_bits_left(gb) < 6 * (res == 0x3F) + s>spillover_bitsize) 
1409 
return 1; 
1410 
} while (res == 0x3F); 
1411 
s>spillover_nbits = get_bits(gb, s>spillover_bitsize); 
1412  
1413 
return 0; 
1414 
} 
1415  
1416 
/**

1417 
* Copy (unaligned) bits from gb/data/size to pb.

1418 
*

1419 
* @param pb target buffer to copy bits into

1420 
* @param data source buffer to copy bits from

1421 
* @param size size of the source data, in bytes

1422 
* @param gb bit I/O context specifying the current position in the source.

1423 
* data. This function might use this to align the bit position to

1424 
* a wholebyte boundary before calling #ff_copy_bits() on aligned

1425 
* source data

1426 
* @param nbits the amount of bits to copy from source to target

1427 
*

1428 
* @note after calling this function, the current position in the input bit

1429 
* I/O context is undefined.

1430 
*/

1431 
static void copy_bits(PutBitContext *pb, 
1432 
const uint8_t *data, int size, 
1433 
GetBitContext *gb, int nbits)

1434 
{ 
1435 
int rmn_bytes, rmn_bits;

1436  
1437 
rmn_bits = rmn_bytes = get_bits_left(gb); 
1438 
if (rmn_bits < nbits)

1439 
return;

1440 
rmn_bits &= 7; rmn_bytes >>= 3; 
1441 
if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0) 
1442 
put_bits(pb, rmn_bits, get_bits(gb, rmn_bits)); 
1443 
ff_copy_bits(pb, data + size  rmn_bytes, 
1444 
FFMIN(nbits  rmn_bits, rmn_bytes << 3));

1445 
} 
1446  
1447 
/**

1448 
* Packet decoding: a packet is anything that the (ASF) demuxer contains,

1449 
* and we expect that the demuxer / application provides it to us as such

1450 
* (else you'll probably get garbage as output). Every packet has a size of

1451 
* ctx>block_align bytes, starts with a packet header (see

1452 
* #parse_packet_header()), and then a series of superframes. Superframe

1453 
* boundaries may exceed packets, i.e. superframes can split data over

1454 
* multiple (two) packets.

1455 
*

1456 
* For more information about frames, see #synth_superframe().

1457 
*/

1458 
static int wmavoice_decode_packet(AVCodecContext *ctx, void *data, 
1459 
int *data_size, AVPacket *avpkt)

1460 
{ 
1461 
WMAVoiceContext *s = ctx>priv_data; 
1462 
GetBitContext *gb = &s>gb; 
1463 
int size, res, pos;

1464  
1465 
if (*data_size < 480 * sizeof(float)) { 
1466 
av_log(ctx, AV_LOG_ERROR, 
1467 
"Output buffer too small (%d given  %lu needed)\n",

1468 
*data_size, 480 * sizeof(float)); 
1469 
return 1; 
1470 
} 
1471 
*data_size = 0;

1472  
1473 
/* Packets are sometimes a multiple of ctx>block_align, with a packet

1474 
* header at each ctx>block_align bytes. However, FFmpeg's ASF demuxer

1475 
* feeds us ASF packets, which may concatenate multiple "codec" packets

1476 
* in a single "muxer" packet, so we artificially emulate that by

1477 
* capping the packet size at ctx>block_align. */

1478 
for (size = avpkt>size; size > ctx>block_align; size = ctx>block_align);

1479 
if (!size)

1480 
return 0; 
1481 
init_get_bits(&s>gb, avpkt>data, size << 3);

1482  
1483 
/* size == ctx>block_align is used to indicate whether we are dealing with

1484 
* a new packet or a packet of which we already read the packet header

1485 
* previously. */

1486 
if (size == ctx>block_align) { // new packet header 
1487 
if ((res = parse_packet_header(s)) < 0) 
1488 
return res;

1489  
1490 
/* If the packet header specifies a s>spillover_nbits, then we want

1491 
* to push out all data of the previous packet (+ spillover) before

1492 
* continuing to parse new superframes in the current packet. */

1493 
if (s>spillover_nbits > 0) { 
1494 
if (s>sframe_cache_size > 0) { 
1495 
int cnt = get_bits_count(gb);

1496 
copy_bits(&s>pb, avpkt>data, size, gb, s>spillover_nbits); 
1497 
flush_put_bits(&s>pb); 
1498 
s>sframe_cache_size += s>spillover_nbits; 
1499 
if ((res = synth_superframe(ctx, data, data_size)) == 0 && 
1500 
*data_size > 0) {

1501 
cnt += s>spillover_nbits; 
1502 
s>skip_bits_next = cnt & 7;

1503 
return cnt >> 3; 
1504 
} else

1505 
skip_bits_long (gb, s>spillover_nbits  cnt + 
1506 
get_bits_count(gb)); // resync

1507 
} else

1508 
skip_bits_long(gb, s>spillover_nbits); // resync

1509 
} 
1510 
} else if (s>skip_bits_next) 
1511 
skip_bits(gb, s>skip_bits_next); 
1512  
1513 
/* Try parsing superframes in current packet */

1514 
s>sframe_cache_size = 0;

1515 
s>skip_bits_next = 0;

1516 
pos = get_bits_left(gb); 
1517 
if ((res = synth_superframe(ctx, data, data_size)) < 0) { 
1518 
return res;

1519 
} else if (*data_size > 0) { 
1520 
int cnt = get_bits_count(gb);

1521 
s>skip_bits_next = cnt & 7;

1522 
return cnt >> 3; 
1523 
} else if ((s>sframe_cache_size = pos) > 0) { 
1524 
/* rewind bit reader to start of last (incomplete) superframe... */

1525 
init_get_bits(gb, avpkt>data, size << 3);

1526 
skip_bits_long(gb, (size << 3)  pos);

1527 
assert(get_bits_left(gb) == pos); 
1528  
1529 
/* ...and cache it for spillover in next packet */

1530 
init_put_bits(&s>pb, s>sframe_cache, SFRAME_CACHE_MAXSIZE); 
1531 
copy_bits(&s>pb, avpkt>data, size, gb, s>sframe_cache_size); 
1532 
// FIXME bad  just copy bytes as whole and add use the

1533 
// skip_bits_next field

1534 
} 
1535  
1536 
return size;

1537 
} 
1538  
1539 
static av_cold void wmavoice_flush(AVCodecContext *ctx) 
1540 
{ 
1541 
WMAVoiceContext *s = ctx>priv_data; 
1542 
int n;

1543  
1544 
s>sframe_cache_size = 0;

1545 
s>skip_bits_next = 0;

1546 
for (n = 0; n < s>lsps; n++) 
1547 
s>prev_lsps[n] = M_PI * (n + 1.0) / (s>lsps + 1.0); 
1548 
memset(s>excitation_history, 0,

1549 
sizeof(*s>excitation_history) * MAX_SIGNAL_HISTORY);

1550 
memset(s>synth_history, 0,

1551 
sizeof(*s>synth_history) * MAX_LSPS);

1552 
memset(s>gain_pred_err, 0,

1553 
sizeof(s>gain_pred_err));

1554 
} 
1555  
1556 
AVCodec wmavoice_decoder = { 
1557 
"wmavoice",

1558 
AVMEDIA_TYPE_AUDIO, 
1559 
CODEC_ID_WMAVOICE, 
1560 
sizeof(WMAVoiceContext),

1561 
wmavoice_decode_init, 
1562 
NULL,

1563 
NULL,

1564 
wmavoice_decode_packet, 
1565 
CODEC_CAP_SUBFRAMES, 
1566 
.flush = wmavoice_flush, 
1567 
.long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),

1568 
}; 