ffmpeg / libavcodec / wmavoice.c @ 72415b2a
History  View  Annotate  Download (61.6 KB)
1 
/*


2 
* Windows Media Audio Voice decoder.

3 
* Copyright (c) 2009 Ronald S. Bultje

4 
*

5 
* This file is part of FFmpeg.

6 
*

7 
* FFmpeg is free software; you can redistribute it and/or

8 
* modify it under the terms of the GNU Lesser General Public

9 
* License as published by the Free Software Foundation; either

10 
* version 2.1 of the License, or (at your option) any later version.

11 
*

12 
* FFmpeg is distributed in the hope that it will be useful,

13 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

14 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

15 
* Lesser General Public License for more details.

16 
*

17 
* You should have received a copy of the GNU Lesser General Public

18 
* License along with FFmpeg; if not, write to the Free Software

19 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

20 
*/

21  
22 
/**

23 
* @file libavcodec/wmavoice.c

24 
* @brief Windows Media Audio Voice compatible decoder

25 
* @author Ronald S. Bultje <rsbultje@gmail.com>

26 
*/

27  
28 
#include <math.h> 
29 
#include "avcodec.h" 
30 
#include "get_bits.h" 
31 
#include "put_bits.h" 
32 
#include "wmavoice_data.h" 
33 
#include "celp_math.h" 
34 
#include "celp_filters.h" 
35 
#include "acelp_vectors.h" 
36 
#include "acelp_filters.h" 
37 
#include "lsp.h" 
38 
#include "libavutil/lzo.h" 
39  
40 
#define MAX_BLOCKS 8 ///< maximum number of blocks per frame 
41 
#define MAX_LSPS 16 ///< maximum filter order 
42 
#define MAX_FRAMES 3 ///< maximum number of frames per superframe 
43 
#define MAX_FRAMESIZE 160 ///< maximum number of samples per frame 
44 
#define MAX_SIGNAL_HISTORY 416 ///< maximum excitation signal history 
45 
#define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)

46 
///< maximum number of samples per superframe

47 
#define SFRAME_CACHE_MAXSIZE 256 ///< maximum cache size for frame data that 
48 
///< was split over two packets

49 
#define VLC_NBITS 6 ///< number of bits to read per VLC iteration 
50  
51 
/**

52 
* Frame type VLC coding.

53 
*/

54 
static VLC frame_type_vlc;

55  
56 
/**

57 
* Adaptive codebook types.

58 
*/

59 
enum {

60 
ACB_TYPE_NONE = 0, ///< no adaptive codebook (only hardcoded fixed) 
61 
ACB_TYPE_ASYMMETRIC = 1, ///< adaptive codebook with perframe pitch, which 
62 
///< we interpolate to get a persample pitch.

63 
///< Signal is generated using an asymmetric sinc

64 
///< window function

65 
///< @note see #wmavoice_ipol1_coeffs

66 
ACB_TYPE_HAMMING = 2 ///< Perblock pitch with signal generation using 
67 
///< a Hamming sinc window function

68 
///< @note see #wmavoice_ipol2_coeffs

69 
}; 
70  
71 
/**

72 
* Fixed codebook types.

73 
*/

74 
enum {

75 
FCB_TYPE_SILENCE = 0, ///< comfort noise during silence 
76 
///< generated from a hardcoded (fixed) codebook

77 
///< with perframe (low) gain values

78 
FCB_TYPE_HARDCODED = 1, ///< hardcoded (fixed) codebook with perblock 
79 
///< gain values

80 
FCB_TYPE_AW_PULSES = 2, ///< Pitchadaptive window (AW) pulse signals, 
81 
///< used in particular for lowbitrate streams

82 
FCB_TYPE_EXC_PULSES = 3, ///< Innovation (fixed) codebook pulse sets in 
83 
///< combinations of either single pulses or

84 
///< pulse pairs

85 
}; 
86  
87 
/**

88 
* Description of frame types.

89 
*/

90 
static const struct frame_type_desc { 
91 
uint8_t n_blocks; ///< amount of blocks per frame (each block

92 
///< (contains 160/#n_blocks samples)

93 
uint8_t log_n_blocks; ///< log2(#n_blocks)

94 
uint8_t acb_type; ///< Adaptive codebook type (ACB_TYPE_*)

95 
uint8_t fcb_type; ///< Fixed codebook type (FCB_TYPE_*)

96 
uint8_t dbl_pulses; ///< how many pulse vectors have pulse pairs

97 
///< (rather than just one single pulse)

98 
///< only if #fcb_type == #FCB_TYPE_EXC_PULSES

99 
uint16_t frame_size; ///< the amount of bits that make up the block

100 
///< data (per frame)

101 
} frame_descs[17] = {

102 
{ 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0, 0 }, 
103 
{ 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0, 28 }, 
104 
{ 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES, 0, 46 }, 
105 
{ 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 80 }, 
106 
{ 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 }, 
107 
{ 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 }, 
108 
{ 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 }, 
109 
{ 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 }, 
110 
{ 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 64 }, 
111 
{ 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 80 }, 
112 
{ 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 104 }, 
113 
{ 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 108 }, 
114 
{ 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 132 }, 
115 
{ 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 168 }, 
116 
{ 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 176 }, 
117 
{ 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 208 }, 
118 
{ 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 256 } 
119 
}; 
120  
121 
/**

122 
* WMA Voice decoding context.

123 
*/

124 
typedef struct { 
125 
/**

126 
* @defgroup struct_global Global values

127 
* Global values, specified in the stream header / extradata or used

128 
* all over.

129 
* @{

130 
*/

131 
GetBitContext gb; ///< packet bitreader. During decoder init,

132 
///< it contains the extradata from the

133 
///< demuxer. During decoding, it contains

134 
///< packet data.

135 
int8_t vbm_tree[25]; ///< converts VLC codes to frame type 
136  
137 
int spillover_bitsize; ///< number of bits used to specify 
138 
///< #spillover_nbits in the packet header

139 
///< = ceil(log2(ctx>block_align << 3))

140 
int history_nsamples; ///< number of samples in history for signal 
141 
///< prediction (through ACB)

142  
143 
int do_apf; ///< whether to apply the averaged 
144 
///< projection filter (APF)

145  
146 
int lsps; ///< number of LSPs per frame [10 or 16] 
147 
int lsp_q_mode; ///< defines quantizer defaults [0, 1] 
148 
int lsp_def_mode; ///< defines different sets of LSP defaults 
149 
///< [0, 1]

150 
int frame_lsp_bitsize; ///< size (in bits) of LSPs, when encoded 
151 
///< perframe (independent coding)

152 
int sframe_lsp_bitsize; ///< size (in bits) of LSPs, when encoded 
153 
///< per superframe (residual coding)

154  
155 
int min_pitch_val; ///< base value for pitch parsing code 
156 
int max_pitch_val; ///< max value + 1 for pitch parsing 
157 
int pitch_nbits; ///< number of bits used to specify the 
158 
///< pitch value in the frame header

159 
int block_pitch_nbits; ///< number of bits used to specify the 
160 
///< first block's pitch value

161 
int block_pitch_range; ///< range of the block pitch 
162 
int block_delta_pitch_nbits; ///< number of bits used to specify the 
163 
///< delta pitch between this and the last

164 
///< block's pitch value, used in all but

165 
///< first block

166 
int block_delta_pitch_hrange; ///< 1/2 range of the delta (full range is 
167 
///< from this to +this1)

168 
uint16_t block_conv_table[4]; ///< boundaries for block pitch unit/scale 
169 
///< conversion

170  
171 
/**

172 
* @}

173 
* @defgroup struct_packet Packet values

174 
* Packet values, specified in the packet header or related to a packet.

175 
* A packet is considered to be a single unit of data provided to this

176 
* decoder by the demuxer.

177 
* @{

178 
*/

179 
int spillover_nbits; ///< number of bits of the previous packet's 
180 
///< last superframe preceeding this

181 
///< packet's first full superframe (useful

182 
///< for resynchronization also)

183 
int has_residual_lsps; ///< if set, superframes contain one set of 
184 
///< LSPs that cover all frames, encoded as

185 
///< independent and residual LSPs; if not

186 
///< set, each frame contains its own, fully

187 
///< independent, LSPs

188 
int skip_bits_next; ///< number of bits to skip at the next call 
189 
///< to #wmavoice_decode_packet() (since

190 
///< they're part of the previous superframe)

191  
192 
uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE]; 
193 
///< cache for superframe data split over

194 
///< multiple packets

195 
int sframe_cache_size; ///< set to >0 if we have data from an 
196 
///< (incomplete) superframe from a previous

197 
///< packet that spilled over in the current

198 
///< packet; specifies the amount of bits in

199 
///< #sframe_cache

200 
PutBitContext pb; ///< bitstream writer for #sframe_cache

201  
202 
/**

203 
* @}

204 
* @defgroup struct_frame Frame and superframe values

205 
* Superframe and frame data  these can change from frame to frame,

206 
* although some of them do in that case serve as a cache / history for

207 
* the next frame or superframe.

208 
* @{

209 
*/

210 
double prev_lsps[MAX_LSPS]; ///< LSPs of the last frame of the previous 
211 
///< superframe

212 
int last_pitch_val; ///< pitch value of the previous frame 
213 
int last_acb_type; ///< frame type [02] of the previous frame 
214 
int pitch_diff_sh16; ///< ((cur_pitch_val  #last_pitch_val) 
215 
///< << 16) / #MAX_FRAMESIZE

216 
float silence_gain; ///< set for use in blocks if #ACB_TYPE_NONE 
217  
218 
int aw_idx_is_ext; ///< whether the AW index was encoded in 
219 
///< 8 bits (instead of 6)

220 
int aw_pulse_range; ///< the range over which #aw_pulse_set1() 
221 
///< can apply the pulse, relative to the

222 
///< value in aw_first_pulse_off. The exact

223 
///< position of the first AWpulse is within

224 
///< [pulse_off, pulse_off + this], and

225 
///< depends on bitstream values; [16 or 24]

226 
int aw_n_pulses[2]; ///< number of AWpulses in each block; note 
227 
///< that this number can be negative (in

228 
///< which case it basically means "zero")

229 
int aw_first_pulse_off[2]; ///< index of first sample to which to 
230 
///< apply AWpulses, or 0xff if unset

231 
int aw_next_pulse_off_cache; ///< the position (relative to start of the 
232 
///< second block) at which pulses should

233 
///< start to be positioned, serves as a

234 
///< cache for pitchadaptive window pulses

235 
///< between blocks

236  
237 
int frame_cntr; ///< current frame index [0  0xFFFE]; is 
238 
///< only used for comfort noise in #pRNG()

239 
float gain_pred_err[6]; ///< cache for gain prediction 
240 
float excitation_history[MAX_SIGNAL_HISTORY];

241 
///< cache of the signal of previous

242 
///< superframes, used as a history for

243 
///< signal generation

244 
float synth_history[MAX_LSPS]; ///< see #excitation_history 
245 
/**

246 
* @}

247 
*/

248 
} WMAVoiceContext; 
249  
250 
/**

251 
* Sets up the variable bit mode (VBM) tree from container extradata.

252 
* @param gb bit I/O context.

253 
* The bit context (s>gb) should be loaded with byte 2346 of the

254 
* container extradata (i.e. the ones containing the VBM tree).

255 
* @param vbm_tree pointer to array to which the decoded VBM tree will be

256 
* written.

257 
* @return 0 on success, <0 on error.

258 
*/

259 
static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25]) 
260 
{ 
261 
static const uint8_t bits[] = { 
262 
2, 2, 2, 4, 4, 4, 
263 
6, 6, 6, 8, 8, 8, 
264 
10, 10, 10, 12, 12, 12, 
265 
14, 14, 14, 14 
266 
}; 
267 
static const uint16_t codes[] = { 
268 
0x0000, 0x0001, 0x0002, // 00/01/10 
269 
0x000c, 0x000d, 0x000e, // 11+00/01/10 
270 
0x003c, 0x003d, 0x003e, // 1111+00/01/10 
271 
0x00fc, 0x00fd, 0x00fe, // 111111+00/01/10 
272 
0x03fc, 0x03fd, 0x03fe, // 11111111+00/01/10 
273 
0x0ffc, 0x0ffd, 0x0ffe, // 1111111111+00/01/10 
274 
0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx 
275 
}; 
276 
int cntr[8], n, res; 
277  
278 
memset(vbm_tree, 0xff, sizeof(vbm_tree)); 
279 
memset(cntr, 0, sizeof(cntr)); 
280 
for (n = 0; n < 17; n++) { 
281 
res = get_bits(gb, 3);

282 
if (cntr[res] > 3) // should be >= 3 + (res == 7)) 
283 
return 1; 
284 
vbm_tree[res * 3 + cntr[res]++] = n;

285 
} 
286 
INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),

287 
bits, 1, 1, codes, 2, 2, 132); 
288 
return 0; 
289 
} 
290  
291 
/**

292 
* Set up decoder with parameters from demuxer (extradata etc.).

293 
*/

294 
static av_cold int wmavoice_decode_init(AVCodecContext *ctx) 
295 
{ 
296 
int n, flags, pitch_range, lsp16_flag;

297 
WMAVoiceContext *s = ctx>priv_data; 
298  
299 
/**

300 
* Extradata layout:

301 
*  byte 018: WMAProinWMAVoice extradata (see wmaprodec.c),

302 
*  byte 1922: flags field (annoyingly in LE; see below for known

303 
* values),

304 
*  byte 2346: variable bitmode tree (really just 17 * 3 bits,

305 
* rest is 0).

306 
*/

307 
if (ctx>extradata_size != 46) { 
308 
av_log(ctx, AV_LOG_ERROR, 
309 
"Invalid extradata size %d (should be 46)\n",

310 
ctx>extradata_size); 
311 
return 1; 
312 
} 
313 
flags = AV_RL32(ctx>extradata + 18);

314 
s>spillover_bitsize = 3 + av_ceil_log2(ctx>block_align);

315 
s>do_apf = flags & 0x1;

316 
s>lsp_q_mode = !!(flags & 0x2000);

317 
s>lsp_def_mode = !!(flags & 0x4000);

318 
lsp16_flag = flags & 0x1000;

319 
if (lsp16_flag) {

320 
s>lsps = 16;

321 
s>frame_lsp_bitsize = 34;

322 
s>sframe_lsp_bitsize = 60;

323 
} else {

324 
s>lsps = 10;

325 
s>frame_lsp_bitsize = 24;

326 
s>sframe_lsp_bitsize = 48;

327 
} 
328 
for (n = 0; n < s>lsps; n++) 
329 
s>prev_lsps[n] = M_PI * (n + 1.0) / (s>lsps + 1.0); 
330  
331 
init_get_bits(&s>gb, ctx>extradata + 22, (ctx>extradata_size  22) << 3); 
332 
if (decode_vbmtree(&s>gb, s>vbm_tree) < 0) { 
333 
av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");

334 
return 1; 
335 
} 
336  
337 
s>min_pitch_val = ((ctx>sample_rate << 8) / 400 + 50) >> 8; 
338 
s>max_pitch_val = ((ctx>sample_rate << 8) * 37 / 2000 + 50) >> 8; 
339 
pitch_range = s>max_pitch_val  s>min_pitch_val; 
340 
s>pitch_nbits = av_ceil_log2(pitch_range); 
341 
s>last_pitch_val = 40;

342 
s>last_acb_type = ACB_TYPE_NONE; 
343 
s>history_nsamples = s>max_pitch_val + 8;

344  
345 
if (s>min_pitch_val < 1  s>history_nsamples > MAX_SIGNAL_HISTORY) { 
346 
int min_sr = ((((1 << 8)  50) * 400) + 0xFF) >> 8, 
347 
max_sr = ((((MAX_SIGNAL_HISTORY  8) << 8) + 205) * 2000 / 37) >> 8; 
348  
349 
av_log(ctx, AV_LOG_ERROR, 
350 
"Unsupported samplerate %d (min=%d, max=%d)\n",

351 
ctx>sample_rate, min_sr, max_sr); // 32222097 Hz

352  
353 
return 1; 
354 
} 
355  
356 
s>block_conv_table[0] = s>min_pitch_val;

357 
s>block_conv_table[1] = (pitch_range * 25) >> 6; 
358 
s>block_conv_table[2] = (pitch_range * 44) >> 6; 
359 
s>block_conv_table[3] = s>max_pitch_val  1; 
360 
s>block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF; 
361 
s>block_delta_pitch_nbits = 1 + av_ceil_log2(s>block_delta_pitch_hrange);

362 
s>block_pitch_range = s>block_conv_table[2] +

363 
s>block_conv_table[3] + 1 + 
364 
2 * (s>block_conv_table[1]  2 * s>min_pitch_val); 
365 
s>block_pitch_nbits = av_ceil_log2(s>block_pitch_range); 
366  
367 
ctx>sample_fmt = SAMPLE_FMT_FLT; 
368  
369 
return 0; 
370 
} 
371  
372 
/**

373 
* Dequantize LSPs

374 
* @param lsps output pointer to the array that will hold the LSPs

375 
* @param num number of LSPs to be dequantized

376 
* @param values quantized values, contains n_stages values

377 
* @param sizes range (i.e. max value) of each quantized value

378 
* @param n_stages number of dequantization runs

379 
* @param table dequantization table to be used

380 
* @param mul_q LSF multiplier

381 
* @param base_q base (lowest) LSF values

382 
*/

383 
static void dequant_lsps(double *lsps, int num, 
384 
const uint16_t *values,

385 
const uint16_t *sizes,

386 
int n_stages, const uint8_t *table, 
387 
const double *mul_q, 
388 
const double *base_q) 
389 
{ 
390 
int n, m;

391  
392 
memset(lsps, 0, num * sizeof(*lsps)); 
393 
for (n = 0; n < n_stages; n++) { 
394 
const uint8_t *t_off = &table[values[n] * num];

395 
double base = base_q[n], mul = mul_q[n];

396  
397 
for (m = 0; m < num; m++) 
398 
lsps[m] += base + mul * t_off[m]; 
399  
400 
table += sizes[n] * num; 
401 
} 
402 
} 
403  
404 
/**

405 
* @defgroup lsp_dequant LSP dequantization routines

406 
* LSP dequantization routines, for 10/16LSPs and independent/residual coding.

407 
* @note we assume enough bits are available, caller should check.

408 
* lsp10i() consumes 24 bits; lsp10r() consumes an additional 24 bits;

409 
* lsp16i() consumes 34 bits; lsp16r() consumes an additional 26 bits.

410 
* @{

411 
*/

412 
/**

413 
* Parse 10 independentlycoded LSPs.

414 
*/

415 
static void dequant_lsp10i(GetBitContext *gb, double *lsps) 
416 
{ 
417 
static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 }; 
418 
static const double mul_lsf[4] = { 
419 
5.2187144800e3, 1.4626986422e3, 
420 
9.6179549166e4, 1.1325736225e3 
421 
}; 
422 
static const double base_lsf[4] = { 
423 
M_PI * 2.15522e1, M_PI * 6.1646e2, 
424 
M_PI * 3.3486e2, M_PI * 5.7408e2 
425 
}; 
426 
uint16_t v[4];

427  
428 
v[0] = get_bits(gb, 8); 
429 
v[1] = get_bits(gb, 6); 
430 
v[2] = get_bits(gb, 5); 
431 
v[3] = get_bits(gb, 5); 
432  
433 
dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i, 
434 
mul_lsf, base_lsf); 
435 
} 
436  
437 
/**

438 
* Parse 10 independentlycoded LSPs, and then derive the tables to

439 
* generate LSPs for the other frames from them (residual coding).

440 
*/

441 
static void dequant_lsp10r(GetBitContext *gb, 
442 
double *i_lsps, const double *old, 
443 
double *a1, double *a2, int q_mode) 
444 
{ 
445 
static const uint16_t vec_sizes[3] = { 128, 64, 64 }; 
446 
static const double mul_lsf[3] = { 
447 
2.5807601174e3, 1.2354460219e3, 1.1763821673e3 
448 
}; 
449 
static const double base_lsf[3] = { 
450 
M_PI * 1.07448e1, M_PI * 5.2706e2, M_PI * 5.1634e2 
451 
}; 
452 
const float (*ipol_tab)[2][10] = q_mode ? 
453 
wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a; 
454 
uint16_t interpol, v[3];

455 
int n;

456  
457 
dequant_lsp10i(gb, i_lsps); 
458  
459 
interpol = get_bits(gb, 5);

460 
v[0] = get_bits(gb, 7); 
461 
v[1] = get_bits(gb, 6); 
462 
v[2] = get_bits(gb, 6); 
463  
464 
for (n = 0; n < 10; n++) { 
465 
double delta = old[n]  i_lsps[n];

466 
a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];

467 
a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n]; 
468 
} 
469  
470 
dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r, 
471 
mul_lsf, base_lsf); 
472 
} 
473  
474 
/**

475 
* Parse 16 independentlycoded LSPs.

476 
*/

477 
static void dequant_lsp16i(GetBitContext *gb, double *lsps) 
478 
{ 
479 
static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 }; 
480 
static const double mul_lsf[5] = { 
481 
3.3439586280e3, 6.9908173703e4, 
482 
3.3216608306e3, 1.0334960326e3, 
483 
3.1899104283e3 
484 
}; 
485 
static const double base_lsf[5] = { 
486 
M_PI * 1.27576e1, M_PI * 2.4292e2, 
487 
M_PI * 1.28094e1, M_PI * 3.2128e2, 
488 
M_PI * 1.29816e1 
489 
}; 
490 
uint16_t v[5];

491  
492 
v[0] = get_bits(gb, 8); 
493 
v[1] = get_bits(gb, 6); 
494 
v[2] = get_bits(gb, 7); 
495 
v[3] = get_bits(gb, 6); 
496 
v[4] = get_bits(gb, 7); 
497  
498 
dequant_lsps( lsps, 5, v, vec_sizes, 2, 
499 
wmavoice_dq_lsp16i1, mul_lsf, base_lsf); 
500 
dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2, 
501 
wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]); 
502 
dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1, 
503 
wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]); 
504 
} 
505  
506 
/**

507 
* Parse 16 independentlycoded LSPs, and then derive the tables to

508 
* generate LSPs for the other frames from them (residual coding).

509 
*/

510 
static void dequant_lsp16r(GetBitContext *gb, 
511 
double *i_lsps, const double *old, 
512 
double *a1, double *a2, int q_mode) 
513 
{ 
514 
static const uint16_t vec_sizes[3] = { 128, 128, 128 }; 
515 
static const double mul_lsf[3] = { 
516 
1.2232979501e3, 1.4062241527e3, 1.6114744851e3 
517 
}; 
518 
static const double base_lsf[3] = { 
519 
M_PI * 5.5830e2, M_PI * 5.2908e2, M_PI * 5.4776e2 
520 
}; 
521 
const float (*ipol_tab)[2][16] = q_mode ? 
522 
wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a; 
523 
uint16_t interpol, v[3];

524 
int n;

525  
526 
dequant_lsp16i(gb, i_lsps); 
527  
528 
interpol = get_bits(gb, 5);

529 
v[0] = get_bits(gb, 7); 
530 
v[1] = get_bits(gb, 7); 
531 
v[2] = get_bits(gb, 7); 
532  
533 
for (n = 0; n < 16; n++) { 
534 
double delta = old[n]  i_lsps[n];

535 
a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];

536 
a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n]; 
537 
} 
538  
539 
dequant_lsps( a2, 10, v, vec_sizes, 1, 
540 
wmavoice_dq_lsp16r1, mul_lsf, base_lsf); 
541 
dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1, 
542 
wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]); 
543 
dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1, 
544 
wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]); 
545 
} 
546  
547 
/**

548 
* @}

549 
* @defgroup aw Pitchadaptive window coding functions

550 
* The next few functions are for pitchadaptive window coding.

551 
* @{

552 
*/

553 
/**

554 
* Parse the offset of the first pitchadaptive window pulses, and

555 
* the distribution of pulses between the two blocks in this frame.

556 
* @param s WMA Voice decoding context private data

557 
* @param gb bit I/O context

558 
* @param pitch pitch for each block in this frame

559 
*/

560 
static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb, 
561 
const int *pitch) 
562 
{ 
563 
static const int16_t start_offset[94] = { 
564 
11, 9, 7, 5, 3, 1, 1, 3, 5, 7, 9, 11, 
565 
13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26, 
566 
27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43, 
567 
45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 
568 
69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91, 
569 
93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115, 
570 
117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139, 
571 
141, 143, 145, 147, 149, 151, 153, 155, 157, 159 
572 
}; 
573 
int bits, offset;

574  
575 
/* position of pulse */

576 
s>aw_idx_is_ext = 0;

577 
if ((bits = get_bits(gb, 6)) >= 54) { 
578 
s>aw_idx_is_ext = 1;

579 
bits += (bits  54) * 3 + get_bits(gb, 2); 
580 
} 
581  
582 
/* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count

583 
* the distribution of the pulses in each block contained in this frame. */

584 
s>aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16; 
585 
for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ; 
586 
s>aw_n_pulses[0] = (pitch[0]  1 + MAX_FRAMESIZE / 2  offset) / pitch[0]; 
587 
s>aw_first_pulse_off[0] = offset  s>aw_pulse_range / 2; 
588 
offset += s>aw_n_pulses[0] * pitch[0]; 
589 
s>aw_n_pulses[1] = (pitch[1]  1 + MAX_FRAMESIZE  offset) / pitch[1]; 
590 
s>aw_first_pulse_off[1] = offset  (MAX_FRAMESIZE + s>aw_pulse_range) / 2; 
591  
592 
/* if continuing from a position before the block, reset position to

593 
* start of block (when corrected for the range over which it can be

594 
* spread in aw_pulse_set1()). */

595 
if (start_offset[bits] < MAX_FRAMESIZE / 2) { 
596 
while (s>aw_first_pulse_off[1]  pitch[1] + s>aw_pulse_range > 0) 
597 
s>aw_first_pulse_off[1] = pitch[1]; 
598 
if (start_offset[bits] < 0) 
599 
while (s>aw_first_pulse_off[0]  pitch[0] + s>aw_pulse_range > 0) 
600 
s>aw_first_pulse_off[0] = pitch[0]; 
601 
} 
602 
} 
603  
604 
/**

605 
* Apply second set of pitchadaptive window pulses.

606 
* @param s WMA Voice decoding context private data

607 
* @param gb bit I/O context

608 
* @param block_idx block index in frame [0, 1]

609 
* @param fcb structure containing fixed codebook vector info

610 
*/

611 
static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb, 
612 
int block_idx, AMRFixed *fcb)

613 
{ 
614 
uint16_t use_mask[7]; // only 5 are used, rest is padding 
615 
/* in this function, idx is the index in the 80bit (+ padding) use_mask

616 
* bitarray. Since use_mask consists of 16bit values, the lower 4 bits

617 
* of idx are the position of the bit within a particular item in the

618 
* array (0 being the most significant bit, and 15 being the least

619 
* significant bit), and the remainder (>> 4) is the index in the

620 
* use_mask[]array. This is faster and uses less memory than using a

621 
* 80byte/80int array. */

622 
int pulse_off = s>aw_first_pulse_off[block_idx],

623 
pulse_start, n, idx, range, aidx, start_off = 0;

624  
625 
/* set offset of first pulse to within this block */

626 
if (s>aw_n_pulses[block_idx] > 0) 
627 
while (pulse_off + s>aw_pulse_range < 1) 
628 
pulse_off += fcb>pitch_lag; 
629  
630 
/* find range per pulse */

631 
if (s>aw_n_pulses[0] > 0) { 
632 
if (block_idx == 0) { 
633 
range = 32;

634 
} else /* block_idx = 1 */ { 
635 
range = 8;

636 
if (s>aw_n_pulses[block_idx] > 0) 
637 
pulse_off = s>aw_next_pulse_off_cache; 
638 
} 
639 
} else

640 
range = 16;

641 
pulse_start = s>aw_n_pulses[block_idx] > 0 ? pulse_off  range / 2 : 0; 
642  
643 
/* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,

644 
* in the range of [pulse_off, pulse_off + s>aw_pulse_range], and thus

645 
* we exclude that range from being pulsed again in this function. */

646 
memset( use_mask, 1, 5 * sizeof(use_mask[0])); 
647 
memset(&use_mask[5], 0, 2 * sizeof(use_mask[0])); 
648 
if (s>aw_n_pulses[block_idx] > 0) 
649 
for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb>pitch_lag) { 
650 
int excl_range = s>aw_pulse_range; // always 16 or 24 
651 
uint16_t *use_mask_ptr = &use_mask[idx >> 4];

652 
int first_sh = 16  (idx & 15); 
653 
*use_mask_ptr++ &= 0xFFFF << first_sh;

654 
excl_range = first_sh; 
655 
if (excl_range >= 16) { 
656 
*use_mask_ptr++ = 0;

657 
*use_mask_ptr &= 0xFFFF >> (excl_range  16); 
658 
} else

659 
*use_mask_ptr &= 0xFFFF >> excl_range;

660 
} 
661  
662 
/* find the 'aidx'th offset that is not excluded */

663 
aidx = get_bits(gb, s>aw_n_pulses[0] > 0 ? 5  2 * block_idx : 4); 
664 
for (n = 0; n <= aidx; pulse_start++) { 
665 
for (idx = pulse_start; idx < 0; idx += fcb>pitch_lag) ; 
666 
if (idx >= MAX_FRAMESIZE / 2) { // find from zero 
667 
if (use_mask[0]) idx = 0x0F; 
668 
else if (use_mask[1]) idx = 0x1F; 
669 
else if (use_mask[2]) idx = 0x2F; 
670 
else if (use_mask[3]) idx = 0x3F; 
671 
else if (use_mask[4]) idx = 0x4F; 
672 
else return; 
673 
idx = av_log2_16bit(use_mask[idx >> 4]);

674 
} 
675 
if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) { 
676 
use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15)); 
677 
n++; 
678 
start_off = idx; 
679 
} 
680 
} 
681  
682 
fcb>x[fcb>n] = start_off; 
683 
fcb>y[fcb>n] = get_bits1(gb) ? 1.0 : 1.0; 
684 
fcb>n++; 
685  
686 
/* set offset for next block, relative to start of that block */

687 
n = (MAX_FRAMESIZE / 2  start_off) % fcb>pitch_lag;

688 
s>aw_next_pulse_off_cache = n ? fcb>pitch_lag  n : 0;

689 
} 
690  
691 
/**

692 
* Apply first set of pitchadaptive window pulses.

693 
* @param s WMA Voice decoding context private data

694 
* @param gb bit I/O context

695 
* @param block_idx block index in frame [0, 1]

696 
* @param fcb storage location for fixed codebook pulse info

697 
*/

698 
static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb, 
699 
int block_idx, AMRFixed *fcb)

700 
{ 
701 
int val = get_bits(gb, 12  2 * (s>aw_idx_is_ext && !block_idx)); 
702 
float v;

703  
704 
if (s>aw_n_pulses[block_idx] > 0) { 
705 
int n, v_mask, i_mask, sh, n_pulses;

706  
707 
if (s>aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each 
708 
n_pulses = 3;

709 
v_mask = 8;

710 
i_mask = 7;

711 
sh = 4;

712 
} else { // 4 pulses, 1:sign + 2:index each 
713 
n_pulses = 4;

714 
v_mask = 4;

715 
i_mask = 3;

716 
sh = 3;

717 
} 
718  
719 
for (n = n_pulses  1; n >= 0; n, val >>= sh) { 
720 
fcb>y[fcb>n] = (val & v_mask) ? 1.0 : 1.0; 
721 
fcb>x[fcb>n] = (val & i_mask) * n_pulses + n + 
722 
s>aw_first_pulse_off[block_idx]; 
723 
while (fcb>x[fcb>n] < 0) 
724 
fcb>x[fcb>n] += fcb>pitch_lag; 
725 
if (fcb>x[fcb>n] < MAX_FRAMESIZE / 2) 
726 
fcb>n++; 
727 
} 
728 
} else {

729 
int num2 = (val & 0x1FF) >> 1, delta, idx; 
730  
731 
if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; } 
732 
else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1  1 * 77; } 
733 
else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1  2 * 76; } 
734 
else { delta = 7; idx = num2 + 1  3 * 75; } 
735 
v = (val & 0x200) ? 1.0 : 1.0; 
736  
737 
fcb>no_repeat_mask = 3 << fcb>n;

738 
fcb>x[fcb>n] = idx  delta; 
739 
fcb>y[fcb>n] = v; 
740 
fcb>x[fcb>n + 1] = idx;

741 
fcb>y[fcb>n + 1] = (val & 1) ? v : v; 
742 
fcb>n += 2;

743 
} 
744 
} 
745  
746 
/**

747 
* @}

748 
*

749 
* Generate a random number from frame_cntr and block_idx, which will lief

750 
* in the range [0, 1000  block_size] (so it can be used as an index in a

751 
* table of size 1000 of which you want to read block_size entries).

752 
*

753 
* @param frame_cntr current frame number

754 
* @param block_num current block index

755 
* @param block_size amount of entries we want to read from a table

756 
* that has 1000 entries

757 
* @return a (non)random number in the [0, 1000  block_size] range.

758 
*/

759 
static int pRNG(int frame_cntr, int block_num, int block_size) 
760 
{ 
761 
/* array to simplify the calculation of z:

762 
* y = (x % 9) * 5 + 6;

763 
* z = (49995 * x) / y;

764 
* Since y only has 9 values, we can remove the division by using a

765 
* LUT and using FASTDIVstyle divisions. For each of the 9 values

766 
* of y, we can rewrite z as:

767 
* z = x * (49995 / y) + x * ((49995 % y) / y)

768 
* In this table, each col represents one possible value of y, the

769 
* first number is 49995 / y, and the second is the FASTDIV variant

770 
* of 49995 % y / y. */

771 
static const unsigned int div_tbl[9][2] = { 
772 
{ 8332, 3 * 715827883U }, // y = 6 
773 
{ 4545, 0 * 390451573U }, // y = 11 
774 
{ 3124, 11 * 268435456U }, // y = 16 
775 
{ 2380, 15 * 204522253U }, // y = 21 
776 
{ 1922, 23 * 165191050U }, // y = 26 
777 
{ 1612, 23 * 138547333U }, // y = 31 
778 
{ 1388, 27 * 119304648U }, // y = 36 
779 
{ 1219, 16 * 104755300U }, // y = 41 
780 
{ 1086, 39 * 93368855U } // y = 46 
781 
}; 
782 
unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr; 
783 
if (x >= 0xFFFF) x = 0xFFFF; // max value of x is 8*1877+0xFFFE=0x13AA6, 
784 
// so this is effectively a modulo (%)

785 
y = x  9 * MULH(477218589, x); // x % 9 
786 
z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1])); 
787 
// z = x * 49995 / (y * 5 + 6)

788 
return z % (1000  block_size); 
789 
} 
790  
791 
/**

792 
* Parse hardcoded signal for a single block.

793 
* @note see #synth_block().

794 
*/

795 
static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb, 
796 
int block_idx, int size, 
797 
const struct frame_type_desc *frame_desc, 
798 
float *excitation)

799 
{ 
800 
float gain;

801 
int n, r_idx;

802  
803 
assert(size <= MAX_FRAMESIZE); 
804  
805 
/* Set the offset from which we start reading wmavoice_std_codebook */

806 
if (frame_desc>fcb_type == FCB_TYPE_SILENCE) {

807 
r_idx = pRNG(s>frame_cntr, block_idx, size); 
808 
gain = s>silence_gain; 
809 
} else /* FCB_TYPE_HARDCODED */ { 
810 
r_idx = get_bits(gb, 8);

811 
gain = wmavoice_gain_universal[get_bits(gb, 6)];

812 
} 
813  
814 
/* Clear gain prediction parameters */

815 
memset(s>gain_pred_err, 0, sizeof(s>gain_pred_err)); 
816  
817 
/* Apply gain to hardcoded codebook and use that as excitation signal */

818 
for (n = 0; n < size; n++) 
819 
excitation[n] = wmavoice_std_codebook[r_idx + n] * gain; 
820 
} 
821  
822 
/**

823 
* Parse FCB/ACB signal for a single block.

824 
* @note see #synth_block().

825 
*/

826 
static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb, 
827 
int block_idx, int size, 
828 
int block_pitch_sh2,

829 
const struct frame_type_desc *frame_desc, 
830 
float *excitation)

831 
{ 
832 
static const float gain_coeff[6] = { 
833 
0.8169, 0.06545, 0.1726, 0.0185, 0.0359, 0.0458 
834 
}; 
835 
float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain; 
836 
int n, idx, gain_weight;

837 
AMRFixed fcb; 
838  
839 
assert(size <= MAX_FRAMESIZE / 2);

840 
memset(pulses, 0, sizeof(*pulses) * size); 
841  
842 
fcb.pitch_lag = block_pitch_sh2 >> 2;

843 
fcb.pitch_fac = 1.0; 
844 
fcb.no_repeat_mask = 0;

845 
fcb.n = 0;

846  
847 
/* For the other frame types, this is where we apply the innovation

848 
* (fixed) codebook pulses of the speech signal. */

849 
if (frame_desc>fcb_type == FCB_TYPE_AW_PULSES) {

850 
aw_pulse_set1(s, gb, block_idx, &fcb); 
851 
aw_pulse_set2(s, gb, block_idx, &fcb); 
852 
} else /* FCB_TYPE_EXC_PULSES */ { 
853 
int offset_nbits = 5  frame_desc>log_n_blocks; 
854  
855 
fcb.no_repeat_mask = 1;

856 
/* similar to ff_decode_10_pulses_35bits(), but with single pulses

857 
* (instead of double) for a subset of pulses */

858 
for (n = 0; n < 5; n++) { 
859 
float sign;

860 
int pos1, pos2;

861  
862 
sign = get_bits1(gb) ? 1.0 : 1.0; 
863 
pos1 = get_bits(gb, offset_nbits); 
864 
fcb.x[fcb.n] = n + 5 * pos1;

865 
fcb.y[fcb.n++] = sign; 
866 
if (n < frame_desc>dbl_pulses) {

867 
pos2 = get_bits(gb, offset_nbits); 
868 
fcb.x[fcb.n] = n + 5 * pos2;

869 
fcb.y[fcb.n++] = (pos1 < pos2) ? sign : sign; 
870 
} 
871 
} 
872 
} 
873 
ff_set_fixed_vector(pulses, &fcb, 1.0, size); 
874  
875 
/* Calculate gain for adaptive & fixed codebook signal.

876 
* see ff_amr_set_fixed_gain(). */

877 
idx = get_bits(gb, 7);

878 
fcb_gain = expf(ff_dot_productf(s>gain_pred_err, gain_coeff, 6) 

879 
5.2409161640 + wmavoice_gain_codebook_fcb[idx]); 
880 
acb_gain = wmavoice_gain_codebook_acb[idx]; 
881 
pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx], 
882 
2.9957322736 /* log(0.05) */, 
883 
1.6094379124 /* log(5.0) */); 
884  
885 
gain_weight = 8 >> frame_desc>log_n_blocks;

886 
memmove(&s>gain_pred_err[gain_weight], s>gain_pred_err, 
887 
sizeof(*s>gain_pred_err) * (6  gain_weight)); 
888 
for (n = 0; n < gain_weight; n++) 
889 
s>gain_pred_err[n] = pred_err; 
890  
891 
/* Calculation of adaptive codebook */

892 
if (frame_desc>acb_type == ACB_TYPE_ASYMMETRIC) {

893 
int len;

894 
for (n = 0; n < size; n += len) { 
895 
int next_idx_sh16;

896 
int abs_idx = block_idx * size + n;

897 
int pitch_sh16 = (s>last_pitch_val << 16) + 
898 
s>pitch_diff_sh16 * abs_idx; 
899 
int pitch = (pitch_sh16 + 0x6FFF) >> 16; 
900 
int idx_sh16 = ((pitch << 16)  pitch_sh16) * 8 + 0x58000; 
901 
idx = idx_sh16 >> 16;

902 
if (s>pitch_diff_sh16) {

903 
if (s>pitch_diff_sh16 > 0) { 
904 
next_idx_sh16 = (idx_sh16) &~ 0xFFFF;

905 
} else

906 
next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF; 
907 
len = av_clip((idx_sh16  next_idx_sh16) / s>pitch_diff_sh16 / 8,

908 
1, size  n);

909 
} else

910 
len = size; 
911  
912 
ff_acelp_interpolatef(&excitation[n], &excitation[n  pitch], 
913 
wmavoice_ipol1_coeffs, 17,

914 
idx, 9, len);

915 
} 
916 
} else /* ACB_TYPE_HAMMING */ { 
917 
int block_pitch = block_pitch_sh2 >> 2; 
918 
idx = block_pitch_sh2 & 3;

919 
if (idx) {

920 
ff_acelp_interpolatef(excitation, &excitation[block_pitch], 
921 
wmavoice_ipol2_coeffs, 4,

922 
idx, 8, size);

923 
} else

924 
av_memcpy_backptr(excitation, sizeof(float) * block_pitch, 
925 
sizeof(float) * size); 
926 
} 
927  
928 
/* Interpolate ACB/FCB and use as excitation signal */

929 
ff_weighted_vector_sumf(excitation, excitation, pulses, 
930 
acb_gain, fcb_gain, size); 
931 
} 
932  
933 
/**

934 
* Parse data in a single block.

935 
* @note we assume enough bits are available, caller should check.

936 
*

937 
* @param s WMA Voice decoding context private data

938 
* @param gb bit I/O context

939 
* @param block_idx index of the toberead block

940 
* @param size amount of samples to be read in this block

941 
* @param block_pitch_sh2 pitch for this block << 2

942 
* @param lsps LSPs for (the end of) this frame

943 
* @param prev_lsps LSPs for the last frame

944 
* @param frame_desc frame type descriptor

945 
* @param excitation target memory for the ACB+FCB interpolated signal

946 
* @param synth target memory for the speech synthesis filter output

947 
* @return 0 on success, <0 on error.

948 
*/

949 
static void synth_block(WMAVoiceContext *s, GetBitContext *gb, 
950 
int block_idx, int size, 
951 
int block_pitch_sh2,

952 
const double *lsps, const double *prev_lsps, 
953 
const struct frame_type_desc *frame_desc, 
954 
float *excitation, float *synth) 
955 
{ 
956 
double i_lsps[MAX_LSPS];

957 
float lpcs[MAX_LSPS];

958 
float fac;

959 
int n;

960  
961 
if (frame_desc>acb_type == ACB_TYPE_NONE)

962 
synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation); 
963 
else

964 
synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2, 
965 
frame_desc, excitation); 
966  
967 
/* convert interpolated LSPs to LPCs */

968 
fac = (block_idx + 0.5) / frame_desc>n_blocks; 
969 
for (n = 0; n < s>lsps; n++) // LSF > LSP 
970 
i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n]  prev_lsps[n])); 
971 
ff_acelp_lspd2lpc(i_lsps, lpcs, s>lsps >> 1);

972  
973 
/* Speech synthesis */

974 
ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s>lsps); 
975 
} 
976  
977 
/**

978 
* Synthesize output samples for a single frame.

979 
* @note we assume enough bits are available, caller should check.

980 
*

981 
* @param ctx WMA Voice decoder context

982 
* @param gb bit I/O context (s>gb or one for crosspacket superframes)

983 
* @param samples pointer to output sample buffer, has space for at least 160

984 
* samples

985 
* @param lsps LSP array

986 
* @param prev_lsps array of previous frame's LSPs

987 
* @param excitation target buffer for excitation signal

988 
* @param synth target buffer for synthesized speech data

989 
* @return 0 on success, <0 on error.

990 
*/

991 
static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, 
992 
float *samples,

993 
const double *lsps, const double *prev_lsps, 
994 
float *excitation, float *synth) 
995 
{ 
996 
WMAVoiceContext *s = ctx>priv_data; 
997 
int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;

998 
int pitch[MAX_BLOCKS], last_block_pitch;

999  
1000 
/* Parse frame type ("frame header"), see frame_descs */

1001 
int bd_idx = s>vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], 
1002 
block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks; 
1003  
1004 
if (bd_idx < 0) { 
1005 
av_log(ctx, AV_LOG_ERROR, 
1006 
"Invalid frame type VLC code, skipping\n");

1007 
return 1; 
1008 
} 
1009  
1010 
/* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitchperframe") */

1011 
if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {

1012 
/* Pitch is provided per frame, which is interpreted as the pitch of

1013 
* the last sample of the last block of this frame. We can interpolate

1014 
* the pitch of other blocks (and even pitchpersample) by gradually

1015 
* incrementing/decrementing prev_frame_pitch to cur_pitch_val. */

1016 
n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;

1017 
log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;

1018 
cur_pitch_val = s>min_pitch_val + get_bits(gb, s>pitch_nbits); 
1019 
cur_pitch_val = FFMIN(cur_pitch_val, s>max_pitch_val  1);

1020 
if (s>last_acb_type == ACB_TYPE_NONE 

1021 
20 * abs(cur_pitch_val  s>last_pitch_val) >

1022 
(cur_pitch_val + s>last_pitch_val)) 
1023 
s>last_pitch_val = cur_pitch_val; 
1024  
1025 
/* pitch per block */

1026 
for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) { 
1027 
int fac = n * 2 + 1; 
1028  
1029 
pitch[n] = (MUL16(fac, cur_pitch_val) + 
1030 
MUL16((n_blocks_x2  fac), s>last_pitch_val) + 
1031 
frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2; 
1032 
} 
1033  
1034 
/* "pitchdiffpersample" for calculation of pitch per sample */

1035 
s>pitch_diff_sh16 = 
1036 
((cur_pitch_val  s>last_pitch_val) << 16) / MAX_FRAMESIZE;

1037 
} 
1038  
1039 
/* Global gain (if silence) and pitchadaptive window coordinates */

1040 
switch (frame_descs[bd_idx].fcb_type) {

1041 
case FCB_TYPE_SILENCE:

1042 
s>silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];

1043 
break;

1044 
case FCB_TYPE_AW_PULSES:

1045 
aw_parse_coords(s, gb, pitch); 
1046 
break;

1047 
} 
1048  
1049 
for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) { 
1050 
int bl_pitch_sh2;

1051  
1052 
/* Pitch calculation for ACB_TYPE_HAMMING ("pitchperblock") */

1053 
switch (frame_descs[bd_idx].acb_type) {

1054 
case ACB_TYPE_HAMMING: {

1055 
/* Pitch is given per block. Perblock pitches are encoded as an

1056 
* absolute value for the first block, and then delta values

1057 
* relative to this value) for all subsequent blocks. The scale of

1058 
* this pitch value is semilogaritmic compared to its use in the

1059 
* decoder, so we convert it to normal scale also. */

1060 
int block_pitch,

1061 
t1 = (s>block_conv_table[1]  s>block_conv_table[0]) << 2, 
1062 
t2 = (s>block_conv_table[2]  s>block_conv_table[1]) << 1, 
1063 
t3 = s>block_conv_table[3]  s>block_conv_table[2] + 1; 
1064  
1065 
if (n == 0) { 
1066 
block_pitch = get_bits(gb, s>block_pitch_nbits); 
1067 
} else

1068 
block_pitch = last_block_pitch  s>block_delta_pitch_hrange + 
1069 
get_bits(gb, s>block_delta_pitch_nbits); 
1070 
/* Convert last_ so that any next delta is within _range */

1071 
last_block_pitch = av_clip(block_pitch, 
1072 
s>block_delta_pitch_hrange, 
1073 
s>block_pitch_range  
1074 
s>block_delta_pitch_hrange); 
1075  
1076 
/* Convert semilogstyle scale back to normal scale */

1077 
if (block_pitch < t1) {

1078 
bl_pitch_sh2 = (s>block_conv_table[0] << 2) + block_pitch; 
1079 
} else {

1080 
block_pitch = t1; 
1081 
if (block_pitch < t2) {

1082 
bl_pitch_sh2 = 
1083 
(s>block_conv_table[1] << 2) + (block_pitch << 1); 
1084 
} else {

1085 
block_pitch = t2; 
1086 
if (block_pitch < t3) {

1087 
bl_pitch_sh2 = 
1088 
(s>block_conv_table[2] + block_pitch) << 2; 
1089 
} else

1090 
bl_pitch_sh2 = s>block_conv_table[3] << 2; 
1091 
} 
1092 
} 
1093 
pitch[n] = bl_pitch_sh2 >> 2;

1094 
break;

1095 
} 
1096  
1097 
case ACB_TYPE_ASYMMETRIC: {

1098 
bl_pitch_sh2 = pitch[n] << 2;

1099 
break;

1100 
} 
1101  
1102 
default: // ACB_TYPE_NONE has no pitch 
1103 
bl_pitch_sh2 = 0;

1104 
break;

1105 
} 
1106  
1107 
synth_block(s, gb, n, block_nsamples, bl_pitch_sh2, 
1108 
lsps, prev_lsps, &frame_descs[bd_idx], 
1109 
&excitation[n * block_nsamples], 
1110 
&synth[n * block_nsamples]); 
1111 
} 
1112  
1113 
/* Averaging projection filter, if applicable. Else, just copy samples

1114 
* from synthesis buffer */

1115 
if (s>do_apf) {

1116 
// FIXME this is where APF would take place, currently not implemented

1117 
av_log_missing_feature(ctx, "APF", 0); 
1118 
s>do_apf = 0;

1119 
} //else

1120 
for (n = 0; n < 160; n++) 
1121 
samples[n] = av_clipf(synth[n], 1.0, 1.0); 
1122  
1123 
/* Cache values for next frame */

1124 
s>frame_cntr++; 
1125 
if (s>frame_cntr >= 0xFFFF) s>frame_cntr = 0xFFFF; // i.e. modulo (%) 
1126 
s>last_acb_type = frame_descs[bd_idx].acb_type; 
1127 
switch (frame_descs[bd_idx].acb_type) {

1128 
case ACB_TYPE_NONE:

1129 
s>last_pitch_val = 0;

1130 
break;

1131 
case ACB_TYPE_ASYMMETRIC:

1132 
s>last_pitch_val = cur_pitch_val; 
1133 
break;

1134 
case ACB_TYPE_HAMMING:

1135 
s>last_pitch_val = pitch[frame_descs[bd_idx].n_blocks  1];

1136 
break;

1137 
} 
1138  
1139 
return 0; 
1140 
} 
1141  
1142 
/**

1143 
* Ensure minimum value for first item, maximum value for last value,

1144 
* proper spacing between each value and proper ordering.

1145 
*

1146 
* @param lsps array of LSPs

1147 
* @param num size of LSP array

1148 
*

1149 
* @note basically a double version of #ff_acelp_reorder_lsf(), might be

1150 
* useful to put in a generic location later on. Parts are also

1151 
* present in #ff_set_min_dist_lsf() + #ff_sort_nearly_sorted_floats(),

1152 
* which is in float.

1153 
*/

1154 
static void stabilize_lsps(double *lsps, int num) 
1155 
{ 
1156 
int n, m, l;

1157  
1158 
/* set minimum value for first, maximum value for last and minimum

1159 
* spacing between LSF values.

1160 
* Very similar to ff_set_min_dist_lsf(), but in double. */

1161 
lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI); 
1162 
for (n = 1; n < num; n++) 
1163 
lsps[n] = FFMAX(lsps[n], lsps[n  1] + 0.0125 * M_PI); 
1164 
lsps[num  1] = FFMIN(lsps[num  1], 0.9985 * M_PI); 
1165  
1166 
/* reorder (looks like onetime / nonrecursed bubblesort).

1167 
* Very similar to ff_sort_nearly_sorted_floats(), but in double. */

1168 
for (n = 1; n < num; n++) { 
1169 
if (lsps[n] < lsps[n  1]) { 
1170 
for (m = 1; m < num; m++) { 
1171 
double tmp = lsps[m];

1172 
for (l = m  1; l >= 0; l) { 
1173 
if (lsps[l] <= tmp) break; 
1174 
lsps[l + 1] = lsps[l];

1175 
} 
1176 
lsps[l + 1] = tmp;

1177 
} 
1178 
break;

1179 
} 
1180 
} 
1181 
} 
1182  
1183 
/**

1184 
* Test if there's enough bits to read 1 superframe.

1185 
*

1186 
* @param orig_gb bit I/O context used for reading. This function

1187 
* does not modify the state of the bitreader; it

1188 
* only uses it to copy the current stream position

1189 
* @param s WMA Voice decoding context private data

1190 
* @return 1 if unsupported, 1 on not enough bits or 0 if OK.

1191 
*/

1192 
static int check_bits_for_superframe(GetBitContext *orig_gb, 
1193 
WMAVoiceContext *s) 
1194 
{ 
1195 
GetBitContext s_gb, *gb = &s_gb; 
1196 
int n, need_bits, bd_idx;

1197 
const struct frame_type_desc *frame_desc; 
1198  
1199 
/* initialize a copy */

1200 
init_get_bits(gb, orig_gb>buffer, orig_gb>size_in_bits); 
1201 
skip_bits_long(gb, get_bits_count(orig_gb)); 
1202 
assert(get_bits_left(gb) == get_bits_left(orig_gb)); 
1203  
1204 
/* superframe header */

1205 
if (get_bits_left(gb) < 14) 
1206 
return 1; 
1207 
if (!get_bits1(gb))

1208 
return 1; // WMAProinWMAVoice superframe 
1209 
if (get_bits1(gb)) skip_bits(gb, 12); // number of samples in superframe 
1210 
if (s>has_residual_lsps) { // residual LSPs (for all frames) 
1211 
if (get_bits_left(gb) < s>sframe_lsp_bitsize)

1212 
return 1; 
1213 
skip_bits_long(gb, s>sframe_lsp_bitsize); 
1214 
} 
1215  
1216 
/* frames */

1217 
for (n = 0; n < MAX_FRAMES; n++) { 
1218 
int aw_idx_is_ext = 0; 
1219  
1220 
if (!s>has_residual_lsps) { // independent LSPs (perframe) 
1221 
if (get_bits_left(gb) < s>frame_lsp_bitsize) return 1; 
1222 
skip_bits_long(gb, s>frame_lsp_bitsize); 
1223 
} 
1224 
bd_idx = s>vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)]; 
1225 
if (bd_idx < 0) 
1226 
return 1; // invalid frame type VLC code 
1227 
frame_desc = &frame_descs[bd_idx]; 
1228 
if (frame_desc>acb_type == ACB_TYPE_ASYMMETRIC) {

1229 
if (get_bits_left(gb) < s>pitch_nbits)

1230 
return 1; 
1231 
skip_bits_long(gb, s>pitch_nbits); 
1232 
} 
1233 
if (frame_desc>fcb_type == FCB_TYPE_SILENCE) {

1234 
skip_bits(gb, 8);

1235 
} else if (frame_desc>fcb_type == FCB_TYPE_AW_PULSES) { 
1236 
int tmp = get_bits(gb, 6); 
1237 
if (tmp >= 0x36) { 
1238 
skip_bits(gb, 2);

1239 
aw_idx_is_ext = 1;

1240 
} 
1241 
} 
1242  
1243 
/* blocks */

1244 
if (frame_desc>acb_type == ACB_TYPE_HAMMING) {

1245 
need_bits = s>block_pitch_nbits + 
1246 
(frame_desc>n_blocks  1) * s>block_delta_pitch_nbits;

1247 
} else if (frame_desc>fcb_type == FCB_TYPE_AW_PULSES) { 
1248 
need_bits = 2 * !aw_idx_is_ext;

1249 
} else

1250 
need_bits = 0;

1251 
need_bits += frame_desc>frame_size; 
1252 
if (get_bits_left(gb) < need_bits)

1253 
return 1; 
1254 
skip_bits_long(gb, need_bits); 
1255 
} 
1256  
1257 
return 0; 
1258 
} 
1259  
1260 
/**

1261 
* Synthesize output samples for a single superframe. If we have any data

1262 
* cached in s>sframe_cache, that will be used instead of whatever is loaded

1263 
* in s>gb.

1264 
*

1265 
* WMA Voice superframes contain 3 frames, each containing 160 audio samples,

1266 
* to give a total of 480 samples per frame. See #synth_frame() for frame

1267 
* parsing. In addition to 3 frames, superframes can also contain the LSPs

1268 
* (if these are globally specified for all frames (residually); they can

1269 
* also be specified individually perframe. See the s>has_residual_lsps

1270 
* option), and can specify the number of samples encoded in this superframe

1271 
* (if less than 480), usually used to prevent blanks at track boundaries.

1272 
*

1273 
* @param ctx WMA Voice decoder context

1274 
* @param samples pointer to output buffer for voice samples

1275 
* @param data_size pointer containing the size of #samples on input, and the

1276 
* amount of #samples filled on output

1277 
* @return 0 on success, <0 on error or 1 if there was not enough data to

1278 
* fully parse the superframe

1279 
*/

1280 
static int synth_superframe(AVCodecContext *ctx, 
1281 
float *samples, int *data_size) 
1282 
{ 
1283 
WMAVoiceContext *s = ctx>priv_data; 
1284 
GetBitContext *gb = &s>gb, s_gb; 
1285 
int n, res, n_samples = 480; 
1286 
double lsps[MAX_FRAMES][MAX_LSPS];

1287 
const double *mean_lsf = s>lsps == 16 ? 
1288 
wmavoice_mean_lsf16[s>lsp_def_mode] : wmavoice_mean_lsf10[s>lsp_def_mode]; 
1289 
float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12]; 
1290 
float synth[MAX_LSPS + MAX_SFRAMESIZE];

1291  
1292 
memcpy(synth, s>synth_history, 
1293 
s>lsps * sizeof(*synth));

1294 
memcpy(excitation, s>excitation_history, 
1295 
s>history_nsamples * sizeof(*excitation));

1296  
1297 
if (s>sframe_cache_size > 0) { 
1298 
gb = &s_gb; 
1299 
init_get_bits(gb, s>sframe_cache, s>sframe_cache_size); 
1300 
s>sframe_cache_size = 0;

1301 
} 
1302  
1303 
if ((res = check_bits_for_superframe(gb, s)) == 1) return 1; 
1304  
1305 
/* First bit is speech/music bit, it differentiates between WMAVoice

1306 
* speech samples (the actual codec) and WMAVoice music samples, which

1307 
* are really WMAProinWMAVoicesuperframes. I've never seen those in

1308 
* the wild yet. */

1309 
if (!get_bits1(gb)) {

1310 
av_log_missing_feature(ctx, "WMAProinWMAVoice support", 1); 
1311 
return 1; 
1312 
} 
1313  
1314 
/* (optional) nr. of samples in superframe; always <= 480 and >= 0 */

1315 
if (get_bits1(gb)) {

1316 
if ((n_samples = get_bits(gb, 12)) > 480) { 
1317 
av_log(ctx, AV_LOG_ERROR, 
1318 
"Superframe encodes >480 samples (%d), not allowed\n",

1319 
n_samples); 
1320 
return 1; 
1321 
} 
1322 
} 
1323 
/* Parse LSPs, if global for the superframe (can also be perframe). */

1324 
if (s>has_residual_lsps) {

1325 
double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2]; 
1326  
1327 
for (n = 0; n < s>lsps; n++) 
1328 
prev_lsps[n] = s>prev_lsps[n]  mean_lsf[n]; 
1329  
1330 
if (s>lsps == 10) { 
1331 
dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s>lsp_q_mode);

1332 
} else /* s>lsps == 16 */ 
1333 
dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s>lsp_q_mode);

1334  
1335 
for (n = 0; n < s>lsps; n++) { 
1336 
lsps[0][n] = mean_lsf[n] + (a1[n]  a2[n * 2]); 
1337 
lsps[1][n] = mean_lsf[n] + (a1[s>lsps + n]  a2[n * 2 + 1]); 
1338 
lsps[2][n] += mean_lsf[n];

1339 
} 
1340 
for (n = 0; n < 3; n++) 
1341 
stabilize_lsps(lsps[n], s>lsps); 
1342 
} 
1343  
1344 
/* Parse frames, optionally preceeded by perframe (independent) LSPs. */

1345 
for (n = 0; n < 3; n++) { 
1346 
if (!s>has_residual_lsps) {

1347 
int m;

1348  
1349 
if (s>lsps == 10) { 
1350 
dequant_lsp10i(gb, lsps[n]); 
1351 
} else /* s>lsps == 16 */ 
1352 
dequant_lsp16i(gb, lsps[n]); 
1353  
1354 
for (m = 0; m < s>lsps; m++) 
1355 
lsps[n][m] += mean_lsf[m]; 
1356 
stabilize_lsps(lsps[n], s>lsps); 
1357 
} 
1358  
1359 
if ((res = synth_frame(ctx, gb,

1360 
&samples[n * MAX_FRAMESIZE], 
1361 
lsps[n], n == 0 ? s>prev_lsps : lsps[n  1], 
1362 
&excitation[s>history_nsamples + n * MAX_FRAMESIZE], 
1363 
&synth[s>lsps + n * MAX_FRAMESIZE]))) 
1364 
return res;

1365 
} 
1366  
1367 
/* Statistics? FIXME  we don't check for length, a slight overrun

1368 
* will be caught by internal buffer padding, and anything else

1369 
* will be skipped, not read. */

1370 
if (get_bits1(gb)) {

1371 
res = get_bits(gb, 4);

1372 
skip_bits(gb, 10 * (res + 1)); 
1373 
} 
1374  
1375 
/* Specify nr. of output samples */

1376 
*data_size = n_samples * sizeof(float); 
1377  
1378 
/* Update history */

1379 
memcpy(s>prev_lsps, lsps[2],

1380 
s>lsps * sizeof(*s>prev_lsps));

1381 
memcpy(s>synth_history, &synth[MAX_SFRAMESIZE], 
1382 
s>lsps * sizeof(*synth));

1383 
memcpy(s>excitation_history, &excitation[MAX_SFRAMESIZE], 
1384 
s>history_nsamples * sizeof(*excitation));

1385  
1386 
return 0; 
1387 
} 
1388  
1389 
/**

1390 
* Parse the packet header at the start of each packet (input data to this

1391 
* decoder).

1392 
*

1393 
* @param s WMA Voice decoding context private data

1394 
* @return 1 if not enough bits were available, or 0 on success.

1395 
*/

1396 
static int parse_packet_header(WMAVoiceContext *s) 
1397 
{ 
1398 
GetBitContext *gb = &s>gb; 
1399 
unsigned int res; 
1400  
1401 
if (get_bits_left(gb) < 11) 
1402 
return 1; 
1403 
skip_bits(gb, 4); // packet sequence number 
1404 
s>has_residual_lsps = get_bits1(gb); 
1405 
do {

1406 
res = get_bits(gb, 6); // number of superframes per packet 
1407 
// (minus first one if there is spillover)

1408 
if (get_bits_left(gb) < 6 * (res == 0x3F) + s>spillover_bitsize) 
1409 
return 1; 
1410 
} while (res == 0x3F); 
1411 
s>spillover_nbits = get_bits(gb, s>spillover_bitsize); 
1412  
1413 
return 0; 
1414 
} 
1415  
1416 
/**

1417 
* Copy (unaligned) bits from gb/data/size to pb.

1418 
*

1419 
* @param pb target buffer to copy bits into

1420 
* @param data source buffer to copy bits from

1421 
* @param size size of the source data, in bytes

1422 
* @param gb bit I/O context specifying the current position in the source.

1423 
* data. This function might use this to align the bit position to

1424 
* a wholebyte boundary before calling #ff_copy_bits() on aligned

1425 
* source data

1426 
* @param nbits the amount of bits to copy from source to target

1427 
*

1428 
* @note after calling this function, the current position in the input bit

1429 
* I/O context is undefined.

1430 
*/

1431 
static void copy_bits(PutBitContext *pb, 
1432 
const uint8_t *data, int size, 
1433 
GetBitContext *gb, int nbits)

1434 
{ 
1435 
int rmn_bytes, rmn_bits;

1436  
1437 
rmn_bits = rmn_bytes = get_bits_left(gb); 
1438 
if (rmn_bits < nbits)

1439 
return;

1440 
rmn_bits &= 7; rmn_bytes >>= 3; 
1441 
if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0) 
1442 
put_bits(pb, rmn_bits, get_bits(gb, rmn_bits)); 
1443 
ff_copy_bits(pb, data + size  rmn_bytes, 
1444 
FFMIN(nbits  rmn_bits, rmn_bytes << 3));

1445 
} 
1446  
1447 
/**

1448 
* Packet decoding: a packet is anything that the (ASF) demuxer contains,

1449 
* and we expect that the demuxer / application provides it to us as such

1450 
* (else you'll probably get garbage as output). Every packet has a size of

1451 
* ctx>block_align bytes, starts with a packet header (see

1452 
* #parse_packet_header()), and then a series of superframes. Superframe

1453 
* boundaries may exceed packets, i.e. superframes can split data over

1454 
* multiple (two) packets.

1455 
*

1456 
* For more information about frames, see #synth_superframe().

1457 
*/

1458 
static int wmavoice_decode_packet(AVCodecContext *ctx, void *data, 
1459 
int *data_size, AVPacket *avpkt)

1460 
{ 
1461 
WMAVoiceContext *s = ctx>priv_data; 
1462 
GetBitContext *gb = &s>gb; 
1463 
int size, res, pos;

1464  
1465 
if (*data_size < 480 * sizeof(float)) { 
1466 
av_log(ctx, AV_LOG_ERROR, 
1467 
"Output buffer too small (%d given  %lu needed)\n",

1468 
*data_size, 480 * sizeof(float)); 
1469 
return 1; 
1470 
} 
1471 
*data_size = 0;

1472  
1473 
/* Packets are sometimes a multiple of ctx>block_align, with a packet

1474 
* header at each ctx>block_align bytes. However, FFmpeg's ASF demuxer

1475 
* feeds us ASF packets, which may concatenate multiple "codec" packets

1476 
* in a single "muxer" packet, so we artificially emulate that by

1477 
* capping the packet size at ctx>block_align. */

1478 
for (size = avpkt>size; size > ctx>block_align; size = ctx>block_align);

1479 
if (!size)

1480 
return 0; 
1481 
init_get_bits(&s>gb, avpkt>data, size << 3);

1482  
1483 
/* size == ctx>block_align is used to indicate whether we are dealing with

1484 
* a new packet or a packet of which we already read the packet header

1485 
* previously. */

1486 
if (size == ctx>block_align) { // new packet header 
1487 
if ((res = parse_packet_header(s)) < 0) 
1488 
return res;

1489  
1490 
/* If the packet header specifies a s>spillover_nbits, then we want

1491 
* to push out all data of the previous packet (+ spillover) before

1492 
* continuing to parse new superframes in the current packet. */

1493 
if (s>spillover_nbits > 0) { 
1494 
if (s>sframe_cache_size > 0) { 
1495 
int cnt = get_bits_count(gb);

1496 
copy_bits(&s>pb, avpkt>data, size, gb, s>spillover_nbits); 
1497 
flush_put_bits(&s>pb); 
1498 
s>sframe_cache_size += s>spillover_nbits; 
1499 
if ((res = synth_superframe(ctx, data, data_size)) == 0 && 
1500 
*data_size > 0) {

1501 
cnt += s>spillover_nbits; 
1502 
s>skip_bits_next = cnt & 7;

1503 
return cnt >> 3; 
1504 
} else

1505 
skip_bits_long (gb, s>spillover_nbits  cnt + 
1506 
get_bits_count(gb)); // resync

1507 
} else

1508 
skip_bits_long(gb, s>spillover_nbits); // resync

1509 
} 
1510 
} else if (s>skip_bits_next) 
1511 
skip_bits(gb, s>skip_bits_next); 
1512  
1513 
/* Try parsing superframes in current packet */

1514 
s>sframe_cache_size = 0;

1515 
s>skip_bits_next = 0;

1516 
pos = get_bits_left(gb); 
1517 
if ((res = synth_superframe(ctx, data, data_size)) < 0) { 
1518 
return res;

1519 
} else if (*data_size > 0) { 
1520 
int cnt = get_bits_count(gb);

1521 
s>skip_bits_next = cnt & 7;

1522 
return cnt >> 3; 
1523 
} else if ((s>sframe_cache_size = pos) > 0) { 
1524 
/* rewind bit reader to start of last (incomplete) superframe... */

1525 
init_get_bits(gb, avpkt>data, size << 3);

1526 
skip_bits_long(gb, (size << 3)  pos);

1527 
assert(get_bits_left(gb) == pos); 
1528  
1529 
/* ...and cache it for spillover in next packet */

1530 
init_put_bits(&s>pb, s>sframe_cache, SFRAME_CACHE_MAXSIZE); 
1531 
copy_bits(&s>pb, avpkt>data, size, gb, s>sframe_cache_size); 
1532 
// FIXME bad  just copy bytes as whole and add use the

1533 
// skip_bits_next field

1534 
} 
1535  
1536 
return size;

1537 
} 
1538  
1539 
static av_cold void wmavoice_flush(AVCodecContext *ctx) 
1540 
{ 
1541 
WMAVoiceContext *s = ctx>priv_data; 
1542 
int n;

1543  
1544 
s>sframe_cache_size = 0;

1545 
s>skip_bits_next = 0;

1546 
for (n = 0; n < s>lsps; n++) 
1547 
s>prev_lsps[n] = M_PI * (n + 1.0) / (s>lsps + 1.0); 
1548 
memset(s>excitation_history, 0,

1549 
sizeof(*s>excitation_history) * MAX_SIGNAL_HISTORY);

1550 
memset(s>synth_history, 0,

1551 
sizeof(*s>synth_history) * MAX_LSPS);

1552 
memset(s>gain_pred_err, 0,

1553 
sizeof(s>gain_pred_err));

1554 
} 
1555  
1556 
AVCodec wmavoice_decoder = { 
1557 
"wmavoice",

1558 
AVMEDIA_TYPE_AUDIO, 
1559 
CODEC_ID_WMAVOICE, 
1560 
sizeof(WMAVoiceContext),

1561 
wmavoice_decode_init, 
1562 
NULL,

1563 
NULL,

1564 
wmavoice_decode_packet, 
1565 
CODEC_CAP_SUBFRAMES, 
1566 
.flush = wmavoice_flush, 
1567 
.long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),

1568 
}; 