Revision 9a32573b libavcodec/wmavoice.c
libavcodec/wmavoice.c  

36  36 
#include "acelp_filters.h" 
37  37 
#include "lsp.h" 
38  38 
#include "libavutil/lzo.h" 
39 
#include "avfft.h" 

40 
#include "fft.h" 

39  41  
40  42 
#define MAX_BLOCKS 8 ///< maximum number of blocks per frame 
41  43 
#define MAX_LSPS 16 ///< maximum filter order 
44 
#define MAX_LSPS_ALIGN16 16 ///< same as #MAX_LSPS; needs to be multiple 

45 
///< of 16 for ASM input buffer alignment 

42  46 
#define MAX_FRAMES 3 ///< maximum number of frames per superframe 
43  47 
#define MAX_FRAMESIZE 160 ///< maximum number of samples per frame 
44  48 
#define MAX_SIGNAL_HISTORY 416 ///< maximum excitation signal history 
...  ...  
140  144 
int history_nsamples; ///< number of samples in history for signal 
141  145 
///< prediction (through ACB) 
142  146  
147 
/* postfilter specific values */ 

143  148 
int do_apf; ///< whether to apply the averaged 
144  149 
///< projection filter (APF) 
150 
int denoise_strength; ///< strength of denoising in Wiener filter 

151 
///< [011] 

152 
int denoise_tilt_corr; ///< Whether to apply tilt correction to the 

153 
///< Wiener filter coefficients (postfilter) 

154 
int dc_level; ///< Predicted amount of DC noise, based 

155 
///< on which a DC removal filter is used 

145  156  
146  157 
int lsps; ///< number of LSPs per frame [10 or 16] 
147  158 
int lsp_q_mode; ///< defines quantizer defaults [0, 1] 
...  ...  
244  255 
float synth_history[MAX_LSPS]; ///< see #excitation_history 
245  256 
/** 
246  257 
* @} 
258 
* @defgroup post_filter Postfilter values 

259 
* Varibales used for postfilter implementation, mostly history for 

260 
* smoothing and so on, and context variables for FFT/iFFT. 

261 
* @{ 

262 
*/ 

263 
RDFTContext rdft, irdft; ///< contexts for FFTcalculation in the 

264 
///< postfilter (for denoise filter) 

265 
DCTContext dct, dst; ///< contexts for phase shift (in Hilbert 

266 
///< transform, part of postfilter) 

267 
float sin[511], cos[511]; ///< 8bit cosine/sine windows over [pi,pi] 

268 
///< range 

269 
float postfilter_agc; ///< gain control memory, used in 

270 
///< #adaptive_gain_control() 

271 
float dcf_mem[2]; ///< DC filter history 

272 
float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE]; 

273 
///< zero filter output (i.e. excitation) 

274 
///< by postfilter 

275 
float denoise_filter_cache[MAX_FRAMESIZE]; 

276 
int denoise_filter_cache_size; ///< samples in #denoise_filter_cache 

277 
DECLARE_ALIGNED(16, float, tilted_lpcs_pf)[0x80]; 

278 
///< aligned buffer for LPC tilting 

279 
DECLARE_ALIGNED(16, float, denoise_coeffs_pf)[0x80]; 

280 
///< aligned buffer for denoise coefficients 

281 
DECLARE_ALIGNED(16, float, synth_filter_out_buf)[80 + MAX_LSPS_ALIGN16]; 

282 
///< aligned buffer for postfilter speech 

283 
///< synthesis 

284 
/** 

285 
* @} 

247  286 
*/ 
248  287 
} WMAVoiceContext; 
249  288  
...  ...  
313  352 
flags = AV_RL32(ctx>extradata + 18); 
314  353 
s>spillover_bitsize = 3 + av_ceil_log2(ctx>block_align); 
315  354 
s>do_apf = flags & 0x1; 
355 
if (s>do_apf) { 

356 
ff_rdft_init(&s>rdft, 7, DFT_R2C); 

357 
ff_rdft_init(&s>irdft, 7, IDFT_C2R); 

358 
ff_dct_init(&s>dct, 6, DCT_I); 

359 
ff_dct_init(&s>dst, 6, DST_I); 

360  
361 
ff_sine_window_init(s>cos, 256); 

362 
memcpy(&s>sin[255], s>cos, 256 * sizeof(s>cos[0])); 

363 
for (n = 0; n < 255; n++) { 

364 
s>sin[n] = s>sin[510  n]; 

365 
s>cos[510  n] = s>cos[n]; 

366 
} 

367 
} 

368 
s>denoise_strength = (flags >> 2) & 0xF; 

369 
if (s>denoise_strength >= 12) { 

370 
av_log(ctx, AV_LOG_ERROR, 

371 
"Invalid denoise filter strength %d (max=11)\n", 

372 
s>denoise_strength); 

373 
return 1; 

374 
} 

375 
s>denoise_tilt_corr = !!(flags & 0x40); 

376 
s>dc_level = (flags >> 7) & 0xF; 

316  377 
s>lsp_q_mode = !!(flags & 0x2000); 
317  378 
s>lsp_def_mode = !!(flags & 0x4000); 
318  379 
lsp16_flag = flags & 0x1000; 
...  ...  
370  431 
} 
371  432  
372  433 
/** 
434 
* @defgroup postfilter Postfilter functions 

435 
* Postfilter functions (gain control, wiener denoise filter, DC filter, 

436 
* kalman smoothening, plus surrounding code to wrap it) 

437 
* @{ 

438 
*/ 

439 
/** 

440 
* Adaptive gain control (as used in postfilter). 

441 
* 

442 
* Identical to #ff_adaptive_gain_control() in acelp_vectors.c, except 

443 
* that the energy here is calculated using sum(abs(...)), whereas the 

444 
* other codecs (e.g. AMRNB, SIPRO) use sqrt(dotproduct(...)). 

445 
* 

446 
* @param out output buffer for filtered samples 

447 
* @param in input buffer containing the samples as they are after the 

448 
* postfilter steps so far 

449 
* @param speech_synth input buffer containing speech synth before postfilter 

450 
* @param size input buffer size 

451 
* @param alpha exponential filter factor 

452 
* @param gain_mem pointer to filter memory (single float) 

453 
*/ 

454 
static void adaptive_gain_control(float *out, const float *in, 

455 
const float *speech_synth, 

456 
int size, float alpha, float *gain_mem) 

457 
{ 

458 
int i; 

459 
float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor; 

460 
float mem = *gain_mem; 

461  
462 
for (i = 0; i < size; i++) { 

463 
speech_energy += fabsf(speech_synth[i]); 

464 
postfilter_energy += fabsf(in[i]); 

465 
} 

466 
gain_scale_factor = (1.0  alpha) * speech_energy / postfilter_energy; 

467  
468 
for (i = 0; i < size; i++) { 

469 
mem = alpha * mem + gain_scale_factor; 

470 
out[i] = in[i] * mem; 

471 
} 

472  
473 
*gain_mem = mem; 

474 
} 

475  
476 
/** 

477 
* Kalman smoothing function. 

478 
* 

479 
* This function looks back pitch +/ 3 samples back into history to find 

480 
* the best fitting curve (that one giving the optimal gain of the two 

481 
* signals, i.e. the highest dot product between the two), and then 

482 
* uses that signal history to smoothen the output of the speech synthesis 

483 
* filter. 

484 
* 

485 
* @param s WMA Voice decoding context 

486 
* @param pitch pitch of the speech signal 

487 
* @param in input speech signal 

488 
* @param out output pointer for smoothened signal 

489 
* @param size input/output buffer size 

490 
* 

491 
* @returns 1 if no smoothening took place, e.g. because no optimal 

492 
* fit could be found, or 0 on success. 

493 
*/ 

494 
static int kalman_smoothen(WMAVoiceContext *s, int pitch, 

495 
const float *in, float *out, int size) 

496 
{ 

497 
int n; 

498 
float optimal_gain = 0, dot; 

499 
const float *ptr = &in[FFMAX(s>min_pitch_val, pitch  3)], 

500 
*end = &in[FFMIN(s>max_pitch_val, pitch + 3)], 

501 
*best_hist_ptr; 

502  
503 
/* find best fitting point in history */ 

504 
do { 

505 
dot = ff_dot_productf(in, ptr, size); 

506 
if (dot > optimal_gain) { 

507 
optimal_gain = dot; 

508 
best_hist_ptr = ptr; 

509 
} 

510 
} while (ptr >= end); 

511  
512 
if (optimal_gain <= 0) 

513 
return 1; 

514 
dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size); 

515 
if (dot <= 0) // would be 1.0 

516 
return 1; 

517  
518 
if (optimal_gain <= dot) { 

519 
dot = dot / (dot + 0.6 * optimal_gain); // 0.6251.000 

520 
} else 

521 
dot = 0.625; 

522  
523 
/* actual smoothing */ 

524 
for (n = 0; n < size; n++) 

525 
out[n] = best_hist_ptr[n] + dot * (in[n]  best_hist_ptr[n]); 

526  
527 
return 0; 

528 
} 

529  
530 
/** 

531 
* Get the tilt factor of a formant filter from its transfer function 

532 
* @see #tilt_factor() in amrnbdec.c, which does essentially the same, 

533 
* but somehow (??) it does a speech synthesis filter in the 

534 
* middle, which is missing here 

535 
* 

536 
* @param lpcs LPC coefficients 

537 
* @param n_lpcs Size of LPC buffer 

538 
* @returns the tilt factor 

539 
*/ 

540 
static float tilt_factor(const float *lpcs, int n_lpcs) 

541 
{ 

542 
float rh0, rh1; 

543  
544 
rh0 = 1.0 + ff_dot_productf(lpcs, lpcs, n_lpcs); 

545 
rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs  1); 

546  
547 
return rh1 / rh0; 

548 
} 

549  
550 
/** 

551 
* Derive denoise filter coefficients (in real domain) from the LPCs. 

552 
*/ 

553 
static void calc_input_response(WMAVoiceContext *s, float *lpcs, 

554 
int fcb_type, float *coeffs, int remainder) 

555 
{ 

556 
float last_coeff, min = 15.0, max = 15.0; 

557 
float irange, angle_mul, gain_mul, range, sq; 

558 
int n, idx; 

559  
560 
/* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */ 

561 
ff_rdft_calc(&s>rdft, lpcs); 

562 
#define log_range(var, assign) do { \ 

563 
float tmp = log10f(assign); var = tmp; \ 

564 
max = FFMAX(max, tmp); min = FFMIN(min, tmp); \ 

565 
} while (0) 

566 
log_range(last_coeff, lpcs[1] * lpcs[1]); 

567 
for (n = 1; n < 64; n++) 

568 
log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] + 

569 
lpcs[n * 2 + 1] * lpcs[n * 2 + 1]); 

570 
log_range(lpcs[0], lpcs[0] * lpcs[0]); 

571 
#undef log_range 

572 
range = max  min; 

573 
lpcs[64] = last_coeff; 

574  
575 
/* Now, use this spectrum to pick out these frequencies with higher 

576 
* (relative) power/energy (which we then take to be "not noise"), 

577 
* and set up a table (still in lpc[]) of (relative) gains per frequency. 

578 
* These frequencies will be maintained, while others ("noise") will be 

579 
* decreased in the filter output. */ 

580 
irange = 64.0 / range; // so irange*(maxvalue) is in the range [0, 63] 

581 
gain_mul = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) : 

582 
(5.0 / 14.7)); 

583 
angle_mul = gain_mul * (8.0 * M_LN10 / M_PI); 

584 
for (n = 0; n <= 64; n++) { 

585 
float pow; 

586  
587 
idx = FFMAX(0, lrint((max  lpcs[n]) * irange)  1); 

588 
pow = wmavoice_denoise_power_table[s>denoise_strength][idx]; 

589 
lpcs[n] = angle_mul * pow; 

590  
591 
/* 70.57 =~ 1/log10(1.0331663) */ 

592 
idx = (pow * gain_mul  0.0295) * 70.570526123; 

593 
if (idx > 127) { // fallback if index falls outside table range 

594 
coeffs[n] = wmavoice_energy_table[127] * 

595 
powf(1.0331663, idx  127); 

596 
} else 

597 
coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)]; 

598 
} 

599  
600 
/* calculate the Hilbert transform of the gains, which we do (since this 

601 
* is a sinus input) by doing a phase shift (in theory, H(sin())=cos()). 

602 
* Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the 

603 
* "moment" of the LPCs in this filter. */ 

604 
ff_dct_calc(&s>dct, lpcs); 

605 
ff_dct_calc(&s>dst, lpcs); 

606  
607 
/* Split out the coefficient indexes into phase/magnitude pairs */ 

608 
idx = 255 + av_clip(lpcs[64], 255, 255); 

609 
coeffs[0] = coeffs[0] * s>cos[idx]; 

610 
idx = 255 + av_clip(lpcs[64]  2 * lpcs[63], 255, 255); 

611 
last_coeff = coeffs[64] * s>cos[idx]; 

612 
for (n = 63;; n) { 

613 
idx = 255 + av_clip(lpcs[64]  2 * lpcs[n  1], 255, 255); 

614 
coeffs[n * 2 + 1] = coeffs[n] * s>sin[idx]; 

615 
coeffs[n * 2] = coeffs[n] * s>cos[idx]; 

616  
617 
if (!n) break; 

618  
619 
idx = 255 + av_clip( lpcs[64]  2 * lpcs[n  1], 255, 255); 

620 
coeffs[n * 2 + 1] = coeffs[n] * s>sin[idx]; 

621 
coeffs[n * 2] = coeffs[n] * s>cos[idx]; 

622 
} 

623 
coeffs[1] = last_coeff; 

624  
625 
/* move into real domain */ 

626 
ff_rdft_calc(&s>irdft, coeffs); 

627  
628 
/* tilt correction and normalize scale */ 

629 
memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128  remainder)); 

630 
if (s>denoise_tilt_corr) { 

631 
float tilt_mem = 0; 

632  
633 
coeffs[remainder  1] = 0; 

634 
ff_tilt_compensation(&tilt_mem, 

635 
1.8 * tilt_factor(coeffs, remainder  1), 

636 
coeffs, remainder); 

637 
} 

638 
sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder)); 

639 
for (n = 0; n < remainder; n++) 

640 
coeffs[n] *= sq; 

641 
} 

642  
643 
/** 

644 
* This function applies a Wiener filter on the (noisy) speech signal as 

645 
* a means to denoise it. 

646 
* 

647 
*  take RDFT of LPCs to get the power spectrum of the noise + speech; 

648 
*  using this power spectrum, calculate (for each frequency) the Wiener 

649 
* filter gain, which depends on the frequency power and desired level 

650 
* of noise subtraction (when set too high, this leads to artifacts) 

651 
* We can do this symmetrically over the Xaxis (so 04kHz is the inverse 

652 
* of 48kHz); 

653 
*  by doing a phase shift, calculate the Hilbert transform of this array 

654 
* of perfrequency filtergains to get the filtering coefficients; 

655 
*  smoothen/normalize/detilt these filter coefficients as desired; 

656 
*  take RDFT of noisy sound, apply the coefficients and take its IRDFT 

657 
* to get the denoised speech signal; 

658 
*  the leftover (i.e. output of the IRDFT on denoised speech data beyond 

659 
* the frame boundary) are saved and applied to subsequent frames by an 

660 
* overlapadd method (otherwise you get clickingartifacts). 

661 
* 

662 
* @param s WMA Voice decoding context 

663 
* @param s fcb_type Frame (codebook) type 

664 
* @param synth_pf input: the noisy speech signal, output: denoised speech 

665 
* data; should be 16byte aligned (for ASM purposes) 

666 
* @param size size of the speech data 

667 
* @param lpcs LPCs used to synthesize this frame's speech data 

668 
*/ 

669 
static void wiener_denoise(WMAVoiceContext *s, int fcb_type, 

670 
float *synth_pf, int size, 

671 
const float *lpcs) 

672 
{ 

673 
int remainder, lim, n; 

674  
675 
if (fcb_type != FCB_TYPE_SILENCE) { 

676 
float *tilted_lpcs = s>tilted_lpcs_pf, 

677 
*coeffs = s>denoise_coeffs_pf, tilt_mem = 0; 

678  
679 
tilted_lpcs[0] = 1.0; 

680 
memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s>lsps); 

681 
memset(&tilted_lpcs[s>lsps + 1], 0, 

682 
sizeof(tilted_lpcs[0]) * (128  s>lsps  1)); 

683 
ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s>lsps), 

684 
tilted_lpcs, s>lsps + 2); 

685  
686 
/* The IRDFT output (127 samples for 7bit filter) beyond the frame 

687 
* size is applied to the next frame. All input beyond this is zero, 

688 
* and thus all output beyond this will go towards zero, hence we can 

689 
* limit to min(size1, 127size) as a performance consideration. */ 

690 
remainder = FFMIN(127  size, size  1); 

691 
calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder); 

692  
693 
/* apply coefficients (in frequency spectrum domain), i.e. complex 

694 
* number multiplication */ 

695 
memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128  size)); 

696 
ff_rdft_calc(&s>rdft, synth_pf); 

697 
ff_rdft_calc(&s>rdft, coeffs); 

698 
synth_pf[0] *= coeffs[0]; 

699 
synth_pf[1] *= coeffs[1]; 

700 
for (n = 1; n < 128; n++) { 

701 
float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1]; 

702 
synth_pf[n * 2] = v1 * coeffs[n * 2]  v2 * coeffs[n * 2 + 1]; 

703 
synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1]; 

704 
} 

705 
ff_rdft_calc(&s>irdft, synth_pf); 

706 
} 

707  
708 
/* merge filter output with the history of previous runs */ 

709 
if (s>denoise_filter_cache_size) { 

710 
lim = FFMIN(s>denoise_filter_cache_size, size); 

711 
for (n = 0; n < lim; n++) 

712 
synth_pf[n] += s>denoise_filter_cache[n]; 

713 
s>denoise_filter_cache_size = lim; 

714 
memmove(s>denoise_filter_cache, &s>denoise_filter_cache[size], 

715 
sizeof(s>denoise_filter_cache[0]) * s>denoise_filter_cache_size); 

716 
} 

717  
718 
/* move remainder of filter output into a cache for future runs */ 

719 
if (fcb_type != FCB_TYPE_SILENCE) { 

720 
lim = FFMIN(remainder, s>denoise_filter_cache_size); 

721 
for (n = 0; n < lim; n++) 

722 
s>denoise_filter_cache[n] += synth_pf[size + n]; 

723 
if (lim < remainder) { 

724 
memcpy(&s>denoise_filter_cache[lim], &synth_pf[size + lim], 

725 
sizeof(s>denoise_filter_cache[0]) * (remainder  lim)); 

726 
s>denoise_filter_cache_size = remainder; 

727 
} 

728 
} 

729 
} 

730  
731 
/** 

732 
* Averaging projection filter, the postfilter used in WMAVoice. 

733 
* 

734 
* This uses the following steps: 

735 
*  A zerosynthesis filter (generate excitation from synth signal) 

736 
*  Kalman smoothing on excitation, based on pitch 

737 
*  Resynthesized smoothened output 

738 
*  Iterative Wiener denoise filter 

739 
*  Adaptive gain filter 

740 
*  DC filter 

741 
* 

742 
* @param s WMAVoice decoding context 

743 
* @param synth Speech synthesis output (before postfilter) 

744 
* @param samples Output buffer for filtered samples 

745 
* @param size Buffer size of synth & samples 

746 
* @param lpcs Generated LPCs used for speech synthesis 

747 
* @param fcb_type Frame type (silence, hardcoded, AWpulses or FCBpulses) 

748 
* @param pitch Pitch of the input signal 

749 
*/ 

750 
static void postfilter(WMAVoiceContext *s, const float *synth, 

751 
float *samples, int size, 

752 
const float *lpcs, float *zero_exc_pf, 

753 
int fcb_type, int pitch) 

754 
{ 

755 
float synth_filter_in_buf[MAX_FRAMESIZE / 2], 

756 
*synth_pf = &s>synth_filter_out_buf[MAX_LSPS_ALIGN16], 

757 
*synth_filter_in = zero_exc_pf; 

758  
759 
assert(size <= MAX_FRAMESIZE / 2); 

760  
761 
/* generate excitation from input signal */ 

762 
ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s>lsps); 

763  
764 
if (fcb_type >= FCB_TYPE_AW_PULSES && 

765 
!kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size)) 

766 
synth_filter_in = synth_filter_in_buf; 

767  
768 
/* resynthesize speech after smoothening, and keep history */ 

769 
ff_celp_lp_synthesis_filterf(synth_pf, lpcs, 

770 
synth_filter_in, size, s>lsps); 

771 
memcpy(&synth_pf[s>lsps], &synth_pf[size  s>lsps], 

772 
sizeof(synth_pf[0]) * s>lsps); 

773  
774 
wiener_denoise(s, fcb_type, synth_pf, size, lpcs); 

775  
776 
adaptive_gain_control(samples, synth_pf, synth, size, 0.99, 

777 
&s>postfilter_agc); 

778  
779 
if (s>dc_level > 8) { 

780 
/* remove ultralow frequency DC noise / highpass filter; 

781 
* coefficients are identical to those used in SIPR decoding, 

782 
* and very closely resemble those used in AMRNB decoding. */ 

783 
ff_acelp_apply_order_2_transfer_function(samples, samples, 

784 
(const float[2]) { 1.99997, 1.0 }, 

785 
(const float[2]) { 1.9330735188, 0.93589198496 }, 

786 
0.93980580475, s>dcf_mem, size); 

787 
} 

788 
} 

789 
/** 

790 
* @} 

791 
*/ 

792  
793 
/** 

373  794 
* Dequantize LSPs 
374  795 
* @param lsps output pointer to the array that will hold the LSPs 
375  796 
* @param num number of LSPs to be dequantized 
...  ...  
980  1401 
* 
981  1402 
* @param ctx WMA Voice decoder context 
982  1403 
* @param gb bit I/O context (s>gb or one for crosspacket superframes) 
1404 
* @param frame_idx Frame number within superframe [02] 

983  1405 
* @param samples pointer to output sample buffer, has space for at least 160 
984  1406 
* samples 
985  1407 
* @param lsps LSP array 
...  ...  
988  1410 
* @param synth target buffer for synthesized speech data 
989  1411 
* @return 0 on success, <0 on error. 
990  1412 
*/ 
991 
static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, 

1413 
static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,


992  1414 
float *samples, 
993  1415 
const double *lsps, const double *prev_lsps, 
994  1416 
float *excitation, float *synth) 
...  ...  
1113  1535 
/* Averaging projection filter, if applicable. Else, just copy samples 
1114  1536 
* from synthesis buffer */ 
1115  1537 
if (s>do_apf) { 
1116 
// FIXME this is where APF would take place, currently not implemented 

1117 
av_log_missing_feature(ctx, "APF", 0); 

1118 
s>do_apf = 0; 

1119 
} //else 

1538 
double i_lsps[MAX_LSPS]; 

1539 
float lpcs[MAX_LSPS]; 

1540  
1541 
for (n = 0; n < s>lsps; n++) // LSF > LSP 

1542 
i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n])); 

1543 
ff_acelp_lspd2lpc(i_lsps, lpcs, s>lsps >> 1); 

1544 
postfilter(s, synth, samples, 80, lpcs, 

1545 
&s>zero_exc_pf[s>history_nsamples + MAX_FRAMESIZE * frame_idx], 

1546 
frame_descs[bd_idx].fcb_type, pitch[0]); 

1547  
1548 
for (n = 0; n < s>lsps; n++) // LSF > LSP 

1549 
i_lsps[n] = cos(lsps[n]); 

1550 
ff_acelp_lspd2lpc(i_lsps, lpcs, s>lsps >> 1); 

1551 
postfilter(s, &synth[80], &samples[80], 80, lpcs, 

1552 
&s>zero_exc_pf[s>history_nsamples + MAX_FRAMESIZE * frame_idx + 80], 

1553 
frame_descs[bd_idx].fcb_type, pitch[0]); 

1554 
} else 

1120  1555 
memcpy(samples, synth, 160 * sizeof(synth[0])); 
1121  1556  
1122  1557 
/* Cache values for next frame */ 
...  ...  
1355  1790 
stabilize_lsps(lsps[n], s>lsps); 
1356  1791 
} 
1357  1792  
1358 
if ((res = synth_frame(ctx, gb, 

1793 
if ((res = synth_frame(ctx, gb, n,


1359  1794 
&samples[n * MAX_FRAMESIZE], 
1360  1795 
lsps[n], n == 0 ? s>prev_lsps : lsps[n  1], 
1361  1796 
&excitation[s>history_nsamples + n * MAX_FRAMESIZE], 
...  ...  
1381  1816 
s>lsps * sizeof(*synth)); 
1382  1817 
memcpy(s>excitation_history, &excitation[MAX_SFRAMESIZE], 
1383  1818 
s>history_nsamples * sizeof(*excitation)); 
1819 
if (s>do_apf) 

1820 
memmove(s>zero_exc_pf, &s>zero_exc_pf[MAX_SFRAMESIZE], 

1821 
s>history_nsamples * sizeof(*s>zero_exc_pf)); 

1384  1822  
1385  1823 
return 0; 
1386  1824 
} 
...  ...  
1535  1973 
return size; 
1536  1974 
} 
1537  1975  
1976 
static av_cold int wmavoice_decode_end(AVCodecContext *ctx) 

1977 
{ 

1978 
WMAVoiceContext *s = ctx>priv_data; 

1979  
1980 
if (s>do_apf) { 

1981 
ff_rdft_end(&s>rdft); 

1982 
ff_rdft_end(&s>irdft); 

1983 
ff_dct_end(&s>dct); 

1984 
ff_dct_end(&s>dst); 

1985 
} 

1986  
1987 
return 0; 

1988 
} 

1989  
1538  1990 
static av_cold void wmavoice_flush(AVCodecContext *ctx) 
1539  1991 
{ 
1540  1992 
WMAVoiceContext *s = ctx>priv_data; 
1541  1993 
int n; 
1542  1994  
1995 
s>postfilter_agc = 0; 

1543  1996 
s>sframe_cache_size = 0; 
1544  1997 
s>skip_bits_next = 0; 
1545  1998 
for (n = 0; n < s>lsps; n++) 
...  ...  
1550  2003 
sizeof(*s>synth_history) * MAX_LSPS); 
1551  2004 
memset(s>gain_pred_err, 0, 
1552  2005 
sizeof(s>gain_pred_err)); 
2006  
2007 
if (s>do_apf) { 

2008 
memset(&s>synth_filter_out_buf[MAX_LSPS_ALIGN16  s>lsps], 0, 

2009 
sizeof(*s>synth_filter_out_buf) * s>lsps); 

2010 
memset(s>dcf_mem, 0, 

2011 
sizeof(*s>dcf_mem) * 2); 

2012 
memset(s>zero_exc_pf, 0, 

2013 
sizeof(*s>zero_exc_pf) * s>history_nsamples); 

2014 
memset(s>denoise_filter_cache, 0, sizeof(s>denoise_filter_cache)); 

2015 
} 

1553  2016 
} 
1554  2017  
1555  2018 
AVCodec wmavoice_decoder = { 
...  ...  
1559  2022 
sizeof(WMAVoiceContext), 
1560  2023 
wmavoice_decode_init, 
1561  2024 
NULL, 
1562 
NULL,


2025 
wmavoice_decode_end,


1563  2026 
wmavoice_decode_packet, 
1564  2027 
CODEC_CAP_SUBFRAMES, 
1565  2028 
.flush = wmavoice_flush, 
Also available in: Unified diff