Revision 9a32573b

View differences:

libavcodec/wmavoice.c
36 36
#include "acelp_filters.h"
37 37
#include "lsp.h"
38 38
#include "libavutil/lzo.h"
39
#include "avfft.h"
40
#include "fft.h"
39 41

  
40 42
#define MAX_BLOCKS           8   ///< maximum number of blocks per frame
41 43
#define MAX_LSPS             16  ///< maximum filter order
44
#define MAX_LSPS_ALIGN16     16  ///< same as #MAX_LSPS; needs to be multiple
45
                                 ///< of 16 for ASM input buffer alignment
42 46
#define MAX_FRAMES           3   ///< maximum number of frames per superframe
43 47
#define MAX_FRAMESIZE        160 ///< maximum number of samples per frame
44 48
#define MAX_SIGNAL_HISTORY   416 ///< maximum excitation signal history
......
140 144
    int history_nsamples;         ///< number of samples in history for signal
141 145
                                  ///< prediction (through ACB)
142 146

  
147
    /* postfilter specific values */
143 148
    int do_apf;                   ///< whether to apply the averaged
144 149
                                  ///< projection filter (APF)
150
    int denoise_strength;         ///< strength of denoising in Wiener filter
151
                                  ///< [0-11]
152
    int denoise_tilt_corr;        ///< Whether to apply tilt correction to the
153
                                  ///< Wiener filter coefficients (postfilter)
154
    int dc_level;                 ///< Predicted amount of DC noise, based
155
                                  ///< on which a DC removal filter is used
145 156

  
146 157
    int lsps;                     ///< number of LSPs per frame [10 or 16]
147 158
    int lsp_q_mode;               ///< defines quantizer defaults [0, 1]
......
244 255
    float synth_history[MAX_LSPS]; ///< see #excitation_history
245 256
    /**
246 257
     * @}
258
     * @defgroup post_filter Postfilter values
259
     * Varibales used for postfilter implementation, mostly history for
260
     * smoothing and so on, and context variables for FFT/iFFT.
261
     * @{
262
     */
263
    RDFTContext rdft, irdft;      ///< contexts for FFT-calculation in the
264
                                  ///< postfilter (for denoise filter)
265
    DCTContext dct, dst;          ///< contexts for phase shift (in Hilbert
266
                                  ///< transform, part of postfilter)
267
    float sin[511], cos[511];     ///< 8-bit cosine/sine windows over [-pi,pi]
268
                                  ///< range
269
    float postfilter_agc;         ///< gain control memory, used in
270
                                  ///< #adaptive_gain_control()
271
    float dcf_mem[2];             ///< DC filter history
272
    float zero_exc_pf[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE];
273
                                  ///< zero filter output (i.e. excitation)
274
                                  ///< by postfilter
275
    float denoise_filter_cache[MAX_FRAMESIZE];
276
    int   denoise_filter_cache_size; ///< samples in #denoise_filter_cache
277
    DECLARE_ALIGNED(16, float, tilted_lpcs_pf)[0x80];
278
                                  ///< aligned buffer for LPC tilting
279
    DECLARE_ALIGNED(16, float, denoise_coeffs_pf)[0x80];
280
                                  ///< aligned buffer for denoise coefficients
281
    DECLARE_ALIGNED(16, float, synth_filter_out_buf)[80 + MAX_LSPS_ALIGN16];
282
                                  ///< aligned buffer for postfilter speech
283
                                  ///< synthesis
284
    /**
285
     * @}
247 286
     */
248 287
} WMAVoiceContext;
249 288

  
......
313 352
    flags                = AV_RL32(ctx->extradata + 18);
314 353
    s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
315 354
    s->do_apf            =    flags & 0x1;
355
    if (s->do_apf) {
356
        ff_rdft_init(&s->rdft,  7, DFT_R2C);
357
        ff_rdft_init(&s->irdft, 7, IDFT_C2R);
358
        ff_dct_init(&s->dct,  6, DCT_I);
359
        ff_dct_init(&s->dst,  6, DST_I);
360

  
361
        ff_sine_window_init(s->cos, 256);
362
        memcpy(&s->sin[255], s->cos, 256 * sizeof(s->cos[0]));
363
        for (n = 0; n < 255; n++) {
364
            s->sin[n]       = -s->sin[510 - n];
365
            s->cos[510 - n] =  s->cos[n];
366
        }
367
    }
368
    s->denoise_strength  =   (flags >> 2) & 0xF;
369
    if (s->denoise_strength >= 12) {
370
        av_log(ctx, AV_LOG_ERROR,
371
               "Invalid denoise filter strength %d (max=11)\n",
372
               s->denoise_strength);
373
        return -1;
374
    }
375
    s->denoise_tilt_corr = !!(flags & 0x40);
376
    s->dc_level          =   (flags >> 7) & 0xF;
316 377
    s->lsp_q_mode        = !!(flags & 0x2000);
317 378
    s->lsp_def_mode      = !!(flags & 0x4000);
318 379
    lsp16_flag           =    flags & 0x1000;
......
370 431
}
371 432

  
372 433
/**
434
 * @defgroup postfilter Postfilter functions
435
 * Postfilter functions (gain control, wiener denoise filter, DC filter,
436
 * kalman smoothening, plus surrounding code to wrap it)
437
 * @{
438
 */
439
/**
440
 * Adaptive gain control (as used in postfilter).
441
 *
442
 * Identical to #ff_adaptive_gain_control() in acelp_vectors.c, except
443
 * that the energy here is calculated using sum(abs(...)), whereas the
444
 * other codecs (e.g. AMR-NB, SIPRO) use sqrt(dotproduct(...)).
445
 *
446
 * @param out output buffer for filtered samples
447
 * @param in input buffer containing the samples as they are after the
448
 *           postfilter steps so far
449
 * @param speech_synth input buffer containing speech synth before postfilter
450
 * @param size input buffer size
451
 * @param alpha exponential filter factor
452
 * @param gain_mem pointer to filter memory (single float)
453
 */
454
static void adaptive_gain_control(float *out, const float *in,
455
                                  const float *speech_synth,
456
                                  int size, float alpha, float *gain_mem)
457
{
458
    int i;
459
    float speech_energy = 0.0, postfilter_energy = 0.0, gain_scale_factor;
460
    float mem = *gain_mem;
461

  
462
    for (i = 0; i < size; i++) {
463
        speech_energy     += fabsf(speech_synth[i]);
464
        postfilter_energy += fabsf(in[i]);
465
    }
466
    gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
467

  
468
    for (i = 0; i < size; i++) {
469
        mem = alpha * mem + gain_scale_factor;
470
        out[i] = in[i] * mem;
471
    }
472

  
473
    *gain_mem = mem;
474
}
475

  
476
/**
477
 * Kalman smoothing function.
478
 *
479
 * This function looks back pitch +/- 3 samples back into history to find
480
 * the best fitting curve (that one giving the optimal gain of the two
481
 * signals, i.e. the highest dot product between the two), and then
482
 * uses that signal history to smoothen the output of the speech synthesis
483
 * filter.
484
 *
485
 * @param s WMA Voice decoding context
486
 * @param pitch pitch of the speech signal
487
 * @param in input speech signal
488
 * @param out output pointer for smoothened signal
489
 * @param size input/output buffer size
490
 *
491
 * @returns -1 if no smoothening took place, e.g. because no optimal
492
 *          fit could be found, or 0 on success.
493
 */
494
static int kalman_smoothen(WMAVoiceContext *s, int pitch,
495
                           const float *in, float *out, int size)
496
{
497
    int n;
498
    float optimal_gain = 0, dot;
499
    const float *ptr = &in[-FFMAX(s->min_pitch_val, pitch - 3)],
500
                *end = &in[-FFMIN(s->max_pitch_val, pitch + 3)],
501
                *best_hist_ptr;
502

  
503
    /* find best fitting point in history */
504
    do {
505
        dot = ff_dot_productf(in, ptr, size);
506
        if (dot > optimal_gain) {
507
            optimal_gain  = dot;
508
            best_hist_ptr = ptr;
509
        }
510
    } while (--ptr >= end);
511

  
512
    if (optimal_gain <= 0)
513
        return -1;
514
    dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
515
    if (dot <= 0) // would be 1.0
516
        return -1;
517

  
518
    if (optimal_gain <= dot) {
519
        dot = dot / (dot + 0.6 * optimal_gain); // 0.625-1.000
520
    } else
521
        dot = 0.625;
522

  
523
    /* actual smoothing */
524
    for (n = 0; n < size; n++)
525
        out[n] = best_hist_ptr[n] + dot * (in[n] - best_hist_ptr[n]);
526

  
527
    return 0;
528
}
529

  
530
/**
531
 * Get the tilt factor of a formant filter from its transfer function
532
 * @see #tilt_factor() in amrnbdec.c, which does essentially the same,
533
 *      but somehow (??) it does a speech synthesis filter in the
534
 *      middle, which is missing here
535
 *
536
 * @param lpcs LPC coefficients
537
 * @param n_lpcs Size of LPC buffer
538
 * @returns the tilt factor
539
 */
540
static float tilt_factor(const float *lpcs, int n_lpcs)
541
{
542
    float rh0, rh1;
543

  
544
    rh0 = 1.0     + ff_dot_productf(lpcs,  lpcs,    n_lpcs);
545
    rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
546

  
547
    return rh1 / rh0;
548
}
549

  
550
/**
551
 * Derive denoise filter coefficients (in real domain) from the LPCs.
552
 */
553
static void calc_input_response(WMAVoiceContext *s, float *lpcs,
554
                                int fcb_type, float *coeffs, int remainder)
555
{
556
    float last_coeff, min = 15.0, max = -15.0;
557
    float irange, angle_mul, gain_mul, range, sq;
558
    int n, idx;
559

  
560
    /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
561
    ff_rdft_calc(&s->rdft, lpcs);
562
#define log_range(var, assign) do { \
563
        float tmp = log10f(assign);  var = tmp; \
564
        max       = FFMAX(max, tmp); min = FFMIN(min, tmp); \
565
    } while (0)
566
    log_range(last_coeff,  lpcs[1]         * lpcs[1]);
567
    for (n = 1; n < 64; n++)
568
        log_range(lpcs[n], lpcs[n * 2]     * lpcs[n * 2] +
569
                           lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
570
    log_range(lpcs[0],     lpcs[0]         * lpcs[0]);
571
#undef log_range
572
    range    = max - min;
573
    lpcs[64] = last_coeff;
574

  
575
    /* Now, use this spectrum to pick out these frequencies with higher
576
     * (relative) power/energy (which we then take to be "not noise"),
577
     * and set up a table (still in lpc[]) of (relative) gains per frequency.
578
     * These frequencies will be maintained, while others ("noise") will be
579
     * decreased in the filter output. */
580
    irange    = 64.0 / range; // so irange*(max-value) is in the range [0, 63]
581
    gain_mul  = range * (fcb_type == FCB_TYPE_HARDCODED ? (5.0 / 13.0) :
582
                                                          (5.0 / 14.7));
583
    angle_mul = gain_mul * (8.0 * M_LN10 / M_PI);
584
    for (n = 0; n <= 64; n++) {
585
        float pow;
586

  
587
        idx = FFMAX(0, lrint((max - lpcs[n]) * irange) - 1);
588
        pow = wmavoice_denoise_power_table[s->denoise_strength][idx];
589
        lpcs[n] = angle_mul * pow;
590

  
591
        /* 70.57 =~ 1/log10(1.0331663) */
592
        idx = (pow * gain_mul - 0.0295) * 70.570526123;
593
        if (idx > 127) { // fallback if index falls outside table range
594
            coeffs[n] = wmavoice_energy_table[127] *
595
                        powf(1.0331663, idx - 127);
596
        } else
597
            coeffs[n] = wmavoice_energy_table[FFMAX(0, idx)];
598
    }
599

  
600
    /* calculate the Hilbert transform of the gains, which we do (since this
601
     * is a sinus input) by doing a phase shift (in theory, H(sin())=cos()).
602
     * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
603
     * "moment" of the LPCs in this filter. */
604
    ff_dct_calc(&s->dct, lpcs);
605
    ff_dct_calc(&s->dst, lpcs);
606

  
607
    /* Split out the coefficient indexes into phase/magnitude pairs */
608
    idx = 255 + av_clip(lpcs[64],               -255, 255);
609
    coeffs[0]  = coeffs[0]  * s->cos[idx];
610
    idx = 255 + av_clip(lpcs[64] - 2 * lpcs[63], -255, 255);
611
    last_coeff = coeffs[64] * s->cos[idx];
612
    for (n = 63;; n--) {
613
        idx = 255 + av_clip(-lpcs[64] - 2 * lpcs[n - 1], -255, 255);
614
        coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
615
        coeffs[n * 2]     = coeffs[n] * s->cos[idx];
616

  
617
        if (!--n) break;
618

  
619
        idx = 255 + av_clip( lpcs[64] - 2 * lpcs[n - 1], -255, 255);
620
        coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
621
        coeffs[n * 2]     = coeffs[n] * s->cos[idx];
622
    }
623
    coeffs[1] = last_coeff;
624

  
625
    /* move into real domain */
626
    ff_rdft_calc(&s->irdft, coeffs);
627

  
628
    /* tilt correction and normalize scale */
629
    memset(&coeffs[remainder], 0, sizeof(coeffs[0]) * (128 - remainder));
630
    if (s->denoise_tilt_corr) {
631
        float tilt_mem = 0;
632

  
633
        coeffs[remainder - 1] = 0;
634
        ff_tilt_compensation(&tilt_mem,
635
                             -1.8 * tilt_factor(coeffs, remainder - 1),
636
                             coeffs, remainder);
637
    }
638
    sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
639
    for (n = 0; n < remainder; n++)
640
        coeffs[n] *= sq;
641
}
642

  
643
/**
644
 * This function applies a Wiener filter on the (noisy) speech signal as
645
 * a means to denoise it.
646
 *
647
 * - take RDFT of LPCs to get the power spectrum of the noise + speech;
648
 * - using this power spectrum, calculate (for each frequency) the Wiener
649
 *    filter gain, which depends on the frequency power and desired level
650
 *    of noise subtraction (when set too high, this leads to artifacts)
651
 *    We can do this symmetrically over the X-axis (so 0-4kHz is the inverse
652
 *    of 4-8kHz);
653
 * - by doing a phase shift, calculate the Hilbert transform of this array
654
 *    of per-frequency filter-gains to get the filtering coefficients;
655
 * - smoothen/normalize/de-tilt these filter coefficients as desired;
656
 * - take RDFT of noisy sound, apply the coefficients and take its IRDFT
657
 *    to get the denoised speech signal;
658
 * - the leftover (i.e. output of the IRDFT on denoised speech data beyond
659
 *    the frame boundary) are saved and applied to subsequent frames by an
660
 *    overlap-add method (otherwise you get clicking-artifacts).
661
 *
662
 * @param s WMA Voice decoding context
663
 * @param s fcb_type Frame (codebook) type
664
 * @param synth_pf input: the noisy speech signal, output: denoised speech
665
 *                 data; should be 16-byte aligned (for ASM purposes)
666
 * @param size size of the speech data
667
 * @param lpcs LPCs used to synthesize this frame's speech data
668
 */
669
static void wiener_denoise(WMAVoiceContext *s, int fcb_type,
670
                           float *synth_pf, int size,
671
                           const float *lpcs)
672
{
673
    int remainder, lim, n;
674

  
675
    if (fcb_type != FCB_TYPE_SILENCE) {
676
        float *tilted_lpcs = s->tilted_lpcs_pf,
677
              *coeffs = s->denoise_coeffs_pf, tilt_mem = 0;
678

  
679
        tilted_lpcs[0]           = 1.0;
680
        memcpy(&tilted_lpcs[1], lpcs, sizeof(lpcs[0]) * s->lsps);
681
        memset(&tilted_lpcs[s->lsps + 1], 0,
682
               sizeof(tilted_lpcs[0]) * (128 - s->lsps - 1));
683
        ff_tilt_compensation(&tilt_mem, 0.7 * tilt_factor(lpcs, s->lsps),
684
                             tilted_lpcs, s->lsps + 2);
685

  
686
        /* The IRDFT output (127 samples for 7-bit filter) beyond the frame
687
         * size is applied to the next frame. All input beyond this is zero,
688
         * and thus all output beyond this will go towards zero, hence we can
689
         * limit to min(size-1, 127-size) as a performance consideration. */
690
        remainder = FFMIN(127 - size, size - 1);
691
        calc_input_response(s, tilted_lpcs, fcb_type, coeffs, remainder);
692

  
693
        /* apply coefficients (in frequency spectrum domain), i.e. complex
694
         * number multiplication */
695
        memset(&synth_pf[size], 0, sizeof(synth_pf[0]) * (128 - size));
696
        ff_rdft_calc(&s->rdft, synth_pf);
697
        ff_rdft_calc(&s->rdft, coeffs);
698
        synth_pf[0] *= coeffs[0];
699
        synth_pf[1] *= coeffs[1];
700
        for (n = 1; n < 128; n++) {
701
            float v1 = synth_pf[n * 2], v2 = synth_pf[n * 2 + 1];
702
            synth_pf[n * 2]     = v1 * coeffs[n * 2] - v2 * coeffs[n * 2 + 1];
703
            synth_pf[n * 2 + 1] = v2 * coeffs[n * 2] + v1 * coeffs[n * 2 + 1];
704
        }
705
        ff_rdft_calc(&s->irdft, synth_pf);
706
    }
707

  
708
    /* merge filter output with the history of previous runs */
709
    if (s->denoise_filter_cache_size) {
710
        lim = FFMIN(s->denoise_filter_cache_size, size);
711
        for (n = 0; n < lim; n++)
712
            synth_pf[n] += s->denoise_filter_cache[n];
713
        s->denoise_filter_cache_size -= lim;
714
        memmove(s->denoise_filter_cache, &s->denoise_filter_cache[size],
715
                sizeof(s->denoise_filter_cache[0]) * s->denoise_filter_cache_size);
716
    }
717

  
718
    /* move remainder of filter output into a cache for future runs */
719
    if (fcb_type != FCB_TYPE_SILENCE) {
720
        lim = FFMIN(remainder, s->denoise_filter_cache_size);
721
        for (n = 0; n < lim; n++)
722
            s->denoise_filter_cache[n] += synth_pf[size + n];
723
        if (lim < remainder) {
724
            memcpy(&s->denoise_filter_cache[lim], &synth_pf[size + lim],
725
                   sizeof(s->denoise_filter_cache[0]) * (remainder - lim));
726
            s->denoise_filter_cache_size = remainder;
727
        }
728
    }
729
}
730

  
731
/**
732
 * Averaging projection filter, the postfilter used in WMAVoice.
733
 *
734
 * This uses the following steps:
735
 * - A zero-synthesis filter (generate excitation from synth signal)
736
 * - Kalman smoothing on excitation, based on pitch
737
 * - Re-synthesized smoothened output
738
 * - Iterative Wiener denoise filter
739
 * - Adaptive gain filter
740
 * - DC filter
741
 *
742
 * @param s WMAVoice decoding context
743
 * @param synth Speech synthesis output (before postfilter)
744
 * @param samples Output buffer for filtered samples
745
 * @param size Buffer size of synth & samples
746
 * @param lpcs Generated LPCs used for speech synthesis
747
 * @param fcb_type Frame type (silence, hardcoded, AW-pulses or FCB-pulses)
748
 * @param pitch Pitch of the input signal
749
 */
750
static void postfilter(WMAVoiceContext *s, const float *synth,
751
                       float *samples,    int size,
752
                       const float *lpcs, float *zero_exc_pf,
753
                       int fcb_type,      int pitch)
754
{
755
    float synth_filter_in_buf[MAX_FRAMESIZE / 2],
756
          *synth_pf = &s->synth_filter_out_buf[MAX_LSPS_ALIGN16],
757
          *synth_filter_in = zero_exc_pf;
758

  
759
    assert(size <= MAX_FRAMESIZE / 2);
760

  
761
    /* generate excitation from input signal */
762
    ff_celp_lp_zero_synthesis_filterf(zero_exc_pf, lpcs, synth, size, s->lsps);
763

  
764
    if (fcb_type >= FCB_TYPE_AW_PULSES &&
765
        !kalman_smoothen(s, pitch, zero_exc_pf, synth_filter_in_buf, size))
766
        synth_filter_in = synth_filter_in_buf;
767

  
768
    /* re-synthesize speech after smoothening, and keep history */
769
    ff_celp_lp_synthesis_filterf(synth_pf, lpcs,
770
                                 synth_filter_in, size, s->lsps);
771
    memcpy(&synth_pf[-s->lsps], &synth_pf[size - s->lsps],
772
           sizeof(synth_pf[0]) * s->lsps);
773

  
774
    wiener_denoise(s, fcb_type, synth_pf, size, lpcs);
775

  
776
    adaptive_gain_control(samples, synth_pf, synth, size, 0.99,
777
                          &s->postfilter_agc);
778

  
779
    if (s->dc_level > 8) {
780
        /* remove ultra-low frequency DC noise / highpass filter;
781
         * coefficients are identical to those used in SIPR decoding,
782
         * and very closely resemble those used in AMR-NB decoding. */
783
        ff_acelp_apply_order_2_transfer_function(samples, samples,
784
            (const float[2]) { -1.99997,      1.0 },
785
            (const float[2]) { -1.9330735188, 0.93589198496 },
786
            0.93980580475, s->dcf_mem, size);
787
    }
788
}
789
/**
790
 * @}
791
 */
792

  
793
/**
373 794
 * Dequantize LSPs
374 795
 * @param lsps output pointer to the array that will hold the LSPs
375 796
 * @param num number of LSPs to be dequantized
......
980 1401
 *
981 1402
 * @param ctx WMA Voice decoder context
982 1403
 * @param gb bit I/O context (s->gb or one for cross-packet superframes)
1404
 * @param frame_idx Frame number within superframe [0-2]
983 1405
 * @param samples pointer to output sample buffer, has space for at least 160
984 1406
 *                samples
985 1407
 * @param lsps LSP array
......
988 1410
 * @param synth target buffer for synthesized speech data
989 1411
 * @return 0 on success, <0 on error.
990 1412
 */
991
static int synth_frame(AVCodecContext *ctx, GetBitContext *gb,
1413
static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
992 1414
                       float *samples,
993 1415
                       const double *lsps, const double *prev_lsps,
994 1416
                       float *excitation, float *synth)
......
1113 1535
    /* Averaging projection filter, if applicable. Else, just copy samples
1114 1536
     * from synthesis buffer */
1115 1537
    if (s->do_apf) {
1116
        // FIXME this is where APF would take place, currently not implemented
1117
        av_log_missing_feature(ctx, "APF", 0);
1118
        s->do_apf = 0;
1119
    } //else
1538
        double i_lsps[MAX_LSPS];
1539
        float lpcs[MAX_LSPS];
1540

  
1541
        for (n = 0; n < s->lsps; n++) // LSF -> LSP
1542
            i_lsps[n] = cos(0.5 * (prev_lsps[n] + lsps[n]));
1543
        ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1544
        postfilter(s, synth, samples, 80, lpcs,
1545
                   &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx],
1546
                   frame_descs[bd_idx].fcb_type, pitch[0]);
1547

  
1548
        for (n = 0; n < s->lsps; n++) // LSF -> LSP
1549
            i_lsps[n] = cos(lsps[n]);
1550
        ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);
1551
        postfilter(s, &synth[80], &samples[80], 80, lpcs,
1552
                   &s->zero_exc_pf[s->history_nsamples + MAX_FRAMESIZE * frame_idx + 80],
1553
                   frame_descs[bd_idx].fcb_type, pitch[0]);
1554
    } else
1120 1555
        memcpy(samples, synth, 160 * sizeof(synth[0]));
1121 1556

  
1122 1557
    /* Cache values for next frame */
......
1355 1790
            stabilize_lsps(lsps[n], s->lsps);
1356 1791
        }
1357 1792

  
1358
        if ((res = synth_frame(ctx, gb,
1793
        if ((res = synth_frame(ctx, gb, n,
1359 1794
                               &samples[n * MAX_FRAMESIZE],
1360 1795
                               lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
1361 1796
                               &excitation[s->history_nsamples + n * MAX_FRAMESIZE],
......
1381 1816
           s->lsps             * sizeof(*synth));
1382 1817
    memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],
1383 1818
           s->history_nsamples * sizeof(*excitation));
1819
    if (s->do_apf)
1820
        memmove(s->zero_exc_pf,       &s->zero_exc_pf[MAX_SFRAMESIZE],
1821
                s->history_nsamples * sizeof(*s->zero_exc_pf));
1384 1822

  
1385 1823
    return 0;
1386 1824
}
......
1535 1973
    return size;
1536 1974
}
1537 1975

  
1976
static av_cold int wmavoice_decode_end(AVCodecContext *ctx)
1977
{
1978
    WMAVoiceContext *s = ctx->priv_data;
1979

  
1980
    if (s->do_apf) {
1981
        ff_rdft_end(&s->rdft);
1982
        ff_rdft_end(&s->irdft);
1983
        ff_dct_end(&s->dct);
1984
        ff_dct_end(&s->dst);
1985
    }
1986

  
1987
    return 0;
1988
}
1989

  
1538 1990
static av_cold void wmavoice_flush(AVCodecContext *ctx)
1539 1991
{
1540 1992
    WMAVoiceContext *s = ctx->priv_data;
1541 1993
    int n;
1542 1994

  
1995
    s->postfilter_agc    = 0;
1543 1996
    s->sframe_cache_size = 0;
1544 1997
    s->skip_bits_next    = 0;
1545 1998
    for (n = 0; n < s->lsps; n++)
......
1550 2003
           sizeof(*s->synth_history)      * MAX_LSPS);
1551 2004
    memset(s->gain_pred_err,      0,
1552 2005
           sizeof(s->gain_pred_err));
2006

  
2007
    if (s->do_apf) {
2008
        memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
2009
               sizeof(*s->synth_filter_out_buf) * s->lsps);
2010
        memset(s->dcf_mem,              0,
2011
               sizeof(*s->dcf_mem)              * 2);
2012
        memset(s->zero_exc_pf,          0,
2013
               sizeof(*s->zero_exc_pf)          * s->history_nsamples);
2014
        memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
2015
    }
1553 2016
}
1554 2017

  
1555 2018
AVCodec wmavoice_decoder = {
......
1559 2022
    sizeof(WMAVoiceContext),
1560 2023
    wmavoice_decode_init,
1561 2024
    NULL,
1562
    NULL,
2025
    wmavoice_decode_end,
1563 2026
    wmavoice_decode_packet,
1564 2027
    CODEC_CAP_SUBFRAMES,
1565 2028
    .flush     = wmavoice_flush,
libavcodec/wmavoice_data.h
3020 3020
    0, -0.0273968070, -0.0392575669, -0.0276240534
3021 3021
};
3022 3022

  
3023
/**
3024
 * LUT for 1.071575641632 * pow(1.0331663, n - 127)
3025
 */
3026
static const float wmavoice_energy_table[128] = {
3027
    0.0169982178, 0.0175619858, 0.0181444519, 0.0187462362,
3028
    0.0193679795, 0.0200103437, 0.0206740128, 0.0213596933,
3029
    0.0220681153, 0.0228000330, 0.0235562258, 0.0243374986,
3030
    0.0251446834, 0.0259786395, 0.0268402549, 0.0277304468,
3031
    0.0286501631, 0.0296003830, 0.0305821182, 0.0315964139,
3032
    0.0326443501, 0.0337270424, 0.0348456436, 0.0360013446,
3033
    0.0371953760, 0.0384290090, 0.0397035571, 0.0410203772,
3034
    0.0423808713, 0.0437864880, 0.0452387238, 0.0467391249,
3035
    0.0482892887, 0.0498908657, 0.0515455612, 0.0532551367,
3036
    0.0550214125, 0.0568462692, 0.0587316496, 0.0606795611,
3037
    0.0626920777, 0.0647713419, 0.0669195677, 0.0691390421,
3038
    0.0714321284, 0.0738012678, 0.0762489827, 0.0787778794,
3039
    0.0813906502, 0.0840900769, 0.0868790336, 0.0897604897,
3040
    0.0927375130, 0.0958132732, 0.0989910450, 0.1022742117,
3041
    0.1056662688, 0.1091708280, 0.1127916204, 0.1165325012,
3042
    0.1203974531, 0.1243905911, 0.1285161668, 0.1327785725,
3043
    0.1371823465, 0.1417321773, 0.1464329093, 0.1512895470,
3044
    0.1563072616, 0.1614913951, 0.1668474671, 0.1723811803,
3045
    0.1780984262, 0.1840052921, 0.1901080668, 0.1964132480,
3046
    0.2029275487, 0.2096579046, 0.2166114816, 0.2237956830,
3047
    0.2312181577, 0.2388868085, 0.2468098001, 0.2549955679,
3048
    0.2634528274, 0.2721905830, 0.2812181375, 0.2905451026,
3049
    0.3001814086, 0.3101373153, 0.3204234225, 0.3310506819,
3050
    0.3420304081, 0.3533742912, 0.3650944090, 0.3772032397,
3051
    0.3897136755, 0.4026390362, 0.4159930832, 0.4297900346,
3052
    0.4440445799, 0.4587718956, 0.4739876619, 0.4897080789,
3053
    0.5059498840, 0.5227303696, 0.5400674019, 0.5579794393,
3054
    0.5764855528, 0.5956054456, 0.6153594745, 0.6357686714,
3055
    0.6568547659, 0.6786402082, 0.7011481929, 0.7244026842,
3056
    0.7484284410, 0.7732510432, 0.7988969192, 0.8253933741,
3057
    0.8527686184, 0.8810517982, 0.9102730265, 0.9404634147,
3058
    0.9716551065, 1.0038813113, 1.0371763400, 1.0715756416
3059
};
3060

  
3061
/**
3062
 * LUT for f(x,y) = pow((y + 6.9) / 64, 0.025 * (x + 1)).
3063
 */
3064
static const float wmavoice_denoise_power_table[12][64] = {
3065
    { 0.9458379339, 0.9490436287, 0.9518757236, 0.9544130754,
3066
      0.9567118717, 0.9588135761, 0.9607496688, 0.9625446194,
3067
      0.9642178285, 0.9657849396, 0.9672587526, 0.9686498743,
3068
      0.9699671937, 0.9712182343, 0.9724094211, 0.9735462842,
3069
      0.9746336187, 0.9756756090, 0.9766759291, 0.9776378218,
3070
      0.9785641645, 0.9794575217, 0.9803201890, 0.9811542296,
3071
      0.9819615045, 0.9827436985, 0.9835023412, 0.9842388263,
3072
      0.9849544265, 0.9856503078, 0.9863275406, 0.9869871101,
3073
      0.9876299254, 0.9882568267, 0.9888685922, 0.9894659445,
3074
      0.9900495551, 0.9906200497, 0.9911780119, 0.9917239872,
3075
      0.9922584859, 0.9927819864, 0.9932949377, 0.9937977618,
3076
      0.9942908555, 0.9947745929, 0.9952493267, 0.9957153901,
3077
      0.9961730980, 0.9966227482, 0.9970646231, 0.9974989903,
3078
      0.9979261037, 0.9983462046, 0.9987595223, 0.9991662752,
3079
      0.9995666709, 0.9999609077, 1.0003491745, 1.0007316515,
3080
      1.0011085110, 1.0014799178, 1.0018460292, 1.0022069960 },
3081
    { 0.8946093973, 0.9006838092, 0.9060673931, 0.9109043185,
3082
      0.9152976055, 0.9193234737, 0.9230399260, 0.9264921443,
3083
      0.9297160207, 0.9327405496, 0.9355894944, 0.9382825789,
3084
      0.9408363568, 0.9432648587, 0.9455800822, 0.9477923675,
3085
      0.9499106907, 0.9519428941, 0.9538958704, 0.9557757107,
3086
      0.9575878241, 0.9593370368, 0.9610276730, 0.9626636222,
3087
      0.9642483964, 0.9657851769, 0.9672768552, 0.9687260672,
3088
      0.9701352224, 0.9715065293, 0.9728420173, 0.9741435556,
3089
      0.9754128696, 0.9766515555, 0.9778610927, 0.9790428553,
3090
      0.9801981216, 0.9813280829, 0.9824338513, 0.9835164667,
3091
      0.9845769028, 0.9856160726, 0.9866348334, 0.9876339913,
3092
      0.9886143053, 0.9895764906, 0.9905212223, 0.9914491381,
3093
      0.9923608411, 0.9932569022, 0.9941378627, 0.9950042356,
3094
      0.9958565084, 0.9966951442, 0.9975205834, 0.9983332454,
3095
      0.9991335296, 0.9999218170, 1.0006984708, 1.0014638383,
3096
      1.0022182509, 1.0029620257, 1.0036954662, 1.0044188628 },
3097
    { 0.8461555040, 0.8547882305, 0.8624635555, 0.8693789920,
3098
      0.8756760853, 0.8814598273, 0.8868103032, 0.8917900284,
3099
      0.8964487626, 0.9008267754, 0.9049571273, 0.9088673021,
3100
      0.9125804007, 0.9161160306, 0.9194909803, 0.9227197376,
3101
      0.9258148939, 0.9287874629, 0.9316471355, 0.9344024839,
3102
      0.9370611291, 0.9396298766, 0.9421148300, 0.9445214846,
3103
      0.9468548060, 0.9491192967, 0.9513190517, 0.9534578074,
3104
      0.9555389816, 0.9575657096, 0.9595408742, 0.9614671327,
3105
      0.9633469396, 0.9651825670, 0.9669761222, 0.9687295635,
3106
      0.9704447142, 0.9721232742, 0.9737668316, 0.9753768718,
3107
      0.9769547868, 0.9785018824, 0.9800193854, 0.9815084500,
3108
      0.9829701633, 0.9844055505, 0.9858155796, 0.9872011653,
3109
      0.9885631734, 0.9899024236, 0.9912196934, 0.9925157203,
3110
      0.9937912053, 0.9950468143, 0.9962831814, 0.9975009102,
3111
      0.9987005760, 0.9998827277, 1.0010478892, 1.0021965608,
3112
      1.0033292209, 1.0044463270, 1.0055483173, 1.0066356112 },
3113
    { 0.8003259737, 0.8112313241, 0.8209581209, 0.8297466775,
3114
      0.8377697066, 0.8451556492, 0.8520027051, 0.8583876935,
3115
      0.8643718792, 0.8700049328, 0.8753277020, 0.8803741979,
3116
      0.8851730502, 0.8897485937, 0.8941216918, 0.8983103719,
3117
      0.9023303202, 0.9061952736, 0.9099173316, 0.9135072091,
3118
      0.9169744409, 0.9203275502, 0.9235741882, 0.9267212496,
3119
      0.9297749699, 0.9327410079, 0.9356245146, 0.9384301933,
3120
      0.9411623497, 0.9438249364, 0.9464215906, 0.9489556668,
3121
      0.9514302661, 0.9538482608, 0.9562123167, 0.9585249126,
3122
      0.9607883576, 0.9630048062, 0.9651762722, 0.9673046403,
3123
      0.9693916775, 0.9714390425, 0.9734482944, 0.9754209007,
3124
      0.9773582446, 0.9792616307, 0.9811322918, 0.9829713934,
3125
      0.9847800389, 0.9865592739, 0.9883100900, 0.9900334289,
3126
      0.9917301853, 0.9934012104, 0.9950473143, 0.9966692689,
3127
      0.9982678100, 0.9998436400, 1.0013974295, 1.0029298194,
3128
      1.0044414224, 1.0059328250, 1.0074045889, 1.0088572520 },
3129
    { 0.7569786654, 0.7698939195, 0.7814501054, 0.7919210783,
3130
      0.8015042240, 0.8103467104, 0.8185613167, 0.8262364557,
3131
      0.8334427763, 0.8402376615, 0.8466683811, 0.8527743561,
3132
      0.8585888194, 0.8641400582, 0.8694523567, 0.8745467247,
3133
      0.8794414652, 0.8841526254, 0.8886943552, 0.8930791981,
3134
      0.8973183276, 0.9014217415, 0.9053984227, 0.9092564737,
3135
      0.9130032283, 0.9166453478, 0.9201889007, 0.9236394320,
3136
      0.9270020224, 0.9302813390, 0.9334816797, 0.9366070112,
3137
      0.9396610028, 0.9426470554, 0.9455683275, 0.9484277579,
3138
      0.9512280860, 0.9539718690, 0.9566614986, 0.9592992147,
3139
      0.9618871182, 0.9644271823, 0.9669212630, 0.9693711079,
3140
      0.9717783651, 0.9741445900, 0.9764712529, 0.9787597445,
3141
      0.9810113822, 0.9832274148, 0.9854090274, 0.9875573457,
3142
      0.9896734398, 0.9917583281, 0.9938129803, 0.9958383209,
3143
      0.9978352315, 0.9998045539, 1.0017470919, 1.0036636145,
3144
      1.0055548568, 1.0074215229, 1.0092642871, 1.0110837959 },
3145
    { 0.7159791370, 0.7306629191, 0.7438433845, 0.7558198318,
3146
      0.7668086064, 0.7769714272, 0.7864325139, 0.7952894548,
3147
      0.8036203840, 0.8114888792, 0.8189474022, 0.8260397728,
3148
      0.8328029877, 0.8392685815, 0.8454636629, 0.8514117142,
3149
      0.8571332177, 0.8626461513, 0.8679663850, 0.8731080020,
3150
      0.8780835596, 0.8829043049, 0.8875803529, 0.8921208349,
3151
      0.8965340237, 0.9008274393, 0.9050079382, 0.9090817905,
3152
      0.9130547454, 0.9169320882, 0.9207186893, 0.9244190474,
3153
      0.9280373261, 0.9315773876, 0.9350428208, 0.9384369673,
3154
      0.9417629433, 0.9450236603, 0.9482218422, 0.9513600421,
3155
      0.9544406555, 0.9574659338, 0.9604379957, 0.9633588374,
3156
      0.9662303420, 0.9690542879, 0.9718323569, 0.9745661408,
3157
      0.9772571477, 0.9799068082, 0.9825164805, 0.9850874551,
3158
      0.9876209597, 0.9901181627, 0.9925801775, 0.9950080658,
3159
      0.9974028405, 0.9997654692, 1.0020968764, 1.0043979464,
3160
      1.0066695255, 1.0089124239, 1.0111274185, 1.0133152537 },
3161
    { 0.6772002277, 0.6934309881, 0.7080464599, 0.7213643301,
3162
      0.7336148970, 0.7449707526, 0.7555647772, 0.7655015856,
3163
      0.7748651015, 0.7837237382, 0.7921340426, 0.8001433220,
3164
      0.8077915768, 0.8151129499, 0.8221368310, 0.8288887107,
3165
      0.8353908496, 0.8416628090, 0.8477218755, 0.8535834053,
3166
      0.8592611049, 0.8647672624, 0.8701129393, 0.8753081305,
3167
      0.8803618988, 0.8852824894, 0.8900774261, 0.8947535945,
3168
      0.8993173131, 0.9037743949, 0.9081302004, 0.9123896841,
3169
      0.9165574352, 0.9206377129, 0.9246344779, 0.9285514202,
3170
      0.9323919830, 0.9361593853, 0.9398566405, 0.9434865742,
3171
      0.9470518396, 0.9505549317, 0.9539981992, 0.9573838564,
3172
      0.9607139933, 0.9639905847, 0.9672154989, 0.9703905051,
3173
      0.9735172803, 0.9765974162, 0.9796324243, 0.9826237418,
3174
      0.9855727362, 0.9884807098, 0.9913489039, 0.9941785028,
3175
      0.9969706369, 0.9997263861, 1.0024467831, 1.0051328157,
3176
      1.0077854297, 1.0104055314, 1.0129939892, 1.0155516364 },
3177
    { 0.6405216642, 0.6580962612, 0.6739722363, 0.6884795488,
3178
      0.7018580813, 0.7142880714, 0.7259086094, 0.7368294324,
3179
      0.7471387455, 0.7569085832, 0.7661985859, 0.7750587283,
3180
      0.7835313288, 0.7916525600, 0.7994535998, 0.8069615243,
3181
      0.8142000068, 0.8211898738, 0.8279495504, 0.8344954211,
3182
      0.8408421252, 0.8470027997, 0.8529892811, 0.8588122744,
3183
      0.8644814947, 0.8700057878, 0.8753932324, 0.8806512276,
3184
      0.8857865684, 0.8908055105, 0.8957138271, 0.9005168576,
3185
      0.9052195513, 0.9098265046, 0.9143419945, 0.9187700080,
3186
      0.9231142680, 0.9273782568, 0.9315652364, 0.9356782672,
3187
      0.9397202245, 0.9436938133, 0.9476015819, 0.9514459336,
3188
      0.9552291382, 0.9589533414, 0.9626205741, 0.9662327603,
3189
      0.9697917251, 0.9732992008, 0.9767568340, 0.9801661903,
3190
      0.9835287605, 0.9868459649, 0.9901191578, 0.9933496315,
3191
      0.9965386205, 0.9996873045, 1.0027968119, 1.0058682226,
3192
      1.0089025710, 1.0119008485, 1.0148640056, 1.0177929548 },
3193
    { 0.6058296875, 0.6245620637, 0.6415378101, 0.6570938835,
3194
      0.6714759586, 0.6848691001, 0.6974164561, 0.7092312055,
3195
      0.7204044988, 0.7310109103, 0.7411122884, 0.7507605397,
3196
      0.7599996842, 0.7688674015, 0.7773962122, 0.7856143935,
3197
      0.7935466990, 0.8012149303, 0.8086383963, 0.8158342858,
3198
      0.8228179717, 0.8296032631, 0.8362026133, 0.8426272954,
3199
      0.8488875492, 0.8549927056, 0.8609512936, 0.8667711307,
3200
      0.8724594015, 0.8780227256, 0.8834672161, 0.8887985309,
3201
      0.8940219180, 0.8991422543, 0.9041640810, 0.9090916337,
3202
      0.9139288704, 0.9186794948, 0.9233469789, 0.9279345818,
3203
      0.9324453671, 0.9368822185, 0.9412478543, 0.9455448393,
3204
      0.9497755970, 0.9539424198, 0.9580474782, 0.9620928299,
3205
      0.9660804271, 0.9700121244, 0.9738896845, 0.9777147851,
3206
      0.9814890239, 0.9852139236, 0.9888909370, 0.9925214512,
3207
      0.9961067913, 0.9996482244, 1.0031469629, 1.0066041676,
3208
      1.0100209506, 1.0133983785, 1.0167374742, 1.0200392198 },
3209
    { 0.5730166999, 0.5927366473, 0.6106642672, 0.6271389942,
3210
      0.6424090212, 0.6566617910, 0.6700426292, 0.6826666808,
3211
      0.6946268614, 0.7059993279, 0.7168473476, 0.7272241023,
3212
      0.7371747608, 0.7467380401, 0.7559474006, 0.7648319736,
3213
      0.7734172908, 0.7817258650, 0.7897776570, 0.7975904541,
3214
      0.8051801811, 0.8125611560, 0.8197463039, 0.8267473349,
3215
      0.8335748949, 0.8402386937, 0.8467476129, 0.8531098003,
3216
      0.8593327495, 0.8654233698, 0.8713880464, 0.8772326935,
3217
      0.8829628002, 0.8885834710, 0.8940994619, 0.8995152120,
3218
      0.9048348715, 0.9100623268, 0.9152012229, 0.9202549833,
3219
      0.9252268281, 0.9301197899, 0.9349367288, 0.9396803449,
3220
      0.9443531909, 0.9489576823, 0.9534961076, 0.9579706374,
3221
      0.9623833320, 0.9667361492, 0.9710309512, 0.9752695109,
3222
      0.9794535174, 0.9835845813, 0.9876642399, 0.9916939614,
3223
      0.9956751493, 0.9996091459, 1.0034972362, 1.0073406510,
3224
      1.0111405700, 1.0148981248, 1.0186144013, 1.0222904422 },
3225
    { 0.5419809316, 0.5625329386, 0.5812764912, 0.5985496562,
3226
      0.6146003370, 0.6296162401, 0.6437432340, 0.6570971404,
3227
      0.6697716039, 0.6818435182, 0.6933768712, 0.7044255353,
3228
      0.7150353340, 0.7252456009, 0.7350903742, 0.7445993259,
3229
      0.7537984929, 0.7627108595, 0.7713568269, 0.7797545943,
3230
      0.7879204712, 0.7958691361, 0.8036138516, 0.8111666444,
3231
      0.8185384580, 0.8257392814, 0.8327782597, 0.8396637886,
3232
      0.8464035955, 0.8530048108, 0.8594740287, 0.8658173611,
3233
      0.8720404845, 0.8781486812, 0.8841468762, 0.8900396688,
3234
      0.8958313620, 0.9015259874, 0.9071273286, 0.9126389413,
3235
      0.9180641715, 0.9234061727, 0.9286679198, 0.9338522236,
3236
      0.9389617420, 0.9439989920, 0.9489663591, 0.9538661069,
3237
      0.9587003852, 0.9634712378, 0.9681806094, 0.9728303524,
3238
      0.9774222323, 0.9819579336, 0.9864390644, 0.9908671615,
3239
      0.9952436943, 0.9995700689, 1.0038476318, 1.0080776733,
3240
      1.0122614305, 1.0164000906, 1.0204947932, 1.0245466331 },
3241
    { 0.5126261246, 0.5338683013, 0.5533029807, 0.5712636181,
3242
      0.5879954388, 0.6036845987, 0.6184760989, 0.6324853169,
3243
      0.6458057215, 0.6585142011, 0.6706748475, 0.6823417062,
3244
      0.6935608163, 0.7043717519, 0.7148088052, 0.7249019070,
3245
      0.7346773529, 0.7441583823, 0.7533656456, 0.7623175831,
3246
      0.7710307376, 0.7795200117, 0.7877988829, 0.7958795841,
3247
      0.8037732557, 0.8114900754, 0.8190393682, 0.8264297018,
3248
      0.8336689680, 0.8407644543, 0.8477229049, 0.8545505751,
3249
      0.8612532786, 0.8678364291, 0.8743050768, 0.8806639416,
3250
      0.8869174414, 0.8930697184, 0.8991246621, 0.9050859297,
3251
      0.9109569648, 0.9167410144, 0.9224411436, 0.9280602496,
3252
      0.9336010737, 0.9390662129, 0.9444581300, 0.9497791628,
3253
      0.9550315328, 0.9602173528, 0.9653386345, 0.9703972943,
3254
      0.9753951600, 0.9803339761, 0.9852154088, 0.9900410510,
3255
      0.9948124263, 0.9995309934, 1.0041981497, 1.0088152348,
3256
      1.0133835335, 1.0179042791, 1.0223786564, 1.0268078035 },
3257
};
3258

  
3023 3259
#endif /* AVCODEC_WMAVOICE_DATA_H */

Also available in: Unified diff