ffmpeg / libavcodec / amrwbdec.c @ b2ed95ec
History | View | Annotate | Download (44.8 KB)
1 |
/*
|
---|---|
2 |
* AMR wideband decoder
|
3 |
* Copyright (c) 2010 Marcelo Galvao Povoa
|
4 |
*
|
5 |
* This file is part of FFmpeg.
|
6 |
*
|
7 |
* FFmpeg is free software; you can redistribute it and/or
|
8 |
* modify it under the terms of the GNU Lesser General Public
|
9 |
* License as published by the Free Software Foundation; either
|
10 |
* version 2.1 of the License, or (at your option) any later version.
|
11 |
*
|
12 |
* FFmpeg is distributed in the hope that it will be useful,
|
13 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14 |
* MERCHANTABILITY or FITNESS FOR A particular PURPOSE. See the GNU
|
15 |
* Lesser General Public License for more details.
|
16 |
*
|
17 |
* You should have received a copy of the GNU Lesser General Public
|
18 |
* License along with FFmpeg; if not, write to the Free Software
|
19 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
20 |
*/
|
21 |
|
22 |
/**
|
23 |
* @file
|
24 |
* AMR wideband decoder
|
25 |
*/
|
26 |
|
27 |
#include "libavutil/lfg.h" |
28 |
|
29 |
#include "avcodec.h" |
30 |
#include "get_bits.h" |
31 |
#include "lsp.h" |
32 |
#include "celp_math.h" |
33 |
#include "celp_filters.h" |
34 |
#include "acelp_filters.h" |
35 |
#include "acelp_vectors.h" |
36 |
#include "acelp_pitch_delay.h" |
37 |
|
38 |
#define AMR_USE_16BIT_TABLES
|
39 |
#include "amr.h" |
40 |
|
41 |
#include "amrwbdata.h" |
42 |
|
43 |
typedef struct { |
44 |
AMRWBFrame frame; ///< AMRWB parameters decoded from bitstream
|
45 |
enum Mode fr_cur_mode; ///< mode index of current frame |
46 |
uint8_t fr_quality; ///< frame quality index (FQI)
|
47 |
float isf_cur[LP_ORDER]; ///< working ISF vector from current frame |
48 |
float isf_q_past[LP_ORDER]; ///< quantized ISF vector of the previous frame |
49 |
float isf_past_final[LP_ORDER]; ///< final processed ISF vector of the previous frame |
50 |
double isp[4][LP_ORDER]; ///< ISP vectors from current frame |
51 |
double isp_sub4_past[LP_ORDER]; ///< ISP vector for the 4th subframe of the previous frame |
52 |
|
53 |
float lp_coef[4][LP_ORDER]; ///< Linear Prediction Coefficients from ISP vector |
54 |
|
55 |
uint8_t base_pitch_lag; ///< integer part of pitch lag for the next relative subframe
|
56 |
uint8_t pitch_lag_int; ///< integer part of pitch lag of the previous subframe
|
57 |
|
58 |
float excitation_buf[AMRWB_P_DELAY_MAX + LP_ORDER + 2 + AMRWB_SFR_SIZE]; ///< current excitation and all necessary excitation history |
59 |
float *excitation; ///< points to current excitation in excitation_buf[] |
60 |
|
61 |
float pitch_vector[AMRWB_SFR_SIZE]; ///< adaptive codebook (pitch) vector for current subframe |
62 |
float fixed_vector[AMRWB_SFR_SIZE]; ///< algebraic codebook (fixed) vector for current subframe |
63 |
|
64 |
float prediction_error[4]; ///< quantified prediction errors {20log10(^gamma_gc)} for previous four subframes |
65 |
float pitch_gain[6]; ///< quantified pitch gains for the current and previous five subframes |
66 |
float fixed_gain[2]; ///< quantified fixed gains for the current and previous subframes |
67 |
|
68 |
float tilt_coef; ///< {beta_1} related to the voicing of the previous subframe |
69 |
|
70 |
float prev_sparse_fixed_gain; ///< previous fixed gain; used by anti-sparseness to determine "onset" |
71 |
uint8_t prev_ir_filter_nr; ///< previous impulse response filter "impNr": 0 - strong, 1 - medium, 2 - none
|
72 |
float prev_tr_gain; ///< previous initial gain used by noise enhancer for threshold |
73 |
|
74 |
float samples_az[LP_ORDER + AMRWB_SFR_SIZE]; ///< low-band samples and memory from synthesis at 12.8kHz |
75 |
float samples_up[UPS_MEM_SIZE + AMRWB_SFR_SIZE]; ///< low-band samples and memory processed for upsampling |
76 |
float samples_hb[LP_ORDER_16k + AMRWB_SFR_SIZE_16k]; ///< high-band samples and memory from synthesis at 16kHz |
77 |
|
78 |
float hpf_31_mem[2], hpf_400_mem[2]; ///< previous values in the high pass filters |
79 |
float demph_mem[1]; ///< previous value in the de-emphasis filter |
80 |
float bpf_6_7_mem[HB_FIR_SIZE]; ///< previous values in the high-band band pass filter |
81 |
float lpf_7_mem[HB_FIR_SIZE]; ///< previous values in the high-band low pass filter |
82 |
|
83 |
AVLFG prng; ///< random number generator for white noise excitation
|
84 |
uint8_t first_frame; ///< flag active during decoding of the first frame
|
85 |
} AMRWBContext; |
86 |
|
87 |
static av_cold int amrwb_decode_init(AVCodecContext *avctx) |
88 |
{ |
89 |
AMRWBContext *ctx = avctx->priv_data; |
90 |
int i;
|
91 |
|
92 |
avctx->sample_fmt = SAMPLE_FMT_FLT; |
93 |
|
94 |
av_lfg_init(&ctx->prng, 1);
|
95 |
|
96 |
ctx->excitation = &ctx->excitation_buf[AMRWB_P_DELAY_MAX + LP_ORDER + 1];
|
97 |
ctx->first_frame = 1;
|
98 |
|
99 |
for (i = 0; i < LP_ORDER; i++) |
100 |
ctx->isf_past_final[i] = isf_init[i] * (1.0f / (1 << 15)); |
101 |
|
102 |
for (i = 0; i < 4; i++) |
103 |
ctx->prediction_error[i] = MIN_ENERGY; |
104 |
|
105 |
return 0; |
106 |
} |
107 |
|
108 |
/**
|
109 |
* Decode the frame header in the "MIME/storage" format. This format
|
110 |
* is simpler and does not carry the auxiliary information of the frame
|
111 |
*
|
112 |
* @param[in] ctx The Context
|
113 |
* @param[in] buf Pointer to the input buffer
|
114 |
*
|
115 |
* @return The decoded header length in bytes
|
116 |
*/
|
117 |
static int decode_mime_header(AMRWBContext *ctx, const uint8_t *buf) |
118 |
{ |
119 |
GetBitContext gb; |
120 |
init_get_bits(&gb, buf, 8);
|
121 |
|
122 |
/* Decode frame header (1st octet) */
|
123 |
skip_bits(&gb, 1); // padding bit |
124 |
ctx->fr_cur_mode = get_bits(&gb, 4);
|
125 |
ctx->fr_quality = get_bits1(&gb); |
126 |
skip_bits(&gb, 2); // padding bits |
127 |
|
128 |
return 1; |
129 |
} |
130 |
|
131 |
/**
|
132 |
* Decodes quantized ISF vectors using 36-bit indexes (6K60 mode only)
|
133 |
*
|
134 |
* @param[in] ind Array of 5 indexes
|
135 |
* @param[out] isf_q Buffer for isf_q[LP_ORDER]
|
136 |
*
|
137 |
*/
|
138 |
static void decode_isf_indices_36b(uint16_t *ind, float *isf_q) |
139 |
{ |
140 |
int i;
|
141 |
|
142 |
for (i = 0; i < 9; i++) |
143 |
isf_q[i] = dico1_isf[ind[0]][i] * (1.0f / (1 << 15)); |
144 |
|
145 |
for (i = 0; i < 7; i++) |
146 |
isf_q[i + 9] = dico2_isf[ind[1]][i] * (1.0f / (1 << 15)); |
147 |
|
148 |
for (i = 0; i < 5; i++) |
149 |
isf_q[i] += dico21_isf_36b[ind[2]][i] * (1.0f / (1 << 15)); |
150 |
|
151 |
for (i = 0; i < 4; i++) |
152 |
isf_q[i + 5] += dico22_isf_36b[ind[3]][i] * (1.0f / (1 << 15)); |
153 |
|
154 |
for (i = 0; i < 7; i++) |
155 |
isf_q[i + 9] += dico23_isf_36b[ind[4]][i] * (1.0f / (1 << 15)); |
156 |
} |
157 |
|
158 |
/**
|
159 |
* Decodes quantized ISF vectors using 46-bit indexes (except 6K60 mode)
|
160 |
*
|
161 |
* @param[in] ind Array of 7 indexes
|
162 |
* @param[out] isf_q Buffer for isf_q[LP_ORDER]
|
163 |
*
|
164 |
*/
|
165 |
static void decode_isf_indices_46b(uint16_t *ind, float *isf_q) |
166 |
{ |
167 |
int i;
|
168 |
|
169 |
for (i = 0; i < 9; i++) |
170 |
isf_q[i] = dico1_isf[ind[0]][i] * (1.0f / (1 << 15)); |
171 |
|
172 |
for (i = 0; i < 7; i++) |
173 |
isf_q[i + 9] = dico2_isf[ind[1]][i] * (1.0f / (1 << 15)); |
174 |
|
175 |
for (i = 0; i < 3; i++) |
176 |
isf_q[i] += dico21_isf[ind[2]][i] * (1.0f / (1 << 15)); |
177 |
|
178 |
for (i = 0; i < 3; i++) |
179 |
isf_q[i + 3] += dico22_isf[ind[3]][i] * (1.0f / (1 << 15)); |
180 |
|
181 |
for (i = 0; i < 3; i++) |
182 |
isf_q[i + 6] += dico23_isf[ind[4]][i] * (1.0f / (1 << 15)); |
183 |
|
184 |
for (i = 0; i < 3; i++) |
185 |
isf_q[i + 9] += dico24_isf[ind[5]][i] * (1.0f / (1 << 15)); |
186 |
|
187 |
for (i = 0; i < 4; i++) |
188 |
isf_q[i + 12] += dico25_isf[ind[6]][i] * (1.0f / (1 << 15)); |
189 |
} |
190 |
|
191 |
/**
|
192 |
* Apply mean and past ISF values using the prediction factor
|
193 |
* Updates past ISF vector
|
194 |
*
|
195 |
* @param[in,out] isf_q Current quantized ISF
|
196 |
* @param[in,out] isf_past Past quantized ISF
|
197 |
*
|
198 |
*/
|
199 |
static void isf_add_mean_and_past(float *isf_q, float *isf_past) |
200 |
{ |
201 |
int i;
|
202 |
float tmp;
|
203 |
|
204 |
for (i = 0; i < LP_ORDER; i++) { |
205 |
tmp = isf_q[i]; |
206 |
isf_q[i] += isf_mean[i] * (1.0f / (1 << 15)); |
207 |
isf_q[i] += PRED_FACTOR * isf_past[i]; |
208 |
isf_past[i] = tmp; |
209 |
} |
210 |
} |
211 |
|
212 |
/**
|
213 |
* Interpolate the fourth ISP vector from current and past frames
|
214 |
* to obtain a ISP vector for each subframe
|
215 |
*
|
216 |
* @param[in,out] isp_q ISPs for each subframe
|
217 |
* @param[in] isp4_past Past ISP for subframe 4
|
218 |
*/
|
219 |
static void interpolate_isp(double isp_q[4][LP_ORDER], const double *isp4_past) |
220 |
{ |
221 |
int i, k;
|
222 |
|
223 |
for (k = 0; k < 3; k++) { |
224 |
float c = isfp_inter[k];
|
225 |
for (i = 0; i < LP_ORDER; i++) |
226 |
isp_q[k][i] = (1.0 - c) * isp4_past[i] + c * isp_q[3][i]; |
227 |
} |
228 |
} |
229 |
|
230 |
/**
|
231 |
* Decode an adaptive codebook index into pitch lag (except 6k60, 8k85 modes)
|
232 |
* Calculate integer lag and fractional lag always using 1/4 resolution
|
233 |
* In 1st and 3rd subframes the index is relative to last subframe integer lag
|
234 |
*
|
235 |
* @param[out] lag_int Decoded integer pitch lag
|
236 |
* @param[out] lag_frac Decoded fractional pitch lag
|
237 |
* @param[in] pitch_index Adaptive codebook pitch index
|
238 |
* @param[in,out] base_lag_int Base integer lag used in relative subframes
|
239 |
* @param[in] subframe Current subframe index (0 to 3)
|
240 |
*/
|
241 |
static void decode_pitch_lag_high(int *lag_int, int *lag_frac, int pitch_index, |
242 |
uint8_t *base_lag_int, int subframe)
|
243 |
{ |
244 |
if (subframe == 0 || subframe == 2) { |
245 |
if (pitch_index < 376) { |
246 |
*lag_int = (pitch_index + 137) >> 2; |
247 |
*lag_frac = pitch_index - (*lag_int << 2) + 136; |
248 |
} else if (pitch_index < 440) { |
249 |
*lag_int = (pitch_index + 257 - 376) >> 1; |
250 |
*lag_frac = (pitch_index - (*lag_int << 1) + 256 - 376) << 1; |
251 |
/* the actual resolution is 1/2 but expressed as 1/4 */
|
252 |
} else {
|
253 |
*lag_int = pitch_index - 280;
|
254 |
*lag_frac = 0;
|
255 |
} |
256 |
/* minimum lag for next subframe */
|
257 |
*base_lag_int = av_clip(*lag_int - 8 - (*lag_frac < 0), |
258 |
AMRWB_P_DELAY_MIN, AMRWB_P_DELAY_MAX - 15);
|
259 |
// XXX: the spec states clearly that *base_lag_int should be
|
260 |
// the nearest integer to *lag_int (minus 8), but the ref code
|
261 |
// actually always uses its floor, I'm following the latter
|
262 |
} else {
|
263 |
*lag_int = (pitch_index + 1) >> 2; |
264 |
*lag_frac = pitch_index - (*lag_int << 2);
|
265 |
*lag_int += *base_lag_int; |
266 |
} |
267 |
} |
268 |
|
269 |
/**
|
270 |
* Decode a adaptive codebook index into pitch lag for 8k85 and 6k60 modes
|
271 |
* Description is analogous to decode_pitch_lag_high, but in 6k60 relative
|
272 |
* index is used for all subframes except the first
|
273 |
*/
|
274 |
static void decode_pitch_lag_low(int *lag_int, int *lag_frac, int pitch_index, |
275 |
uint8_t *base_lag_int, int subframe, enum Mode mode) |
276 |
{ |
277 |
if (subframe == 0 || (subframe == 2 && mode != MODE_6k60)) { |
278 |
if (pitch_index < 116) { |
279 |
*lag_int = (pitch_index + 69) >> 1; |
280 |
*lag_frac = (pitch_index - (*lag_int << 1) + 68) << 1; |
281 |
} else {
|
282 |
*lag_int = pitch_index - 24;
|
283 |
*lag_frac = 0;
|
284 |
} |
285 |
// XXX: same problem as before
|
286 |
*base_lag_int = av_clip(*lag_int - 8 - (*lag_frac < 0), |
287 |
AMRWB_P_DELAY_MIN, AMRWB_P_DELAY_MAX - 15);
|
288 |
} else {
|
289 |
*lag_int = (pitch_index + 1) >> 1; |
290 |
*lag_frac = (pitch_index - (*lag_int << 1)) << 1; |
291 |
*lag_int += *base_lag_int; |
292 |
} |
293 |
} |
294 |
|
295 |
/**
|
296 |
* Find the pitch vector by interpolating the past excitation at the
|
297 |
* pitch delay, which is obtained in this function
|
298 |
*
|
299 |
* @param[in,out] ctx The context
|
300 |
* @param[in] amr_subframe Current subframe data
|
301 |
* @param[in] subframe Current subframe index (0 to 3)
|
302 |
*/
|
303 |
static void decode_pitch_vector(AMRWBContext *ctx, |
304 |
const AMRWBSubFrame *amr_subframe,
|
305 |
const int subframe) |
306 |
{ |
307 |
int pitch_lag_int, pitch_lag_frac;
|
308 |
int i;
|
309 |
float *exc = ctx->excitation;
|
310 |
enum Mode mode = ctx->fr_cur_mode;
|
311 |
|
312 |
if (mode <= MODE_8k85) {
|
313 |
decode_pitch_lag_low(&pitch_lag_int, &pitch_lag_frac, amr_subframe->adap, |
314 |
&ctx->base_pitch_lag, subframe, mode); |
315 |
} else
|
316 |
decode_pitch_lag_high(&pitch_lag_int, &pitch_lag_frac, amr_subframe->adap, |
317 |
&ctx->base_pitch_lag, subframe); |
318 |
|
319 |
ctx->pitch_lag_int = pitch_lag_int; |
320 |
pitch_lag_int += pitch_lag_frac > 0;
|
321 |
|
322 |
/* Calculate the pitch vector by interpolating the past excitation at the
|
323 |
pitch lag using a hamming windowed sinc function */
|
324 |
ff_acelp_interpolatef(exc, exc + 1 - pitch_lag_int,
|
325 |
ac_inter, 4,
|
326 |
pitch_lag_frac + (pitch_lag_frac > 0 ? 0 : 4), |
327 |
LP_ORDER, AMRWB_SFR_SIZE + 1);
|
328 |
|
329 |
/* Check which pitch signal path should be used
|
330 |
* 6k60 and 8k85 modes have the ltp flag set to 0 */
|
331 |
if (amr_subframe->ltp) {
|
332 |
memcpy(ctx->pitch_vector, exc, AMRWB_SFR_SIZE * sizeof(float)); |
333 |
} else {
|
334 |
for (i = 0; i < AMRWB_SFR_SIZE; i++) |
335 |
ctx->pitch_vector[i] = 0.18 * exc[i - 1] + 0.64 * exc[i] + |
336 |
0.18 * exc[i + 1]; |
337 |
memcpy(exc, ctx->pitch_vector, AMRWB_SFR_SIZE * sizeof(float)); |
338 |
} |
339 |
} |
340 |
|
341 |
/** Get x bits in the index interval [lsb,lsb+len-1] inclusive */
|
342 |
#define BIT_STR(x,lsb,len) (((x) >> (lsb)) & ((1 << (len)) - 1)) |
343 |
|
344 |
/** Get the bit at specified position */
|
345 |
#define BIT_POS(x, p) (((x) >> (p)) & 1) |
346 |
|
347 |
/**
|
348 |
* The next six functions decode_[i]p_track decode exactly i pulses
|
349 |
* positions and amplitudes (-1 or 1) in a subframe track using
|
350 |
* an encoded pulse indexing (TS 26.190 section 5.8.2)
|
351 |
*
|
352 |
* The results are given in out[], in which a negative number means
|
353 |
* amplitude -1 and vice versa (i.e., ampl(x) = x / abs(x) )
|
354 |
*
|
355 |
* @param[out] out Output buffer (writes i elements)
|
356 |
* @param[in] code Pulse index (no. of bits varies, see below)
|
357 |
* @param[in] m (log2) Number of potential positions
|
358 |
* @param[in] off Offset for decoded positions
|
359 |
*/
|
360 |
static inline void decode_1p_track(int *out, int code, int m, int off) |
361 |
{ |
362 |
int pos = BIT_STR(code, 0, m) + off; ///code: m+1 bits |
363 |
|
364 |
out[0] = BIT_POS(code, m) ? -pos : pos;
|
365 |
} |
366 |
|
367 |
static inline void decode_2p_track(int *out, int code, int m, int off) ///code: 2m+1 bits |
368 |
{ |
369 |
int pos0 = BIT_STR(code, m, m) + off;
|
370 |
int pos1 = BIT_STR(code, 0, m) + off; |
371 |
|
372 |
out[0] = BIT_POS(code, 2*m) ? -pos0 : pos0; |
373 |
out[1] = BIT_POS(code, 2*m) ? -pos1 : pos1; |
374 |
out[1] = pos0 > pos1 ? -out[1] : out[1]; |
375 |
} |
376 |
|
377 |
static void decode_3p_track(int *out, int code, int m, int off) ///code: 3m+1 bits |
378 |
{ |
379 |
int half_2p = BIT_POS(code, 2*m - 1) << (m - 1); |
380 |
|
381 |
decode_2p_track(out, BIT_STR(code, 0, 2*m - 1), |
382 |
m - 1, off + half_2p);
|
383 |
decode_1p_track(out + 2, BIT_STR(code, 2*m, m + 1), m, off); |
384 |
} |
385 |
|
386 |
static void decode_4p_track(int *out, int code, int m, int off) ///code: 4m bits |
387 |
{ |
388 |
int half_4p, subhalf_2p;
|
389 |
int b_offset = 1 << (m - 1); |
390 |
|
391 |
switch (BIT_STR(code, 4*m - 2, 2)) { /* case ID (2 bits) */ |
392 |
case 0: /* 0 pulses in A, 4 pulses in B or vice versa */ |
393 |
half_4p = BIT_POS(code, 4*m - 3) << (m - 1); // which has 4 pulses |
394 |
subhalf_2p = BIT_POS(code, 2*m - 3) << (m - 2); |
395 |
|
396 |
decode_2p_track(out, BIT_STR(code, 0, 2*m - 3), |
397 |
m - 2, off + half_4p + subhalf_2p);
|
398 |
decode_2p_track(out + 2, BIT_STR(code, 2*m - 2, 2*m - 1), |
399 |
m - 1, off + half_4p);
|
400 |
break;
|
401 |
case 1: /* 1 pulse in A, 3 pulses in B */ |
402 |
decode_1p_track(out, BIT_STR(code, 3*m - 2, m), |
403 |
m - 1, off);
|
404 |
decode_3p_track(out + 1, BIT_STR(code, 0, 3*m - 2), |
405 |
m - 1, off + b_offset);
|
406 |
break;
|
407 |
case 2: /* 2 pulses in each half */ |
408 |
decode_2p_track(out, BIT_STR(code, 2*m - 1, 2*m - 1), |
409 |
m - 1, off);
|
410 |
decode_2p_track(out + 2, BIT_STR(code, 0, 2*m - 1), |
411 |
m - 1, off + b_offset);
|
412 |
break;
|
413 |
case 3: /* 3 pulses in A, 1 pulse in B */ |
414 |
decode_3p_track(out, BIT_STR(code, m, 3*m - 2), |
415 |
m - 1, off);
|
416 |
decode_1p_track(out + 3, BIT_STR(code, 0, m), |
417 |
m - 1, off + b_offset);
|
418 |
break;
|
419 |
} |
420 |
} |
421 |
|
422 |
static void decode_5p_track(int *out, int code, int m, int off) ///code: 5m bits |
423 |
{ |
424 |
int half_3p = BIT_POS(code, 5*m - 1) << (m - 1); |
425 |
|
426 |
decode_3p_track(out, BIT_STR(code, 2*m + 1, 3*m - 2), |
427 |
m - 1, off + half_3p);
|
428 |
|
429 |
decode_2p_track(out + 3, BIT_STR(code, 0, 2*m + 1), m, off); |
430 |
} |
431 |
|
432 |
static void decode_6p_track(int *out, int code, int m, int off) ///code: 6m-2 bits |
433 |
{ |
434 |
int b_offset = 1 << (m - 1); |
435 |
/* which half has more pulses in cases 0 to 2 */
|
436 |
int half_more = BIT_POS(code, 6*m - 5) << (m - 1); |
437 |
int half_other = b_offset - half_more;
|
438 |
|
439 |
switch (BIT_STR(code, 6*m - 4, 2)) { /* case ID (2 bits) */ |
440 |
case 0: /* 0 pulses in A, 6 pulses in B or vice versa */ |
441 |
decode_1p_track(out, BIT_STR(code, 0, m),
|
442 |
m - 1, off + half_more);
|
443 |
decode_5p_track(out + 1, BIT_STR(code, m, 5*m - 5), |
444 |
m - 1, off + half_more);
|
445 |
break;
|
446 |
case 1: /* 1 pulse in A, 5 pulses in B or vice versa */ |
447 |
decode_1p_track(out, BIT_STR(code, 0, m),
|
448 |
m - 1, off + half_other);
|
449 |
decode_5p_track(out + 1, BIT_STR(code, m, 5*m - 5), |
450 |
m - 1, off + half_more);
|
451 |
break;
|
452 |
case 2: /* 2 pulses in A, 4 pulses in B or vice versa */ |
453 |
decode_2p_track(out, BIT_STR(code, 0, 2*m - 1), |
454 |
m - 1, off + half_other);
|
455 |
decode_4p_track(out + 2, BIT_STR(code, 2*m - 1, 4*m - 4), |
456 |
m - 1, off + half_more);
|
457 |
break;
|
458 |
case 3: /* 3 pulses in A, 3 pulses in B */ |
459 |
decode_3p_track(out, BIT_STR(code, 3*m - 2, 3*m - 2), |
460 |
m - 1, off);
|
461 |
decode_3p_track(out + 3, BIT_STR(code, 0, 3*m - 2), |
462 |
m - 1, off + b_offset);
|
463 |
break;
|
464 |
} |
465 |
} |
466 |
|
467 |
/**
|
468 |
* Decode the algebraic codebook index to pulse positions and signs,
|
469 |
* then construct the algebraic codebook vector
|
470 |
*
|
471 |
* @param[out] fixed_vector Buffer for the fixed codebook excitation
|
472 |
* @param[in] pulse_hi MSBs part of the pulse index array (higher modes only)
|
473 |
* @param[in] pulse_lo LSBs part of the pulse index array
|
474 |
* @param[in] mode Mode of the current frame
|
475 |
*/
|
476 |
static void decode_fixed_vector(float *fixed_vector, const uint16_t *pulse_hi, |
477 |
const uint16_t *pulse_lo, const enum Mode mode) |
478 |
{ |
479 |
/* sig_pos stores for each track the decoded pulse position indexes
|
480 |
* (1-based) multiplied by its corresponding amplitude (+1 or -1) */
|
481 |
int sig_pos[4][6]; |
482 |
int spacing = (mode == MODE_6k60) ? 2 : 4; |
483 |
int i, j;
|
484 |
|
485 |
switch (mode) {
|
486 |
case MODE_6k60:
|
487 |
for (i = 0; i < 2; i++) |
488 |
decode_1p_track(sig_pos[i], pulse_lo[i], 5, 1); |
489 |
break;
|
490 |
case MODE_8k85:
|
491 |
for (i = 0; i < 4; i++) |
492 |
decode_1p_track(sig_pos[i], pulse_lo[i], 4, 1); |
493 |
break;
|
494 |
case MODE_12k65:
|
495 |
for (i = 0; i < 4; i++) |
496 |
decode_2p_track(sig_pos[i], pulse_lo[i], 4, 1); |
497 |
break;
|
498 |
case MODE_14k25:
|
499 |
for (i = 0; i < 2; i++) |
500 |
decode_3p_track(sig_pos[i], pulse_lo[i], 4, 1); |
501 |
for (i = 2; i < 4; i++) |
502 |
decode_2p_track(sig_pos[i], pulse_lo[i], 4, 1); |
503 |
break;
|
504 |
case MODE_15k85:
|
505 |
for (i = 0; i < 4; i++) |
506 |
decode_3p_track(sig_pos[i], pulse_lo[i], 4, 1); |
507 |
break;
|
508 |
case MODE_18k25:
|
509 |
for (i = 0; i < 4; i++) |
510 |
decode_4p_track(sig_pos[i], (int) pulse_lo[i] +
|
511 |
((int) pulse_hi[i] << 14), 4, 1); |
512 |
break;
|
513 |
case MODE_19k85:
|
514 |
for (i = 0; i < 2; i++) |
515 |
decode_5p_track(sig_pos[i], (int) pulse_lo[i] +
|
516 |
((int) pulse_hi[i] << 10), 4, 1); |
517 |
for (i = 2; i < 4; i++) |
518 |
decode_4p_track(sig_pos[i], (int) pulse_lo[i] +
|
519 |
((int) pulse_hi[i] << 14), 4, 1); |
520 |
break;
|
521 |
case MODE_23k05:
|
522 |
case MODE_23k85:
|
523 |
for (i = 0; i < 4; i++) |
524 |
decode_6p_track(sig_pos[i], (int) pulse_lo[i] +
|
525 |
((int) pulse_hi[i] << 11), 4, 1); |
526 |
break;
|
527 |
} |
528 |
|
529 |
memset(fixed_vector, 0, sizeof(float) * AMRWB_SFR_SIZE); |
530 |
|
531 |
for (i = 0; i < 4; i++) |
532 |
for (j = 0; j < pulses_nb_per_mode_tr[mode][i]; j++) { |
533 |
int pos = (FFABS(sig_pos[i][j]) - 1) * spacing + i; |
534 |
|
535 |
fixed_vector[pos] += sig_pos[i][j] < 0 ? -1.0 : 1.0; |
536 |
} |
537 |
} |
538 |
|
539 |
/**
|
540 |
* Decode pitch gain and fixed gain correction factor
|
541 |
*
|
542 |
* @param[in] vq_gain Vector-quantized index for gains
|
543 |
* @param[in] mode Mode of the current frame
|
544 |
* @param[out] fixed_gain_factor Decoded fixed gain correction factor
|
545 |
* @param[out] pitch_gain Decoded pitch gain
|
546 |
*/
|
547 |
static void decode_gains(const uint8_t vq_gain, const enum Mode mode, |
548 |
float *fixed_gain_factor, float *pitch_gain) |
549 |
{ |
550 |
const int16_t *gains = (mode <= MODE_8k85 ? qua_gain_6b[vq_gain] :
|
551 |
qua_gain_7b[vq_gain]); |
552 |
|
553 |
*pitch_gain = gains[0] * (1.0f / (1 << 14)); |
554 |
*fixed_gain_factor = gains[1] * (1.0f / (1 << 11)); |
555 |
} |
556 |
|
557 |
/**
|
558 |
* Apply pitch sharpening filters to the fixed codebook vector
|
559 |
*
|
560 |
* @param[in] ctx The context
|
561 |
* @param[in,out] fixed_vector Fixed codebook excitation
|
562 |
*/
|
563 |
// XXX: Spec states this procedure should be applied when the pitch
|
564 |
// lag is less than 64, but this checking seems absent in reference and AMR-NB
|
565 |
static void pitch_sharpening(AMRWBContext *ctx, float *fixed_vector) |
566 |
{ |
567 |
int i;
|
568 |
|
569 |
/* Tilt part */
|
570 |
for (i = AMRWB_SFR_SIZE - 1; i != 0; i--) |
571 |
fixed_vector[i] -= fixed_vector[i - 1] * ctx->tilt_coef;
|
572 |
|
573 |
/* Periodicity enhancement part */
|
574 |
for (i = ctx->pitch_lag_int; i < AMRWB_SFR_SIZE; i++)
|
575 |
fixed_vector[i] += fixed_vector[i - ctx->pitch_lag_int] * 0.85; |
576 |
} |
577 |
|
578 |
/**
|
579 |
* Calculate the voicing factor (-1.0 = unvoiced to 1.0 = voiced)
|
580 |
*
|
581 |
* @param[in] p_vector, f_vector Pitch and fixed excitation vectors
|
582 |
* @param[in] p_gain, f_gain Pitch and fixed gains
|
583 |
*/
|
584 |
// XXX: There is something wrong with the precision here! The magnitudes
|
585 |
// of the energies are not correct. Please check the reference code carefully
|
586 |
static float voice_factor(float *p_vector, float p_gain, |
587 |
float *f_vector, float f_gain) |
588 |
{ |
589 |
double p_ener = (double) ff_dot_productf(p_vector, p_vector, |
590 |
AMRWB_SFR_SIZE) * p_gain * p_gain; |
591 |
double f_ener = (double) ff_dot_productf(f_vector, f_vector, |
592 |
AMRWB_SFR_SIZE) * f_gain * f_gain; |
593 |
|
594 |
return (p_ener - f_ener) / (p_ener + f_ener);
|
595 |
} |
596 |
|
597 |
/**
|
598 |
* Reduce fixed vector sparseness by smoothing with one of three IR filters
|
599 |
* Also known as "adaptive phase dispersion"
|
600 |
*
|
601 |
* @param[in] ctx The context
|
602 |
* @param[in,out] fixed_vector Unfiltered fixed vector
|
603 |
* @param[out] buf Space for modified vector if necessary
|
604 |
*
|
605 |
* @return The potentially overwritten filtered fixed vector address
|
606 |
*/
|
607 |
static float *anti_sparseness(AMRWBContext *ctx, |
608 |
float *fixed_vector, float *buf) |
609 |
{ |
610 |
int ir_filter_nr;
|
611 |
|
612 |
if (ctx->fr_cur_mode > MODE_8k85) // no filtering in higher modes |
613 |
return fixed_vector;
|
614 |
|
615 |
if (ctx->pitch_gain[0] < 0.6) { |
616 |
ir_filter_nr = 0; // strong filtering |
617 |
} else if (ctx->pitch_gain[0] < 0.9) { |
618 |
ir_filter_nr = 1; // medium filtering |
619 |
} else
|
620 |
ir_filter_nr = 2; // no filtering |
621 |
|
622 |
/* detect 'onset' */
|
623 |
if (ctx->fixed_gain[0] > 3.0 * ctx->fixed_gain[1]) { |
624 |
if (ir_filter_nr < 2) |
625 |
ir_filter_nr++; |
626 |
} else {
|
627 |
int i, count = 0; |
628 |
|
629 |
for (i = 0; i < 6; i++) |
630 |
if (ctx->pitch_gain[i] < 0.6) |
631 |
count++; |
632 |
|
633 |
if (count > 2) |
634 |
ir_filter_nr = 0;
|
635 |
|
636 |
if (ir_filter_nr > ctx->prev_ir_filter_nr + 1) |
637 |
ir_filter_nr--; |
638 |
} |
639 |
|
640 |
/* update ir filter strength history */
|
641 |
ctx->prev_ir_filter_nr = ir_filter_nr; |
642 |
|
643 |
ir_filter_nr += (ctx->fr_cur_mode == MODE_8k85); |
644 |
|
645 |
if (ir_filter_nr < 2) { |
646 |
int i;
|
647 |
const float *coef = ir_filters_lookup[ir_filter_nr]; |
648 |
|
649 |
/* Circular convolution code in the reference
|
650 |
* decoder was modified to avoid using one
|
651 |
* extra array. The filtered vector is given by:
|
652 |
*
|
653 |
* c2(n) = sum(i,0,len-1){ c(i) * coef( (n - i + len) % len ) }
|
654 |
*/
|
655 |
|
656 |
memset(buf, 0, sizeof(float) * AMRWB_SFR_SIZE); |
657 |
for (i = 0; i < AMRWB_SFR_SIZE; i++) |
658 |
if (fixed_vector[i])
|
659 |
ff_celp_circ_addf(buf, buf, coef, i, fixed_vector[i], |
660 |
AMRWB_SFR_SIZE); |
661 |
fixed_vector = buf; |
662 |
} |
663 |
|
664 |
return fixed_vector;
|
665 |
} |
666 |
|
667 |
/**
|
668 |
* Calculate a stability factor {teta} based on distance between
|
669 |
* current and past isf. A value of 1 shows maximum signal stability
|
670 |
*/
|
671 |
static float stability_factor(const float *isf, const float *isf_past) |
672 |
{ |
673 |
int i;
|
674 |
float acc = 0.0; |
675 |
|
676 |
for (i = 0; i < LP_ORDER - 1; i++) |
677 |
acc += (isf[i] - isf_past[i]) * (isf[i] - isf_past[i]); |
678 |
|
679 |
// XXX: This part is not so clear from the reference code
|
680 |
// the result is more accurate changing the "/ 256" to "* 512"
|
681 |
return FFMAX(0.0, 1.25 - acc * 0.8 * 512); |
682 |
} |
683 |
|
684 |
/**
|
685 |
* Apply a non-linear fixed gain smoothing in order to reduce
|
686 |
* fluctuation in the energy of excitation
|
687 |
*
|
688 |
* @param[in] fixed_gain Unsmoothed fixed gain
|
689 |
* @param[in,out] prev_tr_gain Previous threshold gain (updated)
|
690 |
* @param[in] voice_fac Frame voicing factor
|
691 |
* @param[in] stab_fac Frame stability factor
|
692 |
*
|
693 |
* @return The smoothed gain
|
694 |
*/
|
695 |
static float noise_enhancer(float fixed_gain, float *prev_tr_gain, |
696 |
float voice_fac, float stab_fac) |
697 |
{ |
698 |
float sm_fac = 0.5 * (1 - voice_fac) * stab_fac; |
699 |
float g0;
|
700 |
|
701 |
// XXX: the following fixed-point constants used to in(de)crement
|
702 |
// gain by 1.5dB were taken from the reference code, maybe it could
|
703 |
// be simpler
|
704 |
if (fixed_gain < *prev_tr_gain) {
|
705 |
g0 = FFMIN(*prev_tr_gain, fixed_gain + fixed_gain * |
706 |
(6226 * (1.0f / (1 << 15)))); // +1.5 dB |
707 |
} else
|
708 |
g0 = FFMAX(*prev_tr_gain, fixed_gain * |
709 |
(27536 * (1.0f / (1 << 15)))); // -1.5 dB |
710 |
|
711 |
*prev_tr_gain = g0; // update next frame threshold
|
712 |
|
713 |
return sm_fac * g0 + (1 - sm_fac) * fixed_gain; |
714 |
} |
715 |
|
716 |
/**
|
717 |
* Filter the fixed_vector to emphasize the higher frequencies
|
718 |
*
|
719 |
* @param[in,out] fixed_vector Fixed codebook vector
|
720 |
* @param[in] voice_fac Frame voicing factor
|
721 |
*/
|
722 |
static void pitch_enhancer(float *fixed_vector, float voice_fac) |
723 |
{ |
724 |
int i;
|
725 |
float cpe = 0.125 * (1 + voice_fac); |
726 |
float last = fixed_vector[0]; // holds c(i - 1) |
727 |
|
728 |
fixed_vector[0] -= cpe * fixed_vector[1]; |
729 |
|
730 |
for (i = 1; i < AMRWB_SFR_SIZE - 1; i++) { |
731 |
float cur = fixed_vector[i];
|
732 |
|
733 |
fixed_vector[i] -= cpe * (last + fixed_vector[i + 1]);
|
734 |
last = cur; |
735 |
} |
736 |
|
737 |
fixed_vector[AMRWB_SFR_SIZE - 1] -= cpe * last;
|
738 |
} |
739 |
|
740 |
/**
|
741 |
* Conduct 16th order linear predictive coding synthesis from excitation
|
742 |
*
|
743 |
* @param[in] ctx Pointer to the AMRWBContext
|
744 |
* @param[in] lpc Pointer to the LPC coefficients
|
745 |
* @param[out] excitation Buffer for synthesis final excitation
|
746 |
* @param[in] fixed_gain Fixed codebook gain for synthesis
|
747 |
* @param[in] fixed_vector Algebraic codebook vector
|
748 |
* @param[in,out] samples Pointer to the output samples and memory
|
749 |
*/
|
750 |
static void synthesis(AMRWBContext *ctx, float *lpc, float *excitation, |
751 |
float fixed_gain, const float *fixed_vector, |
752 |
float *samples)
|
753 |
{ |
754 |
ff_weighted_vector_sumf(excitation, ctx->pitch_vector, fixed_vector, |
755 |
ctx->pitch_gain[0], fixed_gain, AMRWB_SFR_SIZE);
|
756 |
|
757 |
/* emphasize pitch vector contribution in low bitrate modes */
|
758 |
if (ctx->pitch_gain[0] > 0.5 && ctx->fr_cur_mode <= MODE_8k85) { |
759 |
int i;
|
760 |
float energy = ff_dot_productf(excitation, excitation,
|
761 |
AMRWB_SFR_SIZE); |
762 |
|
763 |
// XXX: Weird part in both ref code and spec. A unknown parameter
|
764 |
// {beta} seems to be identical to the current pitch gain
|
765 |
float pitch_factor = 0.25 * ctx->pitch_gain[0] * ctx->pitch_gain[0]; |
766 |
|
767 |
for (i = 0; i < AMRWB_SFR_SIZE; i++) |
768 |
excitation[i] += pitch_factor * ctx->pitch_vector[i]; |
769 |
|
770 |
ff_scale_vector_to_given_sum_of_squares(excitation, excitation, |
771 |
energy, AMRWB_SFR_SIZE); |
772 |
} |
773 |
|
774 |
ff_celp_lp_synthesis_filterf(samples, lpc, excitation, |
775 |
AMRWB_SFR_SIZE, LP_ORDER); |
776 |
} |
777 |
|
778 |
/**
|
779 |
* Apply to synthesis a de-emphasis filter of the form:
|
780 |
* H(z) = 1 / (1 - m * z^-1)
|
781 |
*
|
782 |
* @param[out] out Output buffer
|
783 |
* @param[in] in Input samples array with in[-1]
|
784 |
* @param[in] m Filter coefficient
|
785 |
* @param[in,out] mem State from last filtering
|
786 |
*/
|
787 |
static void de_emphasis(float *out, float *in, float m, float mem[1]) |
788 |
{ |
789 |
int i;
|
790 |
|
791 |
out[0] = in[0] + m * mem[0]; |
792 |
|
793 |
for (i = 1; i < AMRWB_SFR_SIZE; i++) |
794 |
out[i] = in[i] + out[i - 1] * m;
|
795 |
|
796 |
mem[0] = out[AMRWB_SFR_SIZE - 1]; |
797 |
} |
798 |
|
799 |
/**
|
800 |
* Upsample a signal by 5/4 ratio (from 12.8kHz to 16kHz) using
|
801 |
* a FIR interpolation filter. Uses past data from before *in address
|
802 |
*
|
803 |
* @param[out] out Buffer for interpolated signal
|
804 |
* @param[in] in Current signal data (length 0.8*o_size)
|
805 |
* @param[in] o_size Output signal length
|
806 |
*/
|
807 |
static void upsample_5_4(float *out, const float *in, int o_size) |
808 |
{ |
809 |
const float *in0 = in - UPS_FIR_SIZE + 1; |
810 |
int i, j, k;
|
811 |
int int_part = 0, frac_part; |
812 |
|
813 |
i = 0;
|
814 |
for (j = 0; j < o_size / 5; j++) { |
815 |
out[i] = in[int_part]; |
816 |
frac_part = 4;
|
817 |
i++; |
818 |
|
819 |
for (k = 1; k < 5; k++) { |
820 |
out[i] = ff_dot_productf(in0 + int_part, upsample_fir[4 - frac_part],
|
821 |
UPS_MEM_SIZE); |
822 |
int_part++; |
823 |
frac_part--; |
824 |
i++; |
825 |
} |
826 |
} |
827 |
} |
828 |
|
829 |
/**
|
830 |
* Calculate the high-band gain based on encoded index (23k85 mode) or
|
831 |
* on the low-band speech signal and the Voice Activity Detection flag
|
832 |
*
|
833 |
* @param[in] ctx The context
|
834 |
* @param[in] synth LB speech synthesis at 12.8k
|
835 |
* @param[in] hb_idx Gain index for mode 23k85 only
|
836 |
* @param[in] vad VAD flag for the frame
|
837 |
*/
|
838 |
static float find_hb_gain(AMRWBContext *ctx, const float *synth, |
839 |
uint16_t hb_idx, uint8_t vad) |
840 |
{ |
841 |
int wsp = (vad > 0); |
842 |
float tilt;
|
843 |
|
844 |
if (ctx->fr_cur_mode == MODE_23k85)
|
845 |
return qua_hb_gain[hb_idx] * (1.0f / (1 << 14)); |
846 |
|
847 |
tilt = ff_dot_productf(synth, synth + 1, AMRWB_SFR_SIZE - 1) / |
848 |
ff_dot_productf(synth, synth, AMRWB_SFR_SIZE); |
849 |
|
850 |
/* return gain bounded by [0.1, 1.0] */
|
851 |
return av_clipf((1.0 - FFMAX(0.0, tilt)) * (1.25 - 0.25 * wsp), 0.1, 1.0); |
852 |
} |
853 |
|
854 |
/**
|
855 |
* Generate the high-band excitation with the same energy from the lower
|
856 |
* one and scaled by the given gain
|
857 |
*
|
858 |
* @param[in] ctx The context
|
859 |
* @param[out] hb_exc Buffer for the excitation
|
860 |
* @param[in] synth_exc Low-band excitation used for synthesis
|
861 |
* @param[in] hb_gain Wanted excitation gain
|
862 |
*/
|
863 |
static void scaled_hb_excitation(AMRWBContext *ctx, float *hb_exc, |
864 |
const float *synth_exc, float hb_gain) |
865 |
{ |
866 |
int i;
|
867 |
float energy = ff_dot_productf(synth_exc, synth_exc, AMRWB_SFR_SIZE);
|
868 |
|
869 |
/* Generate a white-noise excitation */
|
870 |
for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) |
871 |
hb_exc[i] = 32768.0 - (uint16_t) av_lfg_get(&ctx->prng); |
872 |
|
873 |
ff_scale_vector_to_given_sum_of_squares(hb_exc, hb_exc, |
874 |
energy * hb_gain * hb_gain, |
875 |
AMRWB_SFR_SIZE_16k); |
876 |
} |
877 |
|
878 |
/**
|
879 |
* Calculate the auto-correlation for the ISF difference vector
|
880 |
*/
|
881 |
static float auto_correlation(float *diff_isf, float mean, int lag) |
882 |
{ |
883 |
int i;
|
884 |
float sum = 0.0; |
885 |
|
886 |
for (i = 7; i < LP_ORDER - 2; i++) { |
887 |
float prod = (diff_isf[i] - mean) * (diff_isf[i - lag] - mean);
|
888 |
sum += prod * prod; |
889 |
} |
890 |
return sum;
|
891 |
} |
892 |
|
893 |
/**
|
894 |
* Extrapolate a ISF vector to the 16kHz range (20th order LP)
|
895 |
* used at mode 6k60 LP filter for the high frequency band
|
896 |
*
|
897 |
* @param[out] out Buffer for extrapolated isf
|
898 |
* @param[in] isf Input isf vector
|
899 |
*/
|
900 |
static void extrapolate_isf(float out[LP_ORDER_16k], float isf[LP_ORDER]) |
901 |
{ |
902 |
float diff_isf[LP_ORDER - 2], diff_mean; |
903 |
float *diff_hi = diff_isf - LP_ORDER + 1; // diff array for extrapolated indexes |
904 |
float corr_lag[3]; |
905 |
float est, scale;
|
906 |
int i, i_max_corr;
|
907 |
|
908 |
memcpy(out, isf, (LP_ORDER - 1) * sizeof(float)); |
909 |
out[LP_ORDER_16k - 1] = isf[LP_ORDER - 1]; |
910 |
|
911 |
/* Calculate the difference vector */
|
912 |
for (i = 0; i < LP_ORDER - 2; i++) |
913 |
diff_isf[i] = isf[i + 1] - isf[i];
|
914 |
|
915 |
diff_mean = 0.0; |
916 |
for (i = 2; i < LP_ORDER - 2; i++) |
917 |
diff_mean += diff_isf[i] * (1.0f / (LP_ORDER - 4)); |
918 |
|
919 |
/* Find which is the maximum autocorrelation */
|
920 |
i_max_corr = 0;
|
921 |
for (i = 0; i < 3; i++) { |
922 |
corr_lag[i] = auto_correlation(diff_isf, diff_mean, i + 2);
|
923 |
|
924 |
if (corr_lag[i] > corr_lag[i_max_corr])
|
925 |
i_max_corr = i; |
926 |
} |
927 |
i_max_corr++; |
928 |
|
929 |
for (i = LP_ORDER - 1; i < LP_ORDER_16k - 1; i++) |
930 |
out[i] = isf[i - 1] + isf[i - 1 - i_max_corr] |
931 |
- isf[i - 2 - i_max_corr];
|
932 |
|
933 |
/* Calculate an estimate for ISF(18) and scale ISF based on the error */
|
934 |
est = 7965 + (out[2] - out[3] - out[4]) / 6.0; |
935 |
scale = 0.5 * (FFMIN(est, 7600) - out[LP_ORDER - 2]) / |
936 |
(out[LP_ORDER_16k - 2] - out[LP_ORDER - 2]); |
937 |
|
938 |
for (i = LP_ORDER - 1; i < LP_ORDER_16k - 1; i++) |
939 |
diff_hi[i] = scale * (out[i] - out[i - 1]);
|
940 |
|
941 |
/* Stability insurance */
|
942 |
for (i = LP_ORDER; i < LP_ORDER_16k - 1; i++) |
943 |
if (diff_hi[i] + diff_hi[i - 1] < 5.0) { |
944 |
if (diff_hi[i] > diff_hi[i - 1]) { |
945 |
diff_hi[i - 1] = 5.0 - diff_hi[i]; |
946 |
} else
|
947 |
diff_hi[i] = 5.0 - diff_hi[i - 1]; |
948 |
} |
949 |
|
950 |
for (i = LP_ORDER - 1; i < LP_ORDER_16k - 1; i++) |
951 |
out[i] = out[i - 1] + diff_hi[i] * (1.0f / (1 << 15)); |
952 |
|
953 |
/* Scale the ISF vector for 16000 Hz */
|
954 |
for (i = 0; i < LP_ORDER_16k - 1; i++) |
955 |
out[i] *= 0.8; |
956 |
} |
957 |
|
958 |
/**
|
959 |
* Spectral expand the LP coefficients using the equation:
|
960 |
* y[i] = x[i] * (gamma ** i)
|
961 |
*
|
962 |
* @param[out] out Output buffer (may use input array)
|
963 |
* @param[in] lpc LP coefficients array
|
964 |
* @param[in] gamma Weighting factor
|
965 |
* @param[in] size LP array size
|
966 |
*/
|
967 |
static void lpc_weighting(float *out, const float *lpc, float gamma, int size) |
968 |
{ |
969 |
int i;
|
970 |
float fac = gamma;
|
971 |
|
972 |
for (i = 0; i < size; i++) { |
973 |
out[i] = lpc[i] * fac; |
974 |
fac *= gamma; |
975 |
} |
976 |
} |
977 |
|
978 |
/**
|
979 |
* Conduct 20th order linear predictive coding synthesis for the high
|
980 |
* frequency band excitation at 16kHz
|
981 |
*
|
982 |
* @param[in] ctx The context
|
983 |
* @param[in] subframe Current subframe index (0 to 3)
|
984 |
* @param[in,out] samples Pointer to the output speech samples
|
985 |
* @param[in] exc Generated white-noise scaled excitation
|
986 |
* @param[in] isf Current frame isf vector
|
987 |
* @param[in] isf_past Past frame final isf vector
|
988 |
*/
|
989 |
static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples, |
990 |
const float *exc, const float *isf, const float *isf_past) |
991 |
{ |
992 |
float hb_lpc[LP_ORDER_16k];
|
993 |
enum Mode mode = ctx->fr_cur_mode;
|
994 |
|
995 |
if (mode == MODE_6k60) {
|
996 |
float e_isf[LP_ORDER_16k]; // ISF vector for extrapolation |
997 |
double e_isp[LP_ORDER_16k];
|
998 |
|
999 |
ff_weighted_vector_sumf(e_isf, isf_past, isf, isfp_inter[subframe], |
1000 |
1.0 - isfp_inter[subframe], LP_ORDER); |
1001 |
|
1002 |
extrapolate_isf(e_isf, e_isf); |
1003 |
|
1004 |
e_isf[LP_ORDER_16k - 1] *= 2.0; |
1005 |
ff_acelp_lsf2lspd(e_isp, e_isf, LP_ORDER_16k); |
1006 |
ff_amrwb_lsp2lpc(e_isp, hb_lpc, LP_ORDER_16k); |
1007 |
|
1008 |
lpc_weighting(hb_lpc, hb_lpc, 0.9, LP_ORDER_16k); |
1009 |
} else {
|
1010 |
lpc_weighting(hb_lpc, ctx->lp_coef[subframe], 0.6, LP_ORDER); |
1011 |
} |
1012 |
|
1013 |
ff_celp_lp_synthesis_filterf(samples, hb_lpc, exc, AMRWB_SFR_SIZE_16k, |
1014 |
(mode == MODE_6k60) ? LP_ORDER_16k : LP_ORDER); |
1015 |
} |
1016 |
|
1017 |
/**
|
1018 |
* Apply to high-band samples a 15th order filter
|
1019 |
* The filter characteristic depends on the given coefficients
|
1020 |
*
|
1021 |
* @param[out] out Buffer for filtered output
|
1022 |
* @param[in] fir_coef Filter coefficients
|
1023 |
* @param[in,out] mem State from last filtering (updated)
|
1024 |
* @param[in] in Input speech data (high-band)
|
1025 |
*
|
1026 |
* @remark It is safe to pass the same array in in and out parameters
|
1027 |
*/
|
1028 |
static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1], |
1029 |
float mem[HB_FIR_SIZE], const float *in) |
1030 |
{ |
1031 |
int i, j;
|
1032 |
float data[AMRWB_SFR_SIZE_16k + HB_FIR_SIZE]; // past and current samples |
1033 |
|
1034 |
memcpy(data, mem, HB_FIR_SIZE * sizeof(float)); |
1035 |
memcpy(data + HB_FIR_SIZE, in, AMRWB_SFR_SIZE_16k * sizeof(float)); |
1036 |
|
1037 |
for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) { |
1038 |
out[i] = 0.0; |
1039 |
for (j = 0; j <= HB_FIR_SIZE; j++) |
1040 |
out[i] += data[i + j] * fir_coef[j]; |
1041 |
} |
1042 |
|
1043 |
memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float)); |
1044 |
} |
1045 |
|
1046 |
/**
|
1047 |
* Update context state before the next subframe
|
1048 |
*/
|
1049 |
static void update_sub_state(AMRWBContext *ctx) |
1050 |
{ |
1051 |
memmove(&ctx->excitation_buf[0], &ctx->excitation_buf[AMRWB_SFR_SIZE],
|
1052 |
(AMRWB_P_DELAY_MAX + LP_ORDER + 1) * sizeof(float)); |
1053 |
|
1054 |
memmove(&ctx->pitch_gain[1], &ctx->pitch_gain[0], 5 * sizeof(float)); |
1055 |
memmove(&ctx->fixed_gain[1], &ctx->fixed_gain[0], 1 * sizeof(float)); |
1056 |
|
1057 |
memmove(&ctx->samples_az[0], &ctx->samples_az[AMRWB_SFR_SIZE],
|
1058 |
LP_ORDER * sizeof(float)); |
1059 |
memmove(&ctx->samples_up[0], &ctx->samples_up[AMRWB_SFR_SIZE],
|
1060 |
UPS_MEM_SIZE * sizeof(float)); |
1061 |
memmove(&ctx->samples_hb[0], &ctx->samples_hb[AMRWB_SFR_SIZE_16k],
|
1062 |
LP_ORDER_16k * sizeof(float)); |
1063 |
} |
1064 |
|
1065 |
static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *data_size, |
1066 |
AVPacket *avpkt) |
1067 |
{ |
1068 |
AMRWBContext *ctx = avctx->priv_data; |
1069 |
AMRWBFrame *cf = &ctx->frame; |
1070 |
const uint8_t *buf = avpkt->data;
|
1071 |
int buf_size = avpkt->size;
|
1072 |
int expected_fr_size, header_size;
|
1073 |
float *buf_out = data;
|
1074 |
float spare_vector[AMRWB_SFR_SIZE]; // extra stack space to hold result from anti-sparseness processing |
1075 |
float fixed_gain_factor; // fixed gain correction factor (gamma) |
1076 |
float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use |
1077 |
float synth_fixed_gain; // the fixed gain that synthesis should use |
1078 |
float voice_fac, stab_fac; // parameters used for gain smoothing |
1079 |
float synth_exc[AMRWB_SFR_SIZE]; // post-processed excitation for synthesis |
1080 |
float hb_exc[AMRWB_SFR_SIZE_16k]; // excitation for the high frequency band |
1081 |
float hb_samples[AMRWB_SFR_SIZE_16k]; // filtered high-band samples from synthesis |
1082 |
float hb_gain;
|
1083 |
int sub, i;
|
1084 |
|
1085 |
header_size = decode_mime_header(ctx, buf); |
1086 |
expected_fr_size = ((cf_sizes_wb[ctx->fr_cur_mode] + 7) >> 3) + 1; |
1087 |
|
1088 |
if (buf_size < expected_fr_size) {
|
1089 |
av_log(avctx, AV_LOG_ERROR, |
1090 |
"Frame too small (%d bytes). Truncated file?\n", buf_size);
|
1091 |
*data_size = 0;
|
1092 |
return buf_size;
|
1093 |
} |
1094 |
|
1095 |
if (!ctx->fr_quality || ctx->fr_cur_mode > MODE_SID)
|
1096 |
av_log(avctx, AV_LOG_ERROR, "Encountered a bad or corrupted frame\n");
|
1097 |
|
1098 |
if (ctx->fr_cur_mode == MODE_SID) /* Comfort noise frame */ |
1099 |
av_log_missing_feature(avctx, "SID mode", 1); |
1100 |
|
1101 |
if (ctx->fr_cur_mode >= MODE_SID)
|
1102 |
return -1; |
1103 |
|
1104 |
ff_amr_bit_reorder((uint16_t *) &ctx->frame, sizeof(AMRWBFrame),
|
1105 |
buf + header_size, amr_bit_orderings_by_mode[ctx->fr_cur_mode]); |
1106 |
|
1107 |
/* Decode the quantized ISF vector */
|
1108 |
if (ctx->fr_cur_mode == MODE_6k60) {
|
1109 |
decode_isf_indices_36b(cf->isp_id, ctx->isf_cur); |
1110 |
} else {
|
1111 |
decode_isf_indices_46b(cf->isp_id, ctx->isf_cur); |
1112 |
} |
1113 |
|
1114 |
isf_add_mean_and_past(ctx->isf_cur, ctx->isf_q_past); |
1115 |
ff_set_min_dist_lsf(ctx->isf_cur, MIN_ISF_SPACING, LP_ORDER - 1);
|
1116 |
|
1117 |
stab_fac = stability_factor(ctx->isf_cur, ctx->isf_past_final); |
1118 |
|
1119 |
ctx->isf_cur[LP_ORDER - 1] *= 2.0; |
1120 |
ff_acelp_lsf2lspd(ctx->isp[3], ctx->isf_cur, LP_ORDER);
|
1121 |
|
1122 |
/* Generate a ISP vector for each subframe */
|
1123 |
if (ctx->first_frame) {
|
1124 |
ctx->first_frame = 0;
|
1125 |
memcpy(ctx->isp_sub4_past, ctx->isp[3], LP_ORDER * sizeof(double)); |
1126 |
} |
1127 |
interpolate_isp(ctx->isp, ctx->isp_sub4_past); |
1128 |
|
1129 |
for (sub = 0; sub < 4; sub++) |
1130 |
ff_amrwb_lsp2lpc(ctx->isp[sub], ctx->lp_coef[sub], LP_ORDER); |
1131 |
|
1132 |
for (sub = 0; sub < 4; sub++) { |
1133 |
const AMRWBSubFrame *cur_subframe = &cf->subframe[sub];
|
1134 |
float *sub_buf = buf_out + sub * AMRWB_SFR_SIZE_16k;
|
1135 |
|
1136 |
/* Decode adaptive codebook (pitch vector) */
|
1137 |
decode_pitch_vector(ctx, cur_subframe, sub); |
1138 |
/* Decode innovative codebook (fixed vector) */
|
1139 |
decode_fixed_vector(ctx->fixed_vector, cur_subframe->pul_ih, |
1140 |
cur_subframe->pul_il, ctx->fr_cur_mode); |
1141 |
|
1142 |
pitch_sharpening(ctx, ctx->fixed_vector); |
1143 |
|
1144 |
decode_gains(cur_subframe->vq_gain, ctx->fr_cur_mode, |
1145 |
&fixed_gain_factor, &ctx->pitch_gain[0]);
|
1146 |
|
1147 |
ctx->fixed_gain[0] =
|
1148 |
ff_amr_set_fixed_gain(fixed_gain_factor, |
1149 |
ff_dot_productf(ctx->fixed_vector, ctx->fixed_vector, |
1150 |
AMRWB_SFR_SIZE) / AMRWB_SFR_SIZE, |
1151 |
ctx->prediction_error, |
1152 |
ENERGY_MEAN, energy_pred_fac); |
1153 |
|
1154 |
/* Calculate voice factor and store tilt for next subframe */
|
1155 |
voice_fac = voice_factor(ctx->pitch_vector, ctx->pitch_gain[0],
|
1156 |
ctx->fixed_vector, ctx->fixed_gain[0]);
|
1157 |
ctx->tilt_coef = voice_fac * 0.25 + 0.25; |
1158 |
|
1159 |
/* Construct current excitation */
|
1160 |
for (i = 0; i < AMRWB_SFR_SIZE; i++) { |
1161 |
ctx->excitation[i] *= ctx->pitch_gain[0];
|
1162 |
ctx->excitation[i] += ctx->fixed_gain[0] * ctx->fixed_vector[i];
|
1163 |
ctx->excitation[i] = truncf(ctx->excitation[i]); |
1164 |
} |
1165 |
|
1166 |
/* Post-processing of excitation elements */
|
1167 |
synth_fixed_gain = noise_enhancer(ctx->fixed_gain[0], &ctx->prev_tr_gain,
|
1168 |
voice_fac, stab_fac); |
1169 |
|
1170 |
synth_fixed_vector = anti_sparseness(ctx, ctx->fixed_vector, |
1171 |
spare_vector); |
1172 |
|
1173 |
pitch_enhancer(synth_fixed_vector, voice_fac); |
1174 |
|
1175 |
synthesis(ctx, ctx->lp_coef[sub], synth_exc, synth_fixed_gain, |
1176 |
synth_fixed_vector, &ctx->samples_az[LP_ORDER]); |
1177 |
|
1178 |
/* Synthesis speech post-processing */
|
1179 |
de_emphasis(&ctx->samples_up[UPS_MEM_SIZE], |
1180 |
&ctx->samples_az[LP_ORDER], PREEMPH_FAC, ctx->demph_mem); |
1181 |
|
1182 |
ff_acelp_apply_order_2_transfer_function(&ctx->samples_up[UPS_MEM_SIZE], |
1183 |
&ctx->samples_up[UPS_MEM_SIZE], hpf_zeros, hpf_31_poles, |
1184 |
hpf_31_gain, ctx->hpf_31_mem, AMRWB_SFR_SIZE); |
1185 |
|
1186 |
upsample_5_4(sub_buf, &ctx->samples_up[UPS_FIR_SIZE], |
1187 |
AMRWB_SFR_SIZE_16k); |
1188 |
|
1189 |
/* High frequency band (6.4 - 7.0 kHz) generation part */
|
1190 |
ff_acelp_apply_order_2_transfer_function(hb_samples, |
1191 |
&ctx->samples_up[UPS_MEM_SIZE], hpf_zeros, hpf_400_poles, |
1192 |
hpf_400_gain, ctx->hpf_400_mem, AMRWB_SFR_SIZE); |
1193 |
|
1194 |
hb_gain = find_hb_gain(ctx, hb_samples, |
1195 |
cur_subframe->hb_gain, cf->vad); |
1196 |
|
1197 |
scaled_hb_excitation(ctx, hb_exc, synth_exc, hb_gain); |
1198 |
|
1199 |
hb_synthesis(ctx, sub, &ctx->samples_hb[LP_ORDER_16k], |
1200 |
hb_exc, ctx->isf_cur, ctx->isf_past_final); |
1201 |
|
1202 |
/* High-band post-processing filters */
|
1203 |
hb_fir_filter(hb_samples, bpf_6_7_coef, ctx->bpf_6_7_mem, |
1204 |
&ctx->samples_hb[LP_ORDER_16k]); |
1205 |
|
1206 |
if (ctx->fr_cur_mode == MODE_23k85)
|
1207 |
hb_fir_filter(hb_samples, lpf_7_coef, ctx->lpf_7_mem, |
1208 |
hb_samples); |
1209 |
|
1210 |
/* Add the low and high frequency bands */
|
1211 |
for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) |
1212 |
sub_buf[i] = (sub_buf[i] + hb_samples[i]) * (1.0f / (1 << 15)); |
1213 |
|
1214 |
/* Update buffers and history */
|
1215 |
update_sub_state(ctx); |
1216 |
} |
1217 |
|
1218 |
/* update state for next frame */
|
1219 |
memcpy(ctx->isp_sub4_past, ctx->isp[3], LP_ORDER * sizeof(ctx->isp[3][0])); |
1220 |
memcpy(ctx->isf_past_final, ctx->isf_cur, LP_ORDER * sizeof(float)); |
1221 |
|
1222 |
/* report how many samples we got */
|
1223 |
*data_size = 4 * AMRWB_SFR_SIZE_16k * sizeof(float); |
1224 |
|
1225 |
return expected_fr_size;
|
1226 |
} |
1227 |
|
1228 |
AVCodec ff_amrwb_decoder = { |
1229 |
.name = "amrwb",
|
1230 |
.type = AVMEDIA_TYPE_AUDIO, |
1231 |
.id = CODEC_ID_AMR_WB, |
1232 |
.priv_data_size = sizeof(AMRWBContext),
|
1233 |
.init = amrwb_decode_init, |
1234 |
.decode = amrwb_decode_frame, |
1235 |
.long_name = NULL_IF_CONFIG_SMALL("Adaptive Multi-Rate WideBand"),
|
1236 |
.sample_fmts = (enum AVSampleFormat[]){SAMPLE_FMT_FLT,SAMPLE_FMT_NONE},
|
1237 |
}; |