Statistics
| Branch: | Revision:

ffmpeg / libavcodec / nellymoserenc.c @ f0a41afd

History | View | Annotate | Download (13.3 KB)

1
/*
2
 * Nellymoser encoder
3
 * This code is developed as part of Google Summer of Code 2008 Program.
4
 *
5
 * Copyright (c) 2008 Bartlomiej Wolowiec
6
 *
7
 * This file is part of Libav.
8
 *
9
 * Libav is free software; you can redistribute it and/or
10
 * modify it under the terms of the GNU Lesser General Public
11
 * License as published by the Free Software Foundation; either
12
 * version 2.1 of the License, or (at your option) any later version.
13
 *
14
 * Libav is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
 * Lesser General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public
20
 * License along with Libav; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
 */
23

    
24
/**
25
 * @file
26
 * Nellymoser encoder
27
 * by Bartlomiej Wolowiec
28
 *
29
 * Generic codec information: libavcodec/nellymoserdec.c
30
 *
31
 * Some information also from: http://samples.libav.org/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32
 *                             (Copyright Joseph Artsimovich and UAB "DKD")
33
 *
34
 * for more information about nellymoser format, visit:
35
 * http://wiki.multimedia.cx/index.php?title=Nellymoser
36
 */
37

    
38
#include "nellymoser.h"
39
#include "avcodec.h"
40
#include "dsputil.h"
41
#include "fft.h"
42
#include "sinewin.h"
43

    
44
#define BITSTREAM_WRITER_LE
45
#include "put_bits.h"
46

    
47
#define POW_TABLE_SIZE (1<<11)
48
#define POW_TABLE_OFFSET 3
49
#define OPT_SIZE ((1<<15) + 3000)
50

    
51
typedef struct NellyMoserEncodeContext {
52
    AVCodecContext  *avctx;
53
    int             last_frame;
54
    int             bufsel;
55
    int             have_saved;
56
    DSPContext      dsp;
57
    FFTContext      mdct_ctx;
58
    DECLARE_ALIGNED(16, float, mdct_out)[NELLY_SAMPLES];
59
    DECLARE_ALIGNED(16, float, in_buff)[NELLY_SAMPLES];
60
    DECLARE_ALIGNED(16, float, buf)[2][3 * NELLY_BUF_LEN];     ///< sample buffer
61
    float           (*opt )[NELLY_BANDS];
62
    uint8_t         (*path)[NELLY_BANDS];
63
} NellyMoserEncodeContext;
64

    
65
static float pow_table[POW_TABLE_SIZE];     ///< -pow(2, -i / 2048.0 - 3.0);
66

    
67
static const uint8_t sf_lut[96] = {
68
     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
69
     5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
70
    15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
71
    27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
72
    41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
73
    54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
74
};
75

    
76
static const uint8_t sf_delta_lut[78] = {
77
     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
78
     4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
79
    13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
80
    23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
81
    28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
82
};
83

    
84
static const uint8_t quant_lut[230] = {
85
     0,
86

    
87
     0,  1,  2,
88

    
89
     0,  1,  2,  3,  4,  5,  6,
90

    
91
     0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
92
    12, 13, 13, 13, 14,
93

    
94
     0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
95
     8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
96
    22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
97
    30,
98

    
99
     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
100
     4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
101
    10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
102
    15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
103
    21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
104
    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
105
    46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
106
    53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
107
    58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
108
    61, 61, 61, 61, 62,
109
};
110

    
111
static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
112
static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
113
static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
114

    
115
static void apply_mdct(NellyMoserEncodeContext *s)
116
{
117
    s->dsp.vector_fmul(s->in_buff, s->buf[s->bufsel], ff_sine_128, NELLY_BUF_LEN);
118
    s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
119
                               NELLY_BUF_LEN);
120
    s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
121

    
122
    s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN,
123
                       ff_sine_128, NELLY_BUF_LEN);
124
    s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
125
                               NELLY_BUF_LEN);
126
    s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
127
}
128

    
129
static av_cold int encode_init(AVCodecContext *avctx)
130
{
131
    NellyMoserEncodeContext *s = avctx->priv_data;
132
    int i;
133

    
134
    if (avctx->channels != 1) {
135
        av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
136
        return -1;
137
    }
138

    
139
    if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
140
        avctx->sample_rate != 11025 &&
141
        avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
142
        avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
143
        av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
144
        return -1;
145
    }
146

    
147
    avctx->frame_size = NELLY_SAMPLES;
148
    s->avctx = avctx;
149
    ff_mdct_init(&s->mdct_ctx, 8, 0, 1.0);
150
    dsputil_init(&s->dsp, avctx);
151

    
152
    /* Generate overlap window */
153
    ff_sine_window_init(ff_sine_128, 128);
154
    for (i = 0; i < POW_TABLE_SIZE; i++)
155
        pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
156

    
157
    if (s->avctx->trellis) {
158
        s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
159
        s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
160
    }
161

    
162
    return 0;
163
}
164

    
165
static av_cold int encode_end(AVCodecContext *avctx)
166
{
167
    NellyMoserEncodeContext *s = avctx->priv_data;
168

    
169
    ff_mdct_end(&s->mdct_ctx);
170

    
171
    if (s->avctx->trellis) {
172
        av_free(s->opt);
173
        av_free(s->path);
174
    }
175

    
176
    return 0;
177
}
178

    
179
#define find_best(val, table, LUT, LUT_add, LUT_size) \
180
    best_idx = \
181
        LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
182
    if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
183
        best_idx++;
184

    
185
static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
186
{
187
    int band, best_idx, power_idx = 0;
188
    float power_candidate;
189

    
190
    //base exponent
191
    find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
192
    idx_table[0] = best_idx;
193
    power_idx = ff_nelly_init_table[best_idx];
194

    
195
    for (band = 1; band < NELLY_BANDS; band++) {
196
        power_candidate = cand[band] - power_idx;
197
        find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
198
        idx_table[band] = best_idx;
199
        power_idx += ff_nelly_delta_table[best_idx];
200
    }
201
}
202

    
203
static inline float distance(float x, float y, int band)
204
{
205
    //return pow(fabs(x-y), 2.0);
206
    float tmp = x - y;
207
    return tmp * tmp;
208
}
209

    
210
static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
211
{
212
    int i, j, band, best_idx;
213
    float power_candidate, best_val;
214

    
215
    float  (*opt )[NELLY_BANDS] = s->opt ;
216
    uint8_t(*path)[NELLY_BANDS] = s->path;
217

    
218
    for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
219
        opt[0][i] = INFINITY;
220
    }
221

    
222
    for (i = 0; i < 64; i++) {
223
        opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
224
        path[0][ff_nelly_init_table[i]] = i;
225
    }
226

    
227
    for (band = 1; band < NELLY_BANDS; band++) {
228
        int q, c = 0;
229
        float tmp;
230
        int idx_min, idx_max, idx;
231
        power_candidate = cand[band];
232
        for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
233
            idx_min = FFMAX(0, cand[band] - q);
234
            idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
235
            for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
236
                if ( isinf(opt[band - 1][i]) )
237
                    continue;
238
                for (j = 0; j < 32; j++) {
239
                    idx = i + ff_nelly_delta_table[j];
240
                    if (idx > idx_max)
241
                        break;
242
                    if (idx >= idx_min) {
243
                        tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
244
                        if (opt[band][idx] > tmp) {
245
                            opt[band][idx] = tmp;
246
                            path[band][idx] = j;
247
                            c = 1;
248
                        }
249
                    }
250
                }
251
            }
252
        }
253
        assert(c); //FIXME
254
    }
255

    
256
    best_val = INFINITY;
257
    best_idx = -1;
258
    band = NELLY_BANDS - 1;
259
    for (i = 0; i < OPT_SIZE; i++) {
260
        if (best_val > opt[band][i]) {
261
            best_val = opt[band][i];
262
            best_idx = i;
263
        }
264
    }
265
    for (band = NELLY_BANDS - 1; band >= 0; band--) {
266
        idx_table[band] = path[band][best_idx];
267
        if (band) {
268
            best_idx -= ff_nelly_delta_table[path[band][best_idx]];
269
        }
270
    }
271
}
272

    
273
/**
274
 * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
275
 *  @param s               encoder context
276
 *  @param output          output buffer
277
 *  @param output_size     size of output buffer
278
 */
279
static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
280
{
281
    PutBitContext pb;
282
    int i, j, band, block, best_idx, power_idx = 0;
283
    float power_val, coeff, coeff_sum;
284
    float pows[NELLY_FILL_LEN];
285
    int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
286
    float cand[NELLY_BANDS];
287

    
288
    apply_mdct(s);
289

    
290
    init_put_bits(&pb, output, output_size * 8);
291

    
292
    i = 0;
293
    for (band = 0; band < NELLY_BANDS; band++) {
294
        coeff_sum = 0;
295
        for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
296
            coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
297
                       + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
298
        }
299
        cand[band] =
300
            log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
301
    }
302

    
303
    if (s->avctx->trellis) {
304
        get_exponent_dynamic(s, cand, idx_table);
305
    } else {
306
        get_exponent_greedy(s, cand, idx_table);
307
    }
308

    
309
    i = 0;
310
    for (band = 0; band < NELLY_BANDS; band++) {
311
        if (band) {
312
            power_idx += ff_nelly_delta_table[idx_table[band]];
313
            put_bits(&pb, 5, idx_table[band]);
314
        } else {
315
            power_idx = ff_nelly_init_table[idx_table[0]];
316
            put_bits(&pb, 6, idx_table[0]);
317
        }
318
        power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
319
        for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
320
            s->mdct_out[i] *= power_val;
321
            s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
322
            pows[i] = power_idx;
323
        }
324
    }
325

    
326
    ff_nelly_get_sample_bits(pows, bits);
327

    
328
    for (block = 0; block < 2; block++) {
329
        for (i = 0; i < NELLY_FILL_LEN; i++) {
330
            if (bits[i] > 0) {
331
                const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
332
                coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
333
                best_idx =
334
                    quant_lut[av_clip (
335
                            coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
336
                            quant_lut_offset[bits[i]],
337
                            quant_lut_offset[bits[i]+1] - 1
338
                            )];
339
                if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
340
                    best_idx++;
341

    
342
                put_bits(&pb, bits[i], best_idx);
343
            }
344
        }
345
        if (!block)
346
            put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
347
    }
348

    
349
    flush_put_bits(&pb);
350
}
351

    
352
static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
353
{
354
    NellyMoserEncodeContext *s = avctx->priv_data;
355
    const int16_t *samples = data;
356
    int i;
357

    
358
    if (s->last_frame)
359
        return 0;
360

    
361
    if (data) {
362
        for (i = 0; i < avctx->frame_size; i++) {
363
            s->buf[s->bufsel][i] = samples[i];
364
        }
365
        for (; i < NELLY_SAMPLES; i++) {
366
            s->buf[s->bufsel][i] = 0;
367
        }
368
        s->bufsel = 1 - s->bufsel;
369
        if (!s->have_saved) {
370
            s->have_saved = 1;
371
            return 0;
372
        }
373
    } else {
374
        memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
375
        s->bufsel = 1 - s->bufsel;
376
        s->last_frame = 1;
377
    }
378

    
379
    if (s->have_saved) {
380
        encode_block(s, frame, buf_size);
381
        return NELLY_BLOCK_LEN;
382
    }
383
    return 0;
384
}
385

    
386
AVCodec ff_nellymoser_encoder = {
387
    .name = "nellymoser",
388
    .type = AVMEDIA_TYPE_AUDIO,
389
    .id = CODEC_ID_NELLYMOSER,
390
    .priv_data_size = sizeof(NellyMoserEncodeContext),
391
    .init = encode_init,
392
    .encode = encode_frame,
393
    .close = encode_end,
394
    .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
395
    .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
396
    .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
397
};