Statistics
| Branch: | Revision:

ffmpeg / libavcodec / nellymoserenc.c @ 015f9f1a

History | View | Annotate | Download (13.3 KB)

1
/*
2
 * Nellymoser encoder
3
 * This code is developed as part of Google Summer of Code 2008 Program.
4
 *
5
 * Copyright (c) 2008 Bartlomiej Wolowiec
6
 *
7
 * This file is part of FFmpeg.
8
 *
9
 * FFmpeg is free software; you can redistribute it and/or
10
 * modify it under the terms of the GNU Lesser General Public
11
 * License as published by the Free Software Foundation; either
12
 * version 2.1 of the License, or (at your option) any later version.
13
 *
14
 * FFmpeg is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
 * Lesser General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public
20
 * License along with FFmpeg; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
 */
23

    
24
/**
25
 * @file
26
 * Nellymoser encoder
27
 * by Bartlomiej Wolowiec
28
 *
29
 * Generic codec information: libavcodec/nellymoserdec.c
30
 *
31
 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32
 *                             (Copyright Joseph Artsimovich and UAB "DKD")
33
 *
34
 * for more information about nellymoser format, visit:
35
 * http://wiki.multimedia.cx/index.php?title=Nellymoser
36
 */
37

    
38
#include "nellymoser.h"
39
#include "avcodec.h"
40
#include "dsputil.h"
41
#include "fft.h"
42

    
43
#define BITSTREAM_WRITER_LE
44
#include "put_bits.h"
45

    
46
#define POW_TABLE_SIZE (1<<11)
47
#define POW_TABLE_OFFSET 3
48
#define OPT_SIZE ((1<<15) + 3000)
49

    
50
typedef struct NellyMoserEncodeContext {
51
    AVCodecContext  *avctx;
52
    int             last_frame;
53
    int             bufsel;
54
    int             have_saved;
55
    DSPContext      dsp;
56
    FFTContext      mdct_ctx;
57
    DECLARE_ALIGNED(16, float, mdct_out)[NELLY_SAMPLES];
58
    DECLARE_ALIGNED(16, float, in_buff)[NELLY_SAMPLES];
59
    DECLARE_ALIGNED(16, float, buf)[2][3 * NELLY_BUF_LEN];     ///< sample buffer
60
    float           (*opt )[NELLY_BANDS];
61
    uint8_t         (*path)[NELLY_BANDS];
62
} NellyMoserEncodeContext;
63

    
64
static float pow_table[POW_TABLE_SIZE];     ///< -pow(2, -i / 2048.0 - 3.0);
65

    
66
static const uint8_t sf_lut[96] = {
67
     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
68
     5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
69
    15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
70
    27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
71
    41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
72
    54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
73
};
74

    
75
static const uint8_t sf_delta_lut[78] = {
76
     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
77
     4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
78
    13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
79
    23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
80
    28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
81
};
82

    
83
static const uint8_t quant_lut[230] = {
84
     0,
85

    
86
     0,  1,  2,
87

    
88
     0,  1,  2,  3,  4,  5,  6,
89

    
90
     0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
91
    12, 13, 13, 13, 14,
92

    
93
     0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
94
     8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
95
    22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
96
    30,
97

    
98
     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
99
     4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
100
    10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
101
    15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
102
    21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
103
    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
104
    46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
105
    53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
106
    58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
107
    61, 61, 61, 61, 62,
108
};
109

    
110
static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
111
static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
112
static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
113

    
114
static void apply_mdct(NellyMoserEncodeContext *s)
115
{
116
    s->dsp.vector_fmul(s->in_buff, s->buf[s->bufsel], ff_sine_128, NELLY_BUF_LEN);
117
    s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
118
                               NELLY_BUF_LEN);
119
    ff_mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
120

    
121
    s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN,
122
                       ff_sine_128, NELLY_BUF_LEN);
123
    s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
124
                               NELLY_BUF_LEN);
125
    ff_mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
126
}
127

    
128
static av_cold int encode_init(AVCodecContext *avctx)
129
{
130
    NellyMoserEncodeContext *s = avctx->priv_data;
131
    int i;
132

    
133
    if (avctx->channels != 1) {
134
        av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
135
        return -1;
136
    }
137

    
138
    if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
139
        avctx->sample_rate != 11025 &&
140
        avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
141
        avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
142
        av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
143
        return -1;
144
    }
145

    
146
    avctx->frame_size = NELLY_SAMPLES;
147
    s->avctx = avctx;
148
    ff_mdct_init(&s->mdct_ctx, 8, 0, 1.0);
149
    dsputil_init(&s->dsp, avctx);
150

    
151
    /* Generate overlap window */
152
    ff_sine_window_init(ff_sine_128, 128);
153
    for (i = 0; i < POW_TABLE_SIZE; i++)
154
        pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
155

    
156
    if (s->avctx->trellis) {
157
        s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
158
        s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
159
    }
160

    
161
    return 0;
162
}
163

    
164
static av_cold int encode_end(AVCodecContext *avctx)
165
{
166
    NellyMoserEncodeContext *s = avctx->priv_data;
167

    
168
    ff_mdct_end(&s->mdct_ctx);
169

    
170
    if (s->avctx->trellis) {
171
        av_free(s->opt);
172
        av_free(s->path);
173
    }
174

    
175
    return 0;
176
}
177

    
178
#define find_best(val, table, LUT, LUT_add, LUT_size) \
179
    best_idx = \
180
        LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
181
    if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
182
        best_idx++;
183

    
184
static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
185
{
186
    int band, best_idx, power_idx = 0;
187
    float power_candidate;
188

    
189
    //base exponent
190
    find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
191
    idx_table[0] = best_idx;
192
    power_idx = ff_nelly_init_table[best_idx];
193

    
194
    for (band = 1; band < NELLY_BANDS; band++) {
195
        power_candidate = cand[band] - power_idx;
196
        find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
197
        idx_table[band] = best_idx;
198
        power_idx += ff_nelly_delta_table[best_idx];
199
    }
200
}
201

    
202
static inline float distance(float x, float y, int band)
203
{
204
    //return pow(fabs(x-y), 2.0);
205
    float tmp = x - y;
206
    return tmp * tmp;
207
}
208

    
209
static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
210
{
211
    int i, j, band, best_idx;
212
    float power_candidate, best_val;
213

    
214
    float  (*opt )[NELLY_BANDS] = s->opt ;
215
    uint8_t(*path)[NELLY_BANDS] = s->path;
216

    
217
    for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
218
        opt[0][i] = INFINITY;
219
    }
220

    
221
    for (i = 0; i < 64; i++) {
222
        opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
223
        path[0][ff_nelly_init_table[i]] = i;
224
    }
225

    
226
    for (band = 1; band < NELLY_BANDS; band++) {
227
        int q, c = 0;
228
        float tmp;
229
        int idx_min, idx_max, idx;
230
        power_candidate = cand[band];
231
        for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
232
            idx_min = FFMAX(0, cand[band] - q);
233
            idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
234
            for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
235
                if ( isinf(opt[band - 1][i]) )
236
                    continue;
237
                for (j = 0; j < 32; j++) {
238
                    idx = i + ff_nelly_delta_table[j];
239
                    if (idx > idx_max)
240
                        break;
241
                    if (idx >= idx_min) {
242
                        tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
243
                        if (opt[band][idx] > tmp) {
244
                            opt[band][idx] = tmp;
245
                            path[band][idx] = j;
246
                            c = 1;
247
                        }
248
                    }
249
                }
250
            }
251
        }
252
        assert(c); //FIXME
253
    }
254

    
255
    best_val = INFINITY;
256
    best_idx = -1;
257
    band = NELLY_BANDS - 1;
258
    for (i = 0; i < OPT_SIZE; i++) {
259
        if (best_val > opt[band][i]) {
260
            best_val = opt[band][i];
261
            best_idx = i;
262
        }
263
    }
264
    for (band = NELLY_BANDS - 1; band >= 0; band--) {
265
        idx_table[band] = path[band][best_idx];
266
        if (band) {
267
            best_idx -= ff_nelly_delta_table[path[band][best_idx]];
268
        }
269
    }
270
}
271

    
272
/**
273
 * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
274
 *  @param s               encoder context
275
 *  @param output          output buffer
276
 *  @param output_size     size of output buffer
277
 */
278
static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
279
{
280
    PutBitContext pb;
281
    int i, j, band, block, best_idx, power_idx = 0;
282
    float power_val, coeff, coeff_sum;
283
    float pows[NELLY_FILL_LEN];
284
    int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
285
    float cand[NELLY_BANDS];
286

    
287
    apply_mdct(s);
288

    
289
    init_put_bits(&pb, output, output_size * 8);
290

    
291
    i = 0;
292
    for (band = 0; band < NELLY_BANDS; band++) {
293
        coeff_sum = 0;
294
        for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
295
            coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
296
                       + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
297
        }
298
        cand[band] =
299
            log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
300
    }
301

    
302
    if (s->avctx->trellis) {
303
        get_exponent_dynamic(s, cand, idx_table);
304
    } else {
305
        get_exponent_greedy(s, cand, idx_table);
306
    }
307

    
308
    i = 0;
309
    for (band = 0; band < NELLY_BANDS; band++) {
310
        if (band) {
311
            power_idx += ff_nelly_delta_table[idx_table[band]];
312
            put_bits(&pb, 5, idx_table[band]);
313
        } else {
314
            power_idx = ff_nelly_init_table[idx_table[0]];
315
            put_bits(&pb, 6, idx_table[0]);
316
        }
317
        power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
318
        for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
319
            s->mdct_out[i] *= power_val;
320
            s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
321
            pows[i] = power_idx;
322
        }
323
    }
324

    
325
    ff_nelly_get_sample_bits(pows, bits);
326

    
327
    for (block = 0; block < 2; block++) {
328
        for (i = 0; i < NELLY_FILL_LEN; i++) {
329
            if (bits[i] > 0) {
330
                const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
331
                coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
332
                best_idx =
333
                    quant_lut[av_clip (
334
                            coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
335
                            quant_lut_offset[bits[i]],
336
                            quant_lut_offset[bits[i]+1] - 1
337
                            )];
338
                if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
339
                    best_idx++;
340

    
341
                put_bits(&pb, bits[i], best_idx);
342
            }
343
        }
344
        if (!block)
345
            put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
346
    }
347

    
348
    flush_put_bits(&pb);
349
}
350

    
351
static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
352
{
353
    NellyMoserEncodeContext *s = avctx->priv_data;
354
    const int16_t *samples = data;
355
    int i;
356

    
357
    if (s->last_frame)
358
        return 0;
359

    
360
    if (data) {
361
        for (i = 0; i < avctx->frame_size; i++) {
362
            s->buf[s->bufsel][i] = samples[i];
363
        }
364
        for (; i < NELLY_SAMPLES; i++) {
365
            s->buf[s->bufsel][i] = 0;
366
        }
367
        s->bufsel = 1 - s->bufsel;
368
        if (!s->have_saved) {
369
            s->have_saved = 1;
370
            return 0;
371
        }
372
    } else {
373
        memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
374
        s->bufsel = 1 - s->bufsel;
375
        s->last_frame = 1;
376
    }
377

    
378
    if (s->have_saved) {
379
        encode_block(s, frame, buf_size);
380
        return NELLY_BLOCK_LEN;
381
    }
382
    return 0;
383
}
384

    
385
AVCodec nellymoser_encoder = {
386
    .name = "nellymoser",
387
    .type = AVMEDIA_TYPE_AUDIO,
388
    .id = CODEC_ID_NELLYMOSER,
389
    .priv_data_size = sizeof(NellyMoserEncodeContext),
390
    .init = encode_init,
391
    .encode = encode_frame,
392
    .close = encode_end,
393
    .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
394
    .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
395
    .sample_fmts = (const enum AVSampleFormat[]){AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE},
396
};