Statistics
| Branch: | Revision:

ffmpeg / libavcodec / nellymoserenc.c @ 84dc2d8a

History | View | Annotate | Download (13.2 KB)

1
/*
2
 * Nellymoser encoder
3
 * This code is developed as part of Google Summer of Code 2008 Program.
4
 *
5
 * Copyright (c) 2008 Bartlomiej Wolowiec
6
 *
7
 * This file is part of FFmpeg.
8
 *
9
 * FFmpeg is free software; you can redistribute it and/or
10
 * modify it under the terms of the GNU Lesser General Public
11
 * License as published by the Free Software Foundation; either
12
 * version 2.1 of the License, or (at your option) any later version.
13
 *
14
 * FFmpeg is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
 * Lesser General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public
20
 * License along with FFmpeg; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
 */
23

    
24
/**
25
 * @file libavcodec/nellymoserenc.c
26
 * Nellymoser encoder
27
 * by Bartlomiej Wolowiec
28
 *
29
 * Generic codec information: libavcodec/nellymoserdec.c
30
 *
31
 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32
 *                             (Copyright Joseph Artsimovich and UAB "DKD")
33
 *
34
 * for more information about nellymoser format, visit:
35
 * http://wiki.multimedia.cx/index.php?title=Nellymoser
36
 */
37

    
38
#include "nellymoser.h"
39
#include "avcodec.h"
40
#include "dsputil.h"
41

    
42
#define BITSTREAM_WRITER_LE
43
#include "put_bits.h"
44

    
45
#define POW_TABLE_SIZE (1<<11)
46
#define POW_TABLE_OFFSET 3
47
#define OPT_SIZE ((1<<15) + 3000)
48

    
49
typedef struct NellyMoserEncodeContext {
50
    AVCodecContext  *avctx;
51
    int             last_frame;
52
    int             bufsel;
53
    int             have_saved;
54
    DSPContext      dsp;
55
    FFTContext      mdct_ctx;
56
    DECLARE_ALIGNED(16, float, mdct_out)[NELLY_SAMPLES];
57
    DECLARE_ALIGNED(16, float, in_buff)[NELLY_SAMPLES];
58
    DECLARE_ALIGNED(16, float, buf)[2][3 * NELLY_BUF_LEN];     ///< sample buffer
59
    float           (*opt )[NELLY_BANDS];
60
    uint8_t         (*path)[NELLY_BANDS];
61
} NellyMoserEncodeContext;
62

    
63
static float pow_table[POW_TABLE_SIZE];     ///< -pow(2, -i / 2048.0 - 3.0);
64

    
65
static const uint8_t sf_lut[96] = {
66
     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
67
     5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
68
    15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
69
    27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
70
    41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
71
    54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
72
};
73

    
74
static const uint8_t sf_delta_lut[78] = {
75
     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
76
     4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
77
    13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
78
    23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
79
    28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
80
};
81

    
82
static const uint8_t quant_lut[230] = {
83
     0,
84

    
85
     0,  1,  2,
86

    
87
     0,  1,  2,  3,  4,  5,  6,
88

    
89
     0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
90
    12, 13, 13, 13, 14,
91

    
92
     0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
93
     8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
94
    22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
95
    30,
96

    
97
     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
98
     4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
99
    10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
100
    15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
101
    21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
102
    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
103
    46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
104
    53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
105
    58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
106
    61, 61, 61, 61, 62,
107
};
108

    
109
static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
110
static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
111
static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
112

    
113
static void apply_mdct(NellyMoserEncodeContext *s)
114
{
115
    memcpy(s->in_buff, s->buf[s->bufsel], NELLY_BUF_LEN * sizeof(float));
116
    s->dsp.vector_fmul(s->in_buff, ff_sine_128, NELLY_BUF_LEN);
117
    s->dsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128,
118
                               NELLY_BUF_LEN);
119
    ff_mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
120

    
121
    s->dsp.vector_fmul(s->buf[s->bufsel] + NELLY_BUF_LEN, ff_sine_128, NELLY_BUF_LEN);
122
    s->dsp.vector_fmul_reverse(s->buf[s->bufsel] + 2 * NELLY_BUF_LEN, s->buf[1 - s->bufsel], ff_sine_128,
123
                               NELLY_BUF_LEN);
124
    ff_mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->buf[s->bufsel] + NELLY_BUF_LEN);
125
}
126

    
127
static av_cold int encode_init(AVCodecContext *avctx)
128
{
129
    NellyMoserEncodeContext *s = avctx->priv_data;
130
    int i;
131

    
132
    if (avctx->channels != 1) {
133
        av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
134
        return -1;
135
    }
136

    
137
    if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
138
        avctx->sample_rate != 11025 &&
139
        avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
140
        avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
141
        av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
142
        return -1;
143
    }
144

    
145
    avctx->frame_size = NELLY_SAMPLES;
146
    s->avctx = avctx;
147
    ff_mdct_init(&s->mdct_ctx, 8, 0, 1.0);
148
    dsputil_init(&s->dsp, avctx);
149

    
150
    /* Generate overlap window */
151
    ff_sine_window_init(ff_sine_128, 128);
152
    for (i = 0; i < POW_TABLE_SIZE; i++)
153
        pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
154

    
155
    if (s->avctx->trellis) {
156
        s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
157
        s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
158
    }
159

    
160
    return 0;
161
}
162

    
163
static av_cold int encode_end(AVCodecContext *avctx)
164
{
165
    NellyMoserEncodeContext *s = avctx->priv_data;
166

    
167
    ff_mdct_end(&s->mdct_ctx);
168

    
169
    if (s->avctx->trellis) {
170
        av_free(s->opt);
171
        av_free(s->path);
172
    }
173

    
174
    return 0;
175
}
176

    
177
#define find_best(val, table, LUT, LUT_add, LUT_size) \
178
    best_idx = \
179
        LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
180
    if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
181
        best_idx++;
182

    
183
static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
184
{
185
    int band, best_idx, power_idx = 0;
186
    float power_candidate;
187

    
188
    //base exponent
189
    find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
190
    idx_table[0] = best_idx;
191
    power_idx = ff_nelly_init_table[best_idx];
192

    
193
    for (band = 1; band < NELLY_BANDS; band++) {
194
        power_candidate = cand[band] - power_idx;
195
        find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
196
        idx_table[band] = best_idx;
197
        power_idx += ff_nelly_delta_table[best_idx];
198
    }
199
}
200

    
201
static inline float distance(float x, float y, int band)
202
{
203
    //return pow(fabs(x-y), 2.0);
204
    float tmp = x - y;
205
    return tmp * tmp;
206
}
207

    
208
static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
209
{
210
    int i, j, band, best_idx;
211
    float power_candidate, best_val;
212

    
213
    float  (*opt )[NELLY_BANDS] = s->opt ;
214
    uint8_t(*path)[NELLY_BANDS] = s->path;
215

    
216
    for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
217
        opt[0][i] = INFINITY;
218
    }
219

    
220
    for (i = 0; i < 64; i++) {
221
        opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
222
        path[0][ff_nelly_init_table[i]] = i;
223
    }
224

    
225
    for (band = 1; band < NELLY_BANDS; band++) {
226
        int q, c = 0;
227
        float tmp;
228
        int idx_min, idx_max, idx;
229
        power_candidate = cand[band];
230
        for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
231
            idx_min = FFMAX(0, cand[band] - q);
232
            idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
233
            for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
234
                if ( isinf(opt[band - 1][i]) )
235
                    continue;
236
                for (j = 0; j < 32; j++) {
237
                    idx = i + ff_nelly_delta_table[j];
238
                    if (idx > idx_max)
239
                        break;
240
                    if (idx >= idx_min) {
241
                        tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
242
                        if (opt[band][idx] > tmp) {
243
                            opt[band][idx] = tmp;
244
                            path[band][idx] = j;
245
                            c = 1;
246
                        }
247
                    }
248
                }
249
            }
250
        }
251
        assert(c); //FIXME
252
    }
253

    
254
    best_val = INFINITY;
255
    best_idx = -1;
256
    band = NELLY_BANDS - 1;
257
    for (i = 0; i < OPT_SIZE; i++) {
258
        if (best_val > opt[band][i]) {
259
            best_val = opt[band][i];
260
            best_idx = i;
261
        }
262
    }
263
    for (band = NELLY_BANDS - 1; band >= 0; band--) {
264
        idx_table[band] = path[band][best_idx];
265
        if (band) {
266
            best_idx -= ff_nelly_delta_table[path[band][best_idx]];
267
        }
268
    }
269
}
270

    
271
/**
272
 * Encodes NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
273
 *  @param s               encoder context
274
 *  @param output          output buffer
275
 *  @param output_size     size of output buffer
276
 */
277
static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
278
{
279
    PutBitContext pb;
280
    int i, j, band, block, best_idx, power_idx = 0;
281
    float power_val, coeff, coeff_sum;
282
    float pows[NELLY_FILL_LEN];
283
    int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
284
    float cand[NELLY_BANDS];
285

    
286
    apply_mdct(s);
287

    
288
    init_put_bits(&pb, output, output_size * 8);
289

    
290
    i = 0;
291
    for (band = 0; band < NELLY_BANDS; band++) {
292
        coeff_sum = 0;
293
        for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
294
            coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
295
                       + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
296
        }
297
        cand[band] =
298
            log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
299
    }
300

    
301
    if (s->avctx->trellis) {
302
        get_exponent_dynamic(s, cand, idx_table);
303
    } else {
304
        get_exponent_greedy(s, cand, idx_table);
305
    }
306

    
307
    i = 0;
308
    for (band = 0; band < NELLY_BANDS; band++) {
309
        if (band) {
310
            power_idx += ff_nelly_delta_table[idx_table[band]];
311
            put_bits(&pb, 5, idx_table[band]);
312
        } else {
313
            power_idx = ff_nelly_init_table[idx_table[0]];
314
            put_bits(&pb, 6, idx_table[0]);
315
        }
316
        power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
317
        for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
318
            s->mdct_out[i] *= power_val;
319
            s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
320
            pows[i] = power_idx;
321
        }
322
    }
323

    
324
    ff_nelly_get_sample_bits(pows, bits);
325

    
326
    for (block = 0; block < 2; block++) {
327
        for (i = 0; i < NELLY_FILL_LEN; i++) {
328
            if (bits[i] > 0) {
329
                const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
330
                coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
331
                best_idx =
332
                    quant_lut[av_clip (
333
                            coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
334
                            quant_lut_offset[bits[i]],
335
                            quant_lut_offset[bits[i]+1] - 1
336
                            )];
337
                if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
338
                    best_idx++;
339

    
340
                put_bits(&pb, bits[i], best_idx);
341
            }
342
        }
343
        if (!block)
344
            put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
345
    }
346

    
347
    flush_put_bits(&pb);
348
}
349

    
350
static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size, void *data)
351
{
352
    NellyMoserEncodeContext *s = avctx->priv_data;
353
    int16_t *samples = data;
354
    int i;
355

    
356
    if (s->last_frame)
357
        return 0;
358

    
359
    if (data) {
360
        for (i = 0; i < avctx->frame_size; i++) {
361
            s->buf[s->bufsel][i] = samples[i];
362
        }
363
        for (; i < NELLY_SAMPLES; i++) {
364
            s->buf[s->bufsel][i] = 0;
365
        }
366
        s->bufsel = 1 - s->bufsel;
367
        if (!s->have_saved) {
368
            s->have_saved = 1;
369
            return 0;
370
        }
371
    } else {
372
        memset(s->buf[s->bufsel], 0, sizeof(s->buf[0][0]) * NELLY_BUF_LEN);
373
        s->bufsel = 1 - s->bufsel;
374
        s->last_frame = 1;
375
    }
376

    
377
    if (s->have_saved) {
378
        encode_block(s, frame, buf_size);
379
        return NELLY_BLOCK_LEN;
380
    }
381
    return 0;
382
}
383

    
384
AVCodec nellymoser_encoder = {
385
    .name = "nellymoser",
386
    .type = CODEC_TYPE_AUDIO,
387
    .id = CODEC_ID_NELLYMOSER,
388
    .priv_data_size = sizeof(NellyMoserEncodeContext),
389
    .init = encode_init,
390
    .encode = encode_frame,
391
    .close = encode_end,
392
    .capabilities = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
393
    .long_name = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
394
};