Revision 696d6889

View differences:

libavcodec/adpcm.c
257 257
    return nibble;
258 258
}
259 259

  
260
typedef struct TrellisPath {
261
    int nibble;
262
    int prev;
263
} TrellisPath;
264

  
265
typedef struct TrellisNode {
266
    uint32_t ssd;
267
    int path;
268
    int sample1;
269
    int sample2;
270
    int step;
271
} TrellisNode;
272

  
273
static void adpcm_compress_trellis(AVCodecContext *avctx, const short *samples,
274
                                   uint8_t *dst, ADPCMChannelStatus *c, int n)
275
{
276
#define FREEZE_INTERVAL 128
277
    //FIXME 6% faster if frontier is a compile-time constant
278
    const int frontier = 1 << avctx->trellis;
279
    const int stride = avctx->channels;
280
    const int version = avctx->codec->id;
281
    const int max_paths = frontier*FREEZE_INTERVAL;
282
    TrellisPath paths[max_paths], *p;
283
    TrellisNode node_buf[2][frontier];
284
    TrellisNode *nodep_buf[2][frontier];
285
    TrellisNode **nodes = nodep_buf[0]; // nodes[] is always sorted by .ssd
286
    TrellisNode **nodes_next = nodep_buf[1];
287
    int pathn = 0, froze = -1, i, j, k;
288

  
289
    assert(!(max_paths&(max_paths-1)));
290

  
291
    memset(nodep_buf, 0, sizeof(nodep_buf));
292
    nodes[0] = &node_buf[1][0];
293
    nodes[0]->ssd = 0;
294
    nodes[0]->path = 0;
295
    nodes[0]->step = c->step_index;
296
    nodes[0]->sample1 = c->sample1;
297
    nodes[0]->sample2 = c->sample2;
298
    if(version == CODEC_ID_ADPCM_IMA_WAV)
299
        nodes[0]->sample1 = c->prev_sample;
300
    if(version == CODEC_ID_ADPCM_MS)
301
        nodes[0]->step = c->idelta;
302
    if(version == CODEC_ID_ADPCM_YAMAHA) {
303
        if(c->step == 0) {
304
            nodes[0]->step = 127;
305
            nodes[0]->sample1 = 0;
306
        } else {
307
            nodes[0]->step = c->step;
308
            nodes[0]->sample1 = c->predictor;
309
        }
310
    }
311

  
312
    for(i=0; i<n; i++) {
313
        TrellisNode *t = node_buf[i&1];
314
        TrellisNode **u;
315
        int sample = samples[i*stride];
316
        memset(nodes_next, 0, frontier*sizeof(TrellisNode*));
317
        for(j=0; j<frontier && nodes[j]; j++) {
318
            // higher j have higher ssd already, so they're unlikely to use a suboptimal next sample too
319
            const int range = (j < frontier/2) ? 1 : 0;
320
            const int step = nodes[j]->step;
321
            int nidx;
322
            if(version == CODEC_ID_ADPCM_MS) {
323
                const int predictor = ((nodes[j]->sample1 * c->coeff1) + (nodes[j]->sample2 * c->coeff2)) / 256;
324
                const int div = (sample - predictor) / step;
325
                const int nmin = clip(div-range, -8, 6);
326
                const int nmax = clip(div+range, -7, 7);
327
                for(nidx=nmin; nidx<=nmax; nidx++) {
328
                    const int nibble = nidx & 0xf;
329
                    int dec_sample = predictor + nidx * step;
330
#define STORE_NODE(NAME, STEP_INDEX)\
331
                    int d;\
332
                    uint32_t ssd;\
333
                    CLAMP_TO_SHORT(dec_sample);\
334
                    d = sample - dec_sample;\
335
                    ssd = nodes[j]->ssd + d*d;\
336
                    if(nodes_next[frontier-1] && ssd >= nodes_next[frontier-1]->ssd)\
337
                        continue;\
338
                    /* Collapse any two states with the same previous sample value. \
339
                     * One could also distinguish states by step and by 2nd to last
340
                     * sample, but the effects of that are negligible. */\
341
                    for(k=0; k<frontier && nodes_next[k]; k++) {\
342
                        if(dec_sample == nodes_next[k]->sample1) {\
343
                            assert(ssd >= nodes_next[k]->ssd);\
344
                            goto next_##NAME;\
345
                        }\
346
                    }\
347
                    for(k=0; k<frontier; k++) {\
348
                        if(!nodes_next[k] || ssd < nodes_next[k]->ssd) {\
349
                            TrellisNode *u = nodes_next[frontier-1];\
350
                            if(!u) {\
351
                                assert(pathn < max_paths);\
352
                                u = t++;\
353
                                u->path = pathn++;\
354
                            }\
355
                            u->ssd = ssd;\
356
                            u->step = STEP_INDEX;\
357
                            u->sample2 = nodes[j]->sample1;\
358
                            u->sample1 = dec_sample;\
359
                            paths[u->path].nibble = nibble;\
360
                            paths[u->path].prev = nodes[j]->path;\
361
                            memmove(&nodes_next[k+1], &nodes_next[k], (frontier-k-1)*sizeof(TrellisNode*));\
362
                            nodes_next[k] = u;\
363
                            break;\
364
                        }\
365
                    }\
366
                    next_##NAME:;
367
                    STORE_NODE(ms, FFMAX(16, (AdaptationTable[nibble] * step) >> 8));
368
                }
369
            } else if(version == CODEC_ID_ADPCM_IMA_WAV) {
370
#define LOOP_NODES(NAME, STEP_TABLE, STEP_INDEX)\
371
                const int predictor = nodes[j]->sample1;\
372
                const int div = (sample - predictor) * 4 / STEP_TABLE;\
373
                int nmin = clip(div-range, -7, 6);\
374
                int nmax = clip(div+range, -6, 7);\
375
                if(nmin<=0) nmin--; /* distinguish -0 from +0 */\
376
                if(nmax<0) nmax--;\
377
                for(nidx=nmin; nidx<=nmax; nidx++) {\
378
                    const int nibble = nidx<0 ? 7-nidx : nidx;\
379
                    int dec_sample = predictor + (STEP_TABLE * yamaha_difflookup[nibble]) / 8;\
380
                    STORE_NODE(NAME, STEP_INDEX);\
381
                }
382
                LOOP_NODES(ima, step_table[step], clip(step + index_table[nibble], 0, 88));
383
            } else { //CODEC_ID_ADPCM_YAMAHA
384
                LOOP_NODES(yamaha, step, clip((step * yamaha_indexscale[nibble]) >> 8, 127, 24567));
385
#undef LOOP_NODES
386
#undef STORE_NODE
387
            }
388
        }
389

  
390
        u = nodes;
391
        nodes = nodes_next;
392
        nodes_next = u;
393

  
394
        // prevent overflow
395
        if(nodes[0]->ssd > (1<<28)) {
396
            for(j=1; j<frontier && nodes[j]; j++)
397
                nodes[j]->ssd -= nodes[0]->ssd;
398
            nodes[0]->ssd = 0;
399
        }
400

  
401
        // merge old paths to save memory
402
        if(i == froze + FREEZE_INTERVAL) {
403
            p = &paths[nodes[0]->path];
404
            for(k=i; k>froze; k--) {
405
                dst[k] = p->nibble;
406
                p = &paths[p->prev];
407
            }
408
            froze = i;
409
            pathn = 0;
410
            // other nodes might use paths that don't coincide with the frozen one.
411
            // checking which nodes do so is too slow, so just kill them all.
412
            // this also slightly improves quality, but I don't know why.
413
            memset(nodes+1, 0, (frontier-1)*sizeof(TrellisNode*));
414
        }
415
    }
416

  
417
    p = &paths[nodes[0]->path];
418
    for(i=n-1; i>froze; i--) {
419
        dst[i] = p->nibble;
420
        p = &paths[p->prev];
421
    }
422

  
423
    c->predictor = nodes[0]->sample1;
424
    c->sample1 = nodes[0]->sample1;
425
    c->sample2 = nodes[0]->sample2;
426
    c->step_index = nodes[0]->step;
427
    c->step = nodes[0]->step;
428
    c->idelta = nodes[0]->step;
429
}
430

  
260 431
static int adpcm_encode_frame(AVCodecContext *avctx,
261 432
                            unsigned char *frame, int buf_size, void *data)
262 433
{
......
293 464
            }
294 465

  
295 466
            /* stereo: 4 bytes (8 samples) for left, 4 bytes for right, 4 bytes left, ... */
467
            if(avctx->trellis > 0) {
468
                uint8_t buf[2][n*8];
469
                adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n*8);
470
                if(avctx->channels == 2)
471
                    adpcm_compress_trellis(avctx, samples+1, buf[1], &c->status[1], n*8);
472
                for(i=0; i<n; i++) {
473
                    *dst++ = buf[0][8*i+0] | (buf[0][8*i+1] << 4);
474
                    *dst++ = buf[0][8*i+2] | (buf[0][8*i+3] << 4);
475
                    *dst++ = buf[0][8*i+4] | (buf[0][8*i+5] << 4);
476
                    *dst++ = buf[0][8*i+6] | (buf[0][8*i+7] << 4);
477
                    if (avctx->channels == 2) {
478
                        *dst++ = buf[1][8*i+0] | (buf[1][8*i+1] << 4);
479
                        *dst++ = buf[1][8*i+2] | (buf[1][8*i+3] << 4);
480
                        *dst++ = buf[1][8*i+4] | (buf[1][8*i+5] << 4);
481
                        *dst++ = buf[1][8*i+6] | (buf[1][8*i+7] << 4);
482
                    }
483
                }
484
            } else
296 485
            for (; n>0; n--) {
297 486
                *dst = adpcm_ima_compress_sample(&c->status[0], samples[0]) & 0x0F;
298 487
                *dst |= (adpcm_ima_compress_sample(&c->status[0], samples[avctx->channels]) << 4) & 0xF0;
......
352 541
            *dst++ = c->status[i].sample2 >> 8;
353 542
        }
354 543

  
544
        if(avctx->trellis > 0) {
545
            int n = avctx->block_align - 7*avctx->channels;
546
            uint8_t buf[2][n];
547
            if(avctx->channels == 1) {
548
                n *= 2;
549
                adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n);
550
                for(i=0; i<n; i+=2)
551
                    *dst++ = (buf[0][i] << 4) | buf[0][i+1];
552
            } else {
553
                adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n);
554
                adpcm_compress_trellis(avctx, samples+1, buf[1], &c->status[1], n);
555
                for(i=0; i<n; i++)
556
                    *dst++ = (buf[0][i] << 4) | buf[1][i];
557
            }
558
        } else
355 559
        for(i=7*avctx->channels; i<avctx->block_align; i++) {
356 560
            int nibble;
357 561
            nibble = adpcm_ms_compress_sample(&c->status[ 0], *samples++)<<4;
......
361 565
        break;
362 566
    case CODEC_ID_ADPCM_YAMAHA:
363 567
        n = avctx->frame_size / 2;
568
        if(avctx->trellis > 0) {
569
            uint8_t buf[2][n*2];
570
            n *= 2;
571
            if(avctx->channels == 1) {
572
                adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n);
573
                for(i=0; i<n; i+=2)
574
                    *dst++ = buf[0][i] | (buf[0][i+1] << 4);
575
            } else {
576
                adpcm_compress_trellis(avctx, samples, buf[0], &c->status[0], n);
577
                adpcm_compress_trellis(avctx, samples+1, buf[1], &c->status[1], n);
578
                for(i=0; i<n; i++)
579
                    *dst++ = buf[0][i] | (buf[1][i] << 4);
580
            }
581
        } else
364 582
        for (; n>0; n--) {
365 583
            for(i = 0; i < avctx->channels; i++) {
366 584
                int nibble;
libavcodec/utils.c
720 720
{"refs", NULL, OFFSET(refs), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
721 721
{"chromaoffset", NULL, OFFSET(chromaoffset), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
722 722
{"bframebias", NULL, OFFSET(bframebias), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
723
{"trellis", NULL, OFFSET(trellis), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
723
{"trellis", NULL, OFFSET(trellis), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|A|E},
724 724
{"directpred", NULL, OFFSET(directpred), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
725 725
{"bpyramid", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BPYRAMID, INT_MIN, INT_MAX, V|E, "flags2"},
726 726
{"wpred", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_WPRED, INT_MIN, INT_MAX, V|E, "flags2"},

Also available in: Unified diff