Revision f27e1d64 libavcodec/vorbis_dec.c

View differences:

libavcodec/vorbis_dec.c
149 149
    uint_fast8_t mode_count;
150 150
    vorbis_mode *modes;
151 151
    uint_fast8_t mode_number; // mode number for the current packet
152
    uint_fast8_t previous_window;
152 153
    float *channel_residues;
153 154
    float *channel_floors;
154 155
    float *saved;
155
    uint_fast16_t saved_start;
156 156
    float *ret;
157 157
    float *buf;
158 158
    float *buf_tmp;
......
903 903
    vc->ret             = av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float));
904 904
    vc->buf             = av_malloc( vc->blocksize[1]                       * sizeof(float));
905 905
    vc->buf_tmp         = av_malloc( vc->blocksize[1]                       * sizeof(float));
906
    vc->saved_start=0;
906
    vc->previous_window=0;
907 907

  
908 908
    ff_mdct_init(&vc->mdct[0], bl0, 1);
909 909
    ff_mdct_init(&vc->mdct[1], bl1, 1);
......
1394 1394
    }
1395 1395
}
1396 1396

  
1397
static void copy_normalize(float *dst, float *src, int len, int exp_bias, float add_bias)
1398
{
1399
    int i;
1400
    if(exp_bias) {
1401
        for(i=0; i<len; i++)
1402
            ((uint32_t*)dst)[i] = ((uint32_t*)src)[i] + exp_bias; // dst[k]=src[i]*(1<<bias)
1403
    } else {
1404
        for(i=0; i<len; i++)
1405
            dst[i] = src[i] + add_bias;
1406
    }
1407
}
1408

  
1397 1409
// Decode the audio packet using the functions above
1398 1410

  
1399 1411
static int vorbis_parse_audio_packet(vorbis_context *vc) {
1400 1412
    GetBitContext *gb=&vc->gb;
1401 1413

  
1402
    uint_fast8_t previous_window=0,next_window=0;
1414
    uint_fast8_t previous_window=vc->previous_window;
1403 1415
    uint_fast8_t mode_number;
1416
    uint_fast8_t blockflag;
1404 1417
    uint_fast16_t blocksize;
1405 1418
    int_fast32_t i,j;
1406 1419
    uint_fast8_t no_residue[vc->audio_channels];
......
1411 1424
    uint_fast8_t res_chan[vc->audio_channels];
1412 1425
    uint_fast8_t res_num=0;
1413 1426
    int_fast16_t retlen=0;
1414
    uint_fast16_t saved_start=0;
1415 1427
    float fadd_bias = vc->add_bias;
1416 1428

  
1417 1429
    if (get_bits1(gb)) {
......
1429 1441

  
1430 1442
    AV_DEBUG(" Mode number: %d , mapping: %d , blocktype %d \n", mode_number, vc->modes[mode_number].mapping, vc->modes[mode_number].blockflag);
1431 1443

  
1432
    if (vc->modes[mode_number].blockflag) {
1433
        previous_window=get_bits1(gb);
1434
        next_window=get_bits1(gb);
1444
    blockflag=vc->modes[mode_number].blockflag;
1445
    blocksize=vc->blocksize[blockflag];
1446
    if (blockflag) {
1447
        skip_bits(gb, 2); // previous_window, next_window
1435 1448
    }
1436 1449

  
1437
    blocksize=vc->blocksize[vc->modes[mode_number].blockflag];
1438 1450
    memset(ch_res_ptr, 0, sizeof(float)*vc->audio_channels*blocksize/2); //FIXME can this be removed ?
1439 1451
    memset(ch_floor_ptr, 0, sizeof(float)*vc->audio_channels*blocksize/2); //FIXME can this be removed ?
1440 1452

  
......
1504 1516

  
1505 1517
// MDCT, overlap/add, save data for next overlapping  FPMATH
1506 1518

  
1519
    retlen = (blocksize + vc->blocksize[previous_window])/4;
1507 1520
    for(j=0;j<vc->audio_channels;++j) {
1508
        uint_fast8_t step=vc->audio_channels;
1509
        uint_fast16_t k;
1510
        float *saved=vc->saved+j*vc->blocksize[1]/2;
1511
        float *ret=vc->ret;
1512
        const float *lwin=vc->win[1];
1513
        const float *swin=vc->win[0];
1521
        uint_fast16_t bs0=vc->blocksize[0];
1522
        uint_fast16_t bs1=vc->blocksize[1];
1523
        float *saved=vc->saved+j*bs1/2;
1524
        float *ret=vc->ret+j*retlen;
1514 1525
        float *buf=vc->buf;
1515
        float *buf_tmp=vc->buf_tmp;
1516

  
1517
        ch_floor_ptr=vc->channel_floors+j*blocksize/2;
1518

  
1519
        saved_start=vc->saved_start;
1526
        const float *win=vc->win[blockflag&previous_window];
1520 1527

  
1521
        vc->mdct[0].fft.imdct_calc(&vc->mdct[vc->modes[mode_number].blockflag], buf, ch_floor_ptr, buf_tmp);
1528
        vc->mdct[0].fft.imdct_calc(&vc->mdct[blockflag], buf, vc->channel_floors+j*blocksize/2, vc->buf_tmp);
1522 1529

  
1523
        //FIXME process channels together, to allow faster simd vector_fmul_add_add?
1524
        if (vc->modes[mode_number].blockflag) {
1525
            // -- overlap/add
1526
            if (previous_window) {
1527
                vc->dsp.vector_fmul_add_add(ret+j, buf, lwin, saved, vc->add_bias, vc->blocksize[1]/2, step);
1528
                retlen=vc->blocksize[1]/2;
1529
            } else {
1530
                int len = (vc->blocksize[1]-vc->blocksize[0])/4;
1531
                buf += len;
1532
                vc->dsp.vector_fmul_add_add(ret+j, buf, swin, saved, vc->add_bias, vc->blocksize[0]/2, step);
1533
                k = vc->blocksize[0]/2*step + j;
1534
                buf += vc->blocksize[0]/2;
1535
                if(vc->exp_bias){
1536
                    for(i=0; i<len; i++, k+=step)
1537
                        ((uint32_t*)ret)[k] = ((uint32_t*)buf)[i] + vc->exp_bias; // ret[k]=buf[i]*(1<<bias)
1538
                } else {
1539
                    for(i=0; i<len; i++, k+=step)
1540
                        ret[k] = buf[i] + fadd_bias;
1541
                }
1542
                buf=vc->buf;
1543
                retlen=vc->blocksize[0]/2+len;
1544
            }
1545
            // -- save
1546
            if (next_window) {
1547
                buf += vc->blocksize[1]/2;
1548
                vc->dsp.vector_fmul_reverse(saved, buf, lwin, vc->blocksize[1]/2);
1549
                saved_start=0;
1550
            } else {
1551
                saved_start=(vc->blocksize[1]-vc->blocksize[0])/4;
1552
                buf += vc->blocksize[1]/2;
1553
                for(i=0; i<saved_start; i++)
1554
                    ((uint32_t*)saved)[i] = ((uint32_t*)buf)[i] + vc->exp_bias;
1555
                vc->dsp.vector_fmul_reverse(saved+saved_start, buf+saved_start, swin, vc->blocksize[0]/2);
1556
            }
1530
        if(blockflag == previous_window) {
1531
            vc->dsp.vector_fmul_window(ret, saved, buf, win, fadd_bias, blocksize/2);
1532
        } else if(blockflag > previous_window) {
1533
            vc->dsp.vector_fmul_window(ret, saved, buf+(bs1-bs0)/4, win, fadd_bias, bs0/2);
1534
            copy_normalize(ret+bs0/2, buf+(bs1+bs0)/4, (bs1-bs0)/4, vc->exp_bias, fadd_bias);
1557 1535
        } else {
1558
            // --overlap/add
1559
            if(vc->add_bias) {
1560
                for(k=j, i=0;i<saved_start;++i, k+=step)
1561
                    ret[k] = saved[i] + fadd_bias;
1562
            } else {
1563
                for(k=j, i=0;i<saved_start;++i, k+=step)
1564
                    ret[k] = saved[i];
1565
            }
1566
            vc->dsp.vector_fmul_add_add(ret+k, buf, swin, saved+saved_start, vc->add_bias, vc->blocksize[0]/2, step);
1567
            retlen=saved_start+vc->blocksize[0]/2;
1568
            // -- save
1569
            buf += vc->blocksize[0]/2;
1570
            vc->dsp.vector_fmul_reverse(saved, buf, swin, vc->blocksize[0]/2);
1571
            saved_start=0;
1536
            copy_normalize(ret, saved, (bs1-bs0)/4, vc->exp_bias, fadd_bias);
1537
            vc->dsp.vector_fmul_window(ret+(bs1-bs0)/4, saved+(bs1-bs0)/4, buf, win, fadd_bias, bs0/2);
1572 1538
        }
1539
        memcpy(saved, buf+blocksize/2, blocksize/2*sizeof(float));
1573 1540
    }
1574
    vc->saved_start=saved_start;
1575 1541

  
1576
    return retlen*vc->audio_channels;
1542
    vc->previous_window = blockflag;
1543
    return retlen;
1577 1544
}
1578 1545

  
1579 1546
// Return the decoded audio packet through the standard api
......
1610 1577

  
1611 1578
    AV_DEBUG("parsed %d bytes %d bits, returned %d samples (*ch*bits) \n", get_bits_count(gb)/8, get_bits_count(gb)%8, len);
1612 1579

  
1613
    vc->dsp.float_to_int16(data, vc->ret, len);
1614
    *data_size=len*2;
1580
    vc->dsp.float_to_int16_interleave(data, vc->ret, len, vc->audio_channels);
1581
    *data_size=len*2*vc->audio_channels;
1615 1582

  
1616 1583
    return buf_size ;
1617 1584
}

Also available in: Unified diff