Revision f27e1d64
libavcodec/dsputil.c | ||
---|---|---|
3930 | 3930 |
dst[i*step] = src0[i] * src1[i] + src2[i] + src3; |
3931 | 3931 |
} |
3932 | 3932 |
|
3933 |
void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){ |
|
3934 |
int i; |
|
3935 |
for(i=0; i<len; i++) |
|
3936 |
dst[i] = src0[i]*win[len-i-1] + src1[i]*win[i] + add_bias; |
|
3937 |
} |
|
3938 |
|
|
3939 |
static av_always_inline int float_to_int16_one(const float *src){ |
|
3940 |
int_fast32_t tmp = *(const int32_t*)src; |
|
3941 |
if(tmp & 0xf0000){ |
|
3942 |
tmp = (0x43c0ffff - tmp)>>31; |
|
3943 |
// is this faster on some gcc/cpu combinations? |
|
3944 |
// if(tmp > 0x43c0ffff) tmp = 0xFFFF; |
|
3945 |
// else tmp = 0; |
|
3946 |
} |
|
3947 |
return tmp - 0x8000; |
|
3948 |
} |
|
3949 |
|
|
3933 | 3950 |
void ff_float_to_int16_c(int16_t *dst, const float *src, long len){ |
3934 | 3951 |
int i; |
3935 |
for(i=0; i<len; i++) { |
|
3936 |
int_fast32_t tmp = ((const int32_t*)src)[i]; |
|
3937 |
if(tmp & 0xf0000){ |
|
3938 |
tmp = (0x43c0ffff - tmp)>>31; |
|
3939 |
// is this faster on some gcc/cpu combinations? |
|
3940 |
// if(tmp > 0x43c0ffff) tmp = 0xFFFF; |
|
3941 |
// else tmp = 0; |
|
3952 |
for(i=0; i<len; i++) |
|
3953 |
dst[i] = float_to_int16_one(src+i); |
|
3954 |
} |
|
3955 |
|
|
3956 |
void ff_float_to_int16_interleave_c(int16_t *dst, const float *src, long len, int channels){ |
|
3957 |
int i,j,c; |
|
3958 |
if(channels==2){ |
|
3959 |
for(i=0; i<len; i++){ |
|
3960 |
dst[2*i] = float_to_int16_one(src+i); |
|
3961 |
dst[2*i+1] = float_to_int16_one(src+i+len); |
|
3942 | 3962 |
} |
3943 |
dst[i] = tmp - 0x8000; |
|
3963 |
}else{ |
|
3964 |
for(c=0; c<channels; c++, src+=len) |
|
3965 |
for(i=0, j=c; i<len; i++, j+=channels) |
|
3966 |
dst[j] = float_to_int16_one(src+i); |
|
3944 | 3967 |
} |
3945 | 3968 |
} |
3946 | 3969 |
|
... | ... | |
4450 | 4473 |
c->vector_fmul = vector_fmul_c; |
4451 | 4474 |
c->vector_fmul_reverse = vector_fmul_reverse_c; |
4452 | 4475 |
c->vector_fmul_add_add = ff_vector_fmul_add_add_c; |
4476 |
c->vector_fmul_window = ff_vector_fmul_window_c; |
|
4453 | 4477 |
c->float_to_int16 = ff_float_to_int16_c; |
4478 |
c->float_to_int16_interleave = ff_float_to_int16_interleave_c; |
|
4454 | 4479 |
c->add_int16 = add_int16_c; |
4455 | 4480 |
c->sub_int16 = sub_int16_c; |
4456 | 4481 |
c->scalarproduct_int16 = scalarproduct_int16_c; |
libavcodec/dsputil.h | ||
---|---|---|
63 | 63 |
|
64 | 64 |
void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, |
65 | 65 |
const float *src2, int src3, int blocksize, int step); |
66 |
void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, |
|
67 |
const float *win, float add_bias, int len); |
|
66 | 68 |
void ff_float_to_int16_c(int16_t *dst, const float *src, long len); |
67 | 69 |
|
68 | 70 |
/* encoding scans */ |
... | ... | |
364 | 366 |
void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len); |
365 | 367 |
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */ |
366 | 368 |
void (*vector_fmul_add_add)(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step); |
369 |
/* assume len is a multiple of 4, and arrays are 16-byte aligned */ |
|
370 |
void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len); |
|
367 | 371 |
|
368 | 372 |
/* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767] |
369 | 373 |
* simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */ |
370 | 374 |
void (*float_to_int16)(int16_t *dst, const float *src, long len); |
375 |
void (*float_to_int16_interleave)(int16_t *dst, const float *src, long len, int channels); |
|
371 | 376 |
|
372 | 377 |
/* (I)DCT */ |
373 | 378 |
void (*fdct)(DCTELEM *block/* align 16*/); |
libavcodec/i386/dsputil_mmx.c | ||
---|---|---|
2022 | 2022 |
ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step); |
2023 | 2023 |
} |
2024 | 2024 |
|
2025 |
static void vector_fmul_window_sse(float *dst, const float *src0, const float *src1, |
|
2026 |
const float *win, float add_bias, int len){ |
|
2027 |
#ifdef HAVE_6REGS |
|
2028 |
if(add_bias == 0){ |
|
2029 |
x86_reg i = -len*2; |
|
2030 |
x86_reg j = len*2-16; |
|
2031 |
asm volatile( |
|
2032 |
"1: \n" |
|
2033 |
"movaps (%5,%0), %%xmm0 \n" |
|
2034 |
"movaps (%5,%1), %%xmm1 \n" |
|
2035 |
"movaps %%xmm0, %%xmm2 \n" |
|
2036 |
"movaps %%xmm1, %%xmm3 \n" |
|
2037 |
"shufps $0x1b, %%xmm2, %%xmm2 \n" |
|
2038 |
"shufps $0x1b, %%xmm3, %%xmm3 \n" |
|
2039 |
"mulps (%4,%0), %%xmm0 \n" |
|
2040 |
"mulps (%4,%1), %%xmm1 \n" |
|
2041 |
"mulps (%3,%0), %%xmm3 \n" |
|
2042 |
"mulps (%3,%1), %%xmm2 \n" |
|
2043 |
"addps %%xmm3, %%xmm0 \n" |
|
2044 |
"addps %%xmm2, %%xmm1 \n" |
|
2045 |
"movaps %%xmm0, (%2,%0) \n" |
|
2046 |
"movaps %%xmm1, (%2,%1) \n" |
|
2047 |
"sub $16, %1 \n" |
|
2048 |
"add $16, %0 \n" |
|
2049 |
"jl 1b \n" |
|
2050 |
:"+r"(i), "+r"(j) |
|
2051 |
:"r"(dst+len/2), "r"(src0+len/2), "r"(src1+len/2), "r"(win+len/2) |
|
2052 |
); |
|
2053 |
}else |
|
2054 |
#endif |
|
2055 |
ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len); |
|
2056 |
} |
|
2057 |
|
|
2025 | 2058 |
static void float_to_int16_3dnow(int16_t *dst, const float *src, long len){ |
2026 | 2059 |
// not bit-exact: pf2id uses different rounding than C and SSE |
2027 | 2060 |
asm volatile( |
... | ... | |
2083 | 2116 |
); |
2084 | 2117 |
} |
2085 | 2118 |
|
2119 |
#define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \ |
|
2120 |
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\ |
|
2121 |
static av_noinline void float_to_int16_interleave2_##cpu(int16_t *dst, const float *src, long len, int channels){\ |
|
2122 |
DECLARE_ALIGNED_16(int16_t, tmp[len*channels]);\ |
|
2123 |
int i,j,c;\ |
|
2124 |
float_to_int16_##cpu(tmp, src, len*channels);\ |
|
2125 |
for(c=0; c<channels; c++){\ |
|
2126 |
int16_t *ptmp = tmp+c*len;\ |
|
2127 |
for(i=0, j=c; i<len; i++, j+=channels)\ |
|
2128 |
dst[j] = ptmp[i];\ |
|
2129 |
}\ |
|
2130 |
}\ |
|
2131 |
\ |
|
2132 |
static void float_to_int16_interleave_##cpu(int16_t *dst, const float *src, long len, int channels){\ |
|
2133 |
if(channels==1)\ |
|
2134 |
float_to_int16_##cpu(dst, src, len);\ |
|
2135 |
else if(channels>2)\ |
|
2136 |
float_to_int16_interleave2_##cpu(dst, src, len, channels);\ |
|
2137 |
else{\ |
|
2138 |
float *src1;\ |
|
2139 |
asm volatile(\ |
|
2140 |
"shl $2, %0 \n"\ |
|
2141 |
"add %0, %1 \n"\ |
|
2142 |
"add %0, %2 \n"\ |
|
2143 |
"lea (%2,%0), %3 \n"\ |
|
2144 |
"neg %0 \n"\ |
|
2145 |
body\ |
|
2146 |
:"+r"(len), "+r"(dst), "+r"(src), "=r"(src1)\ |
|
2147 |
);\ |
|
2148 |
}\ |
|
2149 |
} |
|
2150 |
|
|
2151 |
FLOAT_TO_INT16_INTERLEAVE(3dnow, |
|
2152 |
"1: \n" |
|
2153 |
"pf2id (%2,%0), %%mm0 \n" |
|
2154 |
"pf2id 8(%2,%0), %%mm1 \n" |
|
2155 |
"pf2id (%3,%0), %%mm2 \n" |
|
2156 |
"pf2id 8(%3,%0), %%mm3 \n" |
|
2157 |
"packssdw %%mm1, %%mm0 \n" |
|
2158 |
"packssdw %%mm3, %%mm2 \n" |
|
2159 |
"movq %%mm0, %%mm1 \n" |
|
2160 |
"punpcklwd %%mm2, %%mm0 \n" |
|
2161 |
"punpckhwd %%mm2, %%mm1 \n" |
|
2162 |
"movq %%mm0, (%1,%0)\n" |
|
2163 |
"movq %%mm0, 8(%1,%0)\n" |
|
2164 |
"add $16, %0 \n" |
|
2165 |
"js 1b \n" |
|
2166 |
"femms \n" |
|
2167 |
) |
|
2168 |
|
|
2169 |
FLOAT_TO_INT16_INTERLEAVE(sse, |
|
2170 |
"1: \n" |
|
2171 |
"cvtps2pi (%2,%0), %%mm0 \n" |
|
2172 |
"cvtps2pi 8(%2,%0), %%mm1 \n" |
|
2173 |
"cvtps2pi (%3,%0), %%mm2 \n" |
|
2174 |
"cvtps2pi 8(%3,%0), %%mm3 \n" |
|
2175 |
"packssdw %%mm1, %%mm0 \n" |
|
2176 |
"packssdw %%mm3, %%mm2 \n" |
|
2177 |
"movq %%mm0, %%mm1 \n" |
|
2178 |
"punpcklwd %%mm2, %%mm0 \n" |
|
2179 |
"punpckhwd %%mm2, %%mm1 \n" |
|
2180 |
"movq %%mm0, (%1,%0)\n" |
|
2181 |
"movq %%mm0, 8(%1,%0)\n" |
|
2182 |
"add $16, %0 \n" |
|
2183 |
"js 1b \n" |
|
2184 |
"emms \n" |
|
2185 |
) |
|
2186 |
|
|
2187 |
FLOAT_TO_INT16_INTERLEAVE(sse2, |
|
2188 |
"1: \n" |
|
2189 |
"cvtps2dq (%2,%0), %%xmm0 \n" |
|
2190 |
"cvtps2dq (%3,%0), %%xmm1 \n" |
|
2191 |
"packssdw %%xmm1, %%xmm0 \n" |
|
2192 |
"movhlps %%xmm0, %%xmm1 \n" |
|
2193 |
"punpcklwd %%xmm1, %%xmm0 \n" |
|
2194 |
"movdqa %%xmm0, (%1,%0) \n" |
|
2195 |
"add $16, %0 \n" |
|
2196 |
"js 1b \n" |
|
2197 |
) |
|
2198 |
|
|
2199 |
|
|
2086 | 2200 |
extern void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width); |
2087 | 2201 |
extern void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width); |
2088 | 2202 |
extern void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width); |
... | ... | |
2519 | 2633 |
if(mm_flags & MM_3DNOW){ |
2520 | 2634 |
c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; |
2521 | 2635 |
c->vector_fmul = vector_fmul_3dnow; |
2522 |
if(!(avctx->flags & CODEC_FLAG_BITEXACT)) |
|
2636 |
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
|
2523 | 2637 |
c->float_to_int16 = float_to_int16_3dnow; |
2638 |
c->float_to_int16_interleave = float_to_int16_interleave_3dnow; |
|
2639 |
} |
|
2524 | 2640 |
} |
2525 | 2641 |
if(mm_flags & MM_3DNOWEXT) |
2526 | 2642 |
c->vector_fmul_reverse = vector_fmul_reverse_3dnow2; |
... | ... | |
2528 | 2644 |
c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; |
2529 | 2645 |
c->vector_fmul = vector_fmul_sse; |
2530 | 2646 |
c->float_to_int16 = float_to_int16_sse; |
2647 |
c->float_to_int16_interleave = float_to_int16_interleave_sse; |
|
2531 | 2648 |
c->vector_fmul_reverse = vector_fmul_reverse_sse; |
2532 | 2649 |
c->vector_fmul_add_add = vector_fmul_add_add_sse; |
2650 |
c->vector_fmul_window = vector_fmul_window_sse; |
|
2533 | 2651 |
} |
2534 | 2652 |
if(mm_flags & MM_SSE2){ |
2535 | 2653 |
c->float_to_int16 = float_to_int16_sse2; |
2654 |
c->float_to_int16_interleave = float_to_int16_interleave_sse2; |
|
2536 | 2655 |
} |
2537 | 2656 |
if(mm_flags & MM_3DNOW) |
2538 | 2657 |
c->vector_fmul_add_add = vector_fmul_add_add_3dnow; // faster than sse |
libavcodec/vorbis_dec.c | ||
---|---|---|
149 | 149 |
uint_fast8_t mode_count; |
150 | 150 |
vorbis_mode *modes; |
151 | 151 |
uint_fast8_t mode_number; // mode number for the current packet |
152 |
uint_fast8_t previous_window; |
|
152 | 153 |
float *channel_residues; |
153 | 154 |
float *channel_floors; |
154 | 155 |
float *saved; |
155 |
uint_fast16_t saved_start; |
|
156 | 156 |
float *ret; |
157 | 157 |
float *buf; |
158 | 158 |
float *buf_tmp; |
... | ... | |
903 | 903 |
vc->ret = av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float)); |
904 | 904 |
vc->buf = av_malloc( vc->blocksize[1] * sizeof(float)); |
905 | 905 |
vc->buf_tmp = av_malloc( vc->blocksize[1] * sizeof(float)); |
906 |
vc->saved_start=0;
|
|
906 |
vc->previous_window=0;
|
|
907 | 907 |
|
908 | 908 |
ff_mdct_init(&vc->mdct[0], bl0, 1); |
909 | 909 |
ff_mdct_init(&vc->mdct[1], bl1, 1); |
... | ... | |
1394 | 1394 |
} |
1395 | 1395 |
} |
1396 | 1396 |
|
1397 |
static void copy_normalize(float *dst, float *src, int len, int exp_bias, float add_bias) |
|
1398 |
{ |
|
1399 |
int i; |
|
1400 |
if(exp_bias) { |
|
1401 |
for(i=0; i<len; i++) |
|
1402 |
((uint32_t*)dst)[i] = ((uint32_t*)src)[i] + exp_bias; // dst[k]=src[i]*(1<<bias) |
|
1403 |
} else { |
|
1404 |
for(i=0; i<len; i++) |
|
1405 |
dst[i] = src[i] + add_bias; |
|
1406 |
} |
|
1407 |
} |
|
1408 |
|
|
1397 | 1409 |
// Decode the audio packet using the functions above |
1398 | 1410 |
|
1399 | 1411 |
static int vorbis_parse_audio_packet(vorbis_context *vc) { |
1400 | 1412 |
GetBitContext *gb=&vc->gb; |
1401 | 1413 |
|
1402 |
uint_fast8_t previous_window=0,next_window=0;
|
|
1414 |
uint_fast8_t previous_window=vc->previous_window;
|
|
1403 | 1415 |
uint_fast8_t mode_number; |
1416 |
uint_fast8_t blockflag; |
|
1404 | 1417 |
uint_fast16_t blocksize; |
1405 | 1418 |
int_fast32_t i,j; |
1406 | 1419 |
uint_fast8_t no_residue[vc->audio_channels]; |
... | ... | |
1411 | 1424 |
uint_fast8_t res_chan[vc->audio_channels]; |
1412 | 1425 |
uint_fast8_t res_num=0; |
1413 | 1426 |
int_fast16_t retlen=0; |
1414 |
uint_fast16_t saved_start=0; |
|
1415 | 1427 |
float fadd_bias = vc->add_bias; |
1416 | 1428 |
|
1417 | 1429 |
if (get_bits1(gb)) { |
... | ... | |
1429 | 1441 |
|
1430 | 1442 |
AV_DEBUG(" Mode number: %d , mapping: %d , blocktype %d \n", mode_number, vc->modes[mode_number].mapping, vc->modes[mode_number].blockflag); |
1431 | 1443 |
|
1432 |
if (vc->modes[mode_number].blockflag) { |
|
1433 |
previous_window=get_bits1(gb); |
|
1434 |
next_window=get_bits1(gb); |
|
1444 |
blockflag=vc->modes[mode_number].blockflag; |
|
1445 |
blocksize=vc->blocksize[blockflag]; |
|
1446 |
if (blockflag) { |
|
1447 |
skip_bits(gb, 2); // previous_window, next_window |
|
1435 | 1448 |
} |
1436 | 1449 |
|
1437 |
blocksize=vc->blocksize[vc->modes[mode_number].blockflag]; |
|
1438 | 1450 |
memset(ch_res_ptr, 0, sizeof(float)*vc->audio_channels*blocksize/2); //FIXME can this be removed ? |
1439 | 1451 |
memset(ch_floor_ptr, 0, sizeof(float)*vc->audio_channels*blocksize/2); //FIXME can this be removed ? |
1440 | 1452 |
|
... | ... | |
1504 | 1516 |
|
1505 | 1517 |
// MDCT, overlap/add, save data for next overlapping FPMATH |
1506 | 1518 |
|
1519 |
retlen = (blocksize + vc->blocksize[previous_window])/4; |
|
1507 | 1520 |
for(j=0;j<vc->audio_channels;++j) { |
1508 |
uint_fast8_t step=vc->audio_channels; |
|
1509 |
uint_fast16_t k; |
|
1510 |
float *saved=vc->saved+j*vc->blocksize[1]/2; |
|
1511 |
float *ret=vc->ret; |
|
1512 |
const float *lwin=vc->win[1]; |
|
1513 |
const float *swin=vc->win[0]; |
|
1521 |
uint_fast16_t bs0=vc->blocksize[0]; |
|
1522 |
uint_fast16_t bs1=vc->blocksize[1]; |
|
1523 |
float *saved=vc->saved+j*bs1/2; |
|
1524 |
float *ret=vc->ret+j*retlen; |
|
1514 | 1525 |
float *buf=vc->buf; |
1515 |
float *buf_tmp=vc->buf_tmp; |
|
1516 |
|
|
1517 |
ch_floor_ptr=vc->channel_floors+j*blocksize/2; |
|
1518 |
|
|
1519 |
saved_start=vc->saved_start; |
|
1526 |
const float *win=vc->win[blockflag&previous_window]; |
|
1520 | 1527 |
|
1521 |
vc->mdct[0].fft.imdct_calc(&vc->mdct[vc->modes[mode_number].blockflag], buf, ch_floor_ptr, buf_tmp);
|
|
1528 |
vc->mdct[0].fft.imdct_calc(&vc->mdct[blockflag], buf, vc->channel_floors+j*blocksize/2, vc->buf_tmp);
|
|
1522 | 1529 |
|
1523 |
//FIXME process channels together, to allow faster simd vector_fmul_add_add? |
|
1524 |
if (vc->modes[mode_number].blockflag) { |
|
1525 |
// -- overlap/add |
|
1526 |
if (previous_window) { |
|
1527 |
vc->dsp.vector_fmul_add_add(ret+j, buf, lwin, saved, vc->add_bias, vc->blocksize[1]/2, step); |
|
1528 |
retlen=vc->blocksize[1]/2; |
|
1529 |
} else { |
|
1530 |
int len = (vc->blocksize[1]-vc->blocksize[0])/4; |
|
1531 |
buf += len; |
|
1532 |
vc->dsp.vector_fmul_add_add(ret+j, buf, swin, saved, vc->add_bias, vc->blocksize[0]/2, step); |
|
1533 |
k = vc->blocksize[0]/2*step + j; |
|
1534 |
buf += vc->blocksize[0]/2; |
|
1535 |
if(vc->exp_bias){ |
|
1536 |
for(i=0; i<len; i++, k+=step) |
|
1537 |
((uint32_t*)ret)[k] = ((uint32_t*)buf)[i] + vc->exp_bias; // ret[k]=buf[i]*(1<<bias) |
|
1538 |
} else { |
|
1539 |
for(i=0; i<len; i++, k+=step) |
|
1540 |
ret[k] = buf[i] + fadd_bias; |
|
1541 |
} |
|
1542 |
buf=vc->buf; |
|
1543 |
retlen=vc->blocksize[0]/2+len; |
|
1544 |
} |
|
1545 |
// -- save |
|
1546 |
if (next_window) { |
|
1547 |
buf += vc->blocksize[1]/2; |
|
1548 |
vc->dsp.vector_fmul_reverse(saved, buf, lwin, vc->blocksize[1]/2); |
|
1549 |
saved_start=0; |
|
1550 |
} else { |
|
1551 |
saved_start=(vc->blocksize[1]-vc->blocksize[0])/4; |
|
1552 |
buf += vc->blocksize[1]/2; |
|
1553 |
for(i=0; i<saved_start; i++) |
|
1554 |
((uint32_t*)saved)[i] = ((uint32_t*)buf)[i] + vc->exp_bias; |
|
1555 |
vc->dsp.vector_fmul_reverse(saved+saved_start, buf+saved_start, swin, vc->blocksize[0]/2); |
|
1556 |
} |
|
1530 |
if(blockflag == previous_window) { |
|
1531 |
vc->dsp.vector_fmul_window(ret, saved, buf, win, fadd_bias, blocksize/2); |
|
1532 |
} else if(blockflag > previous_window) { |
|
1533 |
vc->dsp.vector_fmul_window(ret, saved, buf+(bs1-bs0)/4, win, fadd_bias, bs0/2); |
|
1534 |
copy_normalize(ret+bs0/2, buf+(bs1+bs0)/4, (bs1-bs0)/4, vc->exp_bias, fadd_bias); |
|
1557 | 1535 |
} else { |
1558 |
// --overlap/add |
|
1559 |
if(vc->add_bias) { |
|
1560 |
for(k=j, i=0;i<saved_start;++i, k+=step) |
|
1561 |
ret[k] = saved[i] + fadd_bias; |
|
1562 |
} else { |
|
1563 |
for(k=j, i=0;i<saved_start;++i, k+=step) |
|
1564 |
ret[k] = saved[i]; |
|
1565 |
} |
|
1566 |
vc->dsp.vector_fmul_add_add(ret+k, buf, swin, saved+saved_start, vc->add_bias, vc->blocksize[0]/2, step); |
|
1567 |
retlen=saved_start+vc->blocksize[0]/2; |
|
1568 |
// -- save |
|
1569 |
buf += vc->blocksize[0]/2; |
|
1570 |
vc->dsp.vector_fmul_reverse(saved, buf, swin, vc->blocksize[0]/2); |
|
1571 |
saved_start=0; |
|
1536 |
copy_normalize(ret, saved, (bs1-bs0)/4, vc->exp_bias, fadd_bias); |
|
1537 |
vc->dsp.vector_fmul_window(ret+(bs1-bs0)/4, saved+(bs1-bs0)/4, buf, win, fadd_bias, bs0/2); |
|
1572 | 1538 |
} |
1539 |
memcpy(saved, buf+blocksize/2, blocksize/2*sizeof(float)); |
|
1573 | 1540 |
} |
1574 |
vc->saved_start=saved_start; |
|
1575 | 1541 |
|
1576 |
return retlen*vc->audio_channels; |
|
1542 |
vc->previous_window = blockflag; |
|
1543 |
return retlen; |
|
1577 | 1544 |
} |
1578 | 1545 |
|
1579 | 1546 |
// Return the decoded audio packet through the standard api |
... | ... | |
1610 | 1577 |
|
1611 | 1578 |
AV_DEBUG("parsed %d bytes %d bits, returned %d samples (*ch*bits) \n", get_bits_count(gb)/8, get_bits_count(gb)%8, len); |
1612 | 1579 |
|
1613 |
vc->dsp.float_to_int16(data, vc->ret, len);
|
|
1614 |
*data_size=len*2; |
|
1580 |
vc->dsp.float_to_int16_interleave(data, vc->ret, len, vc->audio_channels);
|
|
1581 |
*data_size=len*2*vc->audio_channels;
|
|
1615 | 1582 |
|
1616 | 1583 |
return buf_size ; |
1617 | 1584 |
} |
libavutil/x86_cpu.h | ||
---|---|---|
68 | 68 |
# define HAVE_7REGS 1 |
69 | 69 |
#endif |
70 | 70 |
|
71 |
#if defined(ARCH_X86_64) || (defined(ARCH_X86_32) && (defined(HAVE_EBX_AVAILABLE) || defined(HAVE_EBP_AVAILABLE))) |
|
72 |
# define HAVE_6REGS 1 |
|
73 |
#endif |
|
74 |
|
|
71 | 75 |
#if defined(ARCH_X86_64) && defined(PIC) |
72 | 76 |
# define BROKEN_RELOCATIONS 1 |
73 | 77 |
#endif |
Also available in: Unified diff