Revision f1efbca5
libavcodec/ac3dsp.c | ||
---|---|---|
50 | 50 |
return v; |
51 | 51 |
} |
52 | 52 |
|
53 |
static void ac3_lshift_int16_c(int16_t *src, unsigned int len, |
|
54 |
unsigned int shift) |
|
55 |
{ |
|
56 |
uint32_t *src32 = (uint32_t *)src; |
|
57 |
const uint32_t mask = ~(((1 << shift) - 1) << 16); |
|
58 |
int i; |
|
59 |
len >>= 1; |
|
60 |
for (i = 0; i < len; i += 8) { |
|
61 |
src32[i ] = (src32[i ] << shift) & mask; |
|
62 |
src32[i+1] = (src32[i+1] << shift) & mask; |
|
63 |
src32[i+2] = (src32[i+2] << shift) & mask; |
|
64 |
src32[i+3] = (src32[i+3] << shift) & mask; |
|
65 |
src32[i+4] = (src32[i+4] << shift) & mask; |
|
66 |
src32[i+5] = (src32[i+5] << shift) & mask; |
|
67 |
src32[i+6] = (src32[i+6] << shift) & mask; |
|
68 |
src32[i+7] = (src32[i+7] << shift) & mask; |
|
69 |
} |
|
70 |
} |
|
71 |
|
|
72 |
static void ac3_rshift_int32_c(int32_t *src, unsigned int len, |
|
73 |
unsigned int shift) |
|
74 |
{ |
|
75 |
do { |
|
76 |
*src++ >>= shift; |
|
77 |
*src++ >>= shift; |
|
78 |
*src++ >>= shift; |
|
79 |
*src++ >>= shift; |
|
80 |
*src++ >>= shift; |
|
81 |
*src++ >>= shift; |
|
82 |
*src++ >>= shift; |
|
83 |
*src++ >>= shift; |
|
84 |
len -= 8; |
|
85 |
} while (len > 0); |
|
86 |
} |
|
87 |
|
|
53 | 88 |
av_cold void ff_ac3dsp_init(AC3DSPContext *c) |
54 | 89 |
{ |
55 | 90 |
c->ac3_exponent_min = ac3_exponent_min_c; |
56 | 91 |
c->ac3_max_msb_abs_int16 = ac3_max_msb_abs_int16_c; |
92 |
c->ac3_lshift_int16 = ac3_lshift_int16_c; |
|
93 |
c->ac3_rshift_int32 = ac3_rshift_int32_c; |
|
57 | 94 |
|
58 | 95 |
if (HAVE_MMX) |
59 | 96 |
ff_ac3dsp_init_x86(c); |
libavcodec/ac3dsp.h | ||
---|---|---|
46 | 46 |
* @return a value with the same MSB as max(abs(src[])) |
47 | 47 |
*/ |
48 | 48 |
int (*ac3_max_msb_abs_int16)(const int16_t *src, int len); |
49 |
|
|
50 |
/** |
|
51 |
* Left-shift each value in an array of int16_t by a specified amount. |
|
52 |
* @param src input array |
|
53 |
* constraints: align 16 |
|
54 |
* @param len number of values in the array |
|
55 |
* constraints: multiple of 32 greater than 0 |
|
56 |
* @param shift left shift amount |
|
57 |
* constraints: range [0,15] |
|
58 |
*/ |
|
59 |
void (*ac3_lshift_int16)(int16_t *src, unsigned int len, unsigned int shift); |
|
60 |
|
|
61 |
/** |
|
62 |
* Right-shift each value in an array of int32_t by a specified amount. |
|
63 |
* @param src input array |
|
64 |
* constraints: align 16 |
|
65 |
* @param len number of values in the array |
|
66 |
* constraints: multiple of 16 greater than 0 |
|
67 |
* @param shift right shift amount |
|
68 |
* constraints: range [0,31] |
|
69 |
*/ |
|
70 |
void (*ac3_rshift_int32)(int32_t *src, unsigned int len, unsigned int shift); |
|
49 | 71 |
} AC3DSPContext; |
50 | 72 |
|
51 | 73 |
void ff_ac3dsp_init (AC3DSPContext *c); |
libavcodec/ac3enc_fixed.c | ||
---|---|---|
278 | 278 |
|
279 | 279 |
|
280 | 280 |
/** |
281 |
* Left-shift each value in an array by a specified amount. |
|
282 |
* @param tab input array |
|
283 |
* @param n number of values in the array |
|
284 |
* @param lshift left shift amount |
|
285 |
*/ |
|
286 |
static void lshift_tab(int16_t *tab, int n, unsigned int lshift) |
|
287 |
{ |
|
288 |
int i; |
|
289 |
|
|
290 |
if (lshift > 0) { |
|
291 |
for (i = 0; i < n; i++) |
|
292 |
tab[i] <<= lshift; |
|
293 |
} |
|
294 |
} |
|
295 |
|
|
296 |
|
|
297 |
/** |
|
298 |
* Right-shift each value in an array of int32_t by a specified amount. |
|
299 |
* @param src input array |
|
300 |
* @param len number of values in the array |
|
301 |
* @param shift right shift amount |
|
302 |
*/ |
|
303 |
static void ac3_rshift_int32_c(int32_t *src, unsigned int len, unsigned int shift) |
|
304 |
{ |
|
305 |
int i; |
|
306 |
|
|
307 |
if (shift > 0) { |
|
308 |
for (i = 0; i < len; i++) |
|
309 |
src[i] >>= shift; |
|
310 |
} |
|
311 |
} |
|
312 |
|
|
313 |
|
|
314 |
/** |
|
315 | 281 |
* Normalize the input samples to use the maximum available precision. |
316 | 282 |
* This assumes signed 16-bit input samples. |
317 | 283 |
* |
... | ... | |
320 | 286 |
static int normalize_samples(AC3EncodeContext *s) |
321 | 287 |
{ |
322 | 288 |
int v = 14 - log2_tab(s, s->windowed_samples, AC3_WINDOW_SIZE); |
323 |
lshift_tab(s->windowed_samples, AC3_WINDOW_SIZE, v); |
|
289 |
if (v > 0) |
|
290 |
s->ac3dsp.ac3_lshift_int16(s->windowed_samples, AC3_WINDOW_SIZE, v); |
|
324 | 291 |
/* +6 to right-shift from 31-bit to 25-bit */ |
325 | 292 |
return v + 6; |
326 | 293 |
} |
... | ... | |
336 | 303 |
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) { |
337 | 304 |
AC3Block *block = &s->blocks[blk]; |
338 | 305 |
for (ch = 0; ch < s->channels; ch++) { |
339 |
ac3_rshift_int32_c(block->mdct_coef[ch], AC3_MAX_COEFS,
|
|
340 |
block->coeff_shift[ch]); |
|
306 |
s->ac3dsp.ac3_rshift_int32(block->mdct_coef[ch], AC3_MAX_COEFS,
|
|
307 |
block->coeff_shift[ch]);
|
|
341 | 308 |
} |
342 | 309 |
} |
343 | 310 |
} |
libavcodec/x86/ac3dsp.asm | ||
---|---|---|
133 | 133 |
AC3_MAX_MSB_ABS_INT16 sse2, min_max |
134 | 134 |
%define ABS2 ABS2_SSSE3 |
135 | 135 |
AC3_MAX_MSB_ABS_INT16 ssse3, or_abs |
136 |
|
|
137 |
;----------------------------------------------------------------------------- |
|
138 |
; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32() |
|
139 |
;----------------------------------------------------------------------------- |
|
140 |
|
|
141 |
%macro AC3_SHIFT 4 ; l/r, 16/32, shift instruction, instruction set |
|
142 |
cglobal ac3_%1shift_int%2_%4, 3,3,5, src, len, shift |
|
143 |
movd m0, shiftd |
|
144 |
.loop: |
|
145 |
mova m1, [srcq ] |
|
146 |
mova m2, [srcq+mmsize ] |
|
147 |
mova m3, [srcq+mmsize*2] |
|
148 |
mova m4, [srcq+mmsize*3] |
|
149 |
%3 m1, m0 |
|
150 |
%3 m2, m0 |
|
151 |
%3 m3, m0 |
|
152 |
%3 m4, m0 |
|
153 |
mova [srcq ], m1 |
|
154 |
mova [srcq+mmsize ], m2 |
|
155 |
mova [srcq+mmsize*2], m3 |
|
156 |
mova [srcq+mmsize*3], m4 |
|
157 |
add srcq, mmsize*4 |
|
158 |
sub lend, mmsize*32/%2 |
|
159 |
ja .loop |
|
160 |
.end: |
|
161 |
REP_RET |
|
162 |
%endmacro |
|
163 |
|
|
164 |
;----------------------------------------------------------------------------- |
|
165 |
; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift) |
|
166 |
;----------------------------------------------------------------------------- |
|
167 |
|
|
168 |
INIT_MMX |
|
169 |
AC3_SHIFT l, 16, psllw, mmx |
|
170 |
INIT_XMM |
|
171 |
AC3_SHIFT l, 16, psllw, sse2 |
|
172 |
|
|
173 |
;----------------------------------------------------------------------------- |
|
174 |
; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift) |
|
175 |
;----------------------------------------------------------------------------- |
|
176 |
|
|
177 |
INIT_MMX |
|
178 |
AC3_SHIFT r, 32, psrad, mmx |
|
179 |
INIT_XMM |
|
180 |
AC3_SHIFT r, 32, psrad, sse2 |
libavcodec/x86/ac3dsp_mmx.c | ||
---|---|---|
32 | 32 |
extern int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len); |
33 | 33 |
extern int ff_ac3_max_msb_abs_int16_ssse3 (const int16_t *src, int len); |
34 | 34 |
|
35 |
extern void ff_ac3_lshift_int16_mmx (int16_t *src, unsigned int len, unsigned int shift); |
|
36 |
extern void ff_ac3_lshift_int16_sse2(int16_t *src, unsigned int len, unsigned int shift); |
|
37 |
|
|
38 |
extern void ff_ac3_rshift_int32_mmx (int32_t *src, unsigned int len, unsigned int shift); |
|
39 |
extern void ff_ac3_rshift_int32_sse2(int32_t *src, unsigned int len, unsigned int shift); |
|
40 |
|
|
35 | 41 |
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c) |
36 | 42 |
{ |
37 | 43 |
int mm_flags = av_get_cpu_flags(); |
... | ... | |
40 | 46 |
if (mm_flags & AV_CPU_FLAG_MMX) { |
41 | 47 |
c->ac3_exponent_min = ff_ac3_exponent_min_mmx; |
42 | 48 |
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx; |
49 |
c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx; |
|
50 |
c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx; |
|
43 | 51 |
} |
44 | 52 |
if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) { |
45 | 53 |
c->ac3_exponent_min = ff_ac3_exponent_min_mmxext; |
... | ... | |
48 | 56 |
if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { |
49 | 57 |
c->ac3_exponent_min = ff_ac3_exponent_min_sse2; |
50 | 58 |
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2; |
59 |
if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { |
|
60 |
c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2; |
|
61 |
c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; |
|
62 |
} |
|
51 | 63 |
} |
52 | 64 |
if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) { |
53 | 65 |
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3; |
Also available in: Unified diff