Revision e6e98234
libavcodec/ac3enc.c | ||
---|---|---|
167 | 167 |
static void mdct512(AC3MDCTContext *mdct, CoefType *out, SampleType *in); |
168 | 168 |
|
169 | 169 |
static void apply_window(DSPContext *dsp, SampleType *output, const SampleType *input, |
170 |
const SampleType *window, int n);
|
|
170 |
const SampleType *window, unsigned int len);
|
|
171 | 171 |
|
172 | 172 |
static int normalize_samples(AC3EncodeContext *s); |
173 | 173 |
|
libavcodec/ac3enc_fixed.c | ||
---|---|---|
252 | 252 |
* Apply KBD window to input samples prior to MDCT. |
253 | 253 |
*/ |
254 | 254 |
static void apply_window(DSPContext *dsp, int16_t *output, const int16_t *input, |
255 |
const int16_t *window, int n)
|
|
255 |
const int16_t *window, unsigned int len)
|
|
256 | 256 |
{ |
257 |
int i; |
|
258 |
int n2 = n >> 1; |
|
259 |
|
|
260 |
for (i = 0; i < n2; i++) { |
|
261 |
output[i] = MUL16(input[i], window[i]) >> 15; |
|
262 |
output[n-i-1] = MUL16(input[n-i-1], window[i]) >> 15; |
|
263 |
} |
|
257 |
dsp->apply_window_int16(output, input, window, len); |
|
264 | 258 |
} |
265 | 259 |
|
266 | 260 |
|
libavcodec/ac3enc_float.c | ||
---|---|---|
83 | 83 |
* Apply KBD window to input samples prior to MDCT. |
84 | 84 |
*/ |
85 | 85 |
static void apply_window(DSPContext *dsp, float *output, const float *input, |
86 |
const float *window, int n)
|
|
86 |
const float *window, unsigned int len)
|
|
87 | 87 |
{ |
88 |
dsp->vector_fmul(output, input, window, n); |
|
88 |
dsp->vector_fmul(output, input, window, len);
|
|
89 | 89 |
} |
90 | 90 |
|
91 | 91 |
|
libavcodec/ac3tab.c | ||
---|---|---|
141 | 141 |
/* AC-3 MDCT window */ |
142 | 142 |
|
143 | 143 |
/* MDCT window */ |
144 |
const int16_t ff_ac3_window[AC3_WINDOW_SIZE/2] = {
|
|
144 |
DECLARE_ALIGNED(16, const int16_t, ff_ac3_window)[AC3_WINDOW_SIZE/2] = {
|
|
145 | 145 |
4, 7, 12, 16, 21, 28, 34, 42, |
146 | 146 |
51, 61, 72, 84, 97, 111, 127, 145, |
147 | 147 |
164, 184, 207, 231, 257, 285, 315, 347, |
libavcodec/dsputil.c | ||
---|---|---|
3890 | 3890 |
return res; |
3891 | 3891 |
} |
3892 | 3892 |
|
3893 |
static void apply_window_int16_c(int16_t *output, const int16_t *input, |
|
3894 |
const int16_t *window, unsigned int len) |
|
3895 |
{ |
|
3896 |
int i; |
|
3897 |
int len2 = len >> 1; |
|
3898 |
|
|
3899 |
for (i = 0; i < len2; i++) { |
|
3900 |
int16_t w = window[i]; |
|
3901 |
output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15; |
|
3902 |
output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15; |
|
3903 |
} |
|
3904 |
} |
|
3905 |
|
|
3893 | 3906 |
#define W0 2048 |
3894 | 3907 |
#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */ |
3895 | 3908 |
#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */ |
... | ... | |
4364 | 4377 |
c->vector_clipf = vector_clipf_c; |
4365 | 4378 |
c->scalarproduct_int16 = scalarproduct_int16_c; |
4366 | 4379 |
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c; |
4380 |
c->apply_window_int16 = apply_window_int16_c; |
|
4367 | 4381 |
c->scalarproduct_float = scalarproduct_float_c; |
4368 | 4382 |
c->butterflies_float = butterflies_float_c; |
4369 | 4383 |
c->vector_fmul_scalar = vector_fmul_scalar_c; |
libavcodec/dsputil.h | ||
---|---|---|
524 | 524 |
*/ |
525 | 525 |
int32_t (*scalarproduct_and_madd_int16)(int16_t *v1/*align 16*/, const int16_t *v2, const int16_t *v3, int len, int mul); |
526 | 526 |
|
527 |
/** |
|
528 |
* Apply symmetric window in 16-bit fixed-point. |
|
529 |
* @param output destination array |
|
530 |
* constraints: 16-byte aligned |
|
531 |
* @param input source array |
|
532 |
* constraints: 16-byte aligned |
|
533 |
* @param window window array |
|
534 |
* constraints: 16-byte aligned, at least len/2 elements |
|
535 |
* @param len full window length |
|
536 |
* constraints: multiple of ? greater than zero |
|
537 |
*/ |
|
538 |
void (*apply_window_int16)(int16_t *output, const int16_t *input, |
|
539 |
const int16_t *window, unsigned int len); |
|
540 |
|
|
527 | 541 |
/* rv30 functions */ |
528 | 542 |
qpel_mc_func put_rv30_tpel_pixels_tab[4][16]; |
529 | 543 |
qpel_mc_func avg_rv30_tpel_pixels_tab[4][16]; |
libavcodec/x86/dsputil_mmx.c | ||
---|---|---|
2388 | 2388 |
int32_t ff_scalarproduct_and_madd_int16_mmx2(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul); |
2389 | 2389 |
int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul); |
2390 | 2390 |
int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul); |
2391 |
|
|
2392 |
void ff_apply_window_int16_mmxext (int16_t *output, const int16_t *input, |
|
2393 |
const int16_t *window, unsigned int len); |
|
2394 |
void ff_apply_window_int16_mmxext_ba (int16_t *output, const int16_t *input, |
|
2395 |
const int16_t *window, unsigned int len); |
|
2396 |
void ff_apply_window_int16_sse2 (int16_t *output, const int16_t *input, |
|
2397 |
const int16_t *window, unsigned int len); |
|
2398 |
void ff_apply_window_int16_sse2_ba (int16_t *output, const int16_t *input, |
|
2399 |
const int16_t *window, unsigned int len); |
|
2400 |
void ff_apply_window_int16_ssse3 (int16_t *output, const int16_t *input, |
|
2401 |
const int16_t *window, unsigned int len); |
|
2402 |
void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input, |
|
2403 |
const int16_t *window, unsigned int len); |
|
2404 |
|
|
2391 | 2405 |
void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top); |
2392 | 2406 |
int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src, int w, int left); |
2393 | 2407 |
int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, int left); |
... | ... | |
2749 | 2763 |
#if HAVE_YASM |
2750 | 2764 |
c->scalarproduct_int16 = ff_scalarproduct_int16_mmx2; |
2751 | 2765 |
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmx2; |
2766 |
if (avctx->flags & CODEC_FLAG_BITEXACT) { |
|
2767 |
c->apply_window_int16 = ff_apply_window_int16_mmxext_ba; |
|
2768 |
} else { |
|
2769 |
c->apply_window_int16 = ff_apply_window_int16_mmxext; |
|
2770 |
} |
|
2752 | 2771 |
#endif |
2753 | 2772 |
} |
2754 | 2773 |
if(mm_flags & AV_CPU_FLAG_SSE){ |
... | ... | |
2771 | 2790 |
#if HAVE_YASM |
2772 | 2791 |
c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; |
2773 | 2792 |
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2; |
2793 |
if (avctx->flags & CODEC_FLAG_BITEXACT) { |
|
2794 |
c->apply_window_int16 = ff_apply_window_int16_sse2_ba; |
|
2795 |
} else { |
|
2796 |
if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { |
|
2797 |
c->apply_window_int16 = ff_apply_window_int16_sse2; |
|
2798 |
} |
|
2799 |
} |
|
2774 | 2800 |
|
2775 | 2801 |
c->emulated_edge_mc = emulated_edge_mc_sse; |
2776 | 2802 |
c->gmc= gmc_sse; |
2777 | 2803 |
#endif |
2778 | 2804 |
} |
2779 |
if((mm_flags & AV_CPU_FLAG_SSSE3) && !(mm_flags & (AV_CPU_FLAG_SSE42|AV_CPU_FLAG_3DNOW)) && HAVE_YASM) // cachesplit |
|
2780 |
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3; |
|
2805 |
if (mm_flags & AV_CPU_FLAG_SSSE3) { |
|
2806 |
#if HAVE_YASM |
|
2807 |
if (mm_flags & AV_CPU_FLAG_ATOM) { |
|
2808 |
c->apply_window_int16 = ff_apply_window_int16_ssse3_atom; |
|
2809 |
} else { |
|
2810 |
c->apply_window_int16 = ff_apply_window_int16_ssse3; |
|
2811 |
} |
|
2812 |
if (!(mm_flags & (AV_CPU_FLAG_SSE42|AV_CPU_FLAG_3DNOW))) { // cachesplit |
|
2813 |
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3; |
|
2814 |
} |
|
2815 |
#endif |
|
2816 |
} |
|
2781 | 2817 |
} |
2782 | 2818 |
|
2783 | 2819 |
if (CONFIG_ENCODERS) |
libavcodec/x86/dsputil_yasm.asm | ||
---|---|---|
27 | 27 |
pb_7: times 8 db 7 |
28 | 28 |
pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11 |
29 | 29 |
pb_zz11zz55zz99zzdd: db -1,-1,1,1,-1,-1,5,5,-1,-1,9,9,-1,-1,13,13 |
30 |
pb_revwords: db 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1 |
|
31 |
pd_16384: times 4 dd 16384 |
|
30 | 32 |
|
31 | 33 |
section .text align=16 |
32 | 34 |
|
... | ... | |
202 | 204 |
RET |
203 | 205 |
|
204 | 206 |
|
207 |
;----------------------------------------------------------------------------- |
|
208 |
; void ff_apply_window_int16(int16_t *output, const int16_t *input, |
|
209 |
; const int16_t *window, unsigned int len) |
|
210 |
;----------------------------------------------------------------------------- |
|
211 |
|
|
212 |
%macro REVERSE_WORDS_MMXEXT 1-2 |
|
213 |
pshufw %1, %1, 0x1B |
|
214 |
%endmacro |
|
215 |
|
|
216 |
%macro REVERSE_WORDS_SSE2 1-2 |
|
217 |
pshuflw %1, %1, 0x1B |
|
218 |
pshufhw %1, %1, 0x1B |
|
219 |
pshufd %1, %1, 0x4E |
|
220 |
%endmacro |
|
221 |
|
|
222 |
%macro REVERSE_WORDS_SSSE3 2 |
|
223 |
pshufb %1, %2 |
|
224 |
%endmacro |
|
225 |
|
|
226 |
; dst = (dst * src) >> 15 |
|
227 |
; pmulhw cuts off the bottom bit, so we have to lshift by 1 and add it back |
|
228 |
; in from the pmullw result. |
|
229 |
%macro MUL16FIXED_MMXEXT 3 ; dst, src, temp |
|
230 |
mova %3, %1 |
|
231 |
pmulhw %1, %2 |
|
232 |
pmullw %3, %2 |
|
233 |
psrlw %3, 15 |
|
234 |
psllw %1, 1 |
|
235 |
por %1, %3 |
|
236 |
%endmacro |
|
237 |
|
|
238 |
; dst = ((dst * src) + (1<<14)) >> 15 |
|
239 |
%macro MUL16FIXED_SSSE3 3 ; dst, src, unused |
|
240 |
pmulhrsw %1, %2 |
|
241 |
%endmacro |
|
242 |
|
|
243 |
%macro APPLY_WINDOW_INT16 3 ; %1=instruction set, %2=mmxext/sse2 bit exact version, %3=has_ssse3 |
|
244 |
cglobal apply_window_int16_%1, 4,5,6, output, input, window, offset, offset2 |
|
245 |
lea offset2q, [offsetq-mmsize] |
|
246 |
%if %2 |
|
247 |
mova m5, [pd_16384] |
|
248 |
%elifidn %1, ssse3 |
|
249 |
mova m5, [pb_revwords] |
|
250 |
ALIGN 16 |
|
251 |
%endif |
|
252 |
.loop: |
|
253 |
%if %2 |
|
254 |
; This version expands 16-bit to 32-bit, multiplies by the window, |
|
255 |
; adds 16384 for rounding, right shifts 15, then repacks back to words to |
|
256 |
; save to the output. The window is reversed for the second half. |
|
257 |
mova m3, [windowq+offset2q] |
|
258 |
mova m4, [ inputq+offset2q] |
|
259 |
pxor m0, m0 |
|
260 |
punpcklwd m0, m3 |
|
261 |
punpcklwd m1, m4 |
|
262 |
pmaddwd m0, m1 |
|
263 |
paddd m0, m5 |
|
264 |
psrad m0, 15 |
|
265 |
pxor m2, m2 |
|
266 |
punpckhwd m2, m3 |
|
267 |
punpckhwd m1, m4 |
|
268 |
pmaddwd m2, m1 |
|
269 |
paddd m2, m5 |
|
270 |
psrad m2, 15 |
|
271 |
packssdw m0, m2 |
|
272 |
mova [outputq+offset2q], m0 |
|
273 |
REVERSE_WORDS m3 |
|
274 |
mova m4, [ inputq+offsetq] |
|
275 |
pxor m0, m0 |
|
276 |
punpcklwd m0, m3 |
|
277 |
punpcklwd m1, m4 |
|
278 |
pmaddwd m0, m1 |
|
279 |
paddd m0, m5 |
|
280 |
psrad m0, 15 |
|
281 |
pxor m2, m2 |
|
282 |
punpckhwd m2, m3 |
|
283 |
punpckhwd m1, m4 |
|
284 |
pmaddwd m2, m1 |
|
285 |
paddd m2, m5 |
|
286 |
psrad m2, 15 |
|
287 |
packssdw m0, m2 |
|
288 |
mova [outputq+offsetq], m0 |
|
289 |
%elif %3 |
|
290 |
; This version does the 16x16->16 multiplication in-place without expanding |
|
291 |
; to 32-bit. The ssse3 version is bit-identical. |
|
292 |
mova m0, [windowq+offset2q] |
|
293 |
mova m1, [ inputq+offset2q] |
|
294 |
pmulhrsw m1, m0 |
|
295 |
REVERSE_WORDS m0, m5 |
|
296 |
pmulhrsw m0, [ inputq+offsetq ] |
|
297 |
mova [outputq+offset2q], m1 |
|
298 |
mova [outputq+offsetq ], m0 |
|
299 |
%else |
|
300 |
; This version does the 16x16->16 multiplication in-place without expanding |
|
301 |
; to 32-bit. The mmxext and sse2 versions do not use rounding, and |
|
302 |
; therefore are not bit-identical to the C version. |
|
303 |
mova m0, [windowq+offset2q] |
|
304 |
mova m1, [ inputq+offset2q] |
|
305 |
mova m2, [ inputq+offsetq ] |
|
306 |
MUL16FIXED m1, m0, m3 |
|
307 |
REVERSE_WORDS m0 |
|
308 |
MUL16FIXED m2, m0, m3 |
|
309 |
mova [outputq+offset2q], m1 |
|
310 |
mova [outputq+offsetq ], m2 |
|
311 |
%endif |
|
312 |
add offsetd, mmsize |
|
313 |
sub offset2d, mmsize |
|
314 |
jae .loop |
|
315 |
REP_RET |
|
316 |
%endmacro |
|
317 |
|
|
318 |
INIT_MMX |
|
319 |
%define REVERSE_WORDS REVERSE_WORDS_MMXEXT |
|
320 |
%define MUL16FIXED MUL16FIXED_MMXEXT |
|
321 |
APPLY_WINDOW_INT16 mmxext, 0, 0 |
|
322 |
APPLY_WINDOW_INT16 mmxext_ba, 1, 0 |
|
323 |
INIT_XMM |
|
324 |
%define REVERSE_WORDS REVERSE_WORDS_SSE2 |
|
325 |
APPLY_WINDOW_INT16 sse2, 0, 0 |
|
326 |
APPLY_WINDOW_INT16 sse2_ba, 1, 0 |
|
327 |
APPLY_WINDOW_INT16 ssse3_atom, 0, 1 |
|
328 |
%define REVERSE_WORDS REVERSE_WORDS_SSSE3 |
|
329 |
APPLY_WINDOW_INT16 ssse3, 0, 1 |
|
330 |
|
|
205 | 331 |
|
206 | 332 |
; void add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top) |
207 | 333 |
cglobal add_hfyu_median_prediction_mmx2, 6,6,0, dst, top, diff, w, left, left_top |
tests/ref/acodec/ac3_fixed | ||
---|---|---|
1 |
b3a8f0a8809a58b2ece90744f06fff96 *./tests/data/acodec/ac3.rm
|
|
1 |
346073c97eada69330f61e103a170ca1 *./tests/data/acodec/ac3.rm
|
|
2 | 2 |
98751 ./tests/data/acodec/ac3.rm |
tests/ref/lavf/rm | ||
---|---|---|
1 |
7da378131db880bcf2e58305d54418ec *./tests/data/lavf/lavf.rm
|
|
1 |
7b7ede9548a09346675edad36acfbf19 *./tests/data/lavf/lavf.rm
|
|
2 | 2 |
346706 ./tests/data/lavf/lavf.rm |
tests/ref/seek/ac3_rm | ||
---|---|---|
1 | 1 |
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 |
2 | 2 |
ret: 0 st:-1 flags:0 ts:-1.000000 |
3 | 3 |
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 |
4 |
ret: 0 st:-1 flags:1 ts: 1.894167 |
|
5 |
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 |
|
6 |
ret: 0 st: 0 flags:0 ts: 0.788000 |
|
7 |
ret: 0 st: 0 flags:1 dts:12581.487000 pts:12581.487000 pos: 5822 size: 916 |
|
4 |
ret:-1 st:-1 flags:1 ts: 1.894167 |
|
5 |
ret:-1 st: 0 flags:0 ts: 0.788000 |
|
8 | 6 |
ret: 0 st: 0 flags:1 ts:-0.317000 |
9 | 7 |
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 |
10 |
ret: 0 st:-1 flags:0 ts: 2.576668 |
|
11 |
ret: 0 st: 0 flags:1 dts:524.800000 pts:524.800000 pos: 6155 size: 244 |
|
8 |
ret:-1 st:-1 flags:0 ts: 2.576668 |
|
12 | 9 |
ret:-1 st:-1 flags:1 ts: 1.470835 |
13 |
ret: 0 st: 0 flags:0 ts: 0.365000 |
|
14 |
ret: 0 st: 0 flags:1 dts:12581.487000 pts:12581.487000 pos: 5822 size: 916 |
|
10 |
ret:-1 st: 0 flags:0 ts: 0.365000 |
|
15 | 11 |
ret: 0 st: 0 flags:1 ts:-0.741000 |
16 | 12 |
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 |
17 | 13 |
ret:-1 st:-1 flags:0 ts: 2.153336 |
18 |
ret: 0 st:-1 flags:1 ts: 1.047503 |
|
19 |
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 |
|
14 |
ret:-1 st:-1 flags:1 ts: 1.047503 |
|
20 | 15 |
ret: 0 st: 0 flags:0 ts:-0.058000 |
21 | 16 |
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 |
22 |
ret: 0 st: 0 flags:1 ts: 2.836000 |
|
23 |
ret: 0 st: 0 flags:1 dts: 2.681000 pts: 2.681000 pos: 44105 size: 558 |
|
17 |
ret:-1 st: 0 flags:1 ts: 2.836000 |
|
24 | 18 |
ret:-1 st:-1 flags:0 ts: 1.730004 |
25 |
ret: 0 st:-1 flags:1 ts: 0.624171 |
|
26 |
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 |
|
19 |
ret:-1 st:-1 flags:1 ts: 0.624171 |
|
27 | 20 |
ret: 0 st: 0 flags:0 ts:-0.482000 |
28 | 21 |
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 |
29 | 22 |
ret:-1 st: 0 flags:1 ts: 2.413000 |
30 | 23 |
ret:-1 st:-1 flags:0 ts: 1.306672 |
31 |
ret: 0 st:-1 flags:1 ts: 0.200839 |
|
32 |
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 |
|
24 |
ret:-1 st:-1 flags:1 ts: 0.200839 |
|
33 | 25 |
ret: 0 st: 0 flags:0 ts:-0.905000 |
34 | 26 |
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 |
35 | 27 |
ret:-1 st: 0 flags:1 ts: 1.989000 |
36 |
ret: 0 st:-1 flags:0 ts: 0.883340 |
|
37 |
ret: 0 st: 0 flags:1 dts:12581.487000 pts:12581.487000 pos: 5822 size: 916 |
|
28 |
ret:-1 st:-1 flags:0 ts: 0.883340 |
|
38 | 29 |
ret: 0 st:-1 flags:1 ts:-0.222493 |
39 | 30 |
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 |
40 | 31 |
ret:-1 st: 0 flags:0 ts: 2.672000 |
41 | 32 |
ret:-1 st: 0 flags:1 ts: 1.566000 |
42 |
ret: 0 st:-1 flags:0 ts: 0.460008 |
|
43 |
ret: 0 st: 0 flags:1 dts:12581.487000 pts:12581.487000 pos: 5822 size: 916 |
|
33 |
ret:-1 st:-1 flags:0 ts: 0.460008 |
|
44 | 34 |
ret: 0 st:-1 flags:1 ts:-0.645825 |
45 | 35 |
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556 |
Also available in: Unified diff