ffmpeg / libavcodec / x86 / ac3dsp.asm @ f7a5e779
History | View | Annotate | Download (3.84 KB)
1 | a30ac54a | Justin Ruggles | ;***************************************************************************** |
---|---|---|---|
2 | ;* x86-optimized AC-3 DSP utils |
||
3 | ;* Copyright (c) 2011 Justin Ruggles |
||
4 | ;* |
||
5 | ;* This file is part of FFmpeg. |
||
6 | ;* |
||
7 | ;* FFmpeg is free software; you can redistribute it and/or |
||
8 | ;* modify it under the terms of the GNU Lesser General Public |
||
9 | ;* License as published by the Free Software Foundation; either |
||
10 | ;* version 2.1 of the License, or (at your option) any later version. |
||
11 | ;* |
||
12 | ;* FFmpeg is distributed in the hope that it will be useful, |
||
13 | ;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
14 | ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
15 | ;* Lesser General Public License for more details. |
||
16 | ;* |
||
17 | ;* You should have received a copy of the GNU Lesser General Public |
||
18 | ;* License along with FFmpeg; if not, write to the Free Software |
||
19 | ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
20 | ;****************************************************************************** |
||
21 | |||
22 | %include "x86inc.asm" |
||
23 | %include "x86util.asm" |
||
24 | |||
25 | SECTION .text |
||
26 | |||
27 | ;----------------------------------------------------------------------------- |
||
28 | ; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs) |
||
29 | ;----------------------------------------------------------------------------- |
||
30 | |||
31 | %macro AC3_EXPONENT_MIN 1 |
||
32 | cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset |
||
33 | shl reuse_blksq, 8 |
||
34 | jz .end |
||
35 | LOOP_ALIGN |
||
36 | .nextexp: |
||
37 | mov offsetq, reuse_blksq |
||
38 | mova m0, [expq+offsetq] |
||
39 | sub offsetq, 256 |
||
40 | LOOP_ALIGN |
||
41 | .nextblk: |
||
42 | PMINUB m0, [expq+offsetq], m1 |
||
43 | sub offsetq, 256 |
||
44 | jae .nextblk |
||
45 | mova [expq], m0 |
||
46 | add expq, mmsize |
||
47 | sub expnq, mmsize |
||
48 | jg .nextexp |
||
49 | .end: |
||
50 | REP_RET |
||
51 | %endmacro |
||
52 | |||
53 | %define PMINUB PMINUB_MMX |
||
54 | %define LOOP_ALIGN |
||
55 | INIT_MMX |
||
56 | AC3_EXPONENT_MIN mmx |
||
57 | %ifdef HAVE_MMX2 |
||
58 | %define PMINUB PMINUB_MMXEXT |
||
59 | %define LOOP_ALIGN ALIGN 16 |
||
60 | AC3_EXPONENT_MIN mmxext |
||
61 | %endif |
||
62 | %ifdef HAVE_SSE |
||
63 | INIT_XMM |
||
64 | AC3_EXPONENT_MIN sse2 |
||
65 | %endif |
||
66 | %undef PMINUB |
||
67 | %undef LOOP_ALIGN |
||
68 | 7539a1fe | Justin Ruggles | |
69 | ;----------------------------------------------------------------------------- |
||
70 | ; int ff_ac3_max_msb_abs_int16(const int16_t *src, int len) |
||
71 | ; |
||
72 | ; This function uses 2 different methods to calculate a valid result. |
||
73 | ; 1) logical 'or' of abs of each element |
||
74 | ; This is used for ssse3 because of the pabsw instruction. |
||
75 | ; It is also used for mmx because of the lack of min/max instructions. |
||
76 | ; 2) calculate min/max for the array, then or(abs(min),abs(max)) |
||
77 | ; This is used for mmxext and sse2 because they have pminsw/pmaxsw. |
||
78 | ;----------------------------------------------------------------------------- |
||
79 | |||
80 | %macro AC3_MAX_MSB_ABS_INT16 2 |
||
81 | cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len |
||
82 | pxor m2, m2 |
||
83 | pxor m3, m3 |
||
84 | .loop: |
||
85 | %ifidn %2, min_max |
||
86 | mova m0, [srcq] |
||
87 | mova m1, [srcq+mmsize] |
||
88 | pminsw m2, m0 |
||
89 | pminsw m2, m1 |
||
90 | pmaxsw m3, m0 |
||
91 | pmaxsw m3, m1 |
||
92 | %else ; or_abs |
||
93 | %ifidn %1, mmx |
||
94 | mova m0, [srcq] |
||
95 | mova m1, [srcq+mmsize] |
||
96 | ABS2 m0, m1, m3, m4 |
||
97 | %else ; ssse3 |
||
98 | ; using memory args is faster for ssse3 |
||
99 | pabsw m0, [srcq] |
||
100 | pabsw m1, [srcq+mmsize] |
||
101 | %endif |
||
102 | por m2, m0 |
||
103 | por m2, m1 |
||
104 | %endif |
||
105 | add srcq, mmsize*2 |
||
106 | sub lend, mmsize |
||
107 | ja .loop |
||
108 | %ifidn %2, min_max |
||
109 | ABS2 m2, m3, m0, m1 |
||
110 | por m2, m3 |
||
111 | %endif |
||
112 | %ifidn mmsize, 16 |
||
113 | 20a2a3da | Justin Ruggles | movhlps m0, m2 |
114 | 7539a1fe | Justin Ruggles | por m2, m0 |
115 | %endif |
||
116 | PSHUFLW m0, m2, 0xe |
||
117 | por m2, m0 |
||
118 | PSHUFLW m0, m2, 0x1 |
||
119 | por m2, m0 |
||
120 | movd eax, m2 |
||
121 | and eax, 0xFFFF |
||
122 | RET |
||
123 | %endmacro |
||
124 | |||
125 | INIT_MMX |
||
126 | %define ABS2 ABS2_MMX |
||
127 | %define PSHUFLW pshufw |
||
128 | AC3_MAX_MSB_ABS_INT16 mmx, or_abs |
||
129 | %define ABS2 ABS2_MMX2 |
||
130 | AC3_MAX_MSB_ABS_INT16 mmxext, min_max |
||
131 | INIT_XMM |
||
132 | %define PSHUFLW pshuflw |
||
133 | AC3_MAX_MSB_ABS_INT16 sse2, min_max |
||
134 | %define ABS2 ABS2_SSSE3 |
||
135 | AC3_MAX_MSB_ABS_INT16 ssse3, or_abs |