Statistics
| Branch: | Revision:

ffmpeg / libavcodec / x86 / ac3dsp.asm @ cc4d3dd3

History | View | Annotate | Download (5.69 KB)

1
;*****************************************************************************
2
;* x86-optimized AC-3 DSP utils
3
;* Copyright (c) 2011 Justin Ruggles
4
;*
5
;* This file is part of FFmpeg.
6
;*
7
;* FFmpeg is free software; you can redistribute it and/or
8
;* modify it under the terms of the GNU Lesser General Public
9
;* License as published by the Free Software Foundation; either
10
;* version 2.1 of the License, or (at your option) any later version.
11
;*
12
;* FFmpeg is distributed in the hope that it will be useful,
13
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
;* Lesser General Public License for more details.
16
;*
17
;* You should have received a copy of the GNU Lesser General Public
18
;* License along with FFmpeg; if not, write to the Free Software
19
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
;******************************************************************************
21

    
22
%include "x86inc.asm"
23
%include "x86util.asm"
24

    
25
SECTION .text
26

    
27
;-----------------------------------------------------------------------------
28
; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
29
;-----------------------------------------------------------------------------
30

    
31
%macro AC3_EXPONENT_MIN 1
32
cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
33
    shl  reuse_blksq, 8
34
    jz .end
35
    LOOP_ALIGN
36
.nextexp:
37
    mov      offsetq, reuse_blksq
38
    mova          m0, [expq+offsetq]
39
    sub      offsetq, 256
40
    LOOP_ALIGN
41
.nextblk:
42
    PMINUB        m0, [expq+offsetq], m1
43
    sub      offsetq, 256
44
    jae .nextblk
45
    mova      [expq], m0
46
    add         expq, mmsize
47
    sub        expnq, mmsize
48
    jg .nextexp
49
.end:
50
    REP_RET
51
%endmacro
52

    
53
%define PMINUB PMINUB_MMX
54
%define LOOP_ALIGN
55
INIT_MMX
56
AC3_EXPONENT_MIN mmx
57
%ifdef HAVE_MMX2
58
%define PMINUB PMINUB_MMXEXT
59
%define LOOP_ALIGN ALIGN 16
60
AC3_EXPONENT_MIN mmxext
61
%endif
62
%ifdef HAVE_SSE
63
INIT_XMM
64
AC3_EXPONENT_MIN sse2
65
%endif
66
%undef PMINUB
67
%undef LOOP_ALIGN
68

    
69
;-----------------------------------------------------------------------------
70
; int ff_ac3_max_msb_abs_int16(const int16_t *src, int len)
71
;
72
; This function uses 2 different methods to calculate a valid result.
73
; 1) logical 'or' of abs of each element
74
;        This is used for ssse3 because of the pabsw instruction.
75
;        It is also used for mmx because of the lack of min/max instructions.
76
; 2) calculate min/max for the array, then or(abs(min),abs(max))
77
;        This is used for mmxext and sse2 because they have pminsw/pmaxsw.
78
;-----------------------------------------------------------------------------
79

    
80
%macro AC3_MAX_MSB_ABS_INT16 2
81
cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
82
    pxor        m2, m2
83
    pxor        m3, m3
84
.loop:
85
%ifidn %2, min_max
86
    mova        m0, [srcq]
87
    mova        m1, [srcq+mmsize]
88
    pminsw      m2, m0
89
    pminsw      m2, m1
90
    pmaxsw      m3, m0
91
    pmaxsw      m3, m1
92
%else ; or_abs
93
%ifidn %1, mmx
94
    mova        m0, [srcq]
95
    mova        m1, [srcq+mmsize]
96
    ABS2        m0, m1, m3, m4
97
%else ; ssse3
98
    ; using memory args is faster for ssse3
99
    pabsw       m0, [srcq]
100
    pabsw       m1, [srcq+mmsize]
101
%endif
102
    por         m2, m0
103
    por         m2, m1
104
%endif
105
    add       srcq, mmsize*2
106
    sub       lend, mmsize
107
    ja .loop
108
%ifidn %2, min_max
109
    ABS2        m2, m3, m0, m1
110
    por         m2, m3
111
%endif
112
%ifidn mmsize, 16
113
    movhlps     m0, m2
114
    por         m2, m0
115
%endif
116
    PSHUFLW     m0, m2, 0xe
117
    por         m2, m0
118
    PSHUFLW     m0, m2, 0x1
119
    por         m2, m0
120
    movd       eax, m2
121
    and        eax, 0xFFFF
122
    RET
123
%endmacro
124

    
125
INIT_MMX
126
%define ABS2 ABS2_MMX
127
%define PSHUFLW pshufw
128
AC3_MAX_MSB_ABS_INT16 mmx, or_abs
129
%define ABS2 ABS2_MMX2
130
AC3_MAX_MSB_ABS_INT16 mmxext, min_max
131
INIT_XMM
132
%define PSHUFLW pshuflw
133
AC3_MAX_MSB_ABS_INT16 sse2, min_max
134
%define ABS2 ABS2_SSSE3
135
AC3_MAX_MSB_ABS_INT16 ssse3, or_abs
136

    
137
;-----------------------------------------------------------------------------
138
; macro used for ff_ac3_lshift_int16() and ff_ac3_shift_int32()
139
;-----------------------------------------------------------------------------
140

    
141
%macro AC3_SHIFT_4MM 3 ; src/dst, shift instruction, shift amount
142
    mova  m1, [%1         ]
143
    mova  m2, [%1+mmsize  ]
144
    mova  m3, [%1+mmsize*2]
145
    mova  m4, [%1+mmsize*3]
146
    %2    m1, %3
147
    %2    m2, %3
148
    %2    m3, %3
149
    %2    m4, %3
150
    mova  [%1         ], m1
151
    mova  [%1+mmsize  ], m2
152
    mova  [%1+mmsize*2], m3
153
    mova  [%1+mmsize*3], m4
154
    add   %1, mmsize*4
155
%endmacro
156

    
157
;-----------------------------------------------------------------------------
158
; void ff_ac3_lshift_int16(int16_t *src, int len, unsigned int shift)
159
;-----------------------------------------------------------------------------
160

    
161
%macro AC3_LSHIFT_INT16 1
162
cglobal ac3_lshift_int16_%1, 3,3,5, src, len, shift
163
    test   shiftd, shiftd
164
    jz .end
165
    movd       m0, shiftd
166
    ALIGN 8
167
.loop:
168
    AC3_SHIFT_4MM srcq, psllw, m0
169
    sub      lend, mmsize*2
170
    ja .loop
171
.end:
172
    REP_RET
173
%endmacro
174

    
175
INIT_MMX
176
AC3_LSHIFT_INT16 mmx
177
INIT_XMM
178
AC3_LSHIFT_INT16 sse2
179

    
180
;-----------------------------------------------------------------------------
181
; void ff_ac3_shift_int32(int32_t *src, int len, int shift)
182
;-----------------------------------------------------------------------------
183

    
184
%macro AC3_SHIFT_INT32 1
185
cglobal ac3_shift_int32_%1, 3,3,5, src, len, shift
186
    test   shiftd, shiftd
187
    je .end
188
    js .shift_right
189
    movd       m0, shiftd
190
.loop_left:
191
    AC3_SHIFT_4MM srcq, pslld, m0
192
    sub      lend, mmsize
193
    ja .loop_left
194
    jmp .end
195
.shift_right:
196
    neg    shiftd
197
    movd       m0, shiftd
198
.loop_right:
199
    AC3_SHIFT_4MM srcq, psrad, m0
200
    sub      lend, mmsize
201
    ja .loop_right
202
.end:
203
    REP_RET
204
%endmacro
205

    
206
INIT_MMX
207
AC3_SHIFT_INT32 mmx
208
INIT_XMM
209
AC3_SHIFT_INT32 sse2