Statistics
| Branch: | Revision:

ffmpeg / libavcodec / x86 / ac3dsp.asm @ f1efbca5

History | View | Annotate | Download (5.25 KB)

1
;*****************************************************************************
2
;* x86-optimized AC-3 DSP utils
3
;* Copyright (c) 2011 Justin Ruggles
4
;*
5
;* This file is part of FFmpeg.
6
;*
7
;* FFmpeg is free software; you can redistribute it and/or
8
;* modify it under the terms of the GNU Lesser General Public
9
;* License as published by the Free Software Foundation; either
10
;* version 2.1 of the License, or (at your option) any later version.
11
;*
12
;* FFmpeg is distributed in the hope that it will be useful,
13
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
;* Lesser General Public License for more details.
16
;*
17
;* You should have received a copy of the GNU Lesser General Public
18
;* License along with FFmpeg; if not, write to the Free Software
19
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
;******************************************************************************
21

    
22
%include "x86inc.asm"
23
%include "x86util.asm"
24

    
25
SECTION .text
26

    
27
;-----------------------------------------------------------------------------
28
; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
29
;-----------------------------------------------------------------------------
30

    
31
%macro AC3_EXPONENT_MIN 1
32
cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
33
    shl  reuse_blksq, 8
34
    jz .end
35
    LOOP_ALIGN
36
.nextexp:
37
    mov      offsetq, reuse_blksq
38
    mova          m0, [expq+offsetq]
39
    sub      offsetq, 256
40
    LOOP_ALIGN
41
.nextblk:
42
    PMINUB        m0, [expq+offsetq], m1
43
    sub      offsetq, 256
44
    jae .nextblk
45
    mova      [expq], m0
46
    add         expq, mmsize
47
    sub        expnq, mmsize
48
    jg .nextexp
49
.end:
50
    REP_RET
51
%endmacro
52

    
53
%define PMINUB PMINUB_MMX
54
%define LOOP_ALIGN
55
INIT_MMX
56
AC3_EXPONENT_MIN mmx
57
%ifdef HAVE_MMX2
58
%define PMINUB PMINUB_MMXEXT
59
%define LOOP_ALIGN ALIGN 16
60
AC3_EXPONENT_MIN mmxext
61
%endif
62
%ifdef HAVE_SSE
63
INIT_XMM
64
AC3_EXPONENT_MIN sse2
65
%endif
66
%undef PMINUB
67
%undef LOOP_ALIGN
68

    
69
;-----------------------------------------------------------------------------
70
; int ff_ac3_max_msb_abs_int16(const int16_t *src, int len)
71
;
72
; This function uses 2 different methods to calculate a valid result.
73
; 1) logical 'or' of abs of each element
74
;        This is used for ssse3 because of the pabsw instruction.
75
;        It is also used for mmx because of the lack of min/max instructions.
76
; 2) calculate min/max for the array, then or(abs(min),abs(max))
77
;        This is used for mmxext and sse2 because they have pminsw/pmaxsw.
78
;-----------------------------------------------------------------------------
79

    
80
%macro AC3_MAX_MSB_ABS_INT16 2
81
cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
82
    pxor        m2, m2
83
    pxor        m3, m3
84
.loop:
85
%ifidn %2, min_max
86
    mova        m0, [srcq]
87
    mova        m1, [srcq+mmsize]
88
    pminsw      m2, m0
89
    pminsw      m2, m1
90
    pmaxsw      m3, m0
91
    pmaxsw      m3, m1
92
%else ; or_abs
93
%ifidn %1, mmx
94
    mova        m0, [srcq]
95
    mova        m1, [srcq+mmsize]
96
    ABS2        m0, m1, m3, m4
97
%else ; ssse3
98
    ; using memory args is faster for ssse3
99
    pabsw       m0, [srcq]
100
    pabsw       m1, [srcq+mmsize]
101
%endif
102
    por         m2, m0
103
    por         m2, m1
104
%endif
105
    add       srcq, mmsize*2
106
    sub       lend, mmsize
107
    ja .loop
108
%ifidn %2, min_max
109
    ABS2        m2, m3, m0, m1
110
    por         m2, m3
111
%endif
112
%ifidn mmsize, 16
113
    movhlps     m0, m2
114
    por         m2, m0
115
%endif
116
    PSHUFLW     m0, m2, 0xe
117
    por         m2, m0
118
    PSHUFLW     m0, m2, 0x1
119
    por         m2, m0
120
    movd       eax, m2
121
    and        eax, 0xFFFF
122
    RET
123
%endmacro
124

    
125
INIT_MMX
126
%define ABS2 ABS2_MMX
127
%define PSHUFLW pshufw
128
AC3_MAX_MSB_ABS_INT16 mmx, or_abs
129
%define ABS2 ABS2_MMX2
130
AC3_MAX_MSB_ABS_INT16 mmxext, min_max
131
INIT_XMM
132
%define PSHUFLW pshuflw
133
AC3_MAX_MSB_ABS_INT16 sse2, min_max
134
%define ABS2 ABS2_SSSE3
135
AC3_MAX_MSB_ABS_INT16 ssse3, or_abs
136

    
137
;-----------------------------------------------------------------------------
138
; macro used for ff_ac3_lshift_int16() and ff_ac3_rshift_int32()
139
;-----------------------------------------------------------------------------
140

    
141
%macro AC3_SHIFT 4 ; l/r, 16/32, shift instruction, instruction set
142
cglobal ac3_%1shift_int%2_%4, 3,3,5, src, len, shift
143
    movd      m0, shiftd
144
.loop:
145
    mova      m1, [srcq         ]
146
    mova      m2, [srcq+mmsize  ]
147
    mova      m3, [srcq+mmsize*2]
148
    mova      m4, [srcq+mmsize*3]
149
    %3        m1, m0
150
    %3        m2, m0
151
    %3        m3, m0
152
    %3        m4, m0
153
    mova  [srcq         ], m1
154
    mova  [srcq+mmsize  ], m2
155
    mova  [srcq+mmsize*2], m3
156
    mova  [srcq+mmsize*3], m4
157
    add     srcq, mmsize*4
158
    sub     lend, mmsize*32/%2
159
    ja .loop
160
.end:
161
    REP_RET
162
%endmacro
163

    
164
;-----------------------------------------------------------------------------
165
; void ff_ac3_lshift_int16(int16_t *src, unsigned int len, unsigned int shift)
166
;-----------------------------------------------------------------------------
167

    
168
INIT_MMX
169
AC3_SHIFT l, 16, psllw, mmx
170
INIT_XMM
171
AC3_SHIFT l, 16, psllw, sse2
172

    
173
;-----------------------------------------------------------------------------
174
; void ff_ac3_rshift_int32(int32_t *src, unsigned int len, unsigned int shift)
175
;-----------------------------------------------------------------------------
176

    
177
INIT_MMX
178
AC3_SHIFT r, 32, psrad, mmx
179
INIT_XMM
180
AC3_SHIFT r, 32, psrad, sse2