Statistics
| Branch: | Revision:

ffmpeg / libavcodec / x86 / ac3dsp.asm @ f7a5e779

History | View | Annotate | Download (3.84 KB)

1 a30ac54a Justin Ruggles
;*****************************************************************************
2
;* x86-optimized AC-3 DSP utils
3
;* Copyright (c) 2011 Justin Ruggles
4
;*
5
;* This file is part of FFmpeg.
6
;*
7
;* FFmpeg is free software; you can redistribute it and/or
8
;* modify it under the terms of the GNU Lesser General Public
9
;* License as published by the Free Software Foundation; either
10
;* version 2.1 of the License, or (at your option) any later version.
11
;*
12
;* FFmpeg is distributed in the hope that it will be useful,
13
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
;* Lesser General Public License for more details.
16
;*
17
;* You should have received a copy of the GNU Lesser General Public
18
;* License along with FFmpeg; if not, write to the Free Software
19
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
;******************************************************************************
21
22
%include "x86inc.asm"
23
%include "x86util.asm"
24
25
SECTION .text
26
27
;-----------------------------------------------------------------------------
28
; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
29
;-----------------------------------------------------------------------------
30
31
%macro AC3_EXPONENT_MIN 1
32
cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
33
    shl  reuse_blksq, 8
34
    jz .end
35
    LOOP_ALIGN
36
.nextexp:
37
    mov      offsetq, reuse_blksq
38
    mova          m0, [expq+offsetq]
39
    sub      offsetq, 256
40
    LOOP_ALIGN
41
.nextblk:
42
    PMINUB        m0, [expq+offsetq], m1
43
    sub      offsetq, 256
44
    jae .nextblk
45
    mova      [expq], m0
46
    add         expq, mmsize
47
    sub        expnq, mmsize
48
    jg .nextexp
49
.end:
50
    REP_RET
51
%endmacro
52
53
%define PMINUB PMINUB_MMX
54
%define LOOP_ALIGN
55
INIT_MMX
56
AC3_EXPONENT_MIN mmx
57
%ifdef HAVE_MMX2
58
%define PMINUB PMINUB_MMXEXT
59
%define LOOP_ALIGN ALIGN 16
60
AC3_EXPONENT_MIN mmxext
61
%endif
62
%ifdef HAVE_SSE
63
INIT_XMM
64
AC3_EXPONENT_MIN sse2
65
%endif
66
%undef PMINUB
67
%undef LOOP_ALIGN
68 7539a1fe Justin Ruggles
69
;-----------------------------------------------------------------------------
70
; int ff_ac3_max_msb_abs_int16(const int16_t *src, int len)
71
;
72
; This function uses 2 different methods to calculate a valid result.
73
; 1) logical 'or' of abs of each element
74
;        This is used for ssse3 because of the pabsw instruction.
75
;        It is also used for mmx because of the lack of min/max instructions.
76
; 2) calculate min/max for the array, then or(abs(min),abs(max))
77
;        This is used for mmxext and sse2 because they have pminsw/pmaxsw.
78
;-----------------------------------------------------------------------------
79
80
%macro AC3_MAX_MSB_ABS_INT16 2
81
cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
82
    pxor        m2, m2
83
    pxor        m3, m3
84
.loop:
85
%ifidn %2, min_max
86
    mova        m0, [srcq]
87
    mova        m1, [srcq+mmsize]
88
    pminsw      m2, m0
89
    pminsw      m2, m1
90
    pmaxsw      m3, m0
91
    pmaxsw      m3, m1
92
%else ; or_abs
93
%ifidn %1, mmx
94
    mova        m0, [srcq]
95
    mova        m1, [srcq+mmsize]
96
    ABS2        m0, m1, m3, m4
97
%else ; ssse3
98
    ; using memory args is faster for ssse3
99
    pabsw       m0, [srcq]
100
    pabsw       m1, [srcq+mmsize]
101
%endif
102
    por         m2, m0
103
    por         m2, m1
104
%endif
105
    add       srcq, mmsize*2
106
    sub       lend, mmsize
107
    ja .loop
108
%ifidn %2, min_max
109
    ABS2        m2, m3, m0, m1
110
    por         m2, m3
111
%endif
112
%ifidn mmsize, 16
113 20a2a3da Justin Ruggles
    movhlps     m0, m2
114 7539a1fe Justin Ruggles
    por         m2, m0
115
%endif
116
    PSHUFLW     m0, m2, 0xe
117
    por         m2, m0
118
    PSHUFLW     m0, m2, 0x1
119
    por         m2, m0
120
    movd       eax, m2
121
    and        eax, 0xFFFF
122
    RET
123
%endmacro
124
125
INIT_MMX
126
%define ABS2 ABS2_MMX
127
%define PSHUFLW pshufw
128
AC3_MAX_MSB_ABS_INT16 mmx, or_abs
129
%define ABS2 ABS2_MMX2
130
AC3_MAX_MSB_ABS_INT16 mmxext, min_max
131
INIT_XMM
132
%define PSHUFLW pshuflw
133
AC3_MAX_MSB_ABS_INT16 sse2, min_max
134
%define ABS2 ABS2_SSSE3
135
AC3_MAX_MSB_ABS_INT16 ssse3, or_abs