Statistics
| Branch: | Revision:

ffmpeg / libavcodec / x86 / ac3dsp.asm @ fbb6b49d

History | View | Annotate | Download (3.86 KB)

1
;*****************************************************************************
2
;* x86-optimized AC-3 DSP utils
3
;* Copyright (c) 2011 Justin Ruggles
4
;*
5
;* This file is part of FFmpeg.
6
;*
7
;* FFmpeg is free software; you can redistribute it and/or
8
;* modify it under the terms of the GNU Lesser General Public
9
;* License as published by the Free Software Foundation; either
10
;* version 2.1 of the License, or (at your option) any later version.
11
;*
12
;* FFmpeg is distributed in the hope that it will be useful,
13
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
14
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
;* Lesser General Public License for more details.
16
;*
17
;* You should have received a copy of the GNU Lesser General Public
18
;* License along with FFmpeg; if not, write to the Free Software
19
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
;******************************************************************************
21

    
22
%include "x86inc.asm"
23
%include "x86util.asm"
24

    
25
SECTION .text
26

    
27
;-----------------------------------------------------------------------------
28
; void ff_ac3_exponent_min(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
29
;-----------------------------------------------------------------------------
30

    
31
%macro AC3_EXPONENT_MIN 1
32
cglobal ac3_exponent_min_%1, 3,4,2, exp, reuse_blks, expn, offset
33
    shl  reuse_blksq, 8
34
    jz .end
35
    LOOP_ALIGN
36
.nextexp:
37
    mov      offsetq, reuse_blksq
38
    mova          m0, [expq+offsetq]
39
    sub      offsetq, 256
40
    LOOP_ALIGN
41
.nextblk:
42
    PMINUB        m0, [expq+offsetq], m1
43
    sub      offsetq, 256
44
    jae .nextblk
45
    mova      [expq], m0
46
    add         expq, mmsize
47
    sub        expnq, mmsize
48
    jg .nextexp
49
.end:
50
    REP_RET
51
%endmacro
52

    
53
%define PMINUB PMINUB_MMX
54
%define LOOP_ALIGN
55
INIT_MMX
56
AC3_EXPONENT_MIN mmx
57
%ifdef HAVE_MMX2
58
%define PMINUB PMINUB_MMXEXT
59
%define LOOP_ALIGN ALIGN 16
60
AC3_EXPONENT_MIN mmxext
61
%endif
62
%ifdef HAVE_SSE
63
INIT_XMM
64
AC3_EXPONENT_MIN sse2
65
%endif
66
%undef PMINUB
67
%undef LOOP_ALIGN
68

    
69
;-----------------------------------------------------------------------------
70
; int ff_ac3_max_msb_abs_int16(const int16_t *src, int len)
71
;
72
; This function uses 2 different methods to calculate a valid result.
73
; 1) logical 'or' of abs of each element
74
;        This is used for ssse3 because of the pabsw instruction.
75
;        It is also used for mmx because of the lack of min/max instructions.
76
; 2) calculate min/max for the array, then or(abs(min),abs(max))
77
;        This is used for mmxext and sse2 because they have pminsw/pmaxsw.
78
;-----------------------------------------------------------------------------
79

    
80
%macro AC3_MAX_MSB_ABS_INT16 2
81
cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
82
    pxor        m2, m2
83
    pxor        m3, m3
84
.loop:
85
%ifidn %2, min_max
86
    mova        m0, [srcq]
87
    mova        m1, [srcq+mmsize]
88
    pminsw      m2, m0
89
    pminsw      m2, m1
90
    pmaxsw      m3, m0
91
    pmaxsw      m3, m1
92
%else ; or_abs
93
%ifidn %1, mmx
94
    mova        m0, [srcq]
95
    mova        m1, [srcq+mmsize]
96
    ABS2        m0, m1, m3, m4
97
%else ; ssse3
98
    ; using memory args is faster for ssse3
99
    pabsw       m0, [srcq]
100
    pabsw       m1, [srcq+mmsize]
101
%endif
102
    por         m2, m0
103
    por         m2, m1
104
%endif
105
    add       srcq, mmsize*2
106
    sub       lend, mmsize
107
    ja .loop
108
%ifidn %2, min_max
109
    ABS2        m2, m3, m0, m1
110
    por         m2, m3
111
%endif
112
%ifidn mmsize, 16
113
    mova        m0, m2
114
    punpckhqdq  m0, m0
115
    por         m2, m0
116
%endif
117
    PSHUFLW     m0, m2, 0xe
118
    por         m2, m0
119
    PSHUFLW     m0, m2, 0x1
120
    por         m2, m0
121
    movd       eax, m2
122
    and        eax, 0xFFFF
123
    RET
124
%endmacro
125

    
126
INIT_MMX
127
%define ABS2 ABS2_MMX
128
%define PSHUFLW pshufw
129
AC3_MAX_MSB_ABS_INT16 mmx, or_abs
130
%define ABS2 ABS2_MMX2
131
AC3_MAX_MSB_ABS_INT16 mmxext, min_max
132
INIT_XMM
133
%define PSHUFLW pshuflw
134
AC3_MAX_MSB_ABS_INT16 sse2, min_max
135
%define ABS2 ABS2_SSSE3
136
AC3_MAX_MSB_ABS_INT16 ssse3, or_abs