Statistics
| Branch: | Revision:

ffmpeg / libavcodec / x86 / dsputil_h264_template_ssse3.c @ 78b5c97d

History | View | Annotate | Download (7.04 KB)

1 ce53144b Loren Merritt
/*
2
 * Copyright (c) 2008 Loren Merritt
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20
21
/**
22
 * SSSE3 optimized version of (put|avg)_h264_chroma_mc8.
23
 * H264_CHROMA_MC8_TMPL must be defined to the desired function name
24
 * H264_CHROMA_MC8_MV0 must be defined to a (put|avg)_pixels8 function
25
 * AVG_OP must be defined to empty for put and the identify for avg
26
 */
27
static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y, int rnd)
28
{
29
    if(y==0 && x==0) {
30
        /* no filter needed */
31
        H264_CHROMA_MC8_MV0(dst, src, stride, h);
32
        return;
33
    }
34
35
    assert(x<8 && y<8 && x>=0 && y>=0);
36
37
    if(y==0 || x==0)
38
    {
39
        /* 1 dimensional filter only */
40 be449fca Diego Pettenò
        __asm__ volatile(
41 ce53144b Loren Merritt
            "movd %0, %%xmm7 \n\t"
42
            "movq %1, %%xmm6 \n\t"
43
            "pshuflw $0, %%xmm7, %%xmm7 \n\t"
44
            "movlhps %%xmm6, %%xmm6 \n\t"
45
            "movlhps %%xmm7, %%xmm7 \n\t"
46 7af8fbd3 David Conrad
            :: "r"(255*(x+y)+8), "m"(*(rnd?&ff_pw_4.a:&ff_pw_3))
47 96275520 Loren Merritt
        );
48 ce53144b Loren Merritt
49
        if(x) {
50 be449fca Diego Pettenò
            __asm__ volatile(
51 ce53144b Loren Merritt
                "1: \n\t"
52
                "movq (%1), %%xmm0 \n\t"
53
                "movq 1(%1), %%xmm1 \n\t"
54
                "movq (%1,%3), %%xmm2 \n\t"
55
                "movq 1(%1,%3), %%xmm3 \n\t"
56
                "punpcklbw %%xmm1, %%xmm0 \n\t"
57
                "punpcklbw %%xmm3, %%xmm2 \n\t"
58
                "pmaddubsw %%xmm7, %%xmm0 \n\t"
59
                "pmaddubsw %%xmm7, %%xmm2 \n\t"
60
         AVG_OP("movq (%0), %%xmm4 \n\t")
61
         AVG_OP("movhps (%0,%3), %%xmm4 \n\t")
62
                "paddw %%xmm6, %%xmm0 \n\t"
63
                "paddw %%xmm6, %%xmm2 \n\t"
64
                "psrlw $3, %%xmm0 \n\t"
65
                "psrlw $3, %%xmm2 \n\t"
66
                "packuswb %%xmm2, %%xmm0 \n\t"
67
         AVG_OP("pavgb %%xmm4, %%xmm0 \n\t")
68
                "movq %%xmm0, (%0) \n\t"
69
                "movhps %%xmm0, (%0,%3) \n\t"
70
                "sub $2, %2 \n\t"
71
                "lea (%1,%3,2), %1 \n\t"
72
                "lea (%0,%3,2), %0 \n\t"
73
                "jg 1b \n\t"
74
                :"+r"(dst), "+r"(src), "+r"(h)
75 40d0e665 Ramiro Polla
                :"r"((x86_reg)stride)
76 ce53144b Loren Merritt
            );
77
        } else {
78 be449fca Diego Pettenò
            __asm__ volatile(
79 ce53144b Loren Merritt
                "1: \n\t"
80
                "movq (%1), %%xmm0 \n\t"
81
                "movq (%1,%3), %%xmm1 \n\t"
82
                "movdqa %%xmm1, %%xmm2 \n\t"
83
                "movq (%1,%3,2), %%xmm3 \n\t"
84
                "punpcklbw %%xmm1, %%xmm0 \n\t"
85
                "punpcklbw %%xmm3, %%xmm2 \n\t"
86
                "pmaddubsw %%xmm7, %%xmm0 \n\t"
87
                "pmaddubsw %%xmm7, %%xmm2 \n\t"
88
         AVG_OP("movq (%0), %%xmm4 \n\t")
89
         AVG_OP("movhps (%0,%3), %%xmm4 \n\t")
90
                "paddw %%xmm6, %%xmm0 \n\t"
91
                "paddw %%xmm6, %%xmm2 \n\t"
92
                "psrlw $3, %%xmm0 \n\t"
93
                "psrlw $3, %%xmm2 \n\t"
94
                "packuswb %%xmm2, %%xmm0 \n\t"
95
         AVG_OP("pavgb %%xmm4, %%xmm0 \n\t")
96
                "movq %%xmm0, (%0) \n\t"
97
                "movhps %%xmm0, (%0,%3) \n\t"
98
                "sub $2, %2 \n\t"
99
                "lea (%1,%3,2), %1 \n\t"
100
                "lea (%0,%3,2), %0 \n\t"
101
                "jg 1b \n\t"
102
                :"+r"(dst), "+r"(src), "+r"(h)
103 40d0e665 Ramiro Polla
                :"r"((x86_reg)stride)
104 ce53144b Loren Merritt
            );
105
        }
106
        return;
107
    }
108
109
    /* general case, bilinear */
110 be449fca Diego Pettenò
    __asm__ volatile(
111 ce53144b Loren Merritt
        "movd %0, %%xmm7 \n\t"
112
        "movd %1, %%xmm6 \n\t"
113
        "movdqa %2, %%xmm5 \n\t"
114
        "pshuflw $0, %%xmm7, %%xmm7 \n\t"
115
        "pshuflw $0, %%xmm6, %%xmm6 \n\t"
116
        "movlhps %%xmm7, %%xmm7 \n\t"
117
        "movlhps %%xmm6, %%xmm6 \n\t"
118 96275520 Loren Merritt
        :: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(*(rnd?&ff_pw_32:&ff_pw_28))
119 ce53144b Loren Merritt
    );
120
121 be449fca Diego Pettenò
    __asm__ volatile(
122 ce53144b Loren Merritt
        "movq (%1), %%xmm0 \n\t"
123
        "movq 1(%1), %%xmm1 \n\t"
124
        "punpcklbw %%xmm1, %%xmm0 \n\t"
125
        "add %3, %1 \n\t"
126
        "1: \n\t"
127
        "movq (%1), %%xmm1 \n\t"
128
        "movq 1(%1), %%xmm2 \n\t"
129
        "movq (%1,%3), %%xmm3 \n\t"
130
        "movq 1(%1,%3), %%xmm4 \n\t"
131
        "lea (%1,%3,2), %1 \n\t"
132
        "punpcklbw %%xmm2, %%xmm1 \n\t"
133
        "punpcklbw %%xmm4, %%xmm3 \n\t"
134
        "movdqa %%xmm1, %%xmm2 \n\t"
135
        "movdqa %%xmm3, %%xmm4 \n\t"
136
        "pmaddubsw %%xmm7, %%xmm0 \n\t"
137
        "pmaddubsw %%xmm6, %%xmm1 \n\t"
138
        "pmaddubsw %%xmm7, %%xmm2 \n\t"
139
        "pmaddubsw %%xmm6, %%xmm3 \n\t"
140
        "paddw %%xmm5, %%xmm0 \n\t"
141
        "paddw %%xmm5, %%xmm2 \n\t"
142
        "paddw %%xmm0, %%xmm1 \n\t"
143
        "paddw %%xmm2, %%xmm3 \n\t"
144
        "movdqa %%xmm4, %%xmm0 \n\t"
145
        "psrlw $6, %%xmm1 \n\t"
146
        "psrlw $6, %%xmm3 \n\t"
147
 AVG_OP("movq (%0), %%xmm2 \n\t")
148
 AVG_OP("movhps (%0,%3), %%xmm2 \n\t")
149
        "packuswb %%xmm3, %%xmm1 \n\t"
150
 AVG_OP("pavgb %%xmm2, %%xmm1 \n\t")
151
        "movq %%xmm1, (%0)\n\t"
152
        "movhps %%xmm1, (%0,%3)\n\t"
153
        "sub $2, %2 \n\t"
154
        "lea (%0,%3,2), %0 \n\t"
155
        "jg 1b \n\t"
156
        :"+r"(dst), "+r"(src), "+r"(h)
157 40d0e665 Ramiro Polla
        :"r"((x86_reg)stride)
158 ce53144b Loren Merritt
    );
159
}
160
161
static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
162
{
163 be449fca Diego Pettenò
    __asm__ volatile(
164 ce53144b Loren Merritt
        "movd %0, %%mm7 \n\t"
165
        "movd %1, %%mm6 \n\t"
166
        "movq %2, %%mm5 \n\t"
167
        "pshufw $0, %%mm7, %%mm7 \n\t"
168
        "pshufw $0, %%mm6, %%mm6 \n\t"
169
        :: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(ff_pw_32)
170
    );
171
172 be449fca Diego Pettenò
    __asm__ volatile(
173 ce53144b Loren Merritt
        "movd (%1), %%mm0 \n\t"
174
        "punpcklbw 1(%1), %%mm0 \n\t"
175
        "add %3, %1 \n\t"
176
        "1: \n\t"
177
        "movd (%1), %%mm1 \n\t"
178
        "movd (%1,%3), %%mm3 \n\t"
179
        "punpcklbw 1(%1), %%mm1 \n\t"
180
        "punpcklbw 1(%1,%3), %%mm3 \n\t"
181
        "lea (%1,%3,2), %1 \n\t"
182
        "movq %%mm1, %%mm2 \n\t"
183
        "movq %%mm3, %%mm4 \n\t"
184
        "pmaddubsw %%mm7, %%mm0 \n\t"
185
        "pmaddubsw %%mm6, %%mm1 \n\t"
186
        "pmaddubsw %%mm7, %%mm2 \n\t"
187
        "pmaddubsw %%mm6, %%mm3 \n\t"
188
        "paddw %%mm5, %%mm0 \n\t"
189
        "paddw %%mm5, %%mm2 \n\t"
190
        "paddw %%mm0, %%mm1 \n\t"
191
        "paddw %%mm2, %%mm3 \n\t"
192
        "movq %%mm4, %%mm0 \n\t"
193
        "psrlw $6, %%mm1 \n\t"
194
        "psrlw $6, %%mm3 \n\t"
195
        "packuswb %%mm1, %%mm1 \n\t"
196
        "packuswb %%mm3, %%mm3 \n\t"
197
 AVG_OP("pavgb (%0), %%mm1 \n\t")
198
 AVG_OP("pavgb (%0,%3), %%mm3 \n\t")
199
        "movd %%mm1, (%0)\n\t"
200
        "movd %%mm3, (%0,%3)\n\t"
201
        "sub $2, %2 \n\t"
202
        "lea (%0,%3,2), %0 \n\t"
203
        "jg 1b \n\t"
204
        :"+r"(dst), "+r"(src), "+r"(h)
205 40d0e665 Ramiro Polla
        :"r"((x86_reg)stride)
206 ce53144b Loren Merritt
    );
207
}