Statistics
| Branch: | Revision:

ffmpeg / libavcodec / x86 / vp8dsp-init.c @ 98c6053c

History | View | Annotate | Download (19 KB)

1 0178d14f Jason Garrett-Glaser
/*
2
 * VP8 DSP functions x86-optimized
3
 * Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
4
 * Copyright (c) 2010 Jason Garrett-Glaser <darkshikari@gmail.com>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22
23 c6c98d08 Stefano Sabatini
#include "libavutil/cpu.h"
24 0178d14f Jason Garrett-Glaser
#include "libavutil/x86_cpu.h"
25
#include "libavcodec/vp8dsp.h"
26
27 a173aa89 Jason Garrett-Glaser
#if HAVE_YASM
28
29 0178d14f Jason Garrett-Glaser
/*
30
 * MC functions
31
 */
32
extern void ff_put_vp8_epel4_h4_mmxext(uint8_t *dst, int dststride,
33
                                       uint8_t *src, int srcstride,
34
                                       int height, int mx, int my);
35
extern void ff_put_vp8_epel4_h6_mmxext(uint8_t *dst, int dststride,
36
                                       uint8_t *src, int srcstride,
37
                                       int height, int mx, int my);
38
extern void ff_put_vp8_epel4_v4_mmxext(uint8_t *dst, int dststride,
39
                                       uint8_t *src, int srcstride,
40
                                       int height, int mx, int my);
41
extern void ff_put_vp8_epel4_v6_mmxext(uint8_t *dst, int dststride,
42
                                       uint8_t *src, int srcstride,
43
                                       int height, int mx, int my);
44
45
extern void ff_put_vp8_epel8_h4_sse2  (uint8_t *dst, int dststride,
46
                                       uint8_t *src, int srcstride,
47
                                       int height, int mx, int my);
48
extern void ff_put_vp8_epel8_h6_sse2  (uint8_t *dst, int dststride,
49
                                       uint8_t *src, int srcstride,
50
                                       int height, int mx, int my);
51
extern void ff_put_vp8_epel8_v4_sse2  (uint8_t *dst, int dststride,
52
                                       uint8_t *src, int srcstride,
53
                                       int height, int mx, int my);
54
extern void ff_put_vp8_epel8_v6_sse2  (uint8_t *dst, int dststride,
55
                                       uint8_t *src, int srcstride,
56
                                       int height, int mx, int my);
57
58 dcc602d8 Jason Garrett-Glaser
extern void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, int dststride,
59
                                       uint8_t *src, int srcstride,
60
                                       int height, int mx, int my);
61
extern void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, int dststride,
62
                                       uint8_t *src, int srcstride,
63
                                       int height, int mx, int my);
64
extern void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, int dststride,
65
                                       uint8_t *src, int srcstride,
66
                                       int height, int mx, int my);
67
extern void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, int dststride,
68
                                       uint8_t *src, int srcstride,
69
                                       int height, int mx, int my);
70 0178d14f Jason Garrett-Glaser
extern void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, int dststride,
71
                                       uint8_t *src, int srcstride,
72
                                       int height, int mx, int my);
73
extern void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, int dststride,
74
                                       uint8_t *src, int srcstride,
75
                                       int height, int mx, int my);
76
extern void ff_put_vp8_epel8_v4_ssse3 (uint8_t *dst, int dststride,
77
                                       uint8_t *src, int srcstride,
78
                                       int height, int mx, int my);
79
extern void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, int dststride,
80
                                       uint8_t *src, int srcstride,
81
                                       int height, int mx, int my);
82
83 a173aa89 Jason Garrett-Glaser
extern void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, int dststride,
84
                                          uint8_t *src, int srcstride,
85
                                          int height, int mx, int my);
86
extern void ff_put_vp8_bilinear8_h_sse2  (uint8_t *dst, int dststride,
87
                                          uint8_t *src, int srcstride,
88
                                          int height, int mx, int my);
89 b06855f1 Jason Garrett-Glaser
extern void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, int dststride,
90
                                          uint8_t *src, int srcstride,
91
                                          int height, int mx, int my);
92
extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride,
93
                                          uint8_t *src, int srcstride,
94
                                          int height, int mx, int my);
95 a173aa89 Jason Garrett-Glaser
96
extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, int dststride,
97
                                          uint8_t *src, int srcstride,
98
                                          int height, int mx, int my);
99
extern void ff_put_vp8_bilinear8_v_sse2  (uint8_t *dst, int dststride,
100
                                          uint8_t *src, int srcstride,
101
                                          int height, int mx, int my);
102 b06855f1 Jason Garrett-Glaser
extern void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, int dststride,
103 a173aa89 Jason Garrett-Glaser
                                          uint8_t *src, int srcstride,
104
                                          int height, int mx, int my);
105 b06855f1 Jason Garrett-Glaser
extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, int dststride,
106 a173aa89 Jason Garrett-Glaser
                                          uint8_t *src, int srcstride,
107
                                          int height, int mx, int my);
108
109 b06855f1 Jason Garrett-Glaser
110 0fecad09 Jason Garrett-Glaser
extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, int dststride,
111
                                    uint8_t *src, int srcstride,
112
                                    int height, int mx, int my);
113
extern void ff_put_vp8_pixels16_mmx(uint8_t *dst, int dststride,
114
                                    uint8_t *src, int srcstride,
115
                                    int height, int mx, int my);
116
extern void ff_put_vp8_pixels16_sse(uint8_t *dst, int dststride,
117
                                    uint8_t *src, int srcstride,
118
                                    int height, int mx, int my);
119
120 a173aa89 Jason Garrett-Glaser
#define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \
121
static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \
122
    uint8_t *dst,  int dststride, uint8_t *src, \
123
    int srcstride, int height, int mx, int my) \
124 0178d14f Jason Garrett-Glaser
{ \
125 a173aa89 Jason Garrett-Glaser
    ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
126
        dst,     dststride, src,     srcstride, height, mx, my); \
127
    ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
128
        dst + 8, dststride, src + 8, srcstride, height, mx, my); \
129 0178d14f Jason Garrett-Glaser
}
130 a173aa89 Jason Garrett-Glaser
#define TAP_W8(OPT, FILTERTYPE, TAPTYPE) \
131
static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
132
    uint8_t *dst,  int dststride, uint8_t *src, \
133
    int srcstride, int height, int mx, int my) \
134 0178d14f Jason Garrett-Glaser
{ \
135 a173aa89 Jason Garrett-Glaser
    ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
136
        dst,     dststride, src,     srcstride, height, mx, my); \
137
    ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
138
        dst + 4, dststride, src + 4, srcstride, height, mx, my); \
139 0178d14f Jason Garrett-Glaser
}
140
141 a173aa89 Jason Garrett-Glaser
TAP_W8 (mmxext, epel, h4)
142
TAP_W8 (mmxext, epel, h6)
143
TAP_W16(mmxext, epel, h6)
144
TAP_W8 (mmxext, epel, v4)
145
TAP_W8 (mmxext, epel, v6)
146
TAP_W16(mmxext, epel, v6)
147
TAP_W8 (mmxext, bilinear, h)
148
TAP_W16(mmxext, bilinear, h)
149
TAP_W8 (mmxext, bilinear, v)
150
TAP_W16(mmxext, bilinear, v)
151
152
TAP_W16(sse2,   epel, h6)
153
TAP_W16(sse2,   epel, v6)
154
TAP_W16(sse2,   bilinear, h)
155
TAP_W16(sse2,   bilinear, v)
156
157
TAP_W16(ssse3,  epel, h6)
158
TAP_W16(ssse3,  epel, v6)
159
TAP_W16(ssse3,  bilinear, h)
160
TAP_W16(ssse3,  bilinear, v)
161 0178d14f Jason Garrett-Glaser
162
#define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \
163 a173aa89 Jason Garrett-Glaser
static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \
164
    uint8_t *dst, int dststride, uint8_t *src, \
165
    int srcstride, int height, int mx, int my) \
166 0178d14f Jason Garrett-Glaser
{ \
167
    DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + TAPNUMY - 1)]; \
168
    uint8_t *tmpptr = tmp + SIZE * (TAPNUMY / 2 - 1); \
169
    src -= srcstride * (TAPNUMY / 2 - 1); \
170 a173aa89 Jason Garrett-Glaser
    ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## _ ## OPT( \
171
        tmp, SIZE,      src,    srcstride, height + TAPNUMY - 1, mx, my); \
172
    ff_put_vp8_epel ## SIZE ## _v ## TAPNUMY ## _ ## OPT( \
173
        dst, dststride, tmpptr, SIZE,      height,               mx, my); \
174 0178d14f Jason Garrett-Glaser
}
175
176
#define HVTAPMMX(x, y) \
177
HVTAP(mmxext, 8, x, y,  4,  8) \
178
HVTAP(mmxext, 8, x, y,  8, 16)
179
180
HVTAPMMX(4, 4)
181
HVTAPMMX(4, 6)
182
HVTAPMMX(6, 4)
183
HVTAPMMX(6, 6)
184
HVTAP(mmxext, 8, 6, 6, 16, 16)
185
186
#define HVTAPSSE2(x, y, w) \
187
HVTAP(sse2,  16, x, y, w, 16) \
188
HVTAP(ssse3, 16, x, y, w, 16)
189
190
HVTAPSSE2(4, 4, 8)
191
HVTAPSSE2(4, 6, 8)
192
HVTAPSSE2(6, 4, 8)
193
HVTAPSSE2(6, 6, 8)
194
HVTAPSSE2(6, 6, 16)
195 a173aa89 Jason Garrett-Glaser
196 dcc602d8 Jason Garrett-Glaser
HVTAP(ssse3, 16, 4, 4, 4, 8)
197
HVTAP(ssse3, 16, 4, 6, 4, 8)
198
HVTAP(ssse3, 16, 6, 4, 4, 8)
199
HVTAP(ssse3, 16, 6, 6, 4, 8)
200
201 a173aa89 Jason Garrett-Glaser
#define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \
202
static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \
203
    uint8_t *dst, int dststride, uint8_t *src, \
204
    int srcstride, int height, int mx, int my) \
205
{ \
206
    DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + 2)]; \
207
    ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT( \
208
        tmp, SIZE,      src, srcstride, height + 1, mx, my); \
209
    ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT( \
210
        dst, dststride, tmp, SIZE,      height,     mx, my); \
211
}
212
213
HVBILIN(mmxext, 8,  4,  8)
214
HVBILIN(mmxext, 8,  8, 16)
215
HVBILIN(mmxext, 8, 16, 16)
216
HVBILIN(sse2,   8,  8, 16)
217
HVBILIN(sse2,   8, 16, 16)
218 b06855f1 Jason Garrett-Glaser
HVBILIN(ssse3,  8,  4,  8)
219 a173aa89 Jason Garrett-Glaser
HVBILIN(ssse3,  8,  8, 16)
220
HVBILIN(ssse3,  8, 16, 16)
221 0178d14f Jason Garrett-Glaser
222
extern void ff_vp8_idct_dc_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
223
extern void ff_vp8_idct_dc_add_sse4(uint8_t *dst, DCTELEM block[16], int stride);
224 3ae079a3 Jason Garrett-Glaser
extern void ff_vp8_idct_dc_add4y_mmx(uint8_t *dst, DCTELEM block[4][16], int stride);
225
extern void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, DCTELEM block[4][16], int stride);
226
extern void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, DCTELEM block[2][16], int stride);
227 b8b231b5 Jason Garrett-Glaser
extern void ff_vp8_luma_dc_wht_mmx(DCTELEM block[4][4][16], DCTELEM dc[16]);
228 827d43bb Jason Garrett-Glaser
extern void ff_vp8_luma_dc_wht_sse(DCTELEM block[4][4][16], DCTELEM dc[16]);
229 2dd2f716 Ronald S. Bultje
extern void ff_vp8_idct_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
230 c25c7767 Jason Garrett-Glaser
extern void ff_vp8_idct_add_sse(uint8_t *dst, DCTELEM block[16], int stride);
231 f2a30bd8 Ronald S. Bultje
232 7dd224a4 Jason Garrett-Glaser
#define DECLARE_LOOP_FILTER(NAME)\
233
extern void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\
234
extern void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\
235
extern void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\
236
                                                    int e, int i, int hvt);\
237
extern void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\
238
                                                    int e, int i, int hvt);\
239
extern void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\
240
                                                    int s, int e, int i, int hvt);\
241
extern void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\
242
                                                    int s, int e, int i, int hvt);\
243
extern void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\
244
                                                    int e, int i, int hvt);\
245
extern void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\
246
                                                    int e, int i, int hvt);\
247
extern void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\
248
                                                    int s, int e, int i, int hvt);\
249
extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\
250
                                                    int s, int e, int i, int hvt);
251
252
DECLARE_LOOP_FILTER(mmx)
253
DECLARE_LOOP_FILTER(mmxext)
254
DECLARE_LOOP_FILTER(sse2)
255
DECLARE_LOOP_FILTER(ssse3)
256 dc5eec80 Ronald S. Bultje
DECLARE_LOOP_FILTER(sse4)
257 7dd224a4 Jason Garrett-Glaser
258 a173aa89 Jason Garrett-Glaser
#endif
259
260
#define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
261
    c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \
262
    c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \
263
    c->put_vp8_epel_pixels_tab[IDX][2][2] = ff_put_vp8_epel ## SIZE ## _h6v6_ ## OPT
264
265
#define VP8_MC_FUNC(IDX, SIZE, OPT) \
266
    c->put_vp8_epel_pixels_tab[IDX][0][1] = ff_put_vp8_epel ## SIZE ## _h4_ ## OPT; \
267
    c->put_vp8_epel_pixels_tab[IDX][1][0] = ff_put_vp8_epel ## SIZE ## _v4_ ## OPT; \
268
    c->put_vp8_epel_pixels_tab[IDX][1][1] = ff_put_vp8_epel ## SIZE ## _h4v4_ ## OPT; \
269
    c->put_vp8_epel_pixels_tab[IDX][1][2] = ff_put_vp8_epel ## SIZE ## _h6v4_ ## OPT; \
270
    c->put_vp8_epel_pixels_tab[IDX][2][1] = ff_put_vp8_epel ## SIZE ## _h4v6_ ## OPT; \
271
    VP8_LUMA_MC_FUNC(IDX, SIZE, OPT)
272
273
#define VP8_BILINEAR_MC_FUNC(IDX, SIZE, OPT) \
274
    c->put_vp8_bilinear_pixels_tab[IDX][0][1] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
275
    c->put_vp8_bilinear_pixels_tab[IDX][0][2] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
276
    c->put_vp8_bilinear_pixels_tab[IDX][1][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
277
    c->put_vp8_bilinear_pixels_tab[IDX][1][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
278
    c->put_vp8_bilinear_pixels_tab[IDX][1][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
279
    c->put_vp8_bilinear_pixels_tab[IDX][2][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
280
    c->put_vp8_bilinear_pixels_tab[IDX][2][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
281
    c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT
282
283 0178d14f Jason Garrett-Glaser
284
av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
285
{
286 c6c98d08 Stefano Sabatini
    int mm_flags = av_get_cpu_flags();
287 0178d14f Jason Garrett-Glaser
288 30bdefd1 David Conrad
#if HAVE_YASM
289 7160bb71 Stefano Sabatini
    if (mm_flags & AV_CPU_FLAG_MMX) {
290 3ae079a3 Jason Garrett-Glaser
        c->vp8_idct_dc_add    = ff_vp8_idct_dc_add_mmx;
291
        c->vp8_idct_dc_add4y  = ff_vp8_idct_dc_add4y_mmx;
292
        c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
293
        c->vp8_idct_add       = ff_vp8_idct_add_mmx;
294
        c->vp8_luma_dc_wht    = ff_vp8_luma_dc_wht_mmx;
295 0fecad09 Jason Garrett-Glaser
        c->put_vp8_epel_pixels_tab[0][0][0]     =
296
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
297
        c->put_vp8_epel_pixels_tab[1][0][0]     =
298
        c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
299 f2a30bd8 Ronald S. Bultje
300
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
301
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
302 a711eb48 Ronald S. Bultje
303 3facfc99 Ronald S. Bultje
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
304
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
305 268821e7 Ronald S. Bultje
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
306
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
307 e9e456d8 Ronald S. Bultje
308
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmx;
309
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmx;
310
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmx;
311
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmx;
312 0178d14f Jason Garrett-Glaser
    }
313
314
    /* note that 4-tap width=16 functions are missing because w=16
315
     * is only used for luma, and luma is always a copy or sixtap. */
316 7160bb71 Stefano Sabatini
    if (mm_flags & AV_CPU_FLAG_MMX2) {
317 a173aa89 Jason Garrett-Glaser
        VP8_LUMA_MC_FUNC(0, 16, mmxext);
318
        VP8_MC_FUNC(1, 8, mmxext);
319 8434fc26 Jason Garrett-Glaser
        VP8_MC_FUNC(2, 4, mmxext);
320 a173aa89 Jason Garrett-Glaser
        VP8_BILINEAR_MC_FUNC(0, 16, mmxext);
321
        VP8_BILINEAR_MC_FUNC(1, 8, mmxext);
322 8434fc26 Jason Garrett-Glaser
        VP8_BILINEAR_MC_FUNC(2, 4, mmxext);
323 f2a30bd8 Ronald S. Bultje
324
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext;
325
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext;
326 a711eb48 Ronald S. Bultje
327 3facfc99 Ronald S. Bultje
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
328
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
329 268821e7 Ronald S. Bultje
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
330
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;
331 e9e456d8 Ronald S. Bultje
332
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmxext;
333
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmxext;
334
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
335
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
336 0178d14f Jason Garrett-Glaser
    }
337
338 7160bb71 Stefano Sabatini
    if (mm_flags & AV_CPU_FLAG_SSE) {
339 c25c7767 Jason Garrett-Glaser
        c->vp8_idct_add                         = ff_vp8_idct_add_sse;
340 827d43bb Jason Garrett-Glaser
        c->vp8_luma_dc_wht                      = ff_vp8_luma_dc_wht_sse;
341 0fecad09 Jason Garrett-Glaser
        c->put_vp8_epel_pixels_tab[0][0][0]     =
342
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
343
    }
344
345 7160bb71 Stefano Sabatini
    if (mm_flags & (AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW)) {
346 a173aa89 Jason Garrett-Glaser
        VP8_LUMA_MC_FUNC(0, 16, sse2);
347
        VP8_MC_FUNC(1, 8, sse2);
348
        VP8_BILINEAR_MC_FUNC(0, 16, sse2);
349
        VP8_BILINEAR_MC_FUNC(1, 8, sse2);
350 f2a30bd8 Ronald S. Bultje
351
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
352 a711eb48 Ronald S. Bultje
353 3facfc99 Ronald S. Bultje
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
354 268821e7 Ronald S. Bultje
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
355 e9e456d8 Ronald S. Bultje
356 003243c3 Ronald S. Bultje
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_sse2;
357
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_sse2;
358 6526976f Ronald S. Bultje
    }
359
360 7160bb71 Stefano Sabatini
    if (mm_flags & AV_CPU_FLAG_SSE2) {
361 3ae079a3 Jason Garrett-Glaser
        c->vp8_idct_dc_add4y          = ff_vp8_idct_dc_add4y_sse2;
362 8a467b2d Jason Garrett-Glaser
363 6341838f Ronald S. Bultje
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
364
365 6526976f Ronald S. Bultje
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
366 268821e7 Ronald S. Bultje
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
367 e9e456d8 Ronald S. Bultje
368 003243c3 Ronald S. Bultje
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse2;
369
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse2;
370 0178d14f Jason Garrett-Glaser
    }
371
372 7160bb71 Stefano Sabatini
    if (mm_flags & AV_CPU_FLAG_SSSE3) {
373 a173aa89 Jason Garrett-Glaser
        VP8_LUMA_MC_FUNC(0, 16, ssse3);
374
        VP8_MC_FUNC(1, 8, ssse3);
375 dcc602d8 Jason Garrett-Glaser
        VP8_MC_FUNC(2, 4, ssse3);
376 a173aa89 Jason Garrett-Glaser
        VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
377
        VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
378 b06855f1 Jason Garrett-Glaser
        VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
379 7dd224a4 Jason Garrett-Glaser
380
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
381
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
382
383
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
384
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
385
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
386
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;
387
388
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_ssse3;
389 003243c3 Ronald S. Bultje
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_ssse3;
390 7dd224a4 Jason Garrett-Glaser
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
391 003243c3 Ronald S. Bultje
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
392 0178d14f Jason Garrett-Glaser
    }
393
394 7160bb71 Stefano Sabatini
    if (mm_flags & AV_CPU_FLAG_SSE4) {
395 0178d14f Jason Garrett-Glaser
        c->vp8_idct_dc_add                  = ff_vp8_idct_dc_add_sse4;
396 dc5eec80 Ronald S. Bultje
397 6341838f Ronald S. Bultje
        c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse4;
398 dc5eec80 Ronald S. Bultje
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse4;
399
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse4;
400 0178d14f Jason Garrett-Glaser
    }
401 30bdefd1 David Conrad
#endif
402 0178d14f Jason Garrett-Glaser
}