Statistics
| Branch: | Revision:

ffmpeg / libavcodec / x86 / vp8dsp-init.c @ 4eca52ed

History | View | Annotate | Download (18.9 KB)

1 0178d14f Jason Garrett-Glaser
/*
2
 * VP8 DSP functions x86-optimized
3
 * Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
4
 * Copyright (c) 2010 Jason Garrett-Glaser <darkshikari@gmail.com>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22
23
#include "libavutil/x86_cpu.h"
24
#include "libavcodec/vp8dsp.h"
25
26 a173aa89 Jason Garrett-Glaser
#if HAVE_YASM
27
28 0178d14f Jason Garrett-Glaser
/*
29
 * MC functions
30
 */
31
extern void ff_put_vp8_epel4_h4_mmxext(uint8_t *dst, int dststride,
32
                                       uint8_t *src, int srcstride,
33
                                       int height, int mx, int my);
34
extern void ff_put_vp8_epel4_h6_mmxext(uint8_t *dst, int dststride,
35
                                       uint8_t *src, int srcstride,
36
                                       int height, int mx, int my);
37
extern void ff_put_vp8_epel4_v4_mmxext(uint8_t *dst, int dststride,
38
                                       uint8_t *src, int srcstride,
39
                                       int height, int mx, int my);
40
extern void ff_put_vp8_epel4_v6_mmxext(uint8_t *dst, int dststride,
41
                                       uint8_t *src, int srcstride,
42
                                       int height, int mx, int my);
43
44
extern void ff_put_vp8_epel8_h4_sse2  (uint8_t *dst, int dststride,
45
                                       uint8_t *src, int srcstride,
46
                                       int height, int mx, int my);
47
extern void ff_put_vp8_epel8_h6_sse2  (uint8_t *dst, int dststride,
48
                                       uint8_t *src, int srcstride,
49
                                       int height, int mx, int my);
50
extern void ff_put_vp8_epel8_v4_sse2  (uint8_t *dst, int dststride,
51
                                       uint8_t *src, int srcstride,
52
                                       int height, int mx, int my);
53
extern void ff_put_vp8_epel8_v6_sse2  (uint8_t *dst, int dststride,
54
                                       uint8_t *src, int srcstride,
55
                                       int height, int mx, int my);
56
57 dcc602d8 Jason Garrett-Glaser
extern void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, int dststride,
58
                                       uint8_t *src, int srcstride,
59
                                       int height, int mx, int my);
60
extern void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, int dststride,
61
                                       uint8_t *src, int srcstride,
62
                                       int height, int mx, int my);
63
extern void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, int dststride,
64
                                       uint8_t *src, int srcstride,
65
                                       int height, int mx, int my);
66
extern void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, int dststride,
67
                                       uint8_t *src, int srcstride,
68
                                       int height, int mx, int my);
69 0178d14f Jason Garrett-Glaser
extern void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, int dststride,
70
                                       uint8_t *src, int srcstride,
71
                                       int height, int mx, int my);
72
extern void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, int dststride,
73
                                       uint8_t *src, int srcstride,
74
                                       int height, int mx, int my);
75
extern void ff_put_vp8_epel8_v4_ssse3 (uint8_t *dst, int dststride,
76
                                       uint8_t *src, int srcstride,
77
                                       int height, int mx, int my);
78
extern void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, int dststride,
79
                                       uint8_t *src, int srcstride,
80
                                       int height, int mx, int my);
81
82 a173aa89 Jason Garrett-Glaser
extern void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, int dststride,
83
                                          uint8_t *src, int srcstride,
84
                                          int height, int mx, int my);
85
extern void ff_put_vp8_bilinear8_h_sse2  (uint8_t *dst, int dststride,
86
                                          uint8_t *src, int srcstride,
87
                                          int height, int mx, int my);
88 b06855f1 Jason Garrett-Glaser
extern void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, int dststride,
89
                                          uint8_t *src, int srcstride,
90
                                          int height, int mx, int my);
91
extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride,
92
                                          uint8_t *src, int srcstride,
93
                                          int height, int mx, int my);
94 a173aa89 Jason Garrett-Glaser
95
extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, int dststride,
96
                                          uint8_t *src, int srcstride,
97
                                          int height, int mx, int my);
98
extern void ff_put_vp8_bilinear8_v_sse2  (uint8_t *dst, int dststride,
99
                                          uint8_t *src, int srcstride,
100
                                          int height, int mx, int my);
101 b06855f1 Jason Garrett-Glaser
extern void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, int dststride,
102 a173aa89 Jason Garrett-Glaser
                                          uint8_t *src, int srcstride,
103
                                          int height, int mx, int my);
104 b06855f1 Jason Garrett-Glaser
extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, int dststride,
105 a173aa89 Jason Garrett-Glaser
                                          uint8_t *src, int srcstride,
106
                                          int height, int mx, int my);
107
108 b06855f1 Jason Garrett-Glaser
109 0fecad09 Jason Garrett-Glaser
extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, int dststride,
110
                                    uint8_t *src, int srcstride,
111
                                    int height, int mx, int my);
112
extern void ff_put_vp8_pixels16_mmx(uint8_t *dst, int dststride,
113
                                    uint8_t *src, int srcstride,
114
                                    int height, int mx, int my);
115
extern void ff_put_vp8_pixels16_sse(uint8_t *dst, int dststride,
116
                                    uint8_t *src, int srcstride,
117
                                    int height, int mx, int my);
118
119 a173aa89 Jason Garrett-Glaser
#define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \
120
static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \
121
    uint8_t *dst,  int dststride, uint8_t *src, \
122
    int srcstride, int height, int mx, int my) \
123 0178d14f Jason Garrett-Glaser
{ \
124 a173aa89 Jason Garrett-Glaser
    ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
125
        dst,     dststride, src,     srcstride, height, mx, my); \
126
    ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
127
        dst + 8, dststride, src + 8, srcstride, height, mx, my); \
128 0178d14f Jason Garrett-Glaser
}
129 a173aa89 Jason Garrett-Glaser
#define TAP_W8(OPT, FILTERTYPE, TAPTYPE) \
130
static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
131
    uint8_t *dst,  int dststride, uint8_t *src, \
132
    int srcstride, int height, int mx, int my) \
133 0178d14f Jason Garrett-Glaser
{ \
134 a173aa89 Jason Garrett-Glaser
    ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
135
        dst,     dststride, src,     srcstride, height, mx, my); \
136
    ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
137
        dst + 4, dststride, src + 4, srcstride, height, mx, my); \
138 0178d14f Jason Garrett-Glaser
}
139
140 a173aa89 Jason Garrett-Glaser
TAP_W8 (mmxext, epel, h4)
141
TAP_W8 (mmxext, epel, h6)
142
TAP_W16(mmxext, epel, h6)
143
TAP_W8 (mmxext, epel, v4)
144
TAP_W8 (mmxext, epel, v6)
145
TAP_W16(mmxext, epel, v6)
146
TAP_W8 (mmxext, bilinear, h)
147
TAP_W16(mmxext, bilinear, h)
148
TAP_W8 (mmxext, bilinear, v)
149
TAP_W16(mmxext, bilinear, v)
150
151
TAP_W16(sse2,   epel, h6)
152
TAP_W16(sse2,   epel, v6)
153
TAP_W16(sse2,   bilinear, h)
154
TAP_W16(sse2,   bilinear, v)
155
156
TAP_W16(ssse3,  epel, h6)
157
TAP_W16(ssse3,  epel, v6)
158
TAP_W16(ssse3,  bilinear, h)
159
TAP_W16(ssse3,  bilinear, v)
160 0178d14f Jason Garrett-Glaser
161
#define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \
162 a173aa89 Jason Garrett-Glaser
static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \
163
    uint8_t *dst, int dststride, uint8_t *src, \
164
    int srcstride, int height, int mx, int my) \
165 0178d14f Jason Garrett-Glaser
{ \
166
    DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + TAPNUMY - 1)]; \
167
    uint8_t *tmpptr = tmp + SIZE * (TAPNUMY / 2 - 1); \
168
    src -= srcstride * (TAPNUMY / 2 - 1); \
169 a173aa89 Jason Garrett-Glaser
    ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## _ ## OPT( \
170
        tmp, SIZE,      src,    srcstride, height + TAPNUMY - 1, mx, my); \
171
    ff_put_vp8_epel ## SIZE ## _v ## TAPNUMY ## _ ## OPT( \
172
        dst, dststride, tmpptr, SIZE,      height,               mx, my); \
173 0178d14f Jason Garrett-Glaser
}
174
175
#define HVTAPMMX(x, y) \
176
HVTAP(mmxext, 8, x, y,  4,  8) \
177
HVTAP(mmxext, 8, x, y,  8, 16)
178
179
HVTAPMMX(4, 4)
180
HVTAPMMX(4, 6)
181
HVTAPMMX(6, 4)
182
HVTAPMMX(6, 6)
183
HVTAP(mmxext, 8, 6, 6, 16, 16)
184
185
#define HVTAPSSE2(x, y, w) \
186
HVTAP(sse2,  16, x, y, w, 16) \
187
HVTAP(ssse3, 16, x, y, w, 16)
188
189
HVTAPSSE2(4, 4, 8)
190
HVTAPSSE2(4, 6, 8)
191
HVTAPSSE2(6, 4, 8)
192
HVTAPSSE2(6, 6, 8)
193
HVTAPSSE2(6, 6, 16)
194 a173aa89 Jason Garrett-Glaser
195 dcc602d8 Jason Garrett-Glaser
HVTAP(ssse3, 16, 4, 4, 4, 8)
196
HVTAP(ssse3, 16, 4, 6, 4, 8)
197
HVTAP(ssse3, 16, 6, 4, 4, 8)
198
HVTAP(ssse3, 16, 6, 6, 4, 8)
199
200 a173aa89 Jason Garrett-Glaser
#define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \
201
static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \
202
    uint8_t *dst, int dststride, uint8_t *src, \
203
    int srcstride, int height, int mx, int my) \
204
{ \
205
    DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + 2)]; \
206
    ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT( \
207
        tmp, SIZE,      src, srcstride, height + 1, mx, my); \
208
    ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT( \
209
        dst, dststride, tmp, SIZE,      height,     mx, my); \
210
}
211
212
HVBILIN(mmxext, 8,  4,  8)
213
HVBILIN(mmxext, 8,  8, 16)
214
HVBILIN(mmxext, 8, 16, 16)
215
HVBILIN(sse2,   8,  8, 16)
216
HVBILIN(sse2,   8, 16, 16)
217 b06855f1 Jason Garrett-Glaser
HVBILIN(ssse3,  8,  4,  8)
218 a173aa89 Jason Garrett-Glaser
HVBILIN(ssse3,  8,  8, 16)
219
HVBILIN(ssse3,  8, 16, 16)
220 0178d14f Jason Garrett-Glaser
221
extern void ff_vp8_idct_dc_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
222
extern void ff_vp8_idct_dc_add_sse4(uint8_t *dst, DCTELEM block[16], int stride);
223 3ae079a3 Jason Garrett-Glaser
extern void ff_vp8_idct_dc_add4y_mmx(uint8_t *dst, DCTELEM block[4][16], int stride);
224
extern void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, DCTELEM block[4][16], int stride);
225
extern void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, DCTELEM block[2][16], int stride);
226 b8b231b5 Jason Garrett-Glaser
extern void ff_vp8_luma_dc_wht_mmx(DCTELEM block[4][4][16], DCTELEM dc[16]);
227 827d43bb Jason Garrett-Glaser
extern void ff_vp8_luma_dc_wht_sse(DCTELEM block[4][4][16], DCTELEM dc[16]);
228 2dd2f716 Ronald S. Bultje
extern void ff_vp8_idct_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
229 c25c7767 Jason Garrett-Glaser
extern void ff_vp8_idct_add_sse(uint8_t *dst, DCTELEM block[16], int stride);
230 f2a30bd8 Ronald S. Bultje
231 7dd224a4 Jason Garrett-Glaser
#define DECLARE_LOOP_FILTER(NAME)\
232
extern void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\
233
extern void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\
234
extern void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\
235
                                                    int e, int i, int hvt);\
236
extern void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\
237
                                                    int e, int i, int hvt);\
238
extern void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\
239
                                                    int s, int e, int i, int hvt);\
240
extern void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\
241
                                                    int s, int e, int i, int hvt);\
242
extern void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\
243
                                                    int e, int i, int hvt);\
244
extern void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\
245
                                                    int e, int i, int hvt);\
246
extern void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\
247
                                                    int s, int e, int i, int hvt);\
248
extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\
249
                                                    int s, int e, int i, int hvt);
250
251
DECLARE_LOOP_FILTER(mmx)
252
DECLARE_LOOP_FILTER(mmxext)
253
DECLARE_LOOP_FILTER(sse2)
254
DECLARE_LOOP_FILTER(ssse3)
255 dc5eec80 Ronald S. Bultje
DECLARE_LOOP_FILTER(sse4)
256 7dd224a4 Jason Garrett-Glaser
257 a173aa89 Jason Garrett-Glaser
#endif
258
259
#define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
260
    c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \
261
    c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \
262
    c->put_vp8_epel_pixels_tab[IDX][2][2] = ff_put_vp8_epel ## SIZE ## _h6v6_ ## OPT
263
264
#define VP8_MC_FUNC(IDX, SIZE, OPT) \
265
    c->put_vp8_epel_pixels_tab[IDX][0][1] = ff_put_vp8_epel ## SIZE ## _h4_ ## OPT; \
266
    c->put_vp8_epel_pixels_tab[IDX][1][0] = ff_put_vp8_epel ## SIZE ## _v4_ ## OPT; \
267
    c->put_vp8_epel_pixels_tab[IDX][1][1] = ff_put_vp8_epel ## SIZE ## _h4v4_ ## OPT; \
268
    c->put_vp8_epel_pixels_tab[IDX][1][2] = ff_put_vp8_epel ## SIZE ## _h6v4_ ## OPT; \
269
    c->put_vp8_epel_pixels_tab[IDX][2][1] = ff_put_vp8_epel ## SIZE ## _h4v6_ ## OPT; \
270
    VP8_LUMA_MC_FUNC(IDX, SIZE, OPT)
271
272
#define VP8_BILINEAR_MC_FUNC(IDX, SIZE, OPT) \
273
    c->put_vp8_bilinear_pixels_tab[IDX][0][1] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
274
    c->put_vp8_bilinear_pixels_tab[IDX][0][2] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
275
    c->put_vp8_bilinear_pixels_tab[IDX][1][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
276
    c->put_vp8_bilinear_pixels_tab[IDX][1][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
277
    c->put_vp8_bilinear_pixels_tab[IDX][1][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
278
    c->put_vp8_bilinear_pixels_tab[IDX][2][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
279
    c->put_vp8_bilinear_pixels_tab[IDX][2][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
280
    c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT
281
282 0178d14f Jason Garrett-Glaser
283
av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
284
{
285 c0ec9918 Måns Rullgård
    int mm_flags = mm_support();
286 0178d14f Jason Garrett-Glaser
287 30bdefd1 David Conrad
#if HAVE_YASM
288 0178d14f Jason Garrett-Glaser
    if (mm_flags & FF_MM_MMX) {
289 3ae079a3 Jason Garrett-Glaser
        c->vp8_idct_dc_add    = ff_vp8_idct_dc_add_mmx;
290
        c->vp8_idct_dc_add4y  = ff_vp8_idct_dc_add4y_mmx;
291
        c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
292
        c->vp8_idct_add       = ff_vp8_idct_add_mmx;
293
        c->vp8_luma_dc_wht    = ff_vp8_luma_dc_wht_mmx;
294 0fecad09 Jason Garrett-Glaser
        c->put_vp8_epel_pixels_tab[0][0][0]     =
295
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
296
        c->put_vp8_epel_pixels_tab[1][0][0]     =
297
        c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
298 f2a30bd8 Ronald S. Bultje
299
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
300
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
301 a711eb48 Ronald S. Bultje
302 3facfc99 Ronald S. Bultje
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
303
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
304 268821e7 Ronald S. Bultje
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
305
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
306 e9e456d8 Ronald S. Bultje
307
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmx;
308
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmx;
309
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmx;
310
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmx;
311 0178d14f Jason Garrett-Glaser
    }
312
313
    /* note that 4-tap width=16 functions are missing because w=16
314
     * is only used for luma, and luma is always a copy or sixtap. */
315 50f70541 Baptiste Coudurier
    if (mm_flags & FF_MM_MMX2) {
316 a173aa89 Jason Garrett-Glaser
        VP8_LUMA_MC_FUNC(0, 16, mmxext);
317
        VP8_MC_FUNC(1, 8, mmxext);
318 8434fc26 Jason Garrett-Glaser
        VP8_MC_FUNC(2, 4, mmxext);
319 a173aa89 Jason Garrett-Glaser
        VP8_BILINEAR_MC_FUNC(0, 16, mmxext);
320
        VP8_BILINEAR_MC_FUNC(1, 8, mmxext);
321 8434fc26 Jason Garrett-Glaser
        VP8_BILINEAR_MC_FUNC(2, 4, mmxext);
322 f2a30bd8 Ronald S. Bultje
323
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext;
324
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext;
325 a711eb48 Ronald S. Bultje
326 3facfc99 Ronald S. Bultje
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
327
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
328 268821e7 Ronald S. Bultje
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
329
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;
330 e9e456d8 Ronald S. Bultje
331
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmxext;
332
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmxext;
333
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
334
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
335 0178d14f Jason Garrett-Glaser
    }
336
337 0fecad09 Jason Garrett-Glaser
    if (mm_flags & FF_MM_SSE) {
338 c25c7767 Jason Garrett-Glaser
        c->vp8_idct_add                         = ff_vp8_idct_add_sse;
339 827d43bb Jason Garrett-Glaser
        c->vp8_luma_dc_wht                      = ff_vp8_luma_dc_wht_sse;
340 0fecad09 Jason Garrett-Glaser
        c->put_vp8_epel_pixels_tab[0][0][0]     =
341
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
342
    }
343
344 6526976f Ronald S. Bultje
    if (mm_flags & (FF_MM_SSE2|FF_MM_SSE2SLOW)) {
345 a173aa89 Jason Garrett-Glaser
        VP8_LUMA_MC_FUNC(0, 16, sse2);
346
        VP8_MC_FUNC(1, 8, sse2);
347
        VP8_BILINEAR_MC_FUNC(0, 16, sse2);
348
        VP8_BILINEAR_MC_FUNC(1, 8, sse2);
349 f2a30bd8 Ronald S. Bultje
350
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
351 a711eb48 Ronald S. Bultje
352 3facfc99 Ronald S. Bultje
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
353 268821e7 Ronald S. Bultje
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
354 e9e456d8 Ronald S. Bultje
355 003243c3 Ronald S. Bultje
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_sse2;
356
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_sse2;
357 6526976f Ronald S. Bultje
    }
358
359
    if (mm_flags & FF_MM_SSE2) {
360 3ae079a3 Jason Garrett-Glaser
        c->vp8_idct_dc_add4y          = ff_vp8_idct_dc_add4y_sse2;
361 8a467b2d Jason Garrett-Glaser
362 6341838f Ronald S. Bultje
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
363
364 6526976f Ronald S. Bultje
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
365 268821e7 Ronald S. Bultje
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
366 e9e456d8 Ronald S. Bultje
367 003243c3 Ronald S. Bultje
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse2;
368
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse2;
369 0178d14f Jason Garrett-Glaser
    }
370
371
    if (mm_flags & FF_MM_SSSE3) {
372 a173aa89 Jason Garrett-Glaser
        VP8_LUMA_MC_FUNC(0, 16, ssse3);
373
        VP8_MC_FUNC(1, 8, ssse3);
374 dcc602d8 Jason Garrett-Glaser
        VP8_MC_FUNC(2, 4, ssse3);
375 a173aa89 Jason Garrett-Glaser
        VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
376
        VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
377 b06855f1 Jason Garrett-Glaser
        VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
378 7dd224a4 Jason Garrett-Glaser
379
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
380
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
381
382
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
383
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
384
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
385
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;
386
387
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_ssse3;
388 003243c3 Ronald S. Bultje
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_ssse3;
389 7dd224a4 Jason Garrett-Glaser
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
390 003243c3 Ronald S. Bultje
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
391 0178d14f Jason Garrett-Glaser
    }
392
393
    if (mm_flags & FF_MM_SSE4) {
394
        c->vp8_idct_dc_add                  = ff_vp8_idct_dc_add_sse4;
395 dc5eec80 Ronald S. Bultje
396 6341838f Ronald S. Bultje
        c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse4;
397 dc5eec80 Ronald S. Bultje
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse4;
398
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse4;
399 0178d14f Jason Garrett-Glaser
    }
400 30bdefd1 David Conrad
#endif
401 0178d14f Jason Garrett-Glaser
}