Statistics
| Branch: | Revision:

ffmpeg / libavcodec / x86 / vp8dsp-init.c @ 4eca52ed

History | View | Annotate | Download (18.9 KB)

1
/*
2
 * VP8 DSP functions x86-optimized
3
 * Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
4
 * Copyright (c) 2010 Jason Garrett-Glaser <darkshikari@gmail.com>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

    
23
#include "libavutil/x86_cpu.h"
24
#include "libavcodec/vp8dsp.h"
25

    
26
#if HAVE_YASM
27

    
28
/*
29
 * MC functions
30
 */
31
extern void ff_put_vp8_epel4_h4_mmxext(uint8_t *dst, int dststride,
32
                                       uint8_t *src, int srcstride,
33
                                       int height, int mx, int my);
34
extern void ff_put_vp8_epel4_h6_mmxext(uint8_t *dst, int dststride,
35
                                       uint8_t *src, int srcstride,
36
                                       int height, int mx, int my);
37
extern void ff_put_vp8_epel4_v4_mmxext(uint8_t *dst, int dststride,
38
                                       uint8_t *src, int srcstride,
39
                                       int height, int mx, int my);
40
extern void ff_put_vp8_epel4_v6_mmxext(uint8_t *dst, int dststride,
41
                                       uint8_t *src, int srcstride,
42
                                       int height, int mx, int my);
43

    
44
extern void ff_put_vp8_epel8_h4_sse2  (uint8_t *dst, int dststride,
45
                                       uint8_t *src, int srcstride,
46
                                       int height, int mx, int my);
47
extern void ff_put_vp8_epel8_h6_sse2  (uint8_t *dst, int dststride,
48
                                       uint8_t *src, int srcstride,
49
                                       int height, int mx, int my);
50
extern void ff_put_vp8_epel8_v4_sse2  (uint8_t *dst, int dststride,
51
                                       uint8_t *src, int srcstride,
52
                                       int height, int mx, int my);
53
extern void ff_put_vp8_epel8_v6_sse2  (uint8_t *dst, int dststride,
54
                                       uint8_t *src, int srcstride,
55
                                       int height, int mx, int my);
56

    
57
extern void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, int dststride,
58
                                       uint8_t *src, int srcstride,
59
                                       int height, int mx, int my);
60
extern void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, int dststride,
61
                                       uint8_t *src, int srcstride,
62
                                       int height, int mx, int my);
63
extern void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, int dststride,
64
                                       uint8_t *src, int srcstride,
65
                                       int height, int mx, int my);
66
extern void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, int dststride,
67
                                       uint8_t *src, int srcstride,
68
                                       int height, int mx, int my);
69
extern void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, int dststride,
70
                                       uint8_t *src, int srcstride,
71
                                       int height, int mx, int my);
72
extern void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, int dststride,
73
                                       uint8_t *src, int srcstride,
74
                                       int height, int mx, int my);
75
extern void ff_put_vp8_epel8_v4_ssse3 (uint8_t *dst, int dststride,
76
                                       uint8_t *src, int srcstride,
77
                                       int height, int mx, int my);
78
extern void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, int dststride,
79
                                       uint8_t *src, int srcstride,
80
                                       int height, int mx, int my);
81

    
82
extern void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, int dststride,
83
                                          uint8_t *src, int srcstride,
84
                                          int height, int mx, int my);
85
extern void ff_put_vp8_bilinear8_h_sse2  (uint8_t *dst, int dststride,
86
                                          uint8_t *src, int srcstride,
87
                                          int height, int mx, int my);
88
extern void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, int dststride,
89
                                          uint8_t *src, int srcstride,
90
                                          int height, int mx, int my);
91
extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride,
92
                                          uint8_t *src, int srcstride,
93
                                          int height, int mx, int my);
94

    
95
extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, int dststride,
96
                                          uint8_t *src, int srcstride,
97
                                          int height, int mx, int my);
98
extern void ff_put_vp8_bilinear8_v_sse2  (uint8_t *dst, int dststride,
99
                                          uint8_t *src, int srcstride,
100
                                          int height, int mx, int my);
101
extern void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, int dststride,
102
                                          uint8_t *src, int srcstride,
103
                                          int height, int mx, int my);
104
extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, int dststride,
105
                                          uint8_t *src, int srcstride,
106
                                          int height, int mx, int my);
107

    
108

    
109
extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, int dststride,
110
                                    uint8_t *src, int srcstride,
111
                                    int height, int mx, int my);
112
extern void ff_put_vp8_pixels16_mmx(uint8_t *dst, int dststride,
113
                                    uint8_t *src, int srcstride,
114
                                    int height, int mx, int my);
115
extern void ff_put_vp8_pixels16_sse(uint8_t *dst, int dststride,
116
                                    uint8_t *src, int srcstride,
117
                                    int height, int mx, int my);
118

    
119
#define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \
120
static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \
121
    uint8_t *dst,  int dststride, uint8_t *src, \
122
    int srcstride, int height, int mx, int my) \
123
{ \
124
    ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
125
        dst,     dststride, src,     srcstride, height, mx, my); \
126
    ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
127
        dst + 8, dststride, src + 8, srcstride, height, mx, my); \
128
}
129
#define TAP_W8(OPT, FILTERTYPE, TAPTYPE) \
130
static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
131
    uint8_t *dst,  int dststride, uint8_t *src, \
132
    int srcstride, int height, int mx, int my) \
133
{ \
134
    ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
135
        dst,     dststride, src,     srcstride, height, mx, my); \
136
    ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
137
        dst + 4, dststride, src + 4, srcstride, height, mx, my); \
138
}
139

    
140
TAP_W8 (mmxext, epel, h4)
141
TAP_W8 (mmxext, epel, h6)
142
TAP_W16(mmxext, epel, h6)
143
TAP_W8 (mmxext, epel, v4)
144
TAP_W8 (mmxext, epel, v6)
145
TAP_W16(mmxext, epel, v6)
146
TAP_W8 (mmxext, bilinear, h)
147
TAP_W16(mmxext, bilinear, h)
148
TAP_W8 (mmxext, bilinear, v)
149
TAP_W16(mmxext, bilinear, v)
150

    
151
TAP_W16(sse2,   epel, h6)
152
TAP_W16(sse2,   epel, v6)
153
TAP_W16(sse2,   bilinear, h)
154
TAP_W16(sse2,   bilinear, v)
155

    
156
TAP_W16(ssse3,  epel, h6)
157
TAP_W16(ssse3,  epel, v6)
158
TAP_W16(ssse3,  bilinear, h)
159
TAP_W16(ssse3,  bilinear, v)
160

    
161
#define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \
162
static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \
163
    uint8_t *dst, int dststride, uint8_t *src, \
164
    int srcstride, int height, int mx, int my) \
165
{ \
166
    DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + TAPNUMY - 1)]; \
167
    uint8_t *tmpptr = tmp + SIZE * (TAPNUMY / 2 - 1); \
168
    src -= srcstride * (TAPNUMY / 2 - 1); \
169
    ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## _ ## OPT( \
170
        tmp, SIZE,      src,    srcstride, height + TAPNUMY - 1, mx, my); \
171
    ff_put_vp8_epel ## SIZE ## _v ## TAPNUMY ## _ ## OPT( \
172
        dst, dststride, tmpptr, SIZE,      height,               mx, my); \
173
}
174

    
175
#define HVTAPMMX(x, y) \
176
HVTAP(mmxext, 8, x, y,  4,  8) \
177
HVTAP(mmxext, 8, x, y,  8, 16)
178

    
179
HVTAPMMX(4, 4)
180
HVTAPMMX(4, 6)
181
HVTAPMMX(6, 4)
182
HVTAPMMX(6, 6)
183
HVTAP(mmxext, 8, 6, 6, 16, 16)
184

    
185
#define HVTAPSSE2(x, y, w) \
186
HVTAP(sse2,  16, x, y, w, 16) \
187
HVTAP(ssse3, 16, x, y, w, 16)
188

    
189
HVTAPSSE2(4, 4, 8)
190
HVTAPSSE2(4, 6, 8)
191
HVTAPSSE2(6, 4, 8)
192
HVTAPSSE2(6, 6, 8)
193
HVTAPSSE2(6, 6, 16)
194

    
195
HVTAP(ssse3, 16, 4, 4, 4, 8)
196
HVTAP(ssse3, 16, 4, 6, 4, 8)
197
HVTAP(ssse3, 16, 6, 4, 4, 8)
198
HVTAP(ssse3, 16, 6, 6, 4, 8)
199

    
200
#define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \
201
static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \
202
    uint8_t *dst, int dststride, uint8_t *src, \
203
    int srcstride, int height, int mx, int my) \
204
{ \
205
    DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + 2)]; \
206
    ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT( \
207
        tmp, SIZE,      src, srcstride, height + 1, mx, my); \
208
    ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT( \
209
        dst, dststride, tmp, SIZE,      height,     mx, my); \
210
}
211

    
212
HVBILIN(mmxext, 8,  4,  8)
213
HVBILIN(mmxext, 8,  8, 16)
214
HVBILIN(mmxext, 8, 16, 16)
215
HVBILIN(sse2,   8,  8, 16)
216
HVBILIN(sse2,   8, 16, 16)
217
HVBILIN(ssse3,  8,  4,  8)
218
HVBILIN(ssse3,  8,  8, 16)
219
HVBILIN(ssse3,  8, 16, 16)
220

    
221
extern void ff_vp8_idct_dc_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
222
extern void ff_vp8_idct_dc_add_sse4(uint8_t *dst, DCTELEM block[16], int stride);
223
extern void ff_vp8_idct_dc_add4y_mmx(uint8_t *dst, DCTELEM block[4][16], int stride);
224
extern void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, DCTELEM block[4][16], int stride);
225
extern void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, DCTELEM block[2][16], int stride);
226
extern void ff_vp8_luma_dc_wht_mmx(DCTELEM block[4][4][16], DCTELEM dc[16]);
227
extern void ff_vp8_luma_dc_wht_sse(DCTELEM block[4][4][16], DCTELEM dc[16]);
228
extern void ff_vp8_idct_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
229
extern void ff_vp8_idct_add_sse(uint8_t *dst, DCTELEM block[16], int stride);
230

    
231
#define DECLARE_LOOP_FILTER(NAME)\
232
extern void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\
233
extern void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\
234
extern void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\
235
                                                    int e, int i, int hvt);\
236
extern void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\
237
                                                    int e, int i, int hvt);\
238
extern void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\
239
                                                    int s, int e, int i, int hvt);\
240
extern void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\
241
                                                    int s, int e, int i, int hvt);\
242
extern void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\
243
                                                    int e, int i, int hvt);\
244
extern void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\
245
                                                    int e, int i, int hvt);\
246
extern void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\
247
                                                    int s, int e, int i, int hvt);\
248
extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\
249
                                                    int s, int e, int i, int hvt);
250

    
251
DECLARE_LOOP_FILTER(mmx)
252
DECLARE_LOOP_FILTER(mmxext)
253
DECLARE_LOOP_FILTER(sse2)
254
DECLARE_LOOP_FILTER(ssse3)
255
DECLARE_LOOP_FILTER(sse4)
256

    
257
#endif
258

    
259
#define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
260
    c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \
261
    c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \
262
    c->put_vp8_epel_pixels_tab[IDX][2][2] = ff_put_vp8_epel ## SIZE ## _h6v6_ ## OPT
263

    
264
#define VP8_MC_FUNC(IDX, SIZE, OPT) \
265
    c->put_vp8_epel_pixels_tab[IDX][0][1] = ff_put_vp8_epel ## SIZE ## _h4_ ## OPT; \
266
    c->put_vp8_epel_pixels_tab[IDX][1][0] = ff_put_vp8_epel ## SIZE ## _v4_ ## OPT; \
267
    c->put_vp8_epel_pixels_tab[IDX][1][1] = ff_put_vp8_epel ## SIZE ## _h4v4_ ## OPT; \
268
    c->put_vp8_epel_pixels_tab[IDX][1][2] = ff_put_vp8_epel ## SIZE ## _h6v4_ ## OPT; \
269
    c->put_vp8_epel_pixels_tab[IDX][2][1] = ff_put_vp8_epel ## SIZE ## _h4v6_ ## OPT; \
270
    VP8_LUMA_MC_FUNC(IDX, SIZE, OPT)
271

    
272
#define VP8_BILINEAR_MC_FUNC(IDX, SIZE, OPT) \
273
    c->put_vp8_bilinear_pixels_tab[IDX][0][1] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
274
    c->put_vp8_bilinear_pixels_tab[IDX][0][2] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
275
    c->put_vp8_bilinear_pixels_tab[IDX][1][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
276
    c->put_vp8_bilinear_pixels_tab[IDX][1][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
277
    c->put_vp8_bilinear_pixels_tab[IDX][1][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
278
    c->put_vp8_bilinear_pixels_tab[IDX][2][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
279
    c->put_vp8_bilinear_pixels_tab[IDX][2][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
280
    c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT
281

    
282

    
283
av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
284
{
285
    int mm_flags = mm_support();
286

    
287
#if HAVE_YASM
288
    if (mm_flags & FF_MM_MMX) {
289
        c->vp8_idct_dc_add    = ff_vp8_idct_dc_add_mmx;
290
        c->vp8_idct_dc_add4y  = ff_vp8_idct_dc_add4y_mmx;
291
        c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
292
        c->vp8_idct_add       = ff_vp8_idct_add_mmx;
293
        c->vp8_luma_dc_wht    = ff_vp8_luma_dc_wht_mmx;
294
        c->put_vp8_epel_pixels_tab[0][0][0]     =
295
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
296
        c->put_vp8_epel_pixels_tab[1][0][0]     =
297
        c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
298

    
299
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
300
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
301

    
302
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
303
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
304
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
305
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
306

    
307
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmx;
308
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmx;
309
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmx;
310
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmx;
311
    }
312

    
313
    /* note that 4-tap width=16 functions are missing because w=16
314
     * is only used for luma, and luma is always a copy or sixtap. */
315
    if (mm_flags & FF_MM_MMX2) {
316
        VP8_LUMA_MC_FUNC(0, 16, mmxext);
317
        VP8_MC_FUNC(1, 8, mmxext);
318
        VP8_MC_FUNC(2, 4, mmxext);
319
        VP8_BILINEAR_MC_FUNC(0, 16, mmxext);
320
        VP8_BILINEAR_MC_FUNC(1, 8, mmxext);
321
        VP8_BILINEAR_MC_FUNC(2, 4, mmxext);
322

    
323
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext;
324
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext;
325

    
326
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
327
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
328
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
329
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;
330

    
331
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmxext;
332
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmxext;
333
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
334
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
335
    }
336

    
337
    if (mm_flags & FF_MM_SSE) {
338
        c->vp8_idct_add                         = ff_vp8_idct_add_sse;
339
        c->vp8_luma_dc_wht                      = ff_vp8_luma_dc_wht_sse;
340
        c->put_vp8_epel_pixels_tab[0][0][0]     =
341
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
342
    }
343

    
344
    if (mm_flags & (FF_MM_SSE2|FF_MM_SSE2SLOW)) {
345
        VP8_LUMA_MC_FUNC(0, 16, sse2);
346
        VP8_MC_FUNC(1, 8, sse2);
347
        VP8_BILINEAR_MC_FUNC(0, 16, sse2);
348
        VP8_BILINEAR_MC_FUNC(1, 8, sse2);
349

    
350
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
351

    
352
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
353
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
354

    
355
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_sse2;
356
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_sse2;
357
    }
358

    
359
    if (mm_flags & FF_MM_SSE2) {
360
        c->vp8_idct_dc_add4y          = ff_vp8_idct_dc_add4y_sse2;
361

    
362
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
363

    
364
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
365
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
366

    
367
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse2;
368
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse2;
369
    }
370

    
371
    if (mm_flags & FF_MM_SSSE3) {
372
        VP8_LUMA_MC_FUNC(0, 16, ssse3);
373
        VP8_MC_FUNC(1, 8, ssse3);
374
        VP8_MC_FUNC(2, 4, ssse3);
375
        VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
376
        VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
377
        VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
378

    
379
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
380
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
381

    
382
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
383
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
384
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
385
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;
386

    
387
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_ssse3;
388
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_ssse3;
389
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
390
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
391
    }
392

    
393
    if (mm_flags & FF_MM_SSE4) {
394
        c->vp8_idct_dc_add                  = ff_vp8_idct_dc_add_sse4;
395

    
396
        c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse4;
397
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse4;
398
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse4;
399
    }
400
#endif
401
}