Statistics
| Branch: | Revision:

ffmpeg / libavcodec / x86 / vp8dsp-init.c @ 2912e87a

History | View | Annotate | Download (19 KB)

1
/*
2
 * VP8 DSP functions x86-optimized
3
 * Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
4
 * Copyright (c) 2010 Jason Garrett-Glaser <darkshikari@gmail.com>
5
 *
6
 * This file is part of Libav.
7
 *
8
 * Libav is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * Libav is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with Libav; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

    
23
#include "libavutil/cpu.h"
24
#include "libavutil/x86_cpu.h"
25
#include "libavcodec/vp8dsp.h"
26

    
27
#if HAVE_YASM
28

    
29
/*
30
 * MC functions
31
 */
32
extern void ff_put_vp8_epel4_h4_mmxext(uint8_t *dst, int dststride,
33
                                       uint8_t *src, int srcstride,
34
                                       int height, int mx, int my);
35
extern void ff_put_vp8_epel4_h6_mmxext(uint8_t *dst, int dststride,
36
                                       uint8_t *src, int srcstride,
37
                                       int height, int mx, int my);
38
extern void ff_put_vp8_epel4_v4_mmxext(uint8_t *dst, int dststride,
39
                                       uint8_t *src, int srcstride,
40
                                       int height, int mx, int my);
41
extern void ff_put_vp8_epel4_v6_mmxext(uint8_t *dst, int dststride,
42
                                       uint8_t *src, int srcstride,
43
                                       int height, int mx, int my);
44

    
45
extern void ff_put_vp8_epel8_h4_sse2  (uint8_t *dst, int dststride,
46
                                       uint8_t *src, int srcstride,
47
                                       int height, int mx, int my);
48
extern void ff_put_vp8_epel8_h6_sse2  (uint8_t *dst, int dststride,
49
                                       uint8_t *src, int srcstride,
50
                                       int height, int mx, int my);
51
extern void ff_put_vp8_epel8_v4_sse2  (uint8_t *dst, int dststride,
52
                                       uint8_t *src, int srcstride,
53
                                       int height, int mx, int my);
54
extern void ff_put_vp8_epel8_v6_sse2  (uint8_t *dst, int dststride,
55
                                       uint8_t *src, int srcstride,
56
                                       int height, int mx, int my);
57

    
58
extern void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, int dststride,
59
                                       uint8_t *src, int srcstride,
60
                                       int height, int mx, int my);
61
extern void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, int dststride,
62
                                       uint8_t *src, int srcstride,
63
                                       int height, int mx, int my);
64
extern void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, int dststride,
65
                                       uint8_t *src, int srcstride,
66
                                       int height, int mx, int my);
67
extern void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, int dststride,
68
                                       uint8_t *src, int srcstride,
69
                                       int height, int mx, int my);
70
extern void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, int dststride,
71
                                       uint8_t *src, int srcstride,
72
                                       int height, int mx, int my);
73
extern void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, int dststride,
74
                                       uint8_t *src, int srcstride,
75
                                       int height, int mx, int my);
76
extern void ff_put_vp8_epel8_v4_ssse3 (uint8_t *dst, int dststride,
77
                                       uint8_t *src, int srcstride,
78
                                       int height, int mx, int my);
79
extern void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, int dststride,
80
                                       uint8_t *src, int srcstride,
81
                                       int height, int mx, int my);
82

    
83
extern void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, int dststride,
84
                                          uint8_t *src, int srcstride,
85
                                          int height, int mx, int my);
86
extern void ff_put_vp8_bilinear8_h_sse2  (uint8_t *dst, int dststride,
87
                                          uint8_t *src, int srcstride,
88
                                          int height, int mx, int my);
89
extern void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, int dststride,
90
                                          uint8_t *src, int srcstride,
91
                                          int height, int mx, int my);
92
extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, int dststride,
93
                                          uint8_t *src, int srcstride,
94
                                          int height, int mx, int my);
95

    
96
extern void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, int dststride,
97
                                          uint8_t *src, int srcstride,
98
                                          int height, int mx, int my);
99
extern void ff_put_vp8_bilinear8_v_sse2  (uint8_t *dst, int dststride,
100
                                          uint8_t *src, int srcstride,
101
                                          int height, int mx, int my);
102
extern void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, int dststride,
103
                                          uint8_t *src, int srcstride,
104
                                          int height, int mx, int my);
105
extern void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, int dststride,
106
                                          uint8_t *src, int srcstride,
107
                                          int height, int mx, int my);
108

    
109

    
110
extern void ff_put_vp8_pixels8_mmx (uint8_t *dst, int dststride,
111
                                    uint8_t *src, int srcstride,
112
                                    int height, int mx, int my);
113
extern void ff_put_vp8_pixels16_mmx(uint8_t *dst, int dststride,
114
                                    uint8_t *src, int srcstride,
115
                                    int height, int mx, int my);
116
extern void ff_put_vp8_pixels16_sse(uint8_t *dst, int dststride,
117
                                    uint8_t *src, int srcstride,
118
                                    int height, int mx, int my);
119

    
120
#define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \
121
static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \
122
    uint8_t *dst,  int dststride, uint8_t *src, \
123
    int srcstride, int height, int mx, int my) \
124
{ \
125
    ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
126
        dst,     dststride, src,     srcstride, height, mx, my); \
127
    ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
128
        dst + 8, dststride, src + 8, srcstride, height, mx, my); \
129
}
130
#define TAP_W8(OPT, FILTERTYPE, TAPTYPE) \
131
static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
132
    uint8_t *dst,  int dststride, uint8_t *src, \
133
    int srcstride, int height, int mx, int my) \
134
{ \
135
    ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
136
        dst,     dststride, src,     srcstride, height, mx, my); \
137
    ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
138
        dst + 4, dststride, src + 4, srcstride, height, mx, my); \
139
}
140

    
141
TAP_W8 (mmxext, epel, h4)
142
TAP_W8 (mmxext, epel, h6)
143
TAP_W16(mmxext, epel, h6)
144
TAP_W8 (mmxext, epel, v4)
145
TAP_W8 (mmxext, epel, v6)
146
TAP_W16(mmxext, epel, v6)
147
TAP_W8 (mmxext, bilinear, h)
148
TAP_W16(mmxext, bilinear, h)
149
TAP_W8 (mmxext, bilinear, v)
150
TAP_W16(mmxext, bilinear, v)
151

    
152
TAP_W16(sse2,   epel, h6)
153
TAP_W16(sse2,   epel, v6)
154
TAP_W16(sse2,   bilinear, h)
155
TAP_W16(sse2,   bilinear, v)
156

    
157
TAP_W16(ssse3,  epel, h6)
158
TAP_W16(ssse3,  epel, v6)
159
TAP_W16(ssse3,  bilinear, h)
160
TAP_W16(ssse3,  bilinear, v)
161

    
162
#define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \
163
static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \
164
    uint8_t *dst, int dststride, uint8_t *src, \
165
    int srcstride, int height, int mx, int my) \
166
{ \
167
    DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + TAPNUMY - 1)]; \
168
    uint8_t *tmpptr = tmp + SIZE * (TAPNUMY / 2 - 1); \
169
    src -= srcstride * (TAPNUMY / 2 - 1); \
170
    ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## _ ## OPT( \
171
        tmp, SIZE,      src,    srcstride, height + TAPNUMY - 1, mx, my); \
172
    ff_put_vp8_epel ## SIZE ## _v ## TAPNUMY ## _ ## OPT( \
173
        dst, dststride, tmpptr, SIZE,      height,               mx, my); \
174
}
175

    
176
#define HVTAPMMX(x, y) \
177
HVTAP(mmxext, 8, x, y,  4,  8) \
178
HVTAP(mmxext, 8, x, y,  8, 16)
179

    
180
HVTAPMMX(4, 4)
181
HVTAPMMX(4, 6)
182
HVTAPMMX(6, 4)
183
HVTAPMMX(6, 6)
184
HVTAP(mmxext, 8, 6, 6, 16, 16)
185

    
186
#define HVTAPSSE2(x, y, w) \
187
HVTAP(sse2,  16, x, y, w, 16) \
188
HVTAP(ssse3, 16, x, y, w, 16)
189

    
190
HVTAPSSE2(4, 4, 8)
191
HVTAPSSE2(4, 6, 8)
192
HVTAPSSE2(6, 4, 8)
193
HVTAPSSE2(6, 6, 8)
194
HVTAPSSE2(6, 6, 16)
195

    
196
HVTAP(ssse3, 16, 4, 4, 4, 8)
197
HVTAP(ssse3, 16, 4, 6, 4, 8)
198
HVTAP(ssse3, 16, 6, 4, 4, 8)
199
HVTAP(ssse3, 16, 6, 6, 4, 8)
200

    
201
#define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \
202
static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \
203
    uint8_t *dst, int dststride, uint8_t *src, \
204
    int srcstride, int height, int mx, int my) \
205
{ \
206
    DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + 2)]; \
207
    ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT( \
208
        tmp, SIZE,      src, srcstride, height + 1, mx, my); \
209
    ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT( \
210
        dst, dststride, tmp, SIZE,      height,     mx, my); \
211
}
212

    
213
HVBILIN(mmxext, 8,  4,  8)
214
HVBILIN(mmxext, 8,  8, 16)
215
HVBILIN(mmxext, 8, 16, 16)
216
HVBILIN(sse2,   8,  8, 16)
217
HVBILIN(sse2,   8, 16, 16)
218
HVBILIN(ssse3,  8,  4,  8)
219
HVBILIN(ssse3,  8,  8, 16)
220
HVBILIN(ssse3,  8, 16, 16)
221

    
222
extern void ff_vp8_idct_dc_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
223
extern void ff_vp8_idct_dc_add_sse4(uint8_t *dst, DCTELEM block[16], int stride);
224
extern void ff_vp8_idct_dc_add4y_mmx(uint8_t *dst, DCTELEM block[4][16], int stride);
225
extern void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, DCTELEM block[4][16], int stride);
226
extern void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, DCTELEM block[2][16], int stride);
227
extern void ff_vp8_luma_dc_wht_mmx(DCTELEM block[4][4][16], DCTELEM dc[16]);
228
extern void ff_vp8_luma_dc_wht_sse(DCTELEM block[4][4][16], DCTELEM dc[16]);
229
extern void ff_vp8_idct_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
230
extern void ff_vp8_idct_add_sse(uint8_t *dst, DCTELEM block[16], int stride);
231

    
232
#define DECLARE_LOOP_FILTER(NAME)\
233
extern void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\
234
extern void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst, int stride, int flim);\
235
extern void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\
236
                                                    int e, int i, int hvt);\
237
extern void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst, int stride,\
238
                                                    int e, int i, int hvt);\
239
extern void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\
240
                                                    int s, int e, int i, int hvt);\
241
extern void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU, uint8_t *dstV,\
242
                                                    int s, int e, int i, int hvt);\
243
extern void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\
244
                                                    int e, int i, int hvt);\
245
extern void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst, int stride,\
246
                                                    int e, int i, int hvt);\
247
extern void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\
248
                                                    int s, int e, int i, int hvt);\
249
extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, uint8_t *dstV,\
250
                                                    int s, int e, int i, int hvt);
251

    
252
DECLARE_LOOP_FILTER(mmx)
253
DECLARE_LOOP_FILTER(mmxext)
254
DECLARE_LOOP_FILTER(sse2)
255
DECLARE_LOOP_FILTER(ssse3)
256
DECLARE_LOOP_FILTER(sse4)
257

    
258
#endif
259

    
260
#define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
261
    c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \
262
    c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \
263
    c->put_vp8_epel_pixels_tab[IDX][2][2] = ff_put_vp8_epel ## SIZE ## _h6v6_ ## OPT
264

    
265
#define VP8_MC_FUNC(IDX, SIZE, OPT) \
266
    c->put_vp8_epel_pixels_tab[IDX][0][1] = ff_put_vp8_epel ## SIZE ## _h4_ ## OPT; \
267
    c->put_vp8_epel_pixels_tab[IDX][1][0] = ff_put_vp8_epel ## SIZE ## _v4_ ## OPT; \
268
    c->put_vp8_epel_pixels_tab[IDX][1][1] = ff_put_vp8_epel ## SIZE ## _h4v4_ ## OPT; \
269
    c->put_vp8_epel_pixels_tab[IDX][1][2] = ff_put_vp8_epel ## SIZE ## _h6v4_ ## OPT; \
270
    c->put_vp8_epel_pixels_tab[IDX][2][1] = ff_put_vp8_epel ## SIZE ## _h4v6_ ## OPT; \
271
    VP8_LUMA_MC_FUNC(IDX, SIZE, OPT)
272

    
273
#define VP8_BILINEAR_MC_FUNC(IDX, SIZE, OPT) \
274
    c->put_vp8_bilinear_pixels_tab[IDX][0][1] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
275
    c->put_vp8_bilinear_pixels_tab[IDX][0][2] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
276
    c->put_vp8_bilinear_pixels_tab[IDX][1][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
277
    c->put_vp8_bilinear_pixels_tab[IDX][1][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
278
    c->put_vp8_bilinear_pixels_tab[IDX][1][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
279
    c->put_vp8_bilinear_pixels_tab[IDX][2][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
280
    c->put_vp8_bilinear_pixels_tab[IDX][2][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
281
    c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT
282

    
283

    
284
av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
285
{
286
    int mm_flags = av_get_cpu_flags();
287

    
288
#if HAVE_YASM
289
    if (mm_flags & AV_CPU_FLAG_MMX) {
290
        c->vp8_idct_dc_add    = ff_vp8_idct_dc_add_mmx;
291
        c->vp8_idct_dc_add4y  = ff_vp8_idct_dc_add4y_mmx;
292
        c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
293
        c->vp8_idct_add       = ff_vp8_idct_add_mmx;
294
        c->vp8_luma_dc_wht    = ff_vp8_luma_dc_wht_mmx;
295
        c->put_vp8_epel_pixels_tab[0][0][0]     =
296
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
297
        c->put_vp8_epel_pixels_tab[1][0][0]     =
298
        c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
299

    
300
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
301
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
302

    
303
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
304
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
305
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
306
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
307

    
308
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmx;
309
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmx;
310
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmx;
311
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmx;
312
    }
313

    
314
    /* note that 4-tap width=16 functions are missing because w=16
315
     * is only used for luma, and luma is always a copy or sixtap. */
316
    if (mm_flags & AV_CPU_FLAG_MMX2) {
317
        VP8_LUMA_MC_FUNC(0, 16, mmxext);
318
        VP8_MC_FUNC(1, 8, mmxext);
319
        VP8_MC_FUNC(2, 4, mmxext);
320
        VP8_BILINEAR_MC_FUNC(0, 16, mmxext);
321
        VP8_BILINEAR_MC_FUNC(1, 8, mmxext);
322
        VP8_BILINEAR_MC_FUNC(2, 4, mmxext);
323

    
324
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmxext;
325
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmxext;
326

    
327
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
328
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
329
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
330
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;
331

    
332
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmxext;
333
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmxext;
334
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
335
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
336
    }
337

    
338
    if (mm_flags & AV_CPU_FLAG_SSE) {
339
        c->vp8_idct_add                         = ff_vp8_idct_add_sse;
340
        c->vp8_luma_dc_wht                      = ff_vp8_luma_dc_wht_sse;
341
        c->put_vp8_epel_pixels_tab[0][0][0]     =
342
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
343
    }
344

    
345
    if (mm_flags & (AV_CPU_FLAG_SSE2|AV_CPU_FLAG_SSE2SLOW)) {
346
        VP8_LUMA_MC_FUNC(0, 16, sse2);
347
        VP8_MC_FUNC(1, 8, sse2);
348
        VP8_BILINEAR_MC_FUNC(0, 16, sse2);
349
        VP8_BILINEAR_MC_FUNC(1, 8, sse2);
350

    
351
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
352

    
353
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
354
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
355

    
356
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_sse2;
357
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_sse2;
358
    }
359

    
360
    if (mm_flags & AV_CPU_FLAG_SSE2) {
361
        c->vp8_idct_dc_add4y          = ff_vp8_idct_dc_add4y_sse2;
362

    
363
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
364

    
365
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
366
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
367

    
368
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse2;
369
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse2;
370
    }
371

    
372
    if (mm_flags & AV_CPU_FLAG_SSSE3) {
373
        VP8_LUMA_MC_FUNC(0, 16, ssse3);
374
        VP8_MC_FUNC(1, 8, ssse3);
375
        VP8_MC_FUNC(2, 4, ssse3);
376
        VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
377
        VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
378
        VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
379

    
380
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
381
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
382

    
383
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
384
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
385
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
386
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;
387

    
388
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_ssse3;
389
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_ssse3;
390
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
391
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
392
    }
393

    
394
    if (mm_flags & AV_CPU_FLAG_SSE4) {
395
        c->vp8_idct_dc_add                  = ff_vp8_idct_dc_add_sse4;
396

    
397
        c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse4;
398
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse4;
399
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse4;
400
    }
401
#endif
402
}