ffmpeg / libavcodec / x86 / h264dsp_mmx.c @ 8dbe5856
History | View | Annotate | Download (17.3 KB)
1 |
/*
|
---|---|
2 |
* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
|
3 |
*
|
4 |
* This file is part of FFmpeg.
|
5 |
*
|
6 |
* FFmpeg is free software; you can redistribute it and/or
|
7 |
* modify it under the terms of the GNU Lesser General Public
|
8 |
* License as published by the Free Software Foundation; either
|
9 |
* version 2.1 of the License, or (at your option) any later version.
|
10 |
*
|
11 |
* FFmpeg is distributed in the hope that it will be useful,
|
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
* Lesser General Public License for more details.
|
15 |
*
|
16 |
* You should have received a copy of the GNU Lesser General Public
|
17 |
* License along with FFmpeg; if not, write to the Free Software
|
18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
19 |
*/
|
20 |
|
21 |
#include "libavutil/cpu.h" |
22 |
#include "libavutil/x86_cpu.h" |
23 |
#include "libavcodec/h264dsp.h" |
24 |
#include "dsputil_mmx.h" |
25 |
|
26 |
DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL; |
27 |
|
28 |
/***********************************/
|
29 |
/* IDCT */
|
30 |
|
31 |
void ff_h264_idct_add_mmx (uint8_t *dst, int16_t *block, int stride); |
32 |
void ff_h264_idct8_add_mmx (uint8_t *dst, int16_t *block, int stride); |
33 |
void ff_h264_idct8_add_sse2 (uint8_t *dst, int16_t *block, int stride); |
34 |
void ff_h264_idct_dc_add_mmx2 (uint8_t *dst, int16_t *block, int stride); |
35 |
void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride); |
36 |
|
37 |
void ff_h264_idct_add16_mmx (uint8_t *dst, const int *block_offset, |
38 |
DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
39 |
void ff_h264_idct8_add4_mmx (uint8_t *dst, const int *block_offset, |
40 |
DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
41 |
void ff_h264_idct_add16_mmx2 (uint8_t *dst, const int *block_offset, |
42 |
DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
43 |
void ff_h264_idct_add16intra_mmx (uint8_t *dst, const int *block_offset, |
44 |
DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
45 |
void ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset, |
46 |
DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
47 |
void ff_h264_idct8_add4_mmx2 (uint8_t *dst, const int *block_offset, |
48 |
DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
49 |
void ff_h264_idct8_add4_sse2 (uint8_t *dst, const int *block_offset, |
50 |
DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
51 |
void ff_h264_idct_add8_mmx (uint8_t **dest, const int *block_offset, |
52 |
DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
53 |
void ff_h264_idct_add8_mmx2 (uint8_t **dest, const int *block_offset, |
54 |
DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
55 |
|
56 |
void ff_h264_idct_add16_sse2 (uint8_t *dst, const int *block_offset, DCTELEM *block, |
57 |
int stride, const uint8_t nnzc[6*8]); |
58 |
void ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset, DCTELEM *block, |
59 |
int stride, const uint8_t nnzc[6*8]); |
60 |
void ff_h264_idct_add8_sse2 (uint8_t **dest, const int *block_offset, DCTELEM *block, |
61 |
int stride, const uint8_t nnzc[6*8]); |
62 |
void ff_h264_luma_dc_dequant_idct_mmx (DCTELEM *output, DCTELEM *input, int qmul); |
63 |
void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul); |
64 |
|
65 |
/***********************************/
|
66 |
/* deblocking */
|
67 |
|
68 |
#define h264_loop_filter_strength_iteration_mmx2(bS, nz, ref, mv, bidir, edges, step, mask_mv, dir, d_idx, mask_dir) \
|
69 |
do { \
|
70 |
x86_reg b_idx; \ |
71 |
mask_mv <<= 3; \
|
72 |
for( b_idx=0; b_idx<edges; b_idx+=step ) { \ |
73 |
if (!mask_dir) \
|
74 |
__asm__ volatile( \
|
75 |
"pxor %%mm0, %%mm0 \n\t" \
|
76 |
:: \ |
77 |
); \ |
78 |
if(!(mask_mv & b_idx)) { \
|
79 |
if(bidir) { \
|
80 |
__asm__ volatile( \
|
81 |
"movd %a3(%0,%2), %%mm2 \n" \
|
82 |
"punpckldq %a4(%0,%2), %%mm2 \n" /* { ref0[bn], ref1[bn] } */ \ |
83 |
"pshufw $0x44, 12(%0,%2), %%mm0 \n" /* { ref0[b], ref0[b] } */ \ |
84 |
"pshufw $0x44, 52(%0,%2), %%mm1 \n" /* { ref1[b], ref1[b] } */ \ |
85 |
"pshufw $0x4E, %%mm2, %%mm3 \n" \
|
86 |
"psubb %%mm2, %%mm0 \n" /* { ref0[b]!=ref0[bn], ref0[b]!=ref1[bn] } */ \ |
87 |
"psubb %%mm3, %%mm1 \n" /* { ref1[b]!=ref1[bn], ref1[b]!=ref0[bn] } */ \ |
88 |
\ |
89 |
"por %%mm1, %%mm0 \n" \
|
90 |
"movq %a5(%1,%2,4), %%mm1 \n" \
|
91 |
"movq %a6(%1,%2,4), %%mm2 \n" \
|
92 |
"movq %%mm1, %%mm3 \n" \
|
93 |
"movq %%mm2, %%mm4 \n" \
|
94 |
"psubw 48(%1,%2,4), %%mm1 \n" \
|
95 |
"psubw 56(%1,%2,4), %%mm2 \n" \
|
96 |
"psubw 208(%1,%2,4), %%mm3 \n" \
|
97 |
"psubw 216(%1,%2,4), %%mm4 \n" \
|
98 |
"packsswb %%mm2, %%mm1 \n" \
|
99 |
"packsswb %%mm4, %%mm3 \n" \
|
100 |
"paddb %%mm6, %%mm1 \n" \
|
101 |
"paddb %%mm6, %%mm3 \n" \
|
102 |
"psubusb %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \ |
103 |
"psubusb %%mm5, %%mm3 \n" \
|
104 |
"packsswb %%mm3, %%mm1 \n" \
|
105 |
\ |
106 |
"por %%mm1, %%mm0 \n" \
|
107 |
"movq %a7(%1,%2,4), %%mm1 \n" \
|
108 |
"movq %a8(%1,%2,4), %%mm2 \n" \
|
109 |
"movq %%mm1, %%mm3 \n" \
|
110 |
"movq %%mm2, %%mm4 \n" \
|
111 |
"psubw 48(%1,%2,4), %%mm1 \n" \
|
112 |
"psubw 56(%1,%2,4), %%mm2 \n" \
|
113 |
"psubw 208(%1,%2,4), %%mm3 \n" \
|
114 |
"psubw 216(%1,%2,4), %%mm4 \n" \
|
115 |
"packsswb %%mm2, %%mm1 \n" \
|
116 |
"packsswb %%mm4, %%mm3 \n" \
|
117 |
"paddb %%mm6, %%mm1 \n" \
|
118 |
"paddb %%mm6, %%mm3 \n" \
|
119 |
"psubusb %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \ |
120 |
"psubusb %%mm5, %%mm3 \n" \
|
121 |
"packsswb %%mm3, %%mm1 \n" \
|
122 |
\ |
123 |
"pshufw $0x4E, %%mm1, %%mm1 \n" \
|
124 |
"por %%mm1, %%mm0 \n" \
|
125 |
"pshufw $0x4E, %%mm0, %%mm1 \n" \
|
126 |
"pminub %%mm1, %%mm0 \n" \
|
127 |
::"r"(ref), \
|
128 |
"r"(mv), \
|
129 |
"r"(b_idx), \
|
130 |
"i"(d_idx+12), \ |
131 |
"i"(d_idx+52), \ |
132 |
"i"(d_idx*4+48), \ |
133 |
"i"(d_idx*4+56), \ |
134 |
"i"(d_idx*4+208), \ |
135 |
"i"(d_idx*4+216) \ |
136 |
); \ |
137 |
} else { \
|
138 |
__asm__ volatile( \
|
139 |
"movd 12(%0,%2), %%mm0 \n" \
|
140 |
"psubb %a3(%0,%2), %%mm0 \n" /* ref[b] != ref[bn] */ \ |
141 |
"movq 48(%1,%2,4), %%mm1 \n" \
|
142 |
"movq 56(%1,%2,4), %%mm2 \n" \
|
143 |
"psubw %a4(%1,%2,4), %%mm1 \n" \
|
144 |
"psubw %a5(%1,%2,4), %%mm2 \n" \
|
145 |
"packsswb %%mm2, %%mm1 \n" \
|
146 |
"paddb %%mm6, %%mm1 \n" \
|
147 |
"psubusb %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \ |
148 |
"packsswb %%mm1, %%mm1 \n" \
|
149 |
"por %%mm1, %%mm0 \n" \
|
150 |
::"r"(ref), \
|
151 |
"r"(mv), \
|
152 |
"r"(b_idx), \
|
153 |
"i"(d_idx+12), \ |
154 |
"i"(d_idx*4+48), \ |
155 |
"i"(d_idx*4+56) \ |
156 |
); \ |
157 |
} \ |
158 |
} \ |
159 |
__asm__ volatile( \
|
160 |
"movd 12(%0,%1), %%mm1 \n" \
|
161 |
"por %a2(%0,%1), %%mm1 \n" /* nnz[b] || nnz[bn] */ \ |
162 |
::"r"(nnz), \
|
163 |
"r"(b_idx), \
|
164 |
"i"(d_idx+12) \ |
165 |
); \ |
166 |
__asm__ volatile( \
|
167 |
"pminub %%mm7, %%mm1 \n" \
|
168 |
"pminub %%mm7, %%mm0 \n" \
|
169 |
"psllw $1, %%mm1 \n" \
|
170 |
"pxor %%mm2, %%mm2 \n" \
|
171 |
"pmaxub %%mm0, %%mm1 \n" \
|
172 |
"punpcklbw %%mm2, %%mm1 \n" \
|
173 |
"movq %%mm1, %a1(%0,%2) \n" \
|
174 |
::"r"(bS), \
|
175 |
"i"(32*dir), \ |
176 |
"r"(b_idx) \
|
177 |
:"memory" \
|
178 |
); \ |
179 |
} \ |
180 |
} while (0) |
181 |
|
182 |
static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], |
183 |
int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) { |
184 |
__asm__ volatile(
|
185 |
"movq %0, %%mm7 \n"
|
186 |
"movq %1, %%mm6 \n"
|
187 |
::"m"(ff_pb_1), "m"(ff_pb_3) |
188 |
); |
189 |
if(field)
|
190 |
__asm__ volatile(
|
191 |
"movq %0, %%mm6 \n"
|
192 |
::"m"(ff_pb_3_1)
|
193 |
); |
194 |
__asm__ volatile(
|
195 |
"movq %%mm6, %%mm5 \n"
|
196 |
"paddb %%mm5, %%mm5 \n"
|
197 |
:); |
198 |
|
199 |
// could do a special case for dir==0 && edges==1, but it only reduces the
|
200 |
// average filter time by 1.2%
|
201 |
step <<= 3;
|
202 |
edges <<= 3;
|
203 |
h264_loop_filter_strength_iteration_mmx2(bS, nnz, ref, mv, bidir, edges, step, mask_mv1, 1, -8, 0); |
204 |
h264_loop_filter_strength_iteration_mmx2(bS, nnz, ref, mv, bidir, 32, 8, mask_mv0, 0, -1, -1); |
205 |
|
206 |
__asm__ volatile(
|
207 |
"movq (%0), %%mm0 \n\t"
|
208 |
"movq 8(%0), %%mm1 \n\t"
|
209 |
"movq 16(%0), %%mm2 \n\t"
|
210 |
"movq 24(%0), %%mm3 \n\t"
|
211 |
TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4) |
212 |
"movq %%mm0, (%0) \n\t"
|
213 |
"movq %%mm3, 8(%0) \n\t"
|
214 |
"movq %%mm4, 16(%0) \n\t"
|
215 |
"movq %%mm2, 24(%0) \n\t"
|
216 |
::"r"(bS[0]) |
217 |
:"memory"
|
218 |
); |
219 |
} |
220 |
|
221 |
#define LF_FUNC(DIR, TYPE, OPT) \
|
222 |
void ff_x264_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \ |
223 |
int alpha, int beta, int8_t *tc0); |
224 |
#define LF_IFUNC(DIR, TYPE, OPT) \
|
225 |
void ff_x264_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \ |
226 |
int alpha, int beta); |
227 |
|
228 |
LF_FUNC (h, chroma, mmxext) |
229 |
LF_IFUNC(h, chroma_intra, mmxext) |
230 |
LF_FUNC (v, chroma, mmxext) |
231 |
LF_IFUNC(v, chroma_intra, mmxext) |
232 |
|
233 |
LF_FUNC (h, luma, mmxext) |
234 |
LF_IFUNC(h, luma_intra, mmxext) |
235 |
#if HAVE_YASM && ARCH_X86_32
|
236 |
LF_FUNC (v8, luma, mmxext) |
237 |
static void ff_x264_deblock_v_luma_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
238 |
{ |
239 |
if((tc0[0] & tc0[1]) >= 0) |
240 |
ff_x264_deblock_v8_luma_mmxext(pix+0, stride, alpha, beta, tc0);
|
241 |
if((tc0[2] & tc0[3]) >= 0) |
242 |
ff_x264_deblock_v8_luma_mmxext(pix+8, stride, alpha, beta, tc0+2); |
243 |
} |
244 |
LF_IFUNC(v8, luma_intra, mmxext) |
245 |
static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta) |
246 |
{ |
247 |
ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta);
|
248 |
ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta);
|
249 |
} |
250 |
#endif
|
251 |
|
252 |
LF_FUNC (h, luma, sse2) |
253 |
LF_IFUNC(h, luma_intra, sse2) |
254 |
LF_FUNC (v, luma, sse2) |
255 |
LF_IFUNC(v, luma_intra, sse2) |
256 |
|
257 |
/***********************************/
|
258 |
/* weighted prediction */
|
259 |
|
260 |
#define H264_WEIGHT(W, H, OPT) \
|
261 |
void ff_h264_weight_ ## W ## x ## H ## _ ## OPT(uint8_t *dst, \ |
262 |
int stride, int log2_denom, int weight, int offset); |
263 |
|
264 |
#define H264_BIWEIGHT(W, H, OPT) \
|
265 |
void ff_h264_biweight_ ## W ## x ## H ## _ ## OPT(uint8_t *dst, \ |
266 |
uint8_t *src, int stride, int log2_denom, int weightd, \ |
267 |
int weights, int offset); |
268 |
|
269 |
#define H264_BIWEIGHT_MMX(W,H) \
|
270 |
H264_WEIGHT (W, H, mmx2) \ |
271 |
H264_BIWEIGHT(W, H, mmx2) |
272 |
|
273 |
#define H264_BIWEIGHT_MMX_SSE(W,H) \
|
274 |
H264_BIWEIGHT_MMX(W, H) \ |
275 |
H264_WEIGHT (W, H, sse2) \ |
276 |
H264_BIWEIGHT (W, H, sse2) \ |
277 |
H264_BIWEIGHT (W, H, ssse3) |
278 |
|
279 |
H264_BIWEIGHT_MMX_SSE(16, 16) |
280 |
H264_BIWEIGHT_MMX_SSE(16, 8) |
281 |
H264_BIWEIGHT_MMX_SSE( 8, 16) |
282 |
H264_BIWEIGHT_MMX_SSE( 8, 8) |
283 |
H264_BIWEIGHT_MMX_SSE( 8, 4) |
284 |
H264_BIWEIGHT_MMX ( 4, 8) |
285 |
H264_BIWEIGHT_MMX ( 4, 4) |
286 |
H264_BIWEIGHT_MMX ( 4, 2) |
287 |
|
288 |
void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) |
289 |
{ |
290 |
int mm_flags = av_get_cpu_flags();
|
291 |
|
292 |
if (bit_depth == 8) { |
293 |
if (mm_flags & AV_CPU_FLAG_MMX2) {
|
294 |
c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2; |
295 |
} |
296 |
#if HAVE_YASM
|
297 |
if (mm_flags & AV_CPU_FLAG_MMX) {
|
298 |
c->h264_idct_dc_add= |
299 |
c->h264_idct_add= ff_h264_idct_add_mmx; |
300 |
c->h264_idct8_dc_add= |
301 |
c->h264_idct8_add= ff_h264_idct8_add_mmx; |
302 |
|
303 |
c->h264_idct_add16 = ff_h264_idct_add16_mmx; |
304 |
c->h264_idct8_add4 = ff_h264_idct8_add4_mmx; |
305 |
c->h264_idct_add8 = ff_h264_idct_add8_mmx; |
306 |
c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx; |
307 |
c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx; |
308 |
|
309 |
if (mm_flags & AV_CPU_FLAG_MMX2) {
|
310 |
c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2; |
311 |
c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2; |
312 |
c->h264_idct_add16 = ff_h264_idct_add16_mmx2; |
313 |
c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2; |
314 |
c->h264_idct_add8 = ff_h264_idct_add8_mmx2; |
315 |
c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2; |
316 |
|
317 |
c->h264_v_loop_filter_chroma= ff_x264_deblock_v_chroma_mmxext; |
318 |
c->h264_h_loop_filter_chroma= ff_x264_deblock_h_chroma_mmxext; |
319 |
c->h264_v_loop_filter_chroma_intra= ff_x264_deblock_v_chroma_intra_mmxext; |
320 |
c->h264_h_loop_filter_chroma_intra= ff_x264_deblock_h_chroma_intra_mmxext; |
321 |
#if ARCH_X86_32
|
322 |
c->h264_v_loop_filter_luma= ff_x264_deblock_v_luma_mmxext; |
323 |
c->h264_h_loop_filter_luma= ff_x264_deblock_h_luma_mmxext; |
324 |
c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext; |
325 |
c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext; |
326 |
#endif
|
327 |
c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
|
328 |
c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
|
329 |
c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;
|
330 |
c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
|
331 |
c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
|
332 |
c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
|
333 |
c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
|
334 |
c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;
|
335 |
|
336 |
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;
|
337 |
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;
|
338 |
c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;
|
339 |
c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
|
340 |
c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
|
341 |
c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
|
342 |
c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
|
343 |
c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
|
344 |
|
345 |
if (mm_flags&AV_CPU_FLAG_SSE2) {
|
346 |
c->h264_idct8_add = ff_h264_idct8_add_sse2; |
347 |
c->h264_idct8_add4= ff_h264_idct8_add4_sse2; |
348 |
c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2; |
349 |
|
350 |
c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_sse2;
|
351 |
c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_sse2;
|
352 |
c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_sse2;
|
353 |
c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_sse2;
|
354 |
c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_sse2;
|
355 |
|
356 |
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_sse2;
|
357 |
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_sse2;
|
358 |
c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_sse2;
|
359 |
c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_sse2;
|
360 |
c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_sse2;
|
361 |
|
362 |
#if HAVE_ALIGNED_STACK
|
363 |
c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2; |
364 |
c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2; |
365 |
c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2; |
366 |
c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2; |
367 |
#endif
|
368 |
|
369 |
c->h264_idct_add16 = ff_h264_idct_add16_sse2; |
370 |
c->h264_idct_add8 = ff_h264_idct_add8_sse2; |
371 |
c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2; |
372 |
} |
373 |
if (mm_flags&AV_CPU_FLAG_SSSE3) {
|
374 |
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3;
|
375 |
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_ssse3;
|
376 |
c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_ssse3;
|
377 |
c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3;
|
378 |
c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_ssse3;
|
379 |
} |
380 |
} |
381 |
} |
382 |
} |
383 |
#endif
|
384 |
} |