ffmpeg / libavcodec / x86 / h264dsp_mmx.c @ 2912e87a
History | View | Annotate | Download (17.2 KB)
1 | d2bb7db1 | Loren Merritt | /*
|
---|---|---|---|
2 | * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
|
||
3 | *
|
||
4 | 2912e87a | Mans Rullgard | * This file is part of Libav.
|
5 | b78e7197 | Diego Biurrun | *
|
6 | 2912e87a | Mans Rullgard | * Libav is free software; you can redistribute it and/or
|
7 | d2bb7db1 | Loren Merritt | * modify it under the terms of the GNU Lesser General Public
|
8 | * License as published by the Free Software Foundation; either
|
||
9 | b78e7197 | Diego Biurrun | * version 2.1 of the License, or (at your option) any later version.
|
10 | d2bb7db1 | Loren Merritt | *
|
11 | 2912e87a | Mans Rullgard | * Libav is distributed in the hope that it will be useful,
|
12 | d2bb7db1 | Loren Merritt | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
14 | * Lesser General Public License for more details.
|
||
15 | *
|
||
16 | * You should have received a copy of the GNU Lesser General Public
|
||
17 | 2912e87a | Mans Rullgard | * License along with Libav; if not, write to the Free Software
|
18 | 5509bffa | Diego Biurrun | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
19 | d2bb7db1 | Loren Merritt | */
|
20 | |||
21 | c6c98d08 | Stefano Sabatini | #include "libavutil/cpu.h" |
22 | 14bc1f24 | Ronald S. Bultje | #include "libavutil/x86_cpu.h" |
23 | #include "libavcodec/h264dsp.h" |
||
24 | 182f56cb | Aurelien Jacobs | #include "dsputil_mmx.h" |
25 | d2bb7db1 | Loren Merritt | |
26 | 84dc2d8a | Måns Rullgård | DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL; |
27 | 4f20b45f | Michael Niedermayer | |
28 | d2bb7db1 | Loren Merritt | /***********************************/
|
29 | /* IDCT */
|
||
30 | |||
31 | 1d16a1cf | Ronald S. Bultje | void ff_h264_idct_add_mmx (uint8_t *dst, int16_t *block, int stride); |
32 | void ff_h264_idct8_add_mmx (uint8_t *dst, int16_t *block, int stride); |
||
33 | void ff_h264_idct8_add_sse2 (uint8_t *dst, int16_t *block, int stride); |
||
34 | void ff_h264_idct_dc_add_mmx2 (uint8_t *dst, int16_t *block, int stride); |
||
35 | void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride); |
||
36 | |||
37 | void ff_h264_idct_add16_mmx (uint8_t *dst, const int *block_offset, |
||
38 | DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
||
39 | void ff_h264_idct8_add4_mmx (uint8_t *dst, const int *block_offset, |
||
40 | DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
||
41 | void ff_h264_idct_add16_mmx2 (uint8_t *dst, const int *block_offset, |
||
42 | DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
||
43 | void ff_h264_idct_add16intra_mmx (uint8_t *dst, const int *block_offset, |
||
44 | DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
||
45 | void ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset, |
||
46 | DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
||
47 | void ff_h264_idct8_add4_mmx2 (uint8_t *dst, const int *block_offset, |
||
48 | DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
||
49 | void ff_h264_idct8_add4_sse2 (uint8_t *dst, const int *block_offset, |
||
50 | DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
||
51 | void ff_h264_idct_add8_mmx (uint8_t **dest, const int *block_offset, |
||
52 | DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
||
53 | void ff_h264_idct_add8_mmx2 (uint8_t **dest, const int *block_offset, |
||
54 | DCTELEM *block, int stride, const uint8_t nnzc[6*8]); |
||
55 | |||
56 | void ff_h264_idct_add16_sse2 (uint8_t *dst, const int *block_offset, DCTELEM *block, |
||
57 | int stride, const uint8_t nnzc[6*8]); |
||
58 | void ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset, DCTELEM *block, |
||
59 | int stride, const uint8_t nnzc[6*8]); |
||
60 | void ff_h264_idct_add8_sse2 (uint8_t **dest, const int *block_offset, DCTELEM *block, |
||
61 | int stride, const uint8_t nnzc[6*8]); |
||
62 | 19fb234e | Jason Garrett-Glaser | void ff_h264_luma_dc_dequant_idct_mmx (DCTELEM *output, DCTELEM *input, int qmul); |
63 | void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul); |
||
64 | 37fed100 | Jason Garrett-Glaser | |
65 | d2bb7db1 | Loren Merritt | /***********************************/
|
66 | /* deblocking */
|
||
67 | |||
68 | a52ffc3f | Ronald S. Bultje | #define h264_loop_filter_strength_iteration_mmx2(bS, nz, ref, mv, bidir, edges, step, mask_mv, dir, d_idx, mask_dir) \
|
69 | do { \
|
||
70 | x86_reg b_idx; \ |
||
71 | mask_mv <<= 3; \
|
||
72 | for( b_idx=0; b_idx<edges; b_idx+=step ) { \ |
||
73 | if (!mask_dir) \
|
||
74 | __asm__ volatile( \
|
||
75 | "pxor %%mm0, %%mm0 \n\t" \
|
||
76 | :: \ |
||
77 | ); \ |
||
78 | if(!(mask_mv & b_idx)) { \
|
||
79 | if(bidir) { \
|
||
80 | __asm__ volatile( \
|
||
81 | "movd %a3(%0,%2), %%mm2 \n" \
|
||
82 | "punpckldq %a4(%0,%2), %%mm2 \n" /* { ref0[bn], ref1[bn] } */ \ |
||
83 | "pshufw $0x44, 12(%0,%2), %%mm0 \n" /* { ref0[b], ref0[b] } */ \ |
||
84 | "pshufw $0x44, 52(%0,%2), %%mm1 \n" /* { ref1[b], ref1[b] } */ \ |
||
85 | "pshufw $0x4E, %%mm2, %%mm3 \n" \
|
||
86 | "psubb %%mm2, %%mm0 \n" /* { ref0[b]!=ref0[bn], ref0[b]!=ref1[bn] } */ \ |
||
87 | "psubb %%mm3, %%mm1 \n" /* { ref1[b]!=ref1[bn], ref1[b]!=ref0[bn] } */ \ |
||
88 | \ |
||
89 | "por %%mm1, %%mm0 \n" \
|
||
90 | "movq %a5(%1,%2,4), %%mm1 \n" \
|
||
91 | "movq %a6(%1,%2,4), %%mm2 \n" \
|
||
92 | "movq %%mm1, %%mm3 \n" \
|
||
93 | "movq %%mm2, %%mm4 \n" \
|
||
94 | "psubw 48(%1,%2,4), %%mm1 \n" \
|
||
95 | "psubw 56(%1,%2,4), %%mm2 \n" \
|
||
96 | "psubw 208(%1,%2,4), %%mm3 \n" \
|
||
97 | "psubw 216(%1,%2,4), %%mm4 \n" \
|
||
98 | "packsswb %%mm2, %%mm1 \n" \
|
||
99 | "packsswb %%mm4, %%mm3 \n" \
|
||
100 | "paddb %%mm6, %%mm1 \n" \
|
||
101 | "paddb %%mm6, %%mm3 \n" \
|
||
102 | "psubusb %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \ |
||
103 | "psubusb %%mm5, %%mm3 \n" \
|
||
104 | "packsswb %%mm3, %%mm1 \n" \
|
||
105 | \ |
||
106 | "por %%mm1, %%mm0 \n" \
|
||
107 | "movq %a7(%1,%2,4), %%mm1 \n" \
|
||
108 | "movq %a8(%1,%2,4), %%mm2 \n" \
|
||
109 | "movq %%mm1, %%mm3 \n" \
|
||
110 | "movq %%mm2, %%mm4 \n" \
|
||
111 | "psubw 48(%1,%2,4), %%mm1 \n" \
|
||
112 | "psubw 56(%1,%2,4), %%mm2 \n" \
|
||
113 | "psubw 208(%1,%2,4), %%mm3 \n" \
|
||
114 | "psubw 216(%1,%2,4), %%mm4 \n" \
|
||
115 | "packsswb %%mm2, %%mm1 \n" \
|
||
116 | "packsswb %%mm4, %%mm3 \n" \
|
||
117 | "paddb %%mm6, %%mm1 \n" \
|
||
118 | "paddb %%mm6, %%mm3 \n" \
|
||
119 | "psubusb %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \ |
||
120 | "psubusb %%mm5, %%mm3 \n" \
|
||
121 | "packsswb %%mm3, %%mm1 \n" \
|
||
122 | \ |
||
123 | "pshufw $0x4E, %%mm1, %%mm1 \n" \
|
||
124 | "por %%mm1, %%mm0 \n" \
|
||
125 | "pshufw $0x4E, %%mm0, %%mm1 \n" \
|
||
126 | "pminub %%mm1, %%mm0 \n" \
|
||
127 | ::"r"(ref), \
|
||
128 | "r"(mv), \
|
||
129 | "r"(b_idx), \
|
||
130 | "i"(d_idx+12), \ |
||
131 | "i"(d_idx+52), \ |
||
132 | "i"(d_idx*4+48), \ |
||
133 | "i"(d_idx*4+56), \ |
||
134 | "i"(d_idx*4+208), \ |
||
135 | "i"(d_idx*4+216) \ |
||
136 | ); \ |
||
137 | } else { \
|
||
138 | __asm__ volatile( \
|
||
139 | "movd 12(%0,%2), %%mm0 \n" \
|
||
140 | "psubb %a3(%0,%2), %%mm0 \n" /* ref[b] != ref[bn] */ \ |
||
141 | "movq 48(%1,%2,4), %%mm1 \n" \
|
||
142 | "movq 56(%1,%2,4), %%mm2 \n" \
|
||
143 | "psubw %a4(%1,%2,4), %%mm1 \n" \
|
||
144 | "psubw %a5(%1,%2,4), %%mm2 \n" \
|
||
145 | "packsswb %%mm2, %%mm1 \n" \
|
||
146 | "paddb %%mm6, %%mm1 \n" \
|
||
147 | "psubusb %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \ |
||
148 | "packsswb %%mm1, %%mm1 \n" \
|
||
149 | "por %%mm1, %%mm0 \n" \
|
||
150 | ::"r"(ref), \
|
||
151 | "r"(mv), \
|
||
152 | "r"(b_idx), \
|
||
153 | "i"(d_idx+12), \ |
||
154 | "i"(d_idx*4+48), \ |
||
155 | "i"(d_idx*4+56) \ |
||
156 | ); \ |
||
157 | } \ |
||
158 | } \ |
||
159 | __asm__ volatile( \
|
||
160 | "movd 12(%0,%1), %%mm1 \n" \
|
||
161 | "por %a2(%0,%1), %%mm1 \n" /* nnz[b] || nnz[bn] */ \ |
||
162 | ::"r"(nnz), \
|
||
163 | "r"(b_idx), \
|
||
164 | "i"(d_idx+12) \ |
||
165 | ); \ |
||
166 | __asm__ volatile( \
|
||
167 | "pminub %%mm7, %%mm1 \n" \
|
||
168 | "pminub %%mm7, %%mm0 \n" \
|
||
169 | "psllw $1, %%mm1 \n" \
|
||
170 | "pxor %%mm2, %%mm2 \n" \
|
||
171 | "pmaxub %%mm0, %%mm1 \n" \
|
||
172 | "punpcklbw %%mm2, %%mm1 \n" \
|
||
173 | "movq %%mm1, %a1(%0,%2) \n" \
|
||
174 | ::"r"(bS), \
|
||
175 | "i"(32*dir), \ |
||
176 | "r"(b_idx) \
|
||
177 | :"memory" \
|
||
178 | ); \ |
||
179 | } \ |
||
180 | } while (0) |
||
181 | 4b81511c | Ronald S. Bultje | |
182 | static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], |
||
183 | int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) { |
||
184 | __asm__ volatile(
|
||
185 | "movq %0, %%mm7 \n"
|
||
186 | "movq %1, %%mm6 \n"
|
||
187 | ::"m"(ff_pb_1), "m"(ff_pb_3) |
||
188 | ); |
||
189 | if(field)
|
||
190 | __asm__ volatile(
|
||
191 | "movq %0, %%mm6 \n"
|
||
192 | ::"m"(ff_pb_3_1)
|
||
193 | ); |
||
194 | __asm__ volatile(
|
||
195 | "movq %%mm6, %%mm5 \n"
|
||
196 | "paddb %%mm5, %%mm5 \n"
|
||
197 | :); |
||
198 | |||
199 | // could do a special case for dir==0 && edges==1, but it only reduces the
|
||
200 | // average filter time by 1.2%
|
||
201 | cd17285e | Ronald S. Bultje | step <<= 3;
|
202 | edges <<= 3;
|
||
203 | 0cc8a5d0 | Ronald S. Bultje | h264_loop_filter_strength_iteration_mmx2(bS, nnz, ref, mv, bidir, edges, step, mask_mv1, 1, -8, 0); |
204 | cd17285e | Ronald S. Bultje | h264_loop_filter_strength_iteration_mmx2(bS, nnz, ref, mv, bidir, 32, 8, mask_mv0, 0, -1, -1); |
205 | 4b81511c | Ronald S. Bultje | |
206 | be449fca | Diego Pettenò | __asm__ volatile(
|
207 | 3e20143e | Loren Merritt | "movq (%0), %%mm0 \n\t"
|
208 | "movq 8(%0), %%mm1 \n\t"
|
||
209 | "movq 16(%0), %%mm2 \n\t"
|
||
210 | "movq 24(%0), %%mm3 \n\t"
|
||
211 | TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4) |
||
212 | "movq %%mm0, (%0) \n\t"
|
||
213 | "movq %%mm3, 8(%0) \n\t"
|
||
214 | "movq %%mm4, 16(%0) \n\t"
|
||
215 | "movq %%mm2, 24(%0) \n\t"
|
||
216 | ::"r"(bS[0]) |
||
217 | :"memory"
|
||
218 | ); |
||
219 | } |
||
220 | d2bb7db1 | Loren Merritt | |
221 | 2c166c3a | Ronald S. Bultje | #define LF_FUNC(DIR, TYPE, OPT) \
|
222 | void ff_x264_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \ |
||
223 | int alpha, int beta, int8_t *tc0); |
||
224 | #define LF_IFUNC(DIR, TYPE, OPT) \
|
||
225 | void ff_x264_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \ |
||
226 | int alpha, int beta); |
||
227 | |||
228 | LF_FUNC (h, chroma, mmxext) |
||
229 | LF_IFUNC(h, chroma_intra, mmxext) |
||
230 | LF_FUNC (v, chroma, mmxext) |
||
231 | LF_IFUNC(v, chroma_intra, mmxext) |
||
232 | |||
233 | LF_FUNC (h, luma, mmxext) |
||
234 | LF_IFUNC(h, luma_intra, mmxext) |
||
235 | #if HAVE_YASM && ARCH_X86_32
|
||
236 | LF_FUNC (v8, luma, mmxext) |
||
237 | static void ff_x264_deblock_v_luma_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) |
||
238 | { |
||
239 | if((tc0[0] & tc0[1]) >= 0) |
||
240 | ff_x264_deblock_v8_luma_mmxext(pix+0, stride, alpha, beta, tc0);
|
||
241 | if((tc0[2] & tc0[3]) >= 0) |
||
242 | ff_x264_deblock_v8_luma_mmxext(pix+8, stride, alpha, beta, tc0+2); |
||
243 | } |
||
244 | LF_IFUNC(v8, luma_intra, mmxext) |
||
245 | static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta) |
||
246 | { |
||
247 | ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta);
|
||
248 | ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta);
|
||
249 | } |
||
250 | #endif
|
||
251 | |||
252 | LF_FUNC (h, luma, sse2) |
||
253 | LF_IFUNC(h, luma_intra, sse2) |
||
254 | LF_FUNC (v, luma, sse2) |
||
255 | LF_IFUNC(v, luma_intra, sse2) |
||
256 | |||
257 | d2bb7db1 | Loren Merritt | /***********************************/
|
258 | b926572a | Loren Merritt | /* weighted prediction */
|
259 | |||
260 | a33a2562 | Ronald S. Bultje | #define H264_WEIGHT(W, H, OPT) \
|
261 | void ff_h264_weight_ ## W ## x ## H ## _ ## OPT(uint8_t *dst, \ |
||
262 | int stride, int log2_denom, int weight, int offset); |
||
263 | |||
264 | #define H264_BIWEIGHT(W, H, OPT) \
|
||
265 | void ff_h264_biweight_ ## W ## x ## H ## _ ## OPT(uint8_t *dst, \ |
||
266 | uint8_t *src, int stride, int log2_denom, int weightd, \ |
||
267 | int weights, int offset); |
||
268 | |||
269 | #define H264_BIWEIGHT_MMX(W,H) \
|
||
270 | H264_WEIGHT (W, H, mmx2) \ |
||
271 | H264_BIWEIGHT(W, H, mmx2) |
||
272 | |||
273 | #define H264_BIWEIGHT_MMX_SSE(W,H) \
|
||
274 | H264_BIWEIGHT_MMX(W, H) \ |
||
275 | H264_WEIGHT (W, H, sse2) \ |
||
276 | H264_BIWEIGHT (W, H, sse2) \ |
||
277 | H264_BIWEIGHT (W, H, ssse3) |
||
278 | |||
279 | H264_BIWEIGHT_MMX_SSE(16, 16) |
||
280 | H264_BIWEIGHT_MMX_SSE(16, 8) |
||
281 | H264_BIWEIGHT_MMX_SSE( 8, 16) |
||
282 | H264_BIWEIGHT_MMX_SSE( 8, 8) |
||
283 | H264_BIWEIGHT_MMX_SSE( 8, 4) |
||
284 | H264_BIWEIGHT_MMX ( 4, 8) |
||
285 | H264_BIWEIGHT_MMX ( 4, 4) |
||
286 | H264_BIWEIGHT_MMX ( 4, 2) |
||
287 | 14bc1f24 | Ronald S. Bultje | |
288 | void ff_h264dsp_init_x86(H264DSPContext *c)
|
||
289 | { |
||
290 | c6c98d08 | Stefano Sabatini | int mm_flags = av_get_cpu_flags();
|
291 | 14bc1f24 | Ronald S. Bultje | |
292 | 1d16a1cf | Ronald S. Bultje | if (mm_flags & AV_CPU_FLAG_MMX2) {
|
293 | c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2; |
||
294 | } |
||
295 | #if HAVE_YASM
|
||
296 | 7160bb71 | Stefano Sabatini | if (mm_flags & AV_CPU_FLAG_MMX) {
|
297 | 14bc1f24 | Ronald S. Bultje | c->h264_idct_dc_add= |
298 | c->h264_idct_add= ff_h264_idct_add_mmx; |
||
299 | c->h264_idct8_dc_add= |
||
300 | c->h264_idct8_add= ff_h264_idct8_add_mmx; |
||
301 | |||
302 | c->h264_idct_add16 = ff_h264_idct_add16_mmx; |
||
303 | c->h264_idct8_add4 = ff_h264_idct8_add4_mmx; |
||
304 | c->h264_idct_add8 = ff_h264_idct_add8_mmx; |
||
305 | c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx; |
||
306 | 19fb234e | Jason Garrett-Glaser | c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx; |
307 | 14bc1f24 | Ronald S. Bultje | |
308 | 7160bb71 | Stefano Sabatini | if (mm_flags & AV_CPU_FLAG_MMX2) {
|
309 | 14bc1f24 | Ronald S. Bultje | c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2; |
310 | c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2; |
||
311 | c->h264_idct_add16 = ff_h264_idct_add16_mmx2; |
||
312 | c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2; |
||
313 | c->h264_idct_add8 = ff_h264_idct_add8_mmx2; |
||
314 | c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2; |
||
315 | |||
316 | 2c166c3a | Ronald S. Bultje | c->h264_v_loop_filter_chroma= ff_x264_deblock_v_chroma_mmxext; |
317 | c->h264_h_loop_filter_chroma= ff_x264_deblock_h_chroma_mmxext; |
||
318 | c->h264_v_loop_filter_chroma_intra= ff_x264_deblock_v_chroma_intra_mmxext; |
||
319 | c->h264_h_loop_filter_chroma_intra= ff_x264_deblock_h_chroma_intra_mmxext; |
||
320 | a33a2562 | Ronald S. Bultje | #if ARCH_X86_32
|
321 | 2c166c3a | Ronald S. Bultje | c->h264_v_loop_filter_luma= ff_x264_deblock_v_luma_mmxext; |
322 | c->h264_h_loop_filter_luma= ff_x264_deblock_h_luma_mmxext; |
||
323 | a33a2562 | Ronald S. Bultje | c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext; |
324 | c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext; |
||
325 | #endif
|
||
326 | 14bc1f24 | Ronald S. Bultje | c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
|
327 | c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
|
||
328 | c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;
|
||
329 | c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
|
||
330 | c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
|
||
331 | c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
|
||
332 | c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
|
||
333 | c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;
|
||
334 | |||
335 | c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;
|
||
336 | c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;
|
||
337 | c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;
|
||
338 | c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
|
||
339 | c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
|
||
340 | c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
|
||
341 | c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
|
||
342 | c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
|
||
343 | |||
344 | 7160bb71 | Stefano Sabatini | if (mm_flags&AV_CPU_FLAG_SSE2) {
|
345 | 1d16a1cf | Ronald S. Bultje | c->h264_idct8_add = ff_h264_idct8_add_sse2; |
346 | c->h264_idct8_add4= ff_h264_idct8_add4_sse2; |
||
347 | 19fb234e | Jason Garrett-Glaser | c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2; |
348 | 1d16a1cf | Ronald S. Bultje | |
349 | a33a2562 | Ronald S. Bultje | c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_sse2;
|
350 | c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_sse2;
|
||
351 | c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_sse2;
|
||
352 | c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_sse2;
|
||
353 | c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_sse2;
|
||
354 | |||
355 | 14bc1f24 | Ronald S. Bultje | c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_sse2;
|
356 | a33a2562 | Ronald S. Bultje | c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_sse2;
|
357 | c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_sse2;
|
||
358 | 14bc1f24 | Ronald S. Bultje | c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_sse2;
|
359 | a33a2562 | Ronald S. Bultje | c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_sse2;
|
360 | |||
361 | c0bc8b9a | Måns Rullgård | #if HAVE_ALIGNED_STACK
|
362 | 14bc1f24 | Ronald S. Bultje | c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2; |
363 | c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2; |
||
364 | c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2; |
||
365 | c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2; |
||
366 | c0bc8b9a | Måns Rullgård | #endif
|
367 | f41237c9 | Måns Rullgård | |
368 | 14bc1f24 | Ronald S. Bultje | c->h264_idct_add16 = ff_h264_idct_add16_sse2; |
369 | c->h264_idct_add8 = ff_h264_idct_add8_sse2; |
||
370 | c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2; |
||
371 | } |
||
372 | 7160bb71 | Stefano Sabatini | if (mm_flags&AV_CPU_FLAG_SSSE3) {
|
373 | 14bc1f24 | Ronald S. Bultje | c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3;
|
374 | a33a2562 | Ronald S. Bultje | c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_ssse3;
|
375 | c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_ssse3;
|
||
376 | 14bc1f24 | Ronald S. Bultje | c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3;
|
377 | a33a2562 | Ronald S. Bultje | c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_ssse3;
|
378 | 14bc1f24 | Ronald S. Bultje | } |
379 | } |
||
380 | } |
||
381 | 1d16a1cf | Ronald S. Bultje | #endif
|
382 | 14bc1f24 | Ronald S. Bultje | } |