ffmpeg / libavcodec / i386 / motion_est_mmx.c @ 5509bffa
History | View | Annotate | Download (14.2 KB)
1 | 694ec061 | Fabrice Bellard | /*
|
---|---|---|---|
2 | * MMX optimized motion estimation
|
||
3 | ff4ec49e | Fabrice Bellard | * Copyright (c) 2001 Fabrice Bellard.
|
4 | 8f2ab833 | Michael Niedermayer | * Copyright (c) 2002-2004 Michael Niedermayer
|
5 | 694ec061 | Fabrice Bellard | *
|
6 | ff4ec49e | Fabrice Bellard | * This library is free software; you can redistribute it and/or
|
7 | * modify it under the terms of the GNU Lesser General Public
|
||
8 | * License as published by the Free Software Foundation; either
|
||
9 | * version 2 of the License, or (at your option) any later version.
|
||
10 | 694ec061 | Fabrice Bellard | *
|
11 | ff4ec49e | Fabrice Bellard | * This library is distributed in the hope that it will be useful,
|
12 | 694ec061 | Fabrice Bellard | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 | ff4ec49e | Fabrice Bellard | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 | * Lesser General Public License for more details.
|
||
15 | 694ec061 | Fabrice Bellard | *
|
16 | ff4ec49e | Fabrice Bellard | * You should have received a copy of the GNU Lesser General Public
|
17 | * License along with this library; if not, write to the Free Software
|
||
18 | 5509bffa | Diego Biurrun | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
19 | 694ec061 | Fabrice Bellard | *
|
20 | ba6802de | Michael Niedermayer | * mostly by Michael Niedermayer <michaelni@gmx.at>
|
21 | 694ec061 | Fabrice Bellard | */
|
22 | #include "../dsputil.h" |
||
23 | 053dea12 | Aurelien Jacobs | #include "mmx.h" |
24 | 694ec061 | Fabrice Bellard | |
25 | 0c1a9eda | Zdenek Kabelac | static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={ |
26 | 1b245cc2 | Panagiotis Issaris | 0x0000000000000000ULL,
|
27 | 0x0001000100010001ULL,
|
||
28 | 0x0002000200020002ULL,
|
||
29 | ba6802de | Michael Niedermayer | }; |
30 | 694ec061 | Fabrice Bellard | |
31 | 5c0513bd | Dmitry Baryshkov | static attribute_used __attribute__ ((aligned(8))) uint64_t bone= 0x0101010101010101LL; |
32 | ba8d0be9 | Michael Niedermayer | |
33 | bb198e19 | Michael Niedermayer | static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
34 | 694ec061 | Fabrice Bellard | { |
35 | 053dea12 | Aurelien Jacobs | long len= -(stride*h);
|
36 | ba6802de | Michael Niedermayer | asm volatile( |
37 | bb270c08 | Diego Biurrun | ".balign 16 \n\t"
|
38 | "1: \n\t"
|
||
39 | "movq (%1, %%"REG_a"), %%mm0 \n\t" |
||
40 | "movq (%2, %%"REG_a"), %%mm2 \n\t" |
||
41 | "movq (%2, %%"REG_a"), %%mm4 \n\t" |
||
42 | "add %3, %%"REG_a" \n\t" |
||
43 | "psubusb %%mm0, %%mm2 \n\t"
|
||
44 | "psubusb %%mm4, %%mm0 \n\t"
|
||
45 | "movq (%1, %%"REG_a"), %%mm1 \n\t" |
||
46 | "movq (%2, %%"REG_a"), %%mm3 \n\t" |
||
47 | "movq (%2, %%"REG_a"), %%mm5 \n\t" |
||
48 | "psubusb %%mm1, %%mm3 \n\t"
|
||
49 | "psubusb %%mm5, %%mm1 \n\t"
|
||
50 | "por %%mm2, %%mm0 \n\t"
|
||
51 | "por %%mm1, %%mm3 \n\t"
|
||
52 | "movq %%mm0, %%mm1 \n\t"
|
||
53 | "movq %%mm3, %%mm2 \n\t"
|
||
54 | "punpcklbw %%mm7, %%mm0 \n\t"
|
||
55 | "punpckhbw %%mm7, %%mm1 \n\t"
|
||
56 | "punpcklbw %%mm7, %%mm3 \n\t"
|
||
57 | "punpckhbw %%mm7, %%mm2 \n\t"
|
||
58 | "paddw %%mm1, %%mm0 \n\t"
|
||
59 | "paddw %%mm3, %%mm2 \n\t"
|
||
60 | "paddw %%mm2, %%mm0 \n\t"
|
||
61 | "paddw %%mm0, %%mm6 \n\t"
|
||
62 | "add %3, %%"REG_a" \n\t" |
||
63 | " js 1b \n\t"
|
||
64 | ba6802de | Michael Niedermayer | : "+a" (len)
|
65 | 053dea12 | Aurelien Jacobs | : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride) |
66 | ba6802de | Michael Niedermayer | ); |
67 | 694ec061 | Fabrice Bellard | } |
68 | |||
69 | bb198e19 | Michael Niedermayer | static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
70 | 694ec061 | Fabrice Bellard | { |
71 | 053dea12 | Aurelien Jacobs | long len= -(stride*h);
|
72 | ba6802de | Michael Niedermayer | asm volatile( |
73 | bb270c08 | Diego Biurrun | ".balign 16 \n\t"
|
74 | "1: \n\t"
|
||
75 | "movq (%1, %%"REG_a"), %%mm0 \n\t" |
||
76 | "movq (%2, %%"REG_a"), %%mm2 \n\t" |
||
77 | "psadbw %%mm2, %%mm0 \n\t"
|
||
78 | "add %3, %%"REG_a" \n\t" |
||
79 | "movq (%1, %%"REG_a"), %%mm1 \n\t" |
||
80 | "movq (%2, %%"REG_a"), %%mm3 \n\t" |
||
81 | "psadbw %%mm1, %%mm3 \n\t"
|
||
82 | "paddw %%mm3, %%mm0 \n\t"
|
||
83 | "paddw %%mm0, %%mm6 \n\t"
|
||
84 | "add %3, %%"REG_a" \n\t" |
||
85 | " js 1b \n\t"
|
||
86 | ba6802de | Michael Niedermayer | : "+a" (len)
|
87 | 053dea12 | Aurelien Jacobs | : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride) |
88 | ba6802de | Michael Niedermayer | ); |
89 | 694ec061 | Fabrice Bellard | } |
90 | |||
91 | 0c1a9eda | Zdenek Kabelac | static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) |
92 | 694ec061 | Fabrice Bellard | { |
93 | 053dea12 | Aurelien Jacobs | long len= -(stride*h);
|
94 | ba6802de | Michael Niedermayer | asm volatile( |
95 | bb270c08 | Diego Biurrun | ".balign 16 \n\t"
|
96 | "1: \n\t"
|
||
97 | "movq (%1, %%"REG_a"), %%mm0 \n\t" |
||
98 | "movq (%2, %%"REG_a"), %%mm2 \n\t" |
||
99 | "pavgb %%mm2, %%mm0 \n\t"
|
||
100 | "movq (%3, %%"REG_a"), %%mm2 \n\t" |
||
101 | "psadbw %%mm2, %%mm0 \n\t"
|
||
102 | "add %4, %%"REG_a" \n\t" |
||
103 | "movq (%1, %%"REG_a"), %%mm1 \n\t" |
||
104 | "movq (%2, %%"REG_a"), %%mm3 \n\t" |
||
105 | "pavgb %%mm1, %%mm3 \n\t"
|
||
106 | "movq (%3, %%"REG_a"), %%mm1 \n\t" |
||
107 | "psadbw %%mm1, %%mm3 \n\t"
|
||
108 | "paddw %%mm3, %%mm0 \n\t"
|
||
109 | "paddw %%mm0, %%mm6 \n\t"
|
||
110 | "add %4, %%"REG_a" \n\t" |
||
111 | " js 1b \n\t"
|
||
112 | ba6802de | Michael Niedermayer | : "+a" (len)
|
113 | 053dea12 | Aurelien Jacobs | : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride) |
114 | ba6802de | Michael Niedermayer | ); |
115 | 694ec061 | Fabrice Bellard | } |
116 | |||
117 | 0c1a9eda | Zdenek Kabelac | static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
118 | ba6802de | Michael Niedermayer | { //FIXME reuse src
|
119 | 053dea12 | Aurelien Jacobs | long len= -(stride*h);
|
120 | ba6802de | Michael Niedermayer | asm volatile( |
121 | bb270c08 | Diego Biurrun | ".balign 16 \n\t"
|
122 | "movq "MANGLE(bone)", %%mm5 \n\t" |
||
123 | "1: \n\t"
|
||
124 | "movq (%1, %%"REG_a"), %%mm0 \n\t" |
||
125 | "movq (%2, %%"REG_a"), %%mm2 \n\t" |
||
126 | "movq 1(%1, %%"REG_a"), %%mm1 \n\t" |
||
127 | "movq 1(%2, %%"REG_a"), %%mm3 \n\t" |
||
128 | "pavgb %%mm2, %%mm0 \n\t"
|
||
129 | "pavgb %%mm1, %%mm3 \n\t"
|
||
130 | "psubusb %%mm5, %%mm3 \n\t"
|
||
131 | "pavgb %%mm3, %%mm0 \n\t"
|
||
132 | "movq (%3, %%"REG_a"), %%mm2 \n\t" |
||
133 | "psadbw %%mm2, %%mm0 \n\t"
|
||
134 | "add %4, %%"REG_a" \n\t" |
||
135 | "movq (%1, %%"REG_a"), %%mm1 \n\t" |
||
136 | "movq (%2, %%"REG_a"), %%mm3 \n\t" |
||
137 | "movq 1(%1, %%"REG_a"), %%mm2 \n\t" |
||
138 | "movq 1(%2, %%"REG_a"), %%mm4 \n\t" |
||
139 | "pavgb %%mm3, %%mm1 \n\t"
|
||
140 | "pavgb %%mm4, %%mm2 \n\t"
|
||
141 | "psubusb %%mm5, %%mm2 \n\t"
|
||
142 | "pavgb %%mm1, %%mm2 \n\t"
|
||
143 | "movq (%3, %%"REG_a"), %%mm1 \n\t" |
||
144 | "psadbw %%mm1, %%mm2 \n\t"
|
||
145 | "paddw %%mm2, %%mm0 \n\t"
|
||
146 | "paddw %%mm0, %%mm6 \n\t"
|
||
147 | "add %4, %%"REG_a" \n\t" |
||
148 | " js 1b \n\t"
|
||
149 | ba6802de | Michael Niedermayer | : "+a" (len)
|
150 | 053dea12 | Aurelien Jacobs | : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride) |
151 | ba6802de | Michael Niedermayer | ); |
152 | 694ec061 | Fabrice Bellard | } |
153 | |||
154 | 0c1a9eda | Zdenek Kabelac | static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) |
155 | 694ec061 | Fabrice Bellard | { |
156 | 053dea12 | Aurelien Jacobs | long len= -(stride*h);
|
157 | ba6802de | Michael Niedermayer | asm volatile( |
158 | bb270c08 | Diego Biurrun | ".balign 16 \n\t"
|
159 | "1: \n\t"
|
||
160 | "movq (%1, %%"REG_a"), %%mm0 \n\t" |
||
161 | "movq (%2, %%"REG_a"), %%mm1 \n\t" |
||
162 | "movq (%1, %%"REG_a"), %%mm2 \n\t" |
||
163 | "movq (%2, %%"REG_a"), %%mm3 \n\t" |
||
164 | "punpcklbw %%mm7, %%mm0 \n\t"
|
||
165 | "punpcklbw %%mm7, %%mm1 \n\t"
|
||
166 | "punpckhbw %%mm7, %%mm2 \n\t"
|
||
167 | "punpckhbw %%mm7, %%mm3 \n\t"
|
||
168 | "paddw %%mm0, %%mm1 \n\t"
|
||
169 | "paddw %%mm2, %%mm3 \n\t"
|
||
170 | "movq (%3, %%"REG_a"), %%mm4 \n\t" |
||
171 | "movq (%3, %%"REG_a"), %%mm2 \n\t" |
||
172 | "paddw %%mm5, %%mm1 \n\t"
|
||
173 | "paddw %%mm5, %%mm3 \n\t"
|
||
174 | "psrlw $1, %%mm1 \n\t"
|
||
175 | "psrlw $1, %%mm3 \n\t"
|
||
176 | "packuswb %%mm3, %%mm1 \n\t"
|
||
177 | "psubusb %%mm1, %%mm4 \n\t"
|
||
178 | "psubusb %%mm2, %%mm1 \n\t"
|
||
179 | "por %%mm4, %%mm1 \n\t"
|
||
180 | "movq %%mm1, %%mm0 \n\t"
|
||
181 | "punpcklbw %%mm7, %%mm0 \n\t"
|
||
182 | "punpckhbw %%mm7, %%mm1 \n\t"
|
||
183 | "paddw %%mm1, %%mm0 \n\t"
|
||
184 | "paddw %%mm0, %%mm6 \n\t"
|
||
185 | "add %4, %%"REG_a" \n\t" |
||
186 | " js 1b \n\t"
|
||
187 | ba6802de | Michael Niedermayer | : "+a" (len)
|
188 | 053dea12 | Aurelien Jacobs | : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride) |
189 | ba6802de | Michael Niedermayer | ); |
190 | 694ec061 | Fabrice Bellard | } |
191 | |||
192 | 0c1a9eda | Zdenek Kabelac | static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
193 | 694ec061 | Fabrice Bellard | { |
194 | 053dea12 | Aurelien Jacobs | long len= -(stride*h);
|
195 | ba6802de | Michael Niedermayer | asm volatile( |
196 | bb270c08 | Diego Biurrun | ".balign 16 \n\t"
|
197 | "1: \n\t"
|
||
198 | "movq (%1, %%"REG_a"), %%mm0 \n\t" |
||
199 | "movq (%2, %%"REG_a"), %%mm1 \n\t" |
||
200 | "movq %%mm0, %%mm4 \n\t"
|
||
201 | "movq %%mm1, %%mm2 \n\t"
|
||
202 | "punpcklbw %%mm7, %%mm0 \n\t"
|
||
203 | "punpcklbw %%mm7, %%mm1 \n\t"
|
||
204 | "punpckhbw %%mm7, %%mm4 \n\t"
|
||
205 | "punpckhbw %%mm7, %%mm2 \n\t"
|
||
206 | "paddw %%mm1, %%mm0 \n\t"
|
||
207 | "paddw %%mm2, %%mm4 \n\t"
|
||
208 | "movq 1(%1, %%"REG_a"), %%mm2 \n\t" |
||
209 | "movq 1(%2, %%"REG_a"), %%mm3 \n\t" |
||
210 | "movq %%mm2, %%mm1 \n\t"
|
||
211 | "punpcklbw %%mm7, %%mm2 \n\t"
|
||
212 | "punpckhbw %%mm7, %%mm1 \n\t"
|
||
213 | "paddw %%mm0, %%mm2 \n\t"
|
||
214 | "paddw %%mm4, %%mm1 \n\t"
|
||
215 | "movq %%mm3, %%mm4 \n\t"
|
||
216 | "punpcklbw %%mm7, %%mm3 \n\t"
|
||
217 | "punpckhbw %%mm7, %%mm4 \n\t"
|
||
218 | "paddw %%mm3, %%mm2 \n\t"
|
||
219 | "paddw %%mm4, %%mm1 \n\t"
|
||
220 | "movq (%3, %%"REG_a"), %%mm3 \n\t" |
||
221 | "movq (%3, %%"REG_a"), %%mm4 \n\t" |
||
222 | "paddw %%mm5, %%mm2 \n\t"
|
||
223 | "paddw %%mm5, %%mm1 \n\t"
|
||
224 | "psrlw $2, %%mm2 \n\t"
|
||
225 | "psrlw $2, %%mm1 \n\t"
|
||
226 | "packuswb %%mm1, %%mm2 \n\t"
|
||
227 | "psubusb %%mm2, %%mm3 \n\t"
|
||
228 | "psubusb %%mm4, %%mm2 \n\t"
|
||
229 | "por %%mm3, %%mm2 \n\t"
|
||
230 | "movq %%mm2, %%mm0 \n\t"
|
||
231 | "punpcklbw %%mm7, %%mm0 \n\t"
|
||
232 | "punpckhbw %%mm7, %%mm2 \n\t"
|
||
233 | "paddw %%mm2, %%mm0 \n\t"
|
||
234 | "paddw %%mm0, %%mm6 \n\t"
|
||
235 | "add %4, %%"REG_a" \n\t" |
||
236 | " js 1b \n\t"
|
||
237 | ba6802de | Michael Niedermayer | : "+a" (len)
|
238 | 053dea12 | Aurelien Jacobs | : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride) |
239 | ba6802de | Michael Niedermayer | ); |
240 | 694ec061 | Fabrice Bellard | } |
241 | |||
242 | 5c91a675 | Zdenek Kabelac | static inline int sum_mmx(void) |
243 | 694ec061 | Fabrice Bellard | { |
244 | ba6802de | Michael Niedermayer | int ret;
|
245 | asm volatile( |
||
246 | bb270c08 | Diego Biurrun | "movq %%mm6, %%mm0 \n\t"
|
247 | "psrlq $32, %%mm6 \n\t"
|
||
248 | "paddw %%mm0, %%mm6 \n\t"
|
||
249 | "movq %%mm6, %%mm0 \n\t"
|
||
250 | "psrlq $16, %%mm6 \n\t"
|
||
251 | "paddw %%mm0, %%mm6 \n\t"
|
||
252 | "movd %%mm6, %0 \n\t"
|
||
253 | ba6802de | Michael Niedermayer | : "=r" (ret)
|
254 | ); |
||
255 | return ret&0xFFFF; |
||
256 | 694ec061 | Fabrice Bellard | } |
257 | |||
258 | 5c91a675 | Zdenek Kabelac | static inline int sum_mmx2(void) |
259 | 694ec061 | Fabrice Bellard | { |
260 | ba6802de | Michael Niedermayer | int ret;
|
261 | asm volatile( |
||
262 | bb270c08 | Diego Biurrun | "movd %%mm6, %0 \n\t"
|
263 | ba6802de | Michael Niedermayer | : "=r" (ret)
|
264 | ); |
||
265 | return ret;
|
||
266 | 694ec061 | Fabrice Bellard | } |
267 | |||
268 | 843342ef | Michael Niedermayer | |
269 | ba6802de | Michael Niedermayer | #define PIX_SAD(suf)\
|
270 | bb198e19 | Michael Niedermayer | static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
271 | ba6802de | Michael Niedermayer | {\ |
272 | bb198e19 | Michael Niedermayer | assert(h==8);\
|
273 | bb270c08 | Diego Biurrun | asm volatile("pxor %%mm7, %%mm7 \n\t"\ |
274 | "pxor %%mm6, %%mm6 \n\t":);\
|
||
275 | ba6802de | Michael Niedermayer | \ |
276 | bb198e19 | Michael Niedermayer | sad8_1_ ## suf(blk1, blk2, stride, 8);\ |
277 | ba6802de | Michael Niedermayer | \ |
278 | return sum_ ## suf();\ |
||
279 | }\ |
||
280 | bb198e19 | Michael Niedermayer | static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
281 | ba6802de | Michael Niedermayer | {\ |
282 | bb198e19 | Michael Niedermayer | assert(h==8);\
|
283 | bb270c08 | Diego Biurrun | asm volatile("pxor %%mm7, %%mm7 \n\t"\ |
284 | "pxor %%mm6, %%mm6 \n\t"\
|
||
285 | "movq %0, %%mm5 \n\t"\
|
||
286 | ba6802de | Michael Niedermayer | :: "m"(round_tab[1]) \ |
287 | );\ |
||
288 | \ |
||
289 | bb198e19 | Michael Niedermayer | sad8_2_ ## suf(blk1, blk1+1, blk2, stride, 8);\ |
290 | ba6802de | Michael Niedermayer | \ |
291 | return sum_ ## suf();\ |
||
292 | }\ |
||
293 | \ |
||
294 | bb198e19 | Michael Niedermayer | static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
295 | ba6802de | Michael Niedermayer | {\ |
296 | bb198e19 | Michael Niedermayer | assert(h==8);\
|
297 | bb270c08 | Diego Biurrun | asm volatile("pxor %%mm7, %%mm7 \n\t"\ |
298 | "pxor %%mm6, %%mm6 \n\t"\
|
||
299 | "movq %0, %%mm5 \n\t"\
|
||
300 | ba6802de | Michael Niedermayer | :: "m"(round_tab[1]) \ |
301 | );\ |
||
302 | \ |
||
303 | bb198e19 | Michael Niedermayer | sad8_2_ ## suf(blk1, blk1+stride, blk2, stride, 8);\ |
304 | ba6802de | Michael Niedermayer | \ |
305 | return sum_ ## suf();\ |
||
306 | }\ |
||
307 | \ |
||
308 | bb198e19 | Michael Niedermayer | static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
309 | ba6802de | Michael Niedermayer | {\ |
310 | bb198e19 | Michael Niedermayer | assert(h==8);\
|
311 | bb270c08 | Diego Biurrun | asm volatile("pxor %%mm7, %%mm7 \n\t"\ |
312 | "pxor %%mm6, %%mm6 \n\t"\
|
||
313 | "movq %0, %%mm5 \n\t"\
|
||
314 | ba6802de | Michael Niedermayer | :: "m"(round_tab[2]) \ |
315 | );\ |
||
316 | \ |
||
317 | bb198e19 | Michael Niedermayer | sad8_4_ ## suf(blk1, blk2, stride, 8);\ |
318 | ba6802de | Michael Niedermayer | \ |
319 | return sum_ ## suf();\ |
||
320 | }\ |
||
321 | \ |
||
322 | bb198e19 | Michael Niedermayer | static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
323 | 1457ab52 | Michael Niedermayer | {\ |
324 | bb270c08 | Diego Biurrun | asm volatile("pxor %%mm7, %%mm7 \n\t"\ |
325 | "pxor %%mm6, %%mm6 \n\t":);\
|
||
326 | 1457ab52 | Michael Niedermayer | \ |
327 | bb198e19 | Michael Niedermayer | sad8_1_ ## suf(blk1 , blk2 , stride, h);\ |
328 | sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\ |
||
329 | 1457ab52 | Michael Niedermayer | \ |
330 | return sum_ ## suf();\ |
||
331 | }\ |
||
332 | bb198e19 | Michael Niedermayer | static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
333 | ba6802de | Michael Niedermayer | {\ |
334 | bb270c08 | Diego Biurrun | asm volatile("pxor %%mm7, %%mm7 \n\t"\ |
335 | "pxor %%mm6, %%mm6 \n\t"\
|
||
336 | "movq %0, %%mm5 \n\t"\
|
||
337 | ba6802de | Michael Niedermayer | :: "m"(round_tab[1]) \ |
338 | );\ |
||
339 | \ |
||
340 | bb198e19 | Michael Niedermayer | sad8_2_ ## suf(blk1 , blk1+1, blk2 , stride, h);\ |
341 | sad8_2_ ## suf(blk1+8, blk1+9, blk2+8, stride, h);\ |
||
342 | ba6802de | Michael Niedermayer | \ |
343 | return sum_ ## suf();\ |
||
344 | }\ |
||
345 | bb198e19 | Michael Niedermayer | static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
346 | ba6802de | Michael Niedermayer | {\ |
347 | bb270c08 | Diego Biurrun | asm volatile("pxor %%mm7, %%mm7 \n\t"\ |
348 | "pxor %%mm6, %%mm6 \n\t"\
|
||
349 | "movq %0, %%mm5 \n\t"\
|
||
350 | ba6802de | Michael Niedermayer | :: "m"(round_tab[1]) \ |
351 | );\ |
||
352 | \ |
||
353 | bb198e19 | Michael Niedermayer | sad8_2_ ## suf(blk1 , blk1+stride, blk2 , stride, h);\ |
354 | sad8_2_ ## suf(blk1+8, blk1+stride+8,blk2+8, stride, h);\ |
||
355 | ba6802de | Michael Niedermayer | \ |
356 | return sum_ ## suf();\ |
||
357 | }\ |
||
358 | bb198e19 | Michael Niedermayer | static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
359 | ba6802de | Michael Niedermayer | {\ |
360 | bb270c08 | Diego Biurrun | asm volatile("pxor %%mm7, %%mm7 \n\t"\ |
361 | "pxor %%mm6, %%mm6 \n\t"\
|
||
362 | "movq %0, %%mm5 \n\t"\
|
||
363 | ba6802de | Michael Niedermayer | :: "m"(round_tab[2]) \ |
364 | );\ |
||
365 | \ |
||
366 | bb198e19 | Michael Niedermayer | sad8_4_ ## suf(blk1 , blk2 , stride, h);\ |
367 | sad8_4_ ## suf(blk1+8, blk2+8, stride, h);\ |
||
368 | ba6802de | Michael Niedermayer | \ |
369 | return sum_ ## suf();\ |
||
370 | }\ |
||
371 | 694ec061 | Fabrice Bellard | |
372 | ba6802de | Michael Niedermayer | PIX_SAD(mmx) |
373 | PIX_SAD(mmx2) |
||
374 | 5c91a675 | Zdenek Kabelac | |
375 | b0368839 | Michael Niedermayer | void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx)
|
376 | 5c91a675 | Zdenek Kabelac | { |
377 | if (mm_flags & MM_MMX) {
|
||
378 | bb198e19 | Michael Niedermayer | c->pix_abs[0][0] = sad16_mmx; |
379 | c->pix_abs[0][1] = sad16_x2_mmx; |
||
380 | c->pix_abs[0][2] = sad16_y2_mmx; |
||
381 | c->pix_abs[0][3] = sad16_xy2_mmx; |
||
382 | c->pix_abs[1][0] = sad8_mmx; |
||
383 | c->pix_abs[1][1] = sad8_x2_mmx; |
||
384 | c->pix_abs[1][2] = sad8_y2_mmx; |
||
385 | c->pix_abs[1][3] = sad8_xy2_mmx; |
||
386 | 5c91a675 | Zdenek Kabelac | |
387 | bb270c08 | Diego Biurrun | c->sad[0]= sad16_mmx;
|
388 | bb198e19 | Michael Niedermayer | c->sad[1]= sad8_mmx;
|
389 | 5c91a675 | Zdenek Kabelac | } |
390 | if (mm_flags & MM_MMXEXT) {
|
||
391 | bb270c08 | Diego Biurrun | c->pix_abs[0][0] = sad16_mmx2; |
392 | c->pix_abs[1][0] = sad8_mmx2; |
||
393 | 5c91a675 | Zdenek Kabelac | |
394 | bb270c08 | Diego Biurrun | c->sad[0]= sad16_mmx2;
|
395 | c->sad[1]= sad8_mmx2;
|
||
396 | 115329f1 | Diego Biurrun | |
397 | b0368839 | Michael Niedermayer | if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
398 | bb198e19 | Michael Niedermayer | c->pix_abs[0][1] = sad16_x2_mmx2; |
399 | c->pix_abs[0][2] = sad16_y2_mmx2; |
||
400 | c->pix_abs[0][3] = sad16_xy2_mmx2; |
||
401 | c->pix_abs[1][1] = sad8_x2_mmx2; |
||
402 | c->pix_abs[1][2] = sad8_y2_mmx2; |
||
403 | c->pix_abs[1][3] = sad8_xy2_mmx2; |
||
404 | b0368839 | Michael Niedermayer | } |
405 | 5c91a675 | Zdenek Kabelac | } |
406 | } |