Statistics
| Branch: | Revision:

ffmpeg / libavcodec / x86 / idct_sse2_xvid.c @ fbb6b49d

History | View | Annotate | Download (15.3 KB)

1 f73a6393 Alexander Strange
/*
2
 * XVID MPEG-4 VIDEO CODEC
3
 * - SSE2 inverse discrete cosine transform -
4
 *
5
 * Copyright(C) 2003 Pascal Massimino <skal@planet-d.net>
6
 *
7
 * Conversion to gcc syntax with modifications
8
 * by Alexander Strange <astrange@ithinksw.com>
9
 *
10
 * Originally from dct/x86_asm/fdct_sse2_skal.asm in Xvid.
11
 *
12
 * This file is part of FFmpeg.
13
 *
14
 * Vertical pass is an implementation of the scheme:
15
 *  Loeffler C., Ligtenberg A., and Moschytz C.S.:
16
 *  Practical Fast 1D DCT Algorithm with Eleven Multiplications,
17
 *  Proc. ICASSP 1989, 988-991.
18
 *
19
 * Horizontal pass is a double 4x4 vector/matrix multiplication,
20
 * (see also Intel's Application Note 922:
21
 *  http://developer.intel.com/vtune/cbts/strmsimd/922down.htm
22
 *  Copyright (C) 1999 Intel Corporation)
23
 *
24
 * More details at http://skal.planet-d.net/coding/dct.html
25
 *
26
 * FFmpeg is free software; you can redistribute it and/or
27
 * modify it under the terms of the GNU Lesser General Public
28
 * License as published by the Free Software Foundation; either
29
 * version 2.1 of the License, or (at your option) any later version.
30
 *
31
 * FFmpeg is distributed in the hope that it will be useful,
32
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
33
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
34
 * Lesser General Public License for more details.
35
 *
36
 * You should have received a copy of the GNU Lesser General Public License
37
 * along with FFmpeg; if not, write to the Free Software Foundation,
38
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
39
 */
40
41 245976da Diego Biurrun
#include "libavcodec/dsputil.h"
42 616735eb Ramiro Polla
#include "libavutil/x86_cpu.h"
43 c4ff7c53 Diego Biurrun
#include "idct_xvid.h"
44 4e36a5b4 Måns Rullgård
#include "dsputil_mmx.h"
45 f73a6393 Alexander Strange
46
/*!
47 ba87f080 Diego Biurrun
 * @file
48 f73a6393 Alexander Strange
 * @brief SSE2 idct compatible with xvidmmx
49
 */
50
51
#define X8(x)     x,x,x,x,x,x,x,x
52
53
#define ROW_SHIFT 11
54
#define COL_SHIFT 6
55
56 c6727809 Måns Rullgård
DECLARE_ASM_CONST(16, int16_t, tan1)[] = {X8(13036)}; // tan( pi/16)
57
DECLARE_ASM_CONST(16, int16_t, tan2)[] = {X8(27146)}; // tan(2pi/16) = sqrt(2)-1
58
DECLARE_ASM_CONST(16, int16_t, tan3)[] = {X8(43790)}; // tan(3pi/16)-1
59
DECLARE_ASM_CONST(16, int16_t, sqrt2)[]= {X8(23170)}; // 0.5/sqrt(2)
60
DECLARE_ASM_CONST(8,  uint8_t, m127)[] = {X8(127)};
61 f73a6393 Alexander Strange
62 c6727809 Måns Rullgård
DECLARE_ASM_CONST(16, int16_t, iTab1)[] = {
63 f73a6393 Alexander Strange
 0x4000, 0x539f, 0xc000, 0xac61, 0x4000, 0xdd5d, 0x4000, 0xdd5d,
64
 0x4000, 0x22a3, 0x4000, 0x22a3, 0xc000, 0x539f, 0x4000, 0xac61,
65
 0x3249, 0x11a8, 0x4b42, 0xee58, 0x11a8, 0x4b42, 0x11a8, 0xcdb7,
66
 0x58c5, 0x4b42, 0xa73b, 0xcdb7, 0x3249, 0xa73b, 0x4b42, 0xa73b
67
};
68
69 c6727809 Måns Rullgård
DECLARE_ASM_CONST(16, int16_t, iTab2)[] = {
70 f73a6393 Alexander Strange
 0x58c5, 0x73fc, 0xa73b, 0x8c04, 0x58c5, 0xcff5, 0x58c5, 0xcff5,
71
 0x58c5, 0x300b, 0x58c5, 0x300b, 0xa73b, 0x73fc, 0x58c5, 0x8c04,
72
 0x45bf, 0x187e, 0x6862, 0xe782, 0x187e, 0x6862, 0x187e, 0xba41,
73
 0x7b21, 0x6862, 0x84df, 0xba41, 0x45bf, 0x84df, 0x6862, 0x84df
74
};
75
76 c6727809 Måns Rullgård
DECLARE_ASM_CONST(16, int16_t, iTab3)[] = {
77 f73a6393 Alexander Strange
 0x539f, 0x6d41, 0xac61, 0x92bf, 0x539f, 0xd2bf, 0x539f, 0xd2bf,
78
 0x539f, 0x2d41, 0x539f, 0x2d41, 0xac61, 0x6d41, 0x539f, 0x92bf,
79
 0x41b3, 0x1712, 0x6254, 0xe8ee, 0x1712, 0x6254, 0x1712, 0xbe4d,
80
 0x73fc, 0x6254, 0x8c04, 0xbe4d, 0x41b3, 0x8c04, 0x6254, 0x8c04
81
};
82
83 c6727809 Måns Rullgård
DECLARE_ASM_CONST(16, int16_t, iTab4)[] = {
84 f73a6393 Alexander Strange
 0x4b42, 0x6254, 0xb4be, 0x9dac, 0x4b42, 0xd746, 0x4b42, 0xd746,
85
 0x4b42, 0x28ba, 0x4b42, 0x28ba, 0xb4be, 0x6254, 0x4b42, 0x9dac,
86
 0x3b21, 0x14c3, 0x587e, 0xeb3d, 0x14c3, 0x587e, 0x14c3, 0xc4df,
87
 0x6862, 0x587e, 0x979e, 0xc4df, 0x3b21, 0x979e, 0x587e, 0x979e
88
};
89
90 c6727809 Måns Rullgård
DECLARE_ASM_CONST(16, int32_t, walkenIdctRounders)[] = {
91 f73a6393 Alexander Strange
 65536, 65536, 65536, 65536,
92
  3597,  3597,  3597,  3597,
93
  2260,  2260,  2260,  2260,
94
  1203,  1203,  1203,  1203,
95
   120,   120,   120,   120,
96
   512,   512,   512,   512
97
};
98
99
// Temporary storage before the column pass
100
#define ROW1 "%%xmm6"
101
#define ROW3 "%%xmm4"
102
#define ROW5 "%%xmm5"
103
#define ROW7 "%%xmm7"
104
105
#define CLEAR_ODD(r) "pxor  "r","r" \n\t"
106
#define PUT_ODD(dst) "pshufhw  $0x1B, %%xmm2, "dst"   \n\t"
107
108 b250f9c6 Aurelien Jacobs
#if ARCH_X86_64
109 f73a6393 Alexander Strange
110
# define ROW0 "%%xmm8"
111
# define REG0 ROW0
112
# define ROW2 "%%xmm9"
113
# define REG2 ROW2
114
# define ROW4 "%%xmm10"
115
# define REG4 ROW4
116
# define ROW6 "%%xmm11"
117
# define REG6 ROW6
118
# define CLEAR_EVEN(r) CLEAR_ODD(r)
119
# define PUT_EVEN(dst) PUT_ODD(dst)
120
# define XMMS "%%xmm12"
121
# define MOV_32_ONLY "#"
122
# define SREG2 REG2
123
# define TAN3 "%%xmm13"
124
# define TAN1 "%%xmm14"
125
126
#else
127
128
# define ROW0 "(%0)"
129
# define REG0 "%%xmm4"
130
# define ROW2 "2*16(%0)"
131
# define REG2 "%%xmm4"
132
# define ROW4 "4*16(%0)"
133
# define REG4 "%%xmm6"
134
# define ROW6 "6*16(%0)"
135
# define REG6 "%%xmm6"
136
# define CLEAR_EVEN(r)
137
# define PUT_EVEN(dst) \
138
    "pshufhw  $0x1B, %%xmm2, %%xmm2   \n\t" \
139
    "movdqa          %%xmm2, "dst"    \n\t"
140
# define XMMS "%%xmm2"
141
# define MOV_32_ONLY "movdqa "
142
# define SREG2 "%%xmm7"
143
# define TAN3 "%%xmm0"
144
# define TAN1 "%%xmm2"
145
146
#endif
147
148
#define ROUND(x) "paddd   "MANGLE(x)
149
150
#define JZ(reg, to)                         \
151
    "testl     "reg","reg"            \n\t" \
152
    "jz        "to"                   \n\t"
153
154
#define JNZ(reg, to)                        \
155
    "testl     "reg","reg"            \n\t" \
156
    "jnz       "to"                   \n\t"
157
158
#define TEST_ONE_ROW(src, reg, clear)       \
159
    clear                                   \
160
    "movq     "src", %%mm1            \n\t" \
161
    "por    8+"src", %%mm1            \n\t" \
162
    "paddusb  %%mm0, %%mm1            \n\t" \
163
    "pmovmskb %%mm1, "reg"            \n\t"
164
165
#define TEST_TWO_ROWS(row1, row2, reg1, reg2, clear1, clear2) \
166
    clear1                                  \
167
    clear2                                  \
168
    "movq     "row1", %%mm1           \n\t" \
169
    "por    8+"row1", %%mm1           \n\t" \
170
    "movq     "row2", %%mm2           \n\t" \
171
    "por    8+"row2", %%mm2           \n\t" \
172
    "paddusb   %%mm0, %%mm1           \n\t" \
173
    "paddusb   %%mm0, %%mm2           \n\t" \
174
    "pmovmskb  %%mm1, "reg1"          \n\t" \
175
    "pmovmskb  %%mm2, "reg2"          \n\t"
176
177
///IDCT pass on rows.
178
#define iMTX_MULT(src, table, rounder, put) \
179
    "movdqa        "src", %%xmm3      \n\t" \
180
    "movdqa       %%xmm3, %%xmm0      \n\t" \
181
    "pshufd   $0x11, %%xmm3, %%xmm1   \n\t" /* 4602 */ \
182
    "punpcklqdq   %%xmm0, %%xmm0      \n\t" /* 0246 */ \
183
    "pmaddwd     "table", %%xmm0      \n\t" \
184
    "pmaddwd  16+"table", %%xmm1      \n\t" \
185
    "pshufd   $0xBB, %%xmm3, %%xmm2   \n\t" /* 5713 */ \
186
    "punpckhqdq   %%xmm3, %%xmm3      \n\t" /* 1357 */ \
187
    "pmaddwd  32+"table", %%xmm2      \n\t" \
188
    "pmaddwd  48+"table", %%xmm3      \n\t" \
189
    "paddd        %%xmm1, %%xmm0      \n\t" \
190
    "paddd        %%xmm3, %%xmm2      \n\t" \
191
    rounder",     %%xmm0              \n\t" \
192
    "movdqa       %%xmm2, %%xmm3      \n\t" \
193
    "paddd        %%xmm0, %%xmm2      \n\t" \
194
    "psubd        %%xmm3, %%xmm0      \n\t" \
195
    "psrad           $11, %%xmm2      \n\t" \
196
    "psrad           $11, %%xmm0      \n\t" \
197
    "packssdw     %%xmm0, %%xmm2      \n\t" \
198
    put                                     \
199
    "1:                               \n\t"
200
201
#define iLLM_HEAD                           \
202
    "movdqa   "MANGLE(tan3)", "TAN3"  \n\t" \
203
    "movdqa   "MANGLE(tan1)", "TAN1"  \n\t" \
204
205
///IDCT pass on columns.
206
#define iLLM_PASS(dct)                      \
207
    "movdqa   "TAN3", %%xmm1          \n\t" \
208
    "movdqa   "TAN1", %%xmm3          \n\t" \
209
    "pmulhw   %%xmm4, "TAN3"          \n\t" \
210
    "pmulhw   %%xmm5, %%xmm1          \n\t" \
211
    "paddsw   %%xmm4, "TAN3"          \n\t" \
212
    "paddsw   %%xmm5, %%xmm1          \n\t" \
213
    "psubsw   %%xmm5, "TAN3"          \n\t" \
214
    "paddsw   %%xmm4, %%xmm1          \n\t" \
215
    "pmulhw   %%xmm7, %%xmm3          \n\t" \
216
    "pmulhw   %%xmm6, "TAN1"          \n\t" \
217
    "paddsw   %%xmm6, %%xmm3          \n\t" \
218
    "psubsw   %%xmm7, "TAN1"          \n\t" \
219
    "movdqa   %%xmm3, %%xmm7          \n\t" \
220
    "movdqa   "TAN1", %%xmm6          \n\t" \
221
    "psubsw   %%xmm1, %%xmm3          \n\t" \
222
    "psubsw   "TAN3", "TAN1"          \n\t" \
223
    "paddsw   %%xmm7, %%xmm1          \n\t" \
224
    "paddsw   %%xmm6, "TAN3"          \n\t" \
225
    "movdqa   %%xmm3, %%xmm6          \n\t" \
226
    "psubsw   "TAN3", %%xmm3          \n\t" \
227
    "paddsw   %%xmm6, "TAN3"          \n\t" \
228
    "movdqa   "MANGLE(sqrt2)", %%xmm4 \n\t" \
229
    "pmulhw   %%xmm4, %%xmm3          \n\t" \
230
    "pmulhw   %%xmm4, "TAN3"          \n\t" \
231
    "paddsw   "TAN3", "TAN3"          \n\t" \
232
    "paddsw   %%xmm3, %%xmm3          \n\t" \
233
    "movdqa   "MANGLE(tan2)", %%xmm7  \n\t" \
234
    MOV_32_ONLY ROW2", "REG2"         \n\t" \
235
    MOV_32_ONLY ROW6", "REG6"         \n\t" \
236
    "movdqa   %%xmm7, %%xmm5          \n\t" \
237
    "pmulhw   "REG6", %%xmm7          \n\t" \
238
    "pmulhw   "REG2", %%xmm5          \n\t" \
239
    "paddsw   "REG2", %%xmm7          \n\t" \
240
    "psubsw   "REG6", %%xmm5          \n\t" \
241
    MOV_32_ONLY ROW0", "REG0"         \n\t" \
242
    MOV_32_ONLY ROW4", "REG4"         \n\t" \
243
    MOV_32_ONLY"  "TAN1", (%0)        \n\t" \
244
    "movdqa   "REG0", "XMMS"          \n\t" \
245
    "psubsw   "REG4", "REG0"          \n\t" \
246
    "paddsw   "XMMS", "REG4"          \n\t" \
247
    "movdqa   "REG4", "XMMS"          \n\t" \
248
    "psubsw   %%xmm7, "REG4"          \n\t" \
249
    "paddsw   "XMMS", %%xmm7          \n\t" \
250
    "movdqa   "REG0", "XMMS"          \n\t" \
251
    "psubsw   %%xmm5, "REG0"          \n\t" \
252
    "paddsw   "XMMS", %%xmm5          \n\t" \
253
    "movdqa   %%xmm5, "XMMS"          \n\t" \
254
    "psubsw   "TAN3", %%xmm5          \n\t" \
255
    "paddsw   "XMMS", "TAN3"          \n\t" \
256
    "movdqa   "REG0", "XMMS"          \n\t" \
257
    "psubsw   %%xmm3, "REG0"          \n\t" \
258
    "paddsw   "XMMS", %%xmm3          \n\t" \
259
    MOV_32_ONLY"  (%0), "TAN1"        \n\t" \
260
    "psraw        $6, %%xmm5          \n\t" \
261
    "psraw        $6, "REG0"          \n\t" \
262
    "psraw        $6, "TAN3"          \n\t" \
263
    "psraw        $6, %%xmm3          \n\t" \
264
    "movdqa   "TAN3", 1*16("dct")     \n\t" \
265
    "movdqa   %%xmm3, 2*16("dct")     \n\t" \
266
    "movdqa   "REG0", 5*16("dct")     \n\t" \
267
    "movdqa   %%xmm5, 6*16("dct")     \n\t" \
268
    "movdqa   %%xmm7, %%xmm0          \n\t" \
269
    "movdqa   "REG4", %%xmm4          \n\t" \
270
    "psubsw   %%xmm1, %%xmm7          \n\t" \
271
    "psubsw   "TAN1", "REG4"          \n\t" \
272
    "paddsw   %%xmm0, %%xmm1          \n\t" \
273
    "paddsw   %%xmm4, "TAN1"          \n\t" \
274
    "psraw        $6, %%xmm1          \n\t" \
275
    "psraw        $6, %%xmm7          \n\t" \
276
    "psraw        $6, "TAN1"          \n\t" \
277
    "psraw        $6, "REG4"          \n\t" \
278
    "movdqa   %%xmm1, ("dct")         \n\t" \
279
    "movdqa   "TAN1", 3*16("dct")     \n\t" \
280
    "movdqa   "REG4", 4*16("dct")     \n\t" \
281
    "movdqa   %%xmm7, 7*16("dct")     \n\t"
282
283
///IDCT pass on columns, assuming rows 4-7 are zero.
284
#define iLLM_PASS_SPARSE(dct)               \
285
    "pmulhw   %%xmm4, "TAN3"          \n\t" \
286
    "paddsw   %%xmm4, "TAN3"          \n\t" \
287
    "movdqa   %%xmm6, %%xmm3          \n\t" \
288
    "pmulhw   %%xmm6, "TAN1"          \n\t" \
289
    "movdqa   %%xmm4, %%xmm1          \n\t" \
290
    "psubsw   %%xmm1, %%xmm3          \n\t" \
291
    "paddsw   %%xmm6, %%xmm1          \n\t" \
292
    "movdqa   "TAN1", %%xmm6          \n\t" \
293
    "psubsw   "TAN3", "TAN1"          \n\t" \
294
    "paddsw   %%xmm6, "TAN3"          \n\t" \
295
    "movdqa   %%xmm3, %%xmm6          \n\t" \
296
    "psubsw   "TAN3", %%xmm3          \n\t" \
297
    "paddsw   %%xmm6, "TAN3"          \n\t" \
298
    "movdqa   "MANGLE(sqrt2)", %%xmm4 \n\t" \
299
    "pmulhw   %%xmm4, %%xmm3          \n\t" \
300
    "pmulhw   %%xmm4, "TAN3"          \n\t" \
301
    "paddsw   "TAN3", "TAN3"          \n\t" \
302
    "paddsw   %%xmm3, %%xmm3          \n\t" \
303
    "movdqa   "MANGLE(tan2)", %%xmm5  \n\t" \
304
    MOV_32_ONLY ROW2", "SREG2"        \n\t" \
305
    "pmulhw   "SREG2", %%xmm5         \n\t" \
306
    MOV_32_ONLY ROW0", "REG0"         \n\t" \
307
    "movdqa   "REG0", %%xmm6          \n\t" \
308
    "psubsw   "SREG2", %%xmm6         \n\t" \
309
    "paddsw   "REG0", "SREG2"         \n\t" \
310
    MOV_32_ONLY"  "TAN1", (%0)        \n\t" \
311
    "movdqa   "REG0", "XMMS"          \n\t" \
312
    "psubsw   %%xmm5, "REG0"          \n\t" \
313
    "paddsw   "XMMS", %%xmm5          \n\t" \
314
    "movdqa   %%xmm5, "XMMS"          \n\t" \
315
    "psubsw   "TAN3", %%xmm5          \n\t" \
316
    "paddsw   "XMMS", "TAN3"          \n\t" \
317
    "movdqa   "REG0", "XMMS"          \n\t" \
318
    "psubsw   %%xmm3, "REG0"          \n\t" \
319
    "paddsw   "XMMS", %%xmm3          \n\t" \
320
    MOV_32_ONLY"  (%0), "TAN1"        \n\t" \
321
    "psraw        $6, %%xmm5          \n\t" \
322
    "psraw        $6, "REG0"          \n\t" \
323
    "psraw        $6, "TAN3"          \n\t" \
324
    "psraw        $6, %%xmm3          \n\t" \
325
    "movdqa   "TAN3", 1*16("dct")     \n\t" \
326
    "movdqa   %%xmm3, 2*16("dct")     \n\t" \
327
    "movdqa   "REG0", 5*16("dct")     \n\t" \
328
    "movdqa   %%xmm5, 6*16("dct")     \n\t" \
329
    "movdqa   "SREG2", %%xmm0         \n\t" \
330
    "movdqa   %%xmm6, %%xmm4          \n\t" \
331
    "psubsw   %%xmm1, "SREG2"         \n\t" \
332
    "psubsw   "TAN1", %%xmm6          \n\t" \
333
    "paddsw   %%xmm0, %%xmm1          \n\t" \
334
    "paddsw   %%xmm4, "TAN1"          \n\t" \
335
    "psraw        $6, %%xmm1          \n\t" \
336
    "psraw        $6, "SREG2"         \n\t" \
337
    "psraw        $6, "TAN1"          \n\t" \
338
    "psraw        $6, %%xmm6          \n\t" \
339
    "movdqa   %%xmm1, ("dct")         \n\t" \
340
    "movdqa   "TAN1", 3*16("dct")     \n\t" \
341
    "movdqa   %%xmm6, 4*16("dct")     \n\t" \
342
    "movdqa   "SREG2", 7*16("dct")    \n\t"
343
344
inline void ff_idct_xvid_sse2(short *block)
345
{
346 be449fca Diego Pettenò
    __asm__ volatile(
347 f73a6393 Alexander Strange
    "movq     "MANGLE(m127)", %%mm0                              \n\t"
348
    iMTX_MULT("(%0)",     MANGLE(iTab1), ROUND(walkenIdctRounders),      PUT_EVEN(ROW0))
349
    iMTX_MULT("1*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+1*16), PUT_ODD(ROW1))
350
    iMTX_MULT("2*16(%0)", MANGLE(iTab3), ROUND(walkenIdctRounders+2*16), PUT_EVEN(ROW2))
351
352
    TEST_TWO_ROWS("3*16(%0)", "4*16(%0)", "%%eax", "%%ecx", CLEAR_ODD(ROW3), CLEAR_EVEN(ROW4))
353
    JZ("%%eax", "1f")
354
    iMTX_MULT("3*16(%0)", MANGLE(iTab4), ROUND(walkenIdctRounders+3*16), PUT_ODD(ROW3))
355
356
    TEST_TWO_ROWS("5*16(%0)", "6*16(%0)", "%%eax", "%%edx", CLEAR_ODD(ROW5), CLEAR_EVEN(ROW6))
357
    TEST_ONE_ROW("7*16(%0)", "%%esi", CLEAR_ODD(ROW7))
358
    iLLM_HEAD
359 ef4a6514 Mans Rullgard
    ".p2align 4 \n\t"
360 f73a6393 Alexander Strange
    JNZ("%%ecx", "2f")
361
    JNZ("%%eax", "3f")
362
    JNZ("%%edx", "4f")
363
    JNZ("%%esi", "5f")
364
    iLLM_PASS_SPARSE("%0")
365
    "jmp 6f                                                      \n\t"
366
    "2:                                                          \n\t"
367
    iMTX_MULT("4*16(%0)", MANGLE(iTab1), "#", PUT_EVEN(ROW4))
368
    "3:                                                          \n\t"
369
    iMTX_MULT("5*16(%0)", MANGLE(iTab4), ROUND(walkenIdctRounders+4*16), PUT_ODD(ROW5))
370
    JZ("%%edx", "1f")
371
    "4:                                                          \n\t"
372
    iMTX_MULT("6*16(%0)", MANGLE(iTab3), ROUND(walkenIdctRounders+5*16), PUT_EVEN(ROW6))
373
    JZ("%%esi", "1f")
374
    "5:                                                          \n\t"
375
    iMTX_MULT("7*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+5*16), PUT_ODD(ROW7))
376 b250f9c6 Aurelien Jacobs
#if !ARCH_X86_64
377 f73a6393 Alexander Strange
    iLLM_HEAD
378
#endif
379
    iLLM_PASS("%0")
380
    "6:                                                          \n\t"
381
    : "+r"(block)
382
    :
383 153ca56b Ramiro Polla
    : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" ,
384
                   "%xmm4" , "%xmm5" , "%xmm6" , "%xmm7" ,)
385 ba404520 Ramiro Polla
#if ARCH_X86_64
386 153ca56b Ramiro Polla
      XMM_CLOBBERS("%xmm8" , "%xmm9" , "%xmm10", "%xmm11",
387
                   "%xmm12", "%xmm13", "%xmm14",)
388 ba404520 Ramiro Polla
#endif
389 153ca56b Ramiro Polla
      "%eax", "%ecx", "%edx", "%esi", "memory"
390 616735eb Ramiro Polla
    );
391 f73a6393 Alexander Strange
}
392
393
void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block)
394
{
395
    ff_idct_xvid_sse2(block);
396 7e7c4b60 Ronald S. Bultje
    ff_put_pixels_clamped_mmx(block, dest, line_size);
397 f73a6393 Alexander Strange
}
398
399
void ff_idct_xvid_sse2_add(uint8_t *dest, int line_size, short *block)
400
{
401
    ff_idct_xvid_sse2(block);
402 7e7c4b60 Ronald S. Bultje
    ff_add_pixels_clamped_mmx(block, dest, line_size);
403 f73a6393 Alexander Strange
}