Statistics
| Branch: | Revision:

ffmpeg / libavfilter / x86 / yadif_template.c @ 4b9c03b7

History | View | Annotate | Download (9.98 KB)

1 1ef64490 Baptiste Coudurier
/*
2
 * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License along
17
 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19
 */
20
21
#ifdef COMPILE_TEMPLATE_SSE
22
#define MM "%%xmm"
23
#define MOV  "movq"
24
#define MOVQ "movdqa"
25
#define MOVQU "movdqu"
26
#define STEP 8
27
#define LOAD(mem,dst) \
28
            MOV"       "mem", "dst" \n\t"\
29
            "punpcklbw "MM"7, "dst" \n\t"
30
#define PSRL1(reg) "psrldq $1, "reg" \n\t"
31
#define PSRL2(reg) "psrldq $2, "reg" \n\t"
32
#define PSHUF(src,dst) "movdqa "dst", "src" \n\t"\
33
                       "psrldq $2, "src"     \n\t"
34
#else
35
#define MM "%%mm"
36
#define MOV  "movd"
37
#define MOVQ "movq"
38
#define MOVQU "movq"
39
#define STEP 4
40
#define LOAD(mem,dst) \
41
            MOV"       "mem", "dst" \n\t"\
42
            "punpcklbw "MM"7, "dst" \n\t"
43
#define PSRL1(reg) "psrlq $8, "reg" \n\t"
44
#define PSRL2(reg) "psrlq $16, "reg" \n\t"
45
#define PSHUF(src,dst) "pshufw $9, "dst", "src" \n\t"
46
#endif
47
48
#ifdef COMPILE_TEMPLATE_SSSE3
49
#define PABS(tmp,dst) \
50
            "pabsw     "dst", "dst" \n\t"
51
#else
52
#define PABS(tmp,dst) \
53
            "pxor     "tmp", "tmp" \n\t"\
54
            "psubw    "dst", "tmp" \n\t"\
55
            "pmaxsw   "tmp", "dst" \n\t"
56
#endif
57
58
#define CHECK(pj,mj) \
59
            MOVQU" "#pj"(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1+j] */\
60
            MOVQU" "#mj"(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1-j] */\
61
            MOVQ"      "MM"2, "MM"4 \n\t"\
62
            MOVQ"      "MM"2, "MM"5 \n\t"\
63
            "pxor      "MM"3, "MM"4 \n\t"\
64
            "pavgb     "MM"3, "MM"5 \n\t"\
65 90f1f3bf Baptiste Coudurier
            "pand     "MANGLE(pb_1)", "MM"4 \n\t"\
66 1ef64490 Baptiste Coudurier
            "psubusb   "MM"4, "MM"5 \n\t"\
67
            PSRL1(MM"5")                 \
68
            "punpcklbw "MM"7, "MM"5 \n\t" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */\
69
            MOVQ"      "MM"2, "MM"4 \n\t"\
70
            "psubusb   "MM"3, "MM"2 \n\t"\
71
            "psubusb   "MM"4, "MM"3 \n\t"\
72
            "pmaxub    "MM"3, "MM"2 \n\t"\
73
            MOVQ"      "MM"2, "MM"3 \n\t"\
74
            MOVQ"      "MM"2, "MM"4 \n\t" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */\
75
            PSRL1(MM"3")                  /* ABS(cur[x-refs  +j] - cur[x+refs  -j]) */\
76
            PSRL2(MM"4")                  /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */\
77
            "punpcklbw "MM"7, "MM"2 \n\t"\
78
            "punpcklbw "MM"7, "MM"3 \n\t"\
79
            "punpcklbw "MM"7, "MM"4 \n\t"\
80
            "paddw     "MM"3, "MM"2 \n\t"\
81
            "paddw     "MM"4, "MM"2 \n\t" /* score */
82
83
#define CHECK1 \
84
            MOVQ"      "MM"0, "MM"3 \n\t"\
85
            "pcmpgtw   "MM"2, "MM"3 \n\t" /* if(score < spatial_score) */\
86
            "pminsw    "MM"2, "MM"0 \n\t" /* spatial_score= score; */\
87
            MOVQ"      "MM"3, "MM"6 \n\t"\
88
            "pand      "MM"3, "MM"5 \n\t"\
89
            "pandn     "MM"1, "MM"3 \n\t"\
90
            "por       "MM"5, "MM"3 \n\t"\
91
            MOVQ"      "MM"3, "MM"1 \n\t" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
92
93
#define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.\
94
                  hurts both quality and speed, but matches the C version. */\
95 90f1f3bf Baptiste Coudurier
            "paddw    "MANGLE(pw_1)", "MM"6 \n\t"\
96 1ef64490 Baptiste Coudurier
            "psllw     $14,   "MM"6 \n\t"\
97
            "paddsw    "MM"6, "MM"2 \n\t"\
98
            MOVQ"      "MM"0, "MM"3 \n\t"\
99
            "pcmpgtw   "MM"2, "MM"3 \n\t"\
100
            "pminsw    "MM"2, "MM"0 \n\t"\
101
            "pand      "MM"3, "MM"5 \n\t"\
102
            "pandn     "MM"1, "MM"3 \n\t"\
103
            "por       "MM"5, "MM"3 \n\t"\
104
            MOVQ"      "MM"3, "MM"1 \n\t"
105
106
void RENAME(ff_yadif_filter_line)(uint8_t *dst,
107
                                  uint8_t *prev, uint8_t *cur, uint8_t *next,
108 bad82d3d Michael Niedermayer
                                  int w, int prefs, int mrefs, int parity, int mode)
109 1ef64490 Baptiste Coudurier
{
110 4b9c03b7 Michael Niedermayer
    uint8_t tmp[5*16];
111
    uint8_t *tmpA= (uint8_t*)(((uint64_t)(tmp+15)) & ~15);
112 1ef64490 Baptiste Coudurier
    int x;
113
114
#define FILTER\
115
    for(x=0; x<w; x+=STEP){\
116
        __asm__ volatile(\
117
            "pxor      "MM"7, "MM"7 \n\t"\
118
            LOAD("(%[cur],%[mrefs])", MM"0") /* c = cur[x-refs] */\
119
            LOAD("(%[cur],%[prefs])", MM"1") /* e = cur[x+refs] */\
120
            LOAD("(%["prev2"])", MM"2") /* prev2[x] */\
121
            LOAD("(%["next2"])", MM"3") /* next2[x] */\
122
            MOVQ"      "MM"3, "MM"4 \n\t"\
123
            "paddw     "MM"2, "MM"3 \n\t"\
124
            "psraw     $1,    "MM"3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
125 4b9c03b7 Michael Niedermayer
            MOVQ"      "MM"0, (%[tmpA]) \n\t" /* c */\
126
            MOVQ"      "MM"3, 16(%[tmpA]) \n\t" /* d */\
127
            MOVQ"      "MM"1, 32(%[tmpA]) \n\t" /* e */\
128 1ef64490 Baptiste Coudurier
            "psubw     "MM"4, "MM"2 \n\t"\
129
            PABS(      MM"4", MM"2") /* temporal_diff0 */\
130
            LOAD("(%[prev],%[mrefs])", MM"3") /* prev[x-refs] */\
131
            LOAD("(%[prev],%[prefs])", MM"4") /* prev[x+refs] */\
132
            "psubw     "MM"0, "MM"3 \n\t"\
133
            "psubw     "MM"1, "MM"4 \n\t"\
134
            PABS(      MM"5", MM"3")\
135
            PABS(      MM"5", MM"4")\
136
            "paddw     "MM"4, "MM"3 \n\t" /* temporal_diff1 */\
137
            "psrlw     $1,    "MM"2 \n\t"\
138
            "psrlw     $1,    "MM"3 \n\t"\
139
            "pmaxsw    "MM"3, "MM"2 \n\t"\
140
            LOAD("(%[next],%[mrefs])", MM"3") /* next[x-refs] */\
141
            LOAD("(%[next],%[prefs])", MM"4") /* next[x+refs] */\
142
            "psubw     "MM"0, "MM"3 \n\t"\
143
            "psubw     "MM"1, "MM"4 \n\t"\
144
            PABS(      MM"5", MM"3")\
145
            PABS(      MM"5", MM"4")\
146
            "paddw     "MM"4, "MM"3 \n\t" /* temporal_diff2 */\
147
            "psrlw     $1,    "MM"3 \n\t"\
148
            "pmaxsw    "MM"3, "MM"2 \n\t"\
149 4b9c03b7 Michael Niedermayer
            MOVQ"      "MM"2, 48(%[tmpA]) \n\t" /* diff */\
150 1ef64490 Baptiste Coudurier
\
151
            "paddw     "MM"0, "MM"1 \n\t"\
152
            "paddw     "MM"0, "MM"0 \n\t"\
153
            "psubw     "MM"1, "MM"0 \n\t"\
154
            "psrlw     $1,    "MM"1 \n\t" /* spatial_pred */\
155
            PABS(      MM"2", MM"0")      /* ABS(c-e) */\
156
\
157
            MOVQU" -1(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1] */\
158
            MOVQU" -1(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1] */\
159
            MOVQ"      "MM"2, "MM"4 \n\t"\
160
            "psubusb   "MM"3, "MM"2 \n\t"\
161
            "psubusb   "MM"4, "MM"3 \n\t"\
162
            "pmaxub    "MM"3, "MM"2 \n\t"\
163
            PSHUF(MM"3", MM"2") \
164
            "punpcklbw "MM"7, "MM"2 \n\t" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */\
165
            "punpcklbw "MM"7, "MM"3 \n\t" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */\
166
            "paddw     "MM"2, "MM"0 \n\t"\
167
            "paddw     "MM"3, "MM"0 \n\t"\
168 90f1f3bf Baptiste Coudurier
            "psubw    "MANGLE(pw_1)", "MM"0 \n\t" /* spatial_score */\
169 1ef64490 Baptiste Coudurier
\
170
            CHECK(-2,0)\
171
            CHECK1\
172
            CHECK(-3,1)\
173
            CHECK2\
174
            CHECK(0,-2)\
175
            CHECK1\
176
            CHECK(1,-3)\
177
            CHECK2\
178
\
179
            /* if(p->mode<2) ... */\
180 4b9c03b7 Michael Niedermayer
            MOVQ"    48(%[tmpA]), "MM"6 \n\t" /* diff */\
181 1ef64490 Baptiste Coudurier
            "cmpl      $2, %[mode] \n\t"\
182
            "jge       1f \n\t"\
183
            LOAD("(%["prev2"],%[mrefs],2)", MM"2") /* prev2[x-2*refs] */\
184
            LOAD("(%["next2"],%[mrefs],2)", MM"4") /* next2[x-2*refs] */\
185
            LOAD("(%["prev2"],%[prefs],2)", MM"3") /* prev2[x+2*refs] */\
186
            LOAD("(%["next2"],%[prefs],2)", MM"5") /* next2[x+2*refs] */\
187
            "paddw     "MM"4, "MM"2 \n\t"\
188
            "paddw     "MM"5, "MM"3 \n\t"\
189
            "psrlw     $1,    "MM"2 \n\t" /* b */\
190
            "psrlw     $1,    "MM"3 \n\t" /* f */\
191 4b9c03b7 Michael Niedermayer
            MOVQ"    (%[tmpA]), "MM"4 \n\t" /* c */\
192
            MOVQ"    16(%[tmpA]), "MM"5 \n\t" /* d */\
193
            MOVQ"    32(%[tmpA]), "MM"7 \n\t" /* e */\
194 1ef64490 Baptiste Coudurier
            "psubw     "MM"4, "MM"2 \n\t" /* b-c */\
195
            "psubw     "MM"7, "MM"3 \n\t" /* f-e */\
196
            MOVQ"      "MM"5, "MM"0 \n\t"\
197
            "psubw     "MM"4, "MM"5 \n\t" /* d-c */\
198
            "psubw     "MM"7, "MM"0 \n\t" /* d-e */\
199
            MOVQ"      "MM"2, "MM"4 \n\t"\
200
            "pminsw    "MM"3, "MM"2 \n\t"\
201
            "pmaxsw    "MM"4, "MM"3 \n\t"\
202
            "pmaxsw    "MM"5, "MM"2 \n\t"\
203
            "pminsw    "MM"5, "MM"3 \n\t"\
204
            "pmaxsw    "MM"0, "MM"2 \n\t" /* max */\
205
            "pminsw    "MM"0, "MM"3 \n\t" /* min */\
206
            "pxor      "MM"4, "MM"4 \n\t"\
207
            "pmaxsw    "MM"3, "MM"6 \n\t"\
208
            "psubw     "MM"2, "MM"4 \n\t" /* -max */\
209
            "pmaxsw    "MM"4, "MM"6 \n\t" /* diff= MAX3(diff, min, -max); */\
210
            "1: \n\t"\
211
\
212 4b9c03b7 Michael Niedermayer
            MOVQ"    16(%[tmpA]), "MM"2 \n\t" /* d */\
213 1ef64490 Baptiste Coudurier
            MOVQ"      "MM"2, "MM"3 \n\t"\
214
            "psubw     "MM"6, "MM"2 \n\t" /* d-diff */\
215
            "paddw     "MM"6, "MM"3 \n\t" /* d+diff */\
216
            "pmaxsw    "MM"2, "MM"1 \n\t"\
217
            "pminsw    "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
218
            "packuswb  "MM"1, "MM"1 \n\t"\
219
\
220 4b9c03b7 Michael Niedermayer
            :\
221
            :[tmpA] "r"(tmpA),\
222
             [prev] "r"(prev),\
223 1ef64490 Baptiste Coudurier
             [cur]  "r"(cur),\
224
             [next] "r"(next),\
225 bad82d3d Michael Niedermayer
             [prefs]"r"((x86_reg)prefs),\
226
             [mrefs]"r"((x86_reg)mrefs),\
227 1ef64490 Baptiste Coudurier
             [mode] "g"(mode)\
228
        );\
229
        __asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
230
        dst += STEP;\
231
        prev+= STEP;\
232
        cur += STEP;\
233
        next+= STEP;\
234
    }
235
236
    if (parity) {
237
#define prev2 "prev"
238
#define next2 "cur"
239
        FILTER
240
#undef prev2
241
#undef next2
242
    } else {
243
#define prev2 "cur"
244
#define next2 "next"
245
        FILTER
246
#undef prev2
247
#undef next2
248
    }
249
}
250
#undef STEP
251
#undef MM
252
#undef MOV
253
#undef MOVQ
254
#undef MOVQU
255
#undef PSHUF
256
#undef PSRL1
257
#undef PSRL2
258
#undef LOAD
259
#undef PABS
260
#undef CHECK
261
#undef CHECK1
262
#undef CHECK2
263
#undef FILTER