Statistics
| Branch: | Revision:

ffmpeg / libavfilter / x86 / yadif_template.c @ 2912e87a

History | View | Annotate | Download (10.1 KB)

1
/*
2
 * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of Libav.
5
 *
6
 * Libav is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * Libav is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License along
17
 * with Libav; if not, write to the Free Software Foundation, Inc.,
18
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19
 */
20

    
21
#ifdef COMPILE_TEMPLATE_SSE
22
#define MM "%%xmm"
23
#define MOV  "movq"
24
#define MOVQ "movdqa"
25
#define MOVQU "movdqu"
26
#define STEP 8
27
#define LOAD(mem,dst) \
28
            MOV"       "mem", "dst" \n\t"\
29
            "punpcklbw "MM"7, "dst" \n\t"
30
#define PSRL1(reg) "psrldq $1, "reg" \n\t"
31
#define PSRL2(reg) "psrldq $2, "reg" \n\t"
32
#define PSHUF(src,dst) "movdqa "dst", "src" \n\t"\
33
                       "psrldq $2, "src"     \n\t"
34
#else
35
#define MM "%%mm"
36
#define MOV  "movd"
37
#define MOVQ "movq"
38
#define MOVQU "movq"
39
#define STEP 4
40
#define LOAD(mem,dst) \
41
            MOV"       "mem", "dst" \n\t"\
42
            "punpcklbw "MM"7, "dst" \n\t"
43
#define PSRL1(reg) "psrlq $8, "reg" \n\t"
44
#define PSRL2(reg) "psrlq $16, "reg" \n\t"
45
#define PSHUF(src,dst) "pshufw $9, "dst", "src" \n\t"
46
#endif
47

    
48
#ifdef COMPILE_TEMPLATE_SSSE3
49
#define PABS(tmp,dst) \
50
            "pabsw     "dst", "dst" \n\t"
51
#else
52
#define PABS(tmp,dst) \
53
            "pxor     "tmp", "tmp" \n\t"\
54
            "psubw    "dst", "tmp" \n\t"\
55
            "pmaxsw   "tmp", "dst" \n\t"
56
#endif
57

    
58
#define CHECK(pj,mj) \
59
            MOVQU" "#pj"(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1+j] */\
60
            MOVQU" "#mj"(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1-j] */\
61
            MOVQ"      "MM"2, "MM"4 \n\t"\
62
            MOVQ"      "MM"2, "MM"5 \n\t"\
63
            "pxor      "MM"3, "MM"4 \n\t"\
64
            "pavgb     "MM"3, "MM"5 \n\t"\
65
            "pand     "MANGLE(pb_1)", "MM"4 \n\t"\
66
            "psubusb   "MM"4, "MM"5 \n\t"\
67
            PSRL1(MM"5")                 \
68
            "punpcklbw "MM"7, "MM"5 \n\t" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */\
69
            MOVQ"      "MM"2, "MM"4 \n\t"\
70
            "psubusb   "MM"3, "MM"2 \n\t"\
71
            "psubusb   "MM"4, "MM"3 \n\t"\
72
            "pmaxub    "MM"3, "MM"2 \n\t"\
73
            MOVQ"      "MM"2, "MM"3 \n\t"\
74
            MOVQ"      "MM"2, "MM"4 \n\t" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */\
75
            PSRL1(MM"3")                  /* ABS(cur[x-refs  +j] - cur[x+refs  -j]) */\
76
            PSRL2(MM"4")                  /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */\
77
            "punpcklbw "MM"7, "MM"2 \n\t"\
78
            "punpcklbw "MM"7, "MM"3 \n\t"\
79
            "punpcklbw "MM"7, "MM"4 \n\t"\
80
            "paddw     "MM"3, "MM"2 \n\t"\
81
            "paddw     "MM"4, "MM"2 \n\t" /* score */
82

    
83
#define CHECK1 \
84
            MOVQ"      "MM"0, "MM"3 \n\t"\
85
            "pcmpgtw   "MM"2, "MM"3 \n\t" /* if(score < spatial_score) */\
86
            "pminsw    "MM"2, "MM"0 \n\t" /* spatial_score= score; */\
87
            MOVQ"      "MM"3, "MM"6 \n\t"\
88
            "pand      "MM"3, "MM"5 \n\t"\
89
            "pandn     "MM"1, "MM"3 \n\t"\
90
            "por       "MM"5, "MM"3 \n\t"\
91
            MOVQ"      "MM"3, "MM"1 \n\t" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
92

    
93
#define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.\
94
                  hurts both quality and speed, but matches the C version. */\
95
            "paddw    "MANGLE(pw_1)", "MM"6 \n\t"\
96
            "psllw     $14,   "MM"6 \n\t"\
97
            "paddsw    "MM"6, "MM"2 \n\t"\
98
            MOVQ"      "MM"0, "MM"3 \n\t"\
99
            "pcmpgtw   "MM"2, "MM"3 \n\t"\
100
            "pminsw    "MM"2, "MM"0 \n\t"\
101
            "pand      "MM"3, "MM"5 \n\t"\
102
            "pandn     "MM"1, "MM"3 \n\t"\
103
            "por       "MM"5, "MM"3 \n\t"\
104
            MOVQ"      "MM"3, "MM"1 \n\t"
105

    
106
void RENAME(ff_yadif_filter_line)(uint8_t *dst,
107
                                  uint8_t *prev, uint8_t *cur, uint8_t *next,
108
                                  int w, int refs, int parity, int mode)
109
{
110
    DECLARE_ALIGNED(16, uint8_t, tmp0[16]);
111
    DECLARE_ALIGNED(16, uint8_t, tmp1[16]);
112
    DECLARE_ALIGNED(16, uint8_t, tmp2[16]);
113
    DECLARE_ALIGNED(16, uint8_t, tmp3[16]);
114
    int x;
115

    
116
#define FILTER\
117
    for(x=0; x<w; x+=STEP){\
118
        __asm__ volatile(\
119
            "pxor      "MM"7, "MM"7 \n\t"\
120
            LOAD("(%[cur],%[mrefs])", MM"0") /* c = cur[x-refs] */\
121
            LOAD("(%[cur],%[prefs])", MM"1") /* e = cur[x+refs] */\
122
            LOAD("(%["prev2"])", MM"2") /* prev2[x] */\
123
            LOAD("(%["next2"])", MM"3") /* next2[x] */\
124
            MOVQ"      "MM"3, "MM"4 \n\t"\
125
            "paddw     "MM"2, "MM"3 \n\t"\
126
            "psraw     $1,    "MM"3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
127
            MOVQ"      "MM"0, %[tmp0] \n\t" /* c */\
128
            MOVQ"      "MM"3, %[tmp1] \n\t" /* d */\
129
            MOVQ"      "MM"1, %[tmp2] \n\t" /* e */\
130
            "psubw     "MM"4, "MM"2 \n\t"\
131
            PABS(      MM"4", MM"2") /* temporal_diff0 */\
132
            LOAD("(%[prev],%[mrefs])", MM"3") /* prev[x-refs] */\
133
            LOAD("(%[prev],%[prefs])", MM"4") /* prev[x+refs] */\
134
            "psubw     "MM"0, "MM"3 \n\t"\
135
            "psubw     "MM"1, "MM"4 \n\t"\
136
            PABS(      MM"5", MM"3")\
137
            PABS(      MM"5", MM"4")\
138
            "paddw     "MM"4, "MM"3 \n\t" /* temporal_diff1 */\
139
            "psrlw     $1,    "MM"2 \n\t"\
140
            "psrlw     $1,    "MM"3 \n\t"\
141
            "pmaxsw    "MM"3, "MM"2 \n\t"\
142
            LOAD("(%[next],%[mrefs])", MM"3") /* next[x-refs] */\
143
            LOAD("(%[next],%[prefs])", MM"4") /* next[x+refs] */\
144
            "psubw     "MM"0, "MM"3 \n\t"\
145
            "psubw     "MM"1, "MM"4 \n\t"\
146
            PABS(      MM"5", MM"3")\
147
            PABS(      MM"5", MM"4")\
148
            "paddw     "MM"4, "MM"3 \n\t" /* temporal_diff2 */\
149
            "psrlw     $1,    "MM"3 \n\t"\
150
            "pmaxsw    "MM"3, "MM"2 \n\t"\
151
            MOVQ"      "MM"2, %[tmp3] \n\t" /* diff */\
152
\
153
            "paddw     "MM"0, "MM"1 \n\t"\
154
            "paddw     "MM"0, "MM"0 \n\t"\
155
            "psubw     "MM"1, "MM"0 \n\t"\
156
            "psrlw     $1,    "MM"1 \n\t" /* spatial_pred */\
157
            PABS(      MM"2", MM"0")      /* ABS(c-e) */\
158
\
159
            MOVQU" -1(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1] */\
160
            MOVQU" -1(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1] */\
161
            MOVQ"      "MM"2, "MM"4 \n\t"\
162
            "psubusb   "MM"3, "MM"2 \n\t"\
163
            "psubusb   "MM"4, "MM"3 \n\t"\
164
            "pmaxub    "MM"3, "MM"2 \n\t"\
165
            PSHUF(MM"3", MM"2") \
166
            "punpcklbw "MM"7, "MM"2 \n\t" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */\
167
            "punpcklbw "MM"7, "MM"3 \n\t" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */\
168
            "paddw     "MM"2, "MM"0 \n\t"\
169
            "paddw     "MM"3, "MM"0 \n\t"\
170
            "psubw    "MANGLE(pw_1)", "MM"0 \n\t" /* spatial_score */\
171
\
172
            CHECK(-2,0)\
173
            CHECK1\
174
            CHECK(-3,1)\
175
            CHECK2\
176
            CHECK(0,-2)\
177
            CHECK1\
178
            CHECK(1,-3)\
179
            CHECK2\
180
\
181
            /* if(p->mode<2) ... */\
182
            MOVQ"    %[tmp3], "MM"6 \n\t" /* diff */\
183
            "cmpl      $2, %[mode] \n\t"\
184
            "jge       1f \n\t"\
185
            LOAD("(%["prev2"],%[mrefs],2)", MM"2") /* prev2[x-2*refs] */\
186
            LOAD("(%["next2"],%[mrefs],2)", MM"4") /* next2[x-2*refs] */\
187
            LOAD("(%["prev2"],%[prefs],2)", MM"3") /* prev2[x+2*refs] */\
188
            LOAD("(%["next2"],%[prefs],2)", MM"5") /* next2[x+2*refs] */\
189
            "paddw     "MM"4, "MM"2 \n\t"\
190
            "paddw     "MM"5, "MM"3 \n\t"\
191
            "psrlw     $1,    "MM"2 \n\t" /* b */\
192
            "psrlw     $1,    "MM"3 \n\t" /* f */\
193
            MOVQ"    %[tmp0], "MM"4 \n\t" /* c */\
194
            MOVQ"    %[tmp1], "MM"5 \n\t" /* d */\
195
            MOVQ"    %[tmp2], "MM"7 \n\t" /* e */\
196
            "psubw     "MM"4, "MM"2 \n\t" /* b-c */\
197
            "psubw     "MM"7, "MM"3 \n\t" /* f-e */\
198
            MOVQ"      "MM"5, "MM"0 \n\t"\
199
            "psubw     "MM"4, "MM"5 \n\t" /* d-c */\
200
            "psubw     "MM"7, "MM"0 \n\t" /* d-e */\
201
            MOVQ"      "MM"2, "MM"4 \n\t"\
202
            "pminsw    "MM"3, "MM"2 \n\t"\
203
            "pmaxsw    "MM"4, "MM"3 \n\t"\
204
            "pmaxsw    "MM"5, "MM"2 \n\t"\
205
            "pminsw    "MM"5, "MM"3 \n\t"\
206
            "pmaxsw    "MM"0, "MM"2 \n\t" /* max */\
207
            "pminsw    "MM"0, "MM"3 \n\t" /* min */\
208
            "pxor      "MM"4, "MM"4 \n\t"\
209
            "pmaxsw    "MM"3, "MM"6 \n\t"\
210
            "psubw     "MM"2, "MM"4 \n\t" /* -max */\
211
            "pmaxsw    "MM"4, "MM"6 \n\t" /* diff= MAX3(diff, min, -max); */\
212
            "1: \n\t"\
213
\
214
            MOVQ"    %[tmp1], "MM"2 \n\t" /* d */\
215
            MOVQ"      "MM"2, "MM"3 \n\t"\
216
            "psubw     "MM"6, "MM"2 \n\t" /* d-diff */\
217
            "paddw     "MM"6, "MM"3 \n\t" /* d+diff */\
218
            "pmaxsw    "MM"2, "MM"1 \n\t"\
219
            "pminsw    "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
220
            "packuswb  "MM"1, "MM"1 \n\t"\
221
\
222
            :[tmp0]"=m"(tmp0),\
223
             [tmp1]"=m"(tmp1),\
224
             [tmp2]"=m"(tmp2),\
225
             [tmp3]"=m"(tmp3)\
226
            :[prev] "r"(prev),\
227
             [cur]  "r"(cur),\
228
             [next] "r"(next),\
229
             [prefs]"r"((x86_reg)refs),\
230
             [mrefs]"r"((x86_reg)-refs),\
231
             [mode] "g"(mode)\
232
        );\
233
        __asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
234
        dst += STEP;\
235
        prev+= STEP;\
236
        cur += STEP;\
237
        next+= STEP;\
238
    }
239

    
240
    if (parity) {
241
#define prev2 "prev"
242
#define next2 "cur"
243
        FILTER
244
#undef prev2
245
#undef next2
246
    } else {
247
#define prev2 "cur"
248
#define next2 "next"
249
        FILTER
250
#undef prev2
251
#undef next2
252
    }
253
}
254
#undef STEP
255
#undef MM
256
#undef MOV
257
#undef MOVQ
258
#undef MOVQU
259
#undef PSHUF
260
#undef PSRL1
261
#undef PSRL2
262
#undef LOAD
263
#undef PABS
264
#undef CHECK
265
#undef CHECK1
266
#undef CHECK2
267
#undef FILTER
268