Statistics
| Branch: | Revision:

ffmpeg / libavfilter / x86 / yadif_template.c @ 4b9c03b7

History | View | Annotate | Download (9.98 KB)

1
/*
2
 * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; either version 2 of the License, or
9
 * (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License along
17
 * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19
 */
20

    
21
#ifdef COMPILE_TEMPLATE_SSE
22
#define MM "%%xmm"
23
#define MOV  "movq"
24
#define MOVQ "movdqa"
25
#define MOVQU "movdqu"
26
#define STEP 8
27
#define LOAD(mem,dst) \
28
            MOV"       "mem", "dst" \n\t"\
29
            "punpcklbw "MM"7, "dst" \n\t"
30
#define PSRL1(reg) "psrldq $1, "reg" \n\t"
31
#define PSRL2(reg) "psrldq $2, "reg" \n\t"
32
#define PSHUF(src,dst) "movdqa "dst", "src" \n\t"\
33
                       "psrldq $2, "src"     \n\t"
34
#else
35
#define MM "%%mm"
36
#define MOV  "movd"
37
#define MOVQ "movq"
38
#define MOVQU "movq"
39
#define STEP 4
40
#define LOAD(mem,dst) \
41
            MOV"       "mem", "dst" \n\t"\
42
            "punpcklbw "MM"7, "dst" \n\t"
43
#define PSRL1(reg) "psrlq $8, "reg" \n\t"
44
#define PSRL2(reg) "psrlq $16, "reg" \n\t"
45
#define PSHUF(src,dst) "pshufw $9, "dst", "src" \n\t"
46
#endif
47

    
48
#ifdef COMPILE_TEMPLATE_SSSE3
49
#define PABS(tmp,dst) \
50
            "pabsw     "dst", "dst" \n\t"
51
#else
52
#define PABS(tmp,dst) \
53
            "pxor     "tmp", "tmp" \n\t"\
54
            "psubw    "dst", "tmp" \n\t"\
55
            "pmaxsw   "tmp", "dst" \n\t"
56
#endif
57

    
58
#define CHECK(pj,mj) \
59
            MOVQU" "#pj"(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1+j] */\
60
            MOVQU" "#mj"(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1-j] */\
61
            MOVQ"      "MM"2, "MM"4 \n\t"\
62
            MOVQ"      "MM"2, "MM"5 \n\t"\
63
            "pxor      "MM"3, "MM"4 \n\t"\
64
            "pavgb     "MM"3, "MM"5 \n\t"\
65
            "pand     "MANGLE(pb_1)", "MM"4 \n\t"\
66
            "psubusb   "MM"4, "MM"5 \n\t"\
67
            PSRL1(MM"5")                 \
68
            "punpcklbw "MM"7, "MM"5 \n\t" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */\
69
            MOVQ"      "MM"2, "MM"4 \n\t"\
70
            "psubusb   "MM"3, "MM"2 \n\t"\
71
            "psubusb   "MM"4, "MM"3 \n\t"\
72
            "pmaxub    "MM"3, "MM"2 \n\t"\
73
            MOVQ"      "MM"2, "MM"3 \n\t"\
74
            MOVQ"      "MM"2, "MM"4 \n\t" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */\
75
            PSRL1(MM"3")                  /* ABS(cur[x-refs  +j] - cur[x+refs  -j]) */\
76
            PSRL2(MM"4")                  /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */\
77
            "punpcklbw "MM"7, "MM"2 \n\t"\
78
            "punpcklbw "MM"7, "MM"3 \n\t"\
79
            "punpcklbw "MM"7, "MM"4 \n\t"\
80
            "paddw     "MM"3, "MM"2 \n\t"\
81
            "paddw     "MM"4, "MM"2 \n\t" /* score */
82

    
83
#define CHECK1 \
84
            MOVQ"      "MM"0, "MM"3 \n\t"\
85
            "pcmpgtw   "MM"2, "MM"3 \n\t" /* if(score < spatial_score) */\
86
            "pminsw    "MM"2, "MM"0 \n\t" /* spatial_score= score; */\
87
            MOVQ"      "MM"3, "MM"6 \n\t"\
88
            "pand      "MM"3, "MM"5 \n\t"\
89
            "pandn     "MM"1, "MM"3 \n\t"\
90
            "por       "MM"5, "MM"3 \n\t"\
91
            MOVQ"      "MM"3, "MM"1 \n\t" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
92

    
93
#define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.\
94
                  hurts both quality and speed, but matches the C version. */\
95
            "paddw    "MANGLE(pw_1)", "MM"6 \n\t"\
96
            "psllw     $14,   "MM"6 \n\t"\
97
            "paddsw    "MM"6, "MM"2 \n\t"\
98
            MOVQ"      "MM"0, "MM"3 \n\t"\
99
            "pcmpgtw   "MM"2, "MM"3 \n\t"\
100
            "pminsw    "MM"2, "MM"0 \n\t"\
101
            "pand      "MM"3, "MM"5 \n\t"\
102
            "pandn     "MM"1, "MM"3 \n\t"\
103
            "por       "MM"5, "MM"3 \n\t"\
104
            MOVQ"      "MM"3, "MM"1 \n\t"
105

    
106
void RENAME(ff_yadif_filter_line)(uint8_t *dst,
107
                                  uint8_t *prev, uint8_t *cur, uint8_t *next,
108
                                  int w, int prefs, int mrefs, int parity, int mode)
109
{
110
    uint8_t tmp[5*16];
111
    uint8_t *tmpA= (uint8_t*)(((uint64_t)(tmp+15)) & ~15);
112
    int x;
113

    
114
#define FILTER\
115
    for(x=0; x<w; x+=STEP){\
116
        __asm__ volatile(\
117
            "pxor      "MM"7, "MM"7 \n\t"\
118
            LOAD("(%[cur],%[mrefs])", MM"0") /* c = cur[x-refs] */\
119
            LOAD("(%[cur],%[prefs])", MM"1") /* e = cur[x+refs] */\
120
            LOAD("(%["prev2"])", MM"2") /* prev2[x] */\
121
            LOAD("(%["next2"])", MM"3") /* next2[x] */\
122
            MOVQ"      "MM"3, "MM"4 \n\t"\
123
            "paddw     "MM"2, "MM"3 \n\t"\
124
            "psraw     $1,    "MM"3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
125
            MOVQ"      "MM"0, (%[tmpA]) \n\t" /* c */\
126
            MOVQ"      "MM"3, 16(%[tmpA]) \n\t" /* d */\
127
            MOVQ"      "MM"1, 32(%[tmpA]) \n\t" /* e */\
128
            "psubw     "MM"4, "MM"2 \n\t"\
129
            PABS(      MM"4", MM"2") /* temporal_diff0 */\
130
            LOAD("(%[prev],%[mrefs])", MM"3") /* prev[x-refs] */\
131
            LOAD("(%[prev],%[prefs])", MM"4") /* prev[x+refs] */\
132
            "psubw     "MM"0, "MM"3 \n\t"\
133
            "psubw     "MM"1, "MM"4 \n\t"\
134
            PABS(      MM"5", MM"3")\
135
            PABS(      MM"5", MM"4")\
136
            "paddw     "MM"4, "MM"3 \n\t" /* temporal_diff1 */\
137
            "psrlw     $1,    "MM"2 \n\t"\
138
            "psrlw     $1,    "MM"3 \n\t"\
139
            "pmaxsw    "MM"3, "MM"2 \n\t"\
140
            LOAD("(%[next],%[mrefs])", MM"3") /* next[x-refs] */\
141
            LOAD("(%[next],%[prefs])", MM"4") /* next[x+refs] */\
142
            "psubw     "MM"0, "MM"3 \n\t"\
143
            "psubw     "MM"1, "MM"4 \n\t"\
144
            PABS(      MM"5", MM"3")\
145
            PABS(      MM"5", MM"4")\
146
            "paddw     "MM"4, "MM"3 \n\t" /* temporal_diff2 */\
147
            "psrlw     $1,    "MM"3 \n\t"\
148
            "pmaxsw    "MM"3, "MM"2 \n\t"\
149
            MOVQ"      "MM"2, 48(%[tmpA]) \n\t" /* diff */\
150
\
151
            "paddw     "MM"0, "MM"1 \n\t"\
152
            "paddw     "MM"0, "MM"0 \n\t"\
153
            "psubw     "MM"1, "MM"0 \n\t"\
154
            "psrlw     $1,    "MM"1 \n\t" /* spatial_pred */\
155
            PABS(      MM"2", MM"0")      /* ABS(c-e) */\
156
\
157
            MOVQU" -1(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1] */\
158
            MOVQU" -1(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1] */\
159
            MOVQ"      "MM"2, "MM"4 \n\t"\
160
            "psubusb   "MM"3, "MM"2 \n\t"\
161
            "psubusb   "MM"4, "MM"3 \n\t"\
162
            "pmaxub    "MM"3, "MM"2 \n\t"\
163
            PSHUF(MM"3", MM"2") \
164
            "punpcklbw "MM"7, "MM"2 \n\t" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */\
165
            "punpcklbw "MM"7, "MM"3 \n\t" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */\
166
            "paddw     "MM"2, "MM"0 \n\t"\
167
            "paddw     "MM"3, "MM"0 \n\t"\
168
            "psubw    "MANGLE(pw_1)", "MM"0 \n\t" /* spatial_score */\
169
\
170
            CHECK(-2,0)\
171
            CHECK1\
172
            CHECK(-3,1)\
173
            CHECK2\
174
            CHECK(0,-2)\
175
            CHECK1\
176
            CHECK(1,-3)\
177
            CHECK2\
178
\
179
            /* if(p->mode<2) ... */\
180
            MOVQ"    48(%[tmpA]), "MM"6 \n\t" /* diff */\
181
            "cmpl      $2, %[mode] \n\t"\
182
            "jge       1f \n\t"\
183
            LOAD("(%["prev2"],%[mrefs],2)", MM"2") /* prev2[x-2*refs] */\
184
            LOAD("(%["next2"],%[mrefs],2)", MM"4") /* next2[x-2*refs] */\
185
            LOAD("(%["prev2"],%[prefs],2)", MM"3") /* prev2[x+2*refs] */\
186
            LOAD("(%["next2"],%[prefs],2)", MM"5") /* next2[x+2*refs] */\
187
            "paddw     "MM"4, "MM"2 \n\t"\
188
            "paddw     "MM"5, "MM"3 \n\t"\
189
            "psrlw     $1,    "MM"2 \n\t" /* b */\
190
            "psrlw     $1,    "MM"3 \n\t" /* f */\
191
            MOVQ"    (%[tmpA]), "MM"4 \n\t" /* c */\
192
            MOVQ"    16(%[tmpA]), "MM"5 \n\t" /* d */\
193
            MOVQ"    32(%[tmpA]), "MM"7 \n\t" /* e */\
194
            "psubw     "MM"4, "MM"2 \n\t" /* b-c */\
195
            "psubw     "MM"7, "MM"3 \n\t" /* f-e */\
196
            MOVQ"      "MM"5, "MM"0 \n\t"\
197
            "psubw     "MM"4, "MM"5 \n\t" /* d-c */\
198
            "psubw     "MM"7, "MM"0 \n\t" /* d-e */\
199
            MOVQ"      "MM"2, "MM"4 \n\t"\
200
            "pminsw    "MM"3, "MM"2 \n\t"\
201
            "pmaxsw    "MM"4, "MM"3 \n\t"\
202
            "pmaxsw    "MM"5, "MM"2 \n\t"\
203
            "pminsw    "MM"5, "MM"3 \n\t"\
204
            "pmaxsw    "MM"0, "MM"2 \n\t" /* max */\
205
            "pminsw    "MM"0, "MM"3 \n\t" /* min */\
206
            "pxor      "MM"4, "MM"4 \n\t"\
207
            "pmaxsw    "MM"3, "MM"6 \n\t"\
208
            "psubw     "MM"2, "MM"4 \n\t" /* -max */\
209
            "pmaxsw    "MM"4, "MM"6 \n\t" /* diff= MAX3(diff, min, -max); */\
210
            "1: \n\t"\
211
\
212
            MOVQ"    16(%[tmpA]), "MM"2 \n\t" /* d */\
213
            MOVQ"      "MM"2, "MM"3 \n\t"\
214
            "psubw     "MM"6, "MM"2 \n\t" /* d-diff */\
215
            "paddw     "MM"6, "MM"3 \n\t" /* d+diff */\
216
            "pmaxsw    "MM"2, "MM"1 \n\t"\
217
            "pminsw    "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
218
            "packuswb  "MM"1, "MM"1 \n\t"\
219
\
220
            :\
221
            :[tmpA] "r"(tmpA),\
222
             [prev] "r"(prev),\
223
             [cur]  "r"(cur),\
224
             [next] "r"(next),\
225
             [prefs]"r"((x86_reg)prefs),\
226
             [mrefs]"r"((x86_reg)mrefs),\
227
             [mode] "g"(mode)\
228
        );\
229
        __asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
230
        dst += STEP;\
231
        prev+= STEP;\
232
        cur += STEP;\
233
        next+= STEP;\
234
    }
235

    
236
    if (parity) {
237
#define prev2 "prev"
238
#define next2 "cur"
239
        FILTER
240
#undef prev2
241
#undef next2
242
    } else {
243
#define prev2 "cur"
244
#define next2 "next"
245
        FILTER
246
#undef prev2
247
#undef next2
248
    }
249
}
250
#undef STEP
251
#undef MM
252
#undef MOV
253
#undef MOVQ
254
#undef MOVQU
255
#undef PSHUF
256
#undef PSRL1
257
#undef PSRL2
258
#undef LOAD
259
#undef PABS
260
#undef CHECK
261
#undef CHECK1
262
#undef CHECK2
263
#undef FILTER
264