Revision 4b9c03b7 libavfilter/x86/yadif_template.c

View differences:

libavfilter/x86/yadif_template.c
107 107
                                  uint8_t *prev, uint8_t *cur, uint8_t *next,
108 108
                                  int w, int prefs, int mrefs, int parity, int mode)
109 109
{
110
    DECLARE_ALIGNED(16, uint8_t, tmp0[16]);
111
    DECLARE_ALIGNED(16, uint8_t, tmp1[16]);
112
    DECLARE_ALIGNED(16, uint8_t, tmp2[16]);
113
    DECLARE_ALIGNED(16, uint8_t, tmp3[16]);
110
    uint8_t tmp[5*16];
111
    uint8_t *tmpA= (uint8_t*)(((uint64_t)(tmp+15)) & ~15);
114 112
    int x;
115 113

  
116 114
#define FILTER\
......
124 122
            MOVQ"      "MM"3, "MM"4 \n\t"\
125 123
            "paddw     "MM"2, "MM"3 \n\t"\
126 124
            "psraw     $1,    "MM"3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
127
            MOVQ"      "MM"0, %[tmp0] \n\t" /* c */\
128
            MOVQ"      "MM"3, %[tmp1] \n\t" /* d */\
129
            MOVQ"      "MM"1, %[tmp2] \n\t" /* e */\
125
            MOVQ"      "MM"0, (%[tmpA]) \n\t" /* c */\
126
            MOVQ"      "MM"3, 16(%[tmpA]) \n\t" /* d */\
127
            MOVQ"      "MM"1, 32(%[tmpA]) \n\t" /* e */\
130 128
            "psubw     "MM"4, "MM"2 \n\t"\
131 129
            PABS(      MM"4", MM"2") /* temporal_diff0 */\
132 130
            LOAD("(%[prev],%[mrefs])", MM"3") /* prev[x-refs] */\
......
148 146
            "paddw     "MM"4, "MM"3 \n\t" /* temporal_diff2 */\
149 147
            "psrlw     $1,    "MM"3 \n\t"\
150 148
            "pmaxsw    "MM"3, "MM"2 \n\t"\
151
            MOVQ"      "MM"2, %[tmp3] \n\t" /* diff */\
149
            MOVQ"      "MM"2, 48(%[tmpA]) \n\t" /* diff */\
152 150
\
153 151
            "paddw     "MM"0, "MM"1 \n\t"\
154 152
            "paddw     "MM"0, "MM"0 \n\t"\
......
179 177
            CHECK2\
180 178
\
181 179
            /* if(p->mode<2) ... */\
182
            MOVQ"    %[tmp3], "MM"6 \n\t" /* diff */\
180
            MOVQ"    48(%[tmpA]), "MM"6 \n\t" /* diff */\
183 181
            "cmpl      $2, %[mode] \n\t"\
184 182
            "jge       1f \n\t"\
185 183
            LOAD("(%["prev2"],%[mrefs],2)", MM"2") /* prev2[x-2*refs] */\
......
190 188
            "paddw     "MM"5, "MM"3 \n\t"\
191 189
            "psrlw     $1,    "MM"2 \n\t" /* b */\
192 190
            "psrlw     $1,    "MM"3 \n\t" /* f */\
193
            MOVQ"    %[tmp0], "MM"4 \n\t" /* c */\
194
            MOVQ"    %[tmp1], "MM"5 \n\t" /* d */\
195
            MOVQ"    %[tmp2], "MM"7 \n\t" /* e */\
191
            MOVQ"    (%[tmpA]), "MM"4 \n\t" /* c */\
192
            MOVQ"    16(%[tmpA]), "MM"5 \n\t" /* d */\
193
            MOVQ"    32(%[tmpA]), "MM"7 \n\t" /* e */\
196 194
            "psubw     "MM"4, "MM"2 \n\t" /* b-c */\
197 195
            "psubw     "MM"7, "MM"3 \n\t" /* f-e */\
198 196
            MOVQ"      "MM"5, "MM"0 \n\t"\
......
211 209
            "pmaxsw    "MM"4, "MM"6 \n\t" /* diff= MAX3(diff, min, -max); */\
212 210
            "1: \n\t"\
213 211
\
214
            MOVQ"    %[tmp1], "MM"2 \n\t" /* d */\
212
            MOVQ"    16(%[tmpA]), "MM"2 \n\t" /* d */\
215 213
            MOVQ"      "MM"2, "MM"3 \n\t"\
216 214
            "psubw     "MM"6, "MM"2 \n\t" /* d-diff */\
217 215
            "paddw     "MM"6, "MM"3 \n\t" /* d+diff */\
......
219 217
            "pminsw    "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
220 218
            "packuswb  "MM"1, "MM"1 \n\t"\
221 219
\
222
            :[tmp0]"=m"(tmp0),\
223
             [tmp1]"=m"(tmp1),\
224
             [tmp2]"=m"(tmp2),\
225
             [tmp3]"=m"(tmp3)\
226
            :[prev] "r"(prev),\
220
            :\
221
            :[tmpA] "r"(tmpA),\
222
             [prev] "r"(prev),\
227 223
             [cur]  "r"(cur),\
228 224
             [next] "r"(next),\
229 225
             [prefs]"r"((x86_reg)prefs),\

Also available in: Unified diff