## Revision 16e0bf73 libpostproc/postprocess_template.c

View differences:

libpostproc/postprocess_template.c
42 42
```#define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
```
43 43
```#elif defined (HAVE_MMX)
```
44 44
```#define PMINUB(b,a,t) \
```
45
```        "movq " #a ", " #t " \n\t"\
```
46
```        "psubusb " #b ", " #t " \n\t"\
```
47
```        "psubb " #t ", " #a " \n\t"
```
45
```    "movq " #a ", " #t " \n\t"\
```
46
```    "psubusb " #b ", " #t " \n\t"\
```
47
```    "psubb " #t ", " #a " \n\t"
```
48 48
```#endif
```
49 49

50 50
```#ifdef HAVE_MMX2
```
51 51
```#define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
```
52 52
```#elif defined (HAVE_MMX)
```
53 53
```#define PMAXUB(a,b) \
```
54
```        "psubusb " #a ", " #b " \n\t"\
```
55
```        "paddb " #a ", " #b " \n\t"
```
54
```    "psubusb " #a ", " #b " \n\t"\
```
55
```    "paddb " #a ", " #b " \n\t"
```
56 56
```#endif
```
57 57

58 58
```//FIXME? |255-0| = 1 (should not be a problem ...)
```
......
61 61
``` * Check if the middle 8x8 Block in the given 8x16 block is flat
```
62 62
``` */
```
63 63
```static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
```
64
```        int numEq= 0, dcOk;
```
65
```        src+= stride*4; // src points to begin of the 8x8 Block
```
66
```asm volatile(
```
67
```                "movq %0, %%mm7                         \n\t"
```
68
```                "movq %1, %%mm6                         \n\t"
```
69
```                : : "m" (c->mmxDcOffset[c->nonBQP]),  "m" (c->mmxDcThreshold[c->nonBQP])
```
70
```                );
```
71

72
```asm volatile(
```
73
```                "lea (%2, %3), %%"REG_a"                \n\t"
```
64
```    int numEq= 0, dcOk;
```
65
```    src+= stride*4; // src points to begin of the 8x8 Block
```
66
```    asm volatile(
```
67
```        "movq %0, %%mm7                         \n\t"
```
68
```        "movq %1, %%mm6                         \n\t"
```
69
```        : : "m" (c->mmxDcOffset[c->nonBQP]),  "m" (c->mmxDcThreshold[c->nonBQP])
```
70
```        );
```
71

72
```    asm volatile(
```
73
```        "lea (%2, %3), %%"REG_a"                \n\t"
```
74 74
```//      0       1       2       3       4       5       6       7       8       9
```
75 75
```//      %1      eax     eax+%2  eax+2%2 %1+4%2  ecx     ecx+%2  ecx+2%2 %1+8%2  ecx+4%2
```
76 76

77
```                "movq (%2), %%mm0                       \n\t"
```
78
```                "movq (%%"REG_a"), %%mm1                \n\t"
```
79
```                "movq %%mm0, %%mm3                      \n\t"
```
80
```                "movq %%mm0, %%mm4                      \n\t"
```
81
```                PMAXUB(%%mm1, %%mm4)
```
82
```                PMINUB(%%mm1, %%mm3, %%mm5)
```
83
```                "psubb %%mm1, %%mm0                     \n\t" // mm0 = differnece
```
84
```                "paddb %%mm7, %%mm0                     \n\t"
```
85
```                "pcmpgtb %%mm6, %%mm0                   \n\t"
```
86

87
```                "movq (%%"REG_a",%3), %%mm2             \n\t"
```
88
```                PMAXUB(%%mm2, %%mm4)
```
89
```                PMINUB(%%mm2, %%mm3, %%mm5)
```
90
```                "psubb %%mm2, %%mm1                     \n\t"
```
91
```                "paddb %%mm7, %%mm1                     \n\t"
```
92
```                "pcmpgtb %%mm6, %%mm1                   \n\t"
```
93
```                "paddb %%mm1, %%mm0                     \n\t"
```
94

95
```                "movq (%%"REG_a", %3, 2), %%mm1         \n\t"
```
96
```                PMAXUB(%%mm1, %%mm4)
```
97
```                PMINUB(%%mm1, %%mm3, %%mm5)
```
98
```                "psubb %%mm1, %%mm2                     \n\t"
```
99
```                "paddb %%mm7, %%mm2                     \n\t"
```
100
```                "pcmpgtb %%mm6, %%mm2                   \n\t"
```
101
```                "paddb %%mm2, %%mm0                     \n\t"
```
102

103
```                "lea (%%"REG_a", %3, 4), %%"REG_a"      \n\t"
```
104

105
```                "movq (%2, %3, 4), %%mm2                \n\t"
```
106
```                PMAXUB(%%mm2, %%mm4)
```
107
```                PMINUB(%%mm2, %%mm3, %%mm5)
```
108
```                "psubb %%mm2, %%mm1                     \n\t"
```
109
```                "paddb %%mm7, %%mm1                     \n\t"
```
110
```                "pcmpgtb %%mm6, %%mm1                   \n\t"
```
111
```                "paddb %%mm1, %%mm0                     \n\t"
```
112

113
```                "movq (%%"REG_a"), %%mm1                \n\t"
```
114
```                PMAXUB(%%mm1, %%mm4)
```
115
```                PMINUB(%%mm1, %%mm3, %%mm5)
```
116
```                "psubb %%mm1, %%mm2                     \n\t"
```
117
```                "paddb %%mm7, %%mm2                     \n\t"
```
118
```                "pcmpgtb %%mm6, %%mm2                   \n\t"
```
119
```                "paddb %%mm2, %%mm0                     \n\t"
```
120

121
```                "movq (%%"REG_a", %3), %%mm2            \n\t"
```
122
```                PMAXUB(%%mm2, %%mm4)
```
123
```                PMINUB(%%mm2, %%mm3, %%mm5)
```
124
```                "psubb %%mm2, %%mm1                     \n\t"
```
125
```                "paddb %%mm7, %%mm1                     \n\t"
```
126
```                "pcmpgtb %%mm6, %%mm1                   \n\t"
```
127
```                "paddb %%mm1, %%mm0                     \n\t"
```
128

129
```                "movq (%%"REG_a", %3, 2), %%mm1         \n\t"
```
130
```                PMAXUB(%%mm1, %%mm4)
```
131
```                PMINUB(%%mm1, %%mm3, %%mm5)
```
132
```                "psubb %%mm1, %%mm2                     \n\t"
```
133
```                "paddb %%mm7, %%mm2                     \n\t"
```
134
```                "pcmpgtb %%mm6, %%mm2                   \n\t"
```
135
```                "paddb %%mm2, %%mm0                     \n\t"
```
136
```                "psubusb %%mm3, %%mm4                   \n\t"
```
137

138
```                "                                       \n\t"
```
77
```        "movq (%2), %%mm0                       \n\t"
```
78
```        "movq (%%"REG_a"), %%mm1                \n\t"
```
79
```        "movq %%mm0, %%mm3                      \n\t"
```
80
```        "movq %%mm0, %%mm4                      \n\t"
```
81
```        PMAXUB(%%mm1, %%mm4)
```
82
```        PMINUB(%%mm1, %%mm3, %%mm5)
```
83
```        "psubb %%mm1, %%mm0                     \n\t" // mm0 = differnece
```
84
```        "paddb %%mm7, %%mm0                     \n\t"
```
85
```        "pcmpgtb %%mm6, %%mm0                   \n\t"
```
86

87
```        "movq (%%"REG_a",%3), %%mm2             \n\t"
```
88
```        PMAXUB(%%mm2, %%mm4)
```
89
```        PMINUB(%%mm2, %%mm3, %%mm5)
```
90
```        "psubb %%mm2, %%mm1                     \n\t"
```
91
```        "paddb %%mm7, %%mm1                     \n\t"
```
92
```        "pcmpgtb %%mm6, %%mm1                   \n\t"
```
93
```        "paddb %%mm1, %%mm0                     \n\t"
```
94

95
```        "movq (%%"REG_a", %3, 2), %%mm1         \n\t"
```
96
```        PMAXUB(%%mm1, %%mm4)
```
97
```        PMINUB(%%mm1, %%mm3, %%mm5)
```
98
```        "psubb %%mm1, %%mm2                     \n\t"
```
99
```        "paddb %%mm7, %%mm2                     \n\t"
```
100
```        "pcmpgtb %%mm6, %%mm2                   \n\t"
```
101
```        "paddb %%mm2, %%mm0                     \n\t"
```
102

103
```        "lea (%%"REG_a", %3, 4), %%"REG_a"      \n\t"
```
104

105
```        "movq (%2, %3, 4), %%mm2                \n\t"
```
106
```        PMAXUB(%%mm2, %%mm4)
```
107
```        PMINUB(%%mm2, %%mm3, %%mm5)
```
108
```        "psubb %%mm2, %%mm1                     \n\t"
```
109
```        "paddb %%mm7, %%mm1                     \n\t"
```
110
```        "pcmpgtb %%mm6, %%mm1                   \n\t"
```
111
```        "paddb %%mm1, %%mm0                     \n\t"
```
112

113
```        "movq (%%"REG_a"), %%mm1                \n\t"
```
114
```        PMAXUB(%%mm1, %%mm4)
```
115
```        PMINUB(%%mm1, %%mm3, %%mm5)
```
116
```        "psubb %%mm1, %%mm2                     \n\t"
```
117
```        "paddb %%mm7, %%mm2                     \n\t"
```
118
```        "pcmpgtb %%mm6, %%mm2                   \n\t"
```
119
```        "paddb %%mm2, %%mm0                     \n\t"
```
120

121
```        "movq (%%"REG_a", %3), %%mm2            \n\t"
```
122
```        PMAXUB(%%mm2, %%mm4)
```
123
```        PMINUB(%%mm2, %%mm3, %%mm5)
```
124
```        "psubb %%mm2, %%mm1                     \n\t"
```
125
```        "paddb %%mm7, %%mm1                     \n\t"
```
126
```        "pcmpgtb %%mm6, %%mm1                   \n\t"
```
127
```        "paddb %%mm1, %%mm0                     \n\t"
```
128

129
```        "movq (%%"REG_a", %3, 2), %%mm1         \n\t"
```
130
```        PMAXUB(%%mm1, %%mm4)
```
131
```        PMINUB(%%mm1, %%mm3, %%mm5)
```
132
```        "psubb %%mm1, %%mm2                     \n\t"
```
133
```        "paddb %%mm7, %%mm2                     \n\t"
```
134
```        "pcmpgtb %%mm6, %%mm2                   \n\t"
```
135
```        "paddb %%mm2, %%mm0                     \n\t"
```
136
```        "psubusb %%mm3, %%mm4                   \n\t"
```
137

138
```        "                                       \n\t"
```
139 139
```#ifdef HAVE_MMX2
```
140
```                "pxor %%mm7, %%mm7                      \n\t"
```
141
```                "psadbw %%mm7, %%mm0                    \n\t"
```
140
```        "pxor %%mm7, %%mm7                      \n\t"
```
141
```        "psadbw %%mm7, %%mm0                    \n\t"
```
142 142
```#else
```
143
```                "movq %%mm0, %%mm1                      \n\t"
```
144
```                "psrlw \$8, %%mm0                        \n\t"
```
145
```                "paddb %%mm1, %%mm0                     \n\t"
```
146
```                "movq %%mm0, %%mm1                      \n\t"
```
147
```                "psrlq \$16, %%mm0                       \n\t"
```
148
```                "paddb %%mm1, %%mm0                     \n\t"
```
149
```                "movq %%mm0, %%mm1                      \n\t"
```
150
```                "psrlq \$32, %%mm0                       \n\t"
```
151
```                "paddb %%mm1, %%mm0                     \n\t"
```
143
```        "movq %%mm0, %%mm1                      \n\t"
```
144
```        "psrlw \$8, %%mm0                        \n\t"
```
145
```        "paddb %%mm1, %%mm0                     \n\t"
```
146
```        "movq %%mm0, %%mm1                      \n\t"
```
147
```        "psrlq \$16, %%mm0                       \n\t"
```
148
```        "paddb %%mm1, %%mm0                     \n\t"
```
149
```        "movq %%mm0, %%mm1                      \n\t"
```
150
```        "psrlq \$32, %%mm0                       \n\t"
```
151
```        "paddb %%mm1, %%mm0                     \n\t"
```
152 152
```#endif
```
153
```                "movq %4, %%mm7                         \n\t" // QP,..., QP
```
154
```                "paddusb %%mm7, %%mm7                   \n\t" // 2QP ... 2QP
```
155
```                "psubusb %%mm7, %%mm4                   \n\t" // Diff <= 2QP -> 0
```
156
```                "packssdw %%mm4, %%mm4                  \n\t"
```
157
```                "movd %%mm0, %0                         \n\t"
```
158
```                "movd %%mm4, %1                         \n\t"
```
159

160
```                : "=r" (numEq), "=r" (dcOk)
```
161
```                : "r" (src), "r" ((long)stride), "m" (c->pQPb)
```
162
```                : "%"REG_a
```
163
```                );
```
164

165
```        numEq= (-numEq) &0xFF;
```
166
```        if(numEq > c->ppMode.flatnessThreshold){
```
167
```            if(dcOk) return 0;
```
168
```            else     return 1;
```
169
```        }else{
```
170
```            return 2;
```
171
```        }
```
153
```        "movq %4, %%mm7                         \n\t" // QP,..., QP
```
154
```        "paddusb %%mm7, %%mm7                   \n\t" // 2QP ... 2QP
```
155
```        "psubusb %%mm7, %%mm4                   \n\t" // Diff <= 2QP -> 0
```
156
```        "packssdw %%mm4, %%mm4                  \n\t"
```
157
```        "movd %%mm0, %0                         \n\t"
```
158
```        "movd %%mm4, %1                         \n\t"
```
159

160
```        : "=r" (numEq), "=r" (dcOk)
```
161
```        : "r" (src), "r" ((long)stride), "m" (c->pQPb)
```
162
```        : "%"REG_a
```
163
```        );
```
164

165
```    numEq= (-numEq) &0xFF;
```
166
```    if(numEq > c->ppMode.flatnessThreshold){
```
167
```        if(dcOk) return 0;
```
168
```        else     return 1;
```
169
```    }else{
```
170
```        return 2;
```
171
```    }
```
172 172
```}
```
173 173
```#endif //HAVE_MMX
```
174 174

......
180 180
```static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
```
181 181
```{
```
182 182
```#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
```
183
```        src+= stride*3;
```
184
```        asm volatile(        //"movv %0 %1 %2\n\t"
```
185
```                "movq %2, %%mm0                         \n\t"  // QP,..., QP
```
186
```                "pxor %%mm4, %%mm4                      \n\t"
```
187

188
```                "movq (%0), %%mm6                       \n\t"
```
189
```                "movq (%0, %1), %%mm5                   \n\t"
```
190
```                "movq %%mm5, %%mm1                      \n\t"
```
191
```                "movq %%mm6, %%mm2                      \n\t"
```
192
```                "psubusb %%mm6, %%mm5                   \n\t"
```
193
```                "psubusb %%mm1, %%mm2                   \n\t"
```
194
```                "por %%mm5, %%mm2                       \n\t" // ABS Diff of lines
```
195
```                "psubusb %%mm0, %%mm2                   \n\t" // diff <= QP -> 0
```
196
```                "pcmpeqb %%mm4, %%mm2                   \n\t" // diff <= QP -> FF
```
197

198
```                "pand %%mm2, %%mm6                      \n\t"
```
199
```                "pandn %%mm1, %%mm2                     \n\t"
```
200
```                "por %%mm2, %%mm6                       \n\t"// First Line to Filter
```
201

202
```                "movq (%0, %1, 8), %%mm5                \n\t"
```
203
```                "lea (%0, %1, 4), %%"REG_a"             \n\t"
```
204
```                "lea (%0, %1, 8), %%"REG_c"             \n\t"
```
205
```                "sub %1, %%"REG_c"                      \n\t"
```
206
```                "add %1, %0                             \n\t" // %0 points to line 1 not 0
```
207
```                "movq (%0, %1, 8), %%mm7                \n\t"
```
208
```                "movq %%mm5, %%mm1                      \n\t"
```
209
```                "movq %%mm7, %%mm2                      \n\t"
```
210
```                "psubusb %%mm7, %%mm5                   \n\t"
```
211
```                "psubusb %%mm1, %%mm2                   \n\t"
```
212
```                "por %%mm5, %%mm2                       \n\t" // ABS Diff of lines
```
213
```                "psubusb %%mm0, %%mm2                   \n\t" // diff <= QP -> 0
```
214
```                "pcmpeqb %%mm4, %%mm2                   \n\t" // diff <= QP -> FF
```
215

216
```                "pand %%mm2, %%mm7                      \n\t"
```
217
```                "pandn %%mm1, %%mm2                     \n\t"
```
218
```                "por %%mm2, %%mm7                       \n\t" // First Line to Filter
```
219

220

221
```                //      1       2       3       4       5       6       7       8
```
222
```                //      %0      %0+%1   %0+2%1  eax     %0+4%1  eax+2%1 ecx     eax+4%1
```
223
```                // 6 4 2 2 1 1
```
224
```                // 6 4 4 2
```
225
```                // 6 8 2
```
226

227
```                "movq (%0, %1), %%mm0                   \n\t" //  1
```
228
```                "movq %%mm0, %%mm1                      \n\t" //  1
```
229
```                PAVGB(%%mm6, %%mm0)                           //1 1        /2
```
230
```                PAVGB(%%mm6, %%mm0)                           //3 1        /4
```
231

232
```                "movq (%0, %1, 4), %%mm2                \n\t" //     1
```
233
```                "movq %%mm2, %%mm5                      \n\t" //     1
```
234
```                PAVGB((%%REGa), %%mm2)                        //    11        /2
```
235
```                PAVGB((%0, %1, 2), %%mm2)                     //   211        /4
```
236
```                "movq %%mm2, %%mm3                      \n\t" //   211        /4
```
237
```                "movq (%0), %%mm4                       \n\t" // 1
```
238
```                PAVGB(%%mm4, %%mm3)                           // 4 211        /8
```
239
```                PAVGB(%%mm0, %%mm3)                           //642211        /16
```
240
```                "movq %%mm3, (%0)                       \n\t" // X
```
241
```                // mm1=2 mm2=3(211) mm4=1 mm5=5 mm6=0 mm7=9
```
242
```                "movq %%mm1, %%mm0                      \n\t" //  1
```
243
```                PAVGB(%%mm6, %%mm0)                           //1 1        /2
```
244
```                "movq %%mm4, %%mm3                      \n\t" // 1
```
245
```                PAVGB((%0,%1,2), %%mm3)                       // 1 1        /2
```
246
```                PAVGB((%%REGa,%1,2), %%mm5)                   //     11        /2
```
247
```                PAVGB((%%REGa), %%mm5)                        //    211 /4
```
248
```                PAVGB(%%mm5, %%mm3)                           // 2 2211 /8
```
249
```                PAVGB(%%mm0, %%mm3)                           //4242211 /16
```
250
```                "movq %%mm3, (%0,%1)                    \n\t" //  X
```
251
```                // mm1=2 mm2=3(211) mm4=1 mm5=4(211) mm6=0 mm7=9
```
252
```                PAVGB(%%mm4, %%mm6)                                   //11        /2
```
253
```                "movq (%%"REG_c"), %%mm0                \n\t" //       1
```
254
```                PAVGB((%%REGa, %1, 2), %%mm0)                 //      11/2
```
255
```                "movq %%mm0, %%mm3                      \n\t" //      11/2
```
256
```                PAVGB(%%mm1, %%mm0)                           //  2   11/4
```
257
```                PAVGB(%%mm6, %%mm0)                           //222   11/8
```
258
```                PAVGB(%%mm2, %%mm0)                           //22242211/16
```
259
```                "movq (%0, %1, 2), %%mm2                \n\t" //   1
```
260
```                "movq %%mm0, (%0, %1, 2)                \n\t" //   X
```
261
```                // mm1=2 mm2=3 mm3=6(11) mm4=1 mm5=4(211) mm6=0(11) mm7=9
```
262
```                "movq (%%"REG_a", %1, 4), %%mm0         \n\t" //        1
```
263
```                PAVGB((%%REGc), %%mm0)                        //       11        /2
```
264
```                PAVGB(%%mm0, %%mm6)                           //11     11        /4
```
265
```                PAVGB(%%mm1, %%mm4)                           // 11                /2
```
266
```                PAVGB(%%mm2, %%mm1)                           //  11                /2
```
267
```                PAVGB(%%mm1, %%mm6)                           //1122   11        /8
```
268
```                PAVGB(%%mm5, %%mm6)                           //112242211        /16
```
269
```                "movq (%%"REG_a"), %%mm5                \n\t" //    1
```
270
```                "movq %%mm6, (%%"REG_a")                \n\t" //    X
```
271
```                // mm0=7(11) mm1=2(11) mm2=3 mm3=6(11) mm4=1(11) mm5=4 mm7=9
```
272
```                "movq (%%"REG_a", %1, 4), %%mm6         \n\t" //        1
```
273
```                PAVGB(%%mm7, %%mm6)                           //        11        /2
```
274
```                PAVGB(%%mm4, %%mm6)                           // 11     11        /4
```
275
```                PAVGB(%%mm3, %%mm6)                           // 11   2211        /8
```
276
```                PAVGB(%%mm5, %%mm2)                           //   11                /2
```
277
```                "movq (%0, %1, 4), %%mm4                \n\t" //     1
```
278
```                PAVGB(%%mm4, %%mm2)                           //   112                /4
```
279
```                PAVGB(%%mm2, %%mm6)                           // 112242211        /16
```
280
```                "movq %%mm6, (%0, %1, 4)                \n\t" //     X
```
281
```                // mm0=7(11) mm1=2(11) mm2=3(112) mm3=6(11) mm4=5 mm5=4 mm7=9
```
282
```                PAVGB(%%mm7, %%mm1)                           //  11     2        /4
```
283
```                PAVGB(%%mm4, %%mm5)                           //    11                /2
```
284
```                PAVGB(%%mm5, %%mm0)                           //    11 11        /4
```
285
```                "movq (%%"REG_a", %1, 2), %%mm6         \n\t" //      1
```
286
```                PAVGB(%%mm6, %%mm1)                           //  11  4  2        /8
```
287
```                PAVGB(%%mm0, %%mm1)                           //  11224222        /16
```
288
```                "movq %%mm1, (%%"REG_a", %1, 2)         \n\t" //      X
```
289
```                // mm2=3(112) mm3=6(11) mm4=5 mm5=4(11) mm6=6 mm7=9
```
290
```                PAVGB((%%REGc), %%mm2)                        //   112 4        /8
```
291
```                "movq (%%"REG_a", %1, 4), %%mm0         \n\t" //        1
```
292
```                PAVGB(%%mm0, %%mm6)                           //      1 1        /2
```
293
```                PAVGB(%%mm7, %%mm6)                           //      1 12        /4
```
294
```                PAVGB(%%mm2, %%mm6)                           //   1122424        /4
```
295
```                "movq %%mm6, (%%"REG_c")                \n\t" //       X
```
296
```                // mm0=8 mm3=6(11) mm4=5 mm5=4(11) mm7=9
```
297
```                PAVGB(%%mm7, %%mm5)                           //    11   2        /4
```
298
```                PAVGB(%%mm7, %%mm5)                           //    11   6        /8
```
299

300
```                PAVGB(%%mm3, %%mm0)                           //      112        /4
```
301
```                PAVGB(%%mm0, %%mm5)                           //    112246        /16
```
302
```                "movq %%mm5, (%%"REG_a", %1, 4)         \n\t" //        X
```
303
```                "sub %1, %0                             \n\t"
```
304

305
```                :
```
306
```                : "r" (src), "r" ((long)stride), "m" (c->pQPb)
```
307
```                : "%"REG_a, "%"REG_c
```
308
```        );
```
183
```    src+= stride*3;
```
184
```    asm volatile(        //"movv %0 %1 %2\n\t"
```
185
```        "movq %2, %%mm0                         \n\t"  // QP,..., QP
```
186
```        "pxor %%mm4, %%mm4                      \n\t"
```
187

188
```        "movq (%0), %%mm6                       \n\t"
```
189
```        "movq (%0, %1), %%mm5                   \n\t"
```
190
```        "movq %%mm5, %%mm1                      \n\t"
```
191
```        "movq %%mm6, %%mm2                      \n\t"
```
192
```        "psubusb %%mm6, %%mm5                   \n\t"
```
193
```        "psubusb %%mm1, %%mm2                   \n\t"
```
194
```        "por %%mm5, %%mm2                       \n\t" // ABS Diff of lines
```
195
```        "psubusb %%mm0, %%mm2                   \n\t" // diff <= QP -> 0
```
196
```        "pcmpeqb %%mm4, %%mm2                   \n\t" // diff <= QP -> FF
```
197

198
```        "pand %%mm2, %%mm6                      \n\t"
```
199
```        "pandn %%mm1, %%mm2                     \n\t"
```
200
```        "por %%mm2, %%mm6                       \n\t"// First Line to Filter
```
201

202
```        "movq (%0, %1, 8), %%mm5                \n\t"
```
203
```        "lea (%0, %1, 4), %%"REG_a"             \n\t"
```
204
```        "lea (%0, %1, 8), %%"REG_c"             \n\t"
```
205
```        "sub %1, %%"REG_c"                      \n\t"
```
206
```        "add %1, %0                             \n\t" // %0 points to line 1 not 0
```
207
```        "movq (%0, %1, 8), %%mm7                \n\t"
```
208
```        "movq %%mm5, %%mm1                      \n\t"
```
209
```        "movq %%mm7, %%mm2                      \n\t"
```
210
```        "psubusb %%mm7, %%mm5                   \n\t"
```
211
```        "psubusb %%mm1, %%mm2                   \n\t"
```
212
```        "por %%mm5, %%mm2                       \n\t" // ABS Diff of lines
```
213
```        "psubusb %%mm0, %%mm2                   \n\t" // diff <= QP -> 0
```
214
```        "pcmpeqb %%mm4, %%mm2                   \n\t" // diff <= QP -> FF
```
215

216
```        "pand %%mm2, %%mm7                      \n\t"
```
217
```        "pandn %%mm1, %%mm2                     \n\t"
```
218
```        "por %%mm2, %%mm7                       \n\t" // First Line to Filter
```
219

220

221
```        //      1       2       3       4       5       6       7       8
```
222
```        //      %0      %0+%1   %0+2%1  eax     %0+4%1  eax+2%1 ecx     eax+4%1
```
223
```        // 6 4 2 2 1 1
```
224
```        // 6 4 4 2
```
225
```        // 6 8 2
```
226

227
```        "movq (%0, %1), %%mm0                   \n\t" //  1
```
228
```        "movq %%mm0, %%mm1                      \n\t" //  1
```
229
```        PAVGB(%%mm6, %%mm0)                           //1 1        /2
```
230
```        PAVGB(%%mm6, %%mm0)                           //3 1        /4
```
231

232
```        "movq (%0, %1, 4), %%mm2                \n\t" //     1
```
233
```        "movq %%mm2, %%mm5                      \n\t" //     1
```
234
```        PAVGB((%%REGa), %%mm2)                        //    11        /2
```
235
```        PAVGB((%0, %1, 2), %%mm2)                     //   211        /4
```
236
```        "movq %%mm2, %%mm3                      \n\t" //   211        /4
```
237
```        "movq (%0), %%mm4                       \n\t" // 1
```
238
```        PAVGB(%%mm4, %%mm3)                           // 4 211        /8
```
239
```        PAVGB(%%mm0, %%mm3)                           //642211        /16
```
240
```        "movq %%mm3, (%0)                       \n\t" // X
```
241
```        // mm1=2 mm2=3(211) mm4=1 mm5=5 mm6=0 mm7=9
```
242
```        "movq %%mm1, %%mm0                      \n\t" //  1
```
243
```        PAVGB(%%mm6, %%mm0)                           //1 1        /2
```
244
```        "movq %%mm4, %%mm3                      \n\t" // 1
```
245
```        PAVGB((%0,%1,2), %%mm3)                       // 1 1        /2
```
246
```        PAVGB((%%REGa,%1,2), %%mm5)                   //     11        /2
```
247
```        PAVGB((%%REGa), %%mm5)                        //    211 /4
```
248
```        PAVGB(%%mm5, %%mm3)                           // 2 2211 /8
```
249
```        PAVGB(%%mm0, %%mm3)                           //4242211 /16
```
250
```        "movq %%mm3, (%0,%1)                    \n\t" //  X
```
251
```        // mm1=2 mm2=3(211) mm4=1 mm5=4(211) mm6=0 mm7=9
```
252
```        PAVGB(%%mm4, %%mm6)                                   //11        /2
```
253
```        "movq (%%"REG_c"), %%mm0                \n\t" //       1
```
254
```        PAVGB((%%REGa, %1, 2), %%mm0)                 //      11/2
```
255
```        "movq %%mm0, %%mm3                      \n\t" //      11/2
```
256
```        PAVGB(%%mm1, %%mm0)                           //  2   11/4
```
257
```        PAVGB(%%mm6, %%mm0)                           //222   11/8
```
258
```        PAVGB(%%mm2, %%mm0)                           //22242211/16
```
259
```        "movq (%0, %1, 2), %%mm2                \n\t" //   1
```
260
```        "movq %%mm0, (%0, %1, 2)                \n\t" //   X
```
261
```        // mm1=2 mm2=3 mm3=6(11) mm4=1 mm5=4(211) mm6=0(11) mm7=9
```
262
```        "movq (%%"REG_a", %1, 4), %%mm0         \n\t" //        1
```
263
```        PAVGB((%%REGc), %%mm0)                        //       11        /2
```
264
```        PAVGB(%%mm0, %%mm6)                           //11     11        /4
```
265
```        PAVGB(%%mm1, %%mm4)                           // 11                /2
```
266
```        PAVGB(%%mm2, %%mm1)                           //  11                /2
```
267
```        PAVGB(%%mm1, %%mm6)                           //1122   11        /8
```
268
```        PAVGB(%%mm5, %%mm6)                           //112242211        /16
```
269
```        "movq (%%"REG_a"), %%mm5                \n\t" //    1
```
270
```        "movq %%mm6, (%%"REG_a")                \n\t" //    X
```
271
```        // mm0=7(11) mm1=2(11) mm2=3 mm3=6(11) mm4=1(11) mm5=4 mm7=9
```
272
```        "movq (%%"REG_a", %1, 4), %%mm6         \n\t" //        1
```
273
```        PAVGB(%%mm7, %%mm6)                           //        11        /2
```
274
```        PAVGB(%%mm4, %%mm6)                           // 11     11        /4
```
275
```        PAVGB(%%mm3, %%mm6)                           // 11   2211        /8
```
276
```        PAVGB(%%mm5, %%mm2)                           //   11                /2
```
277
```        "movq (%0, %1, 4), %%mm4                \n\t" //     1
```
278
```        PAVGB(%%mm4, %%mm2)                           //   112                /4
```
279
```        PAVGB(%%mm2, %%mm6)                           // 112242211        /16
```
280
```        "movq %%mm6, (%0, %1, 4)                \n\t" //     X
```
281
```        // mm0=7(11) mm1=2(11) mm2=3(112) mm3=6(11) mm4=5 mm5=4 mm7=9
```
282
```        PAVGB(%%mm7, %%mm1)                           //  11     2        /4
```
283
```        PAVGB(%%mm4, %%mm5)                           //    11                /2
```
284
```        PAVGB(%%mm5, %%mm0)                           //    11 11        /4
```
285
```        "movq (%%"REG_a", %1, 2), %%mm6         \n\t" //      1
```
286
```        PAVGB(%%mm6, %%mm1)                           //  11  4  2        /8
```
287
```        PAVGB(%%mm0, %%mm1)                           //  11224222        /16
```
288
```        "movq %%mm1, (%%"REG_a", %1, 2)         \n\t" //      X
```
289
```        // mm2=3(112) mm3=6(11) mm4=5 mm5=4(11) mm6=6 mm7=9
```
290
```        PAVGB((%%REGc), %%mm2)                        //   112 4        /8
```
291
```        "movq (%%"REG_a", %1, 4), %%mm0         \n\t" //        1
```
292
```        PAVGB(%%mm0, %%mm6)                           //      1 1        /2
```
293
```        PAVGB(%%mm7, %%mm6)                           //      1 12        /4
```
294
```        PAVGB(%%mm2, %%mm6)                           //   1122424        /4
```
295
```        "movq %%mm6, (%%"REG_c")                \n\t" //       X
```
296
```        // mm0=8 mm3=6(11) mm4=5 mm5=4(11) mm7=9
```
297
```        PAVGB(%%mm7, %%mm5)                           //    11   2        /4
```
298
```        PAVGB(%%mm7, %%mm5)                           //    11   6        /8
```
299

300
```        PAVGB(%%mm3, %%mm0)                           //      112        /4
```
301
```        PAVGB(%%mm0, %%mm5)                           //    112246        /16
```
302
```        "movq %%mm5, (%%"REG_a", %1, 4)         \n\t" //        X
```
303
```        "sub %1, %0                             \n\t"
```
304

305
```        :
```
306
```        : "r" (src), "r" ((long)stride), "m" (c->pQPb)
```
307
```        : "%"REG_a, "%"REG_c
```
308
```    );
```
309 309
```#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
```
310
```        const int l1= stride;
```
311
```        const int l2= stride + l1;
```
312
```        const int l3= stride + l2;
```
313
```        const int l4= stride + l3;
```
314
```        const int l5= stride + l4;
```
315
```        const int l6= stride + l5;
```
316
```        const int l7= stride + l6;
```
317
```        const int l8= stride + l7;
```
318
```        const int l9= stride + l8;
```
319
```        int x;
```
320
```        src+= stride*3;
```
321
```        for(x=0; x<BLOCK_SIZE; x++)
```
322
```        {
```
323
```                const int first= FFABS(src[0] - src[l1]) < c->QP ? src[0] : src[l1];
```
324
```                const int last= FFABS(src[l8] - src[l9]) < c->QP ? src[l9] : src[l8];
```
325

326
```                int sums[10];
```
327
```                sums[0] = 4*first + src[l1] + src[l2] + src[l3] + 4;
```
328
```                sums[1] = sums[0] - first  + src[l4];
```
329
```                sums[2] = sums[1] - first  + src[l5];
```
330
```                sums[3] = sums[2] - first  + src[l6];
```
331
```                sums[4] = sums[3] - first  + src[l7];
```
332
```                sums[5] = sums[4] - src[l1] + src[l8];
```
333
```                sums[6] = sums[5] - src[l2] + last;
```
334
```                sums[7] = sums[6] - src[l3] + last;
```
335
```                sums[8] = sums[7] - src[l4] + last;
```
336
```                sums[9] = sums[8] - src[l5] + last;
```
337

338
```                src[l1]= (sums[0] + sums[2] + 2*src[l1])>>4;
```
339
```                src[l2]= (sums[1] + sums[3] + 2*src[l2])>>4;
```
340
```                src[l3]= (sums[2] + sums[4] + 2*src[l3])>>4;
```
341
```                src[l4]= (sums[3] + sums[5] + 2*src[l4])>>4;
```
342
```                src[l5]= (sums[4] + sums[6] + 2*src[l5])>>4;
```
343
```                src[l6]= (sums[5] + sums[7] + 2*src[l6])>>4;
```
344
```                src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
```
345
```                src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
```
346

347
```                src++;
```
348
```        }
```
310
```    const int l1= stride;
```
311
```    const int l2= stride + l1;
```
312
```    const int l3= stride + l2;
```
313
```    const int l4= stride + l3;
```
314
```    const int l5= stride + l4;
```
315
```    const int l6= stride + l5;
```
316
```    const int l7= stride + l6;
```
317
```    const int l8= stride + l7;
```
318
```    const int l9= stride + l8;
```
319
```    int x;
```
320
```    src+= stride*3;
```
321
```    for(x=0; x<BLOCK_SIZE; x++){
```
322
```        const int first= FFABS(src[0] - src[l1]) < c->QP ? src[0] : src[l1];
```
323
```        const int last= FFABS(src[l8] - src[l9]) < c->QP ? src[l9] : src[l8];
```
324

325
```        int sums[10];
```
326
```        sums[0] = 4*first + src[l1] + src[l2] + src[l3] + 4;
```
327
```        sums[1] = sums[0] - first  + src[l4];
```
328
```        sums[2] = sums[1] - first  + src[l5];
```
329
```        sums[3] = sums[2] - first  + src[l6];
```
330
```        sums[4] = sums[3] - first  + src[l7];
```
331
```        sums[5] = sums[4] - src[l1] + src[l8];
```
332
```        sums[6] = sums[5] - src[l2] + last;
```
333
```        sums[7] = sums[6] - src[l3] + last;
```
334
```        sums[8] = sums[7] - src[l4] + last;
```
335
```        sums[9] = sums[8] - src[l5] + last;
```
336

337
```        src[l1]= (sums[0] + sums[2] + 2*src[l1])>>4;
```
338
```        src[l2]= (sums[1] + sums[3] + 2*src[l2])>>4;
```
339
```        src[l3]= (sums[2] + sums[4] + 2*src[l3])>>4;
```
340
```        src[l4]= (sums[3] + sums[5] + 2*src[l4])>>4;
```
341
```        src[l5]= (sums[4] + sums[6] + 2*src[l5])>>4;
```
342
```        src[l6]= (sums[5] + sums[7] + 2*src[l6])>>4;
```
343
```        src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
```
344
```        src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
```
345

346
```        src++;
```
347
```    }
```
349 348
```#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
```
350 349
```}
```
351 350
```#endif //HAVE_ALTIVEC
```
......
366 365
```static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
```
367 366
```{
```
368 367
```#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
```
369
```        src+= stride*3;
```
368
```    src+= stride*3;
```
370 369
```// FIXME rounding
```
371
```        asm volatile(
```
372
```                "pxor %%mm7, %%mm7                      \n\t" // 0
```
373
```                "movq "MANGLE(b80)", %%mm6              \n\t" // MIN_SIGNED_BYTE
```
374
```                "leal (%0, %1), %%"REG_a"               \n\t"
```
375
```                "leal (%%"REG_a", %1, 4), %%"REG_c"     \n\t"
```
370
```    asm volatile(
```
371
```        "pxor %%mm7, %%mm7                      \n\t" // 0
```
372
```        "movq "MANGLE(b80)", %%mm6              \n\t" // MIN_SIGNED_BYTE
```
373
```        "leal (%0, %1), %%"REG_a"               \n\t"
```
374
```        "leal (%%"REG_a", %1, 4), %%"REG_c"     \n\t"
```
376 375
```//      0       1       2       3       4       5       6       7       8       9
```
377 376
```//      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1 %0+8%1  ecx+4%1
```
378
```                "movq "MANGLE(pQPb)", %%mm0             \n\t" // QP,..., QP
```
379
```                "movq %%mm0, %%mm1                      \n\t" // QP,..., QP
```
380
```                "paddusb "MANGLE(b02)", %%mm0           \n\t"
```
381
```                "psrlw \$2, %%mm0                        \n\t"
```
382
```                "pand "MANGLE(b3F)", %%mm0              \n\t" // QP/4,..., QP/4
```
383
```                "paddusb %%mm1, %%mm0                   \n\t" // QP*1.25 ...
```
384
```                "movq (%0, %1, 4), %%mm2                \n\t" // line 4
```
385
```                "movq (%%"REG_c"), %%mm3                \n\t" // line 5
```
386
```                "movq %%mm2, %%mm4                      \n\t" // line 4
```
387
```                "pcmpeqb %%mm5, %%mm5                   \n\t" // -1
```
388
```                "pxor %%mm2, %%mm5                      \n\t" // -line 4 - 1
```
389
```                PAVGB(%%mm3, %%mm5)
```
390
```                "paddb %%mm6, %%mm5                     \n\t" // (l5-l4)/2
```
391
```                "psubusb %%mm3, %%mm4                   \n\t"
```
392
```                "psubusb %%mm2, %%mm3                   \n\t"
```
393
```                "por %%mm3, %%mm4                       \n\t" // |l4 - l5|
```
394
```                "psubusb %%mm0, %%mm4                   \n\t"
```
395
```                "pcmpeqb %%mm7, %%mm4                   \n\t"
```
396
```                "pand %%mm4, %%mm5                      \n\t" // d/2
```
397

398
```//                "paddb %%mm6, %%mm2                     \n\t" // line 4 + 0x80
```
399
```                "paddb %%mm5, %%mm2                     \n\t"
```
400
```//                "psubb %%mm6, %%mm2                     \n\t"
```
401
```                "movq %%mm2, (%0,%1, 4)                 \n\t"
```
402

403
```                "movq (%%"REG_c"), %%mm2                \n\t"
```
404
```//                "paddb %%mm6, %%mm2                     \n\t" // line 5 + 0x80
```
405
```                "psubb %%mm5, %%mm2                     \n\t"
```
406
```//                "psubb %%mm6, %%mm2                     \n\t"
```
407
```                "movq %%mm2, (%%"REG_c")                \n\t"
```
408

409
```                "paddb %%mm6, %%mm5                     \n\t"
```
410
```                "psrlw \$2, %%mm5                        \n\t"
```
411
```                "pand "MANGLE(b3F)", %%mm5              \n\t"
```
412
```                "psubb "MANGLE(b20)", %%mm5             \n\t" // (l5-l4)/8
```
413

414
```                "movq (%%"REG_a", %1, 2), %%mm2         \n\t"
```
415
```                "paddb %%mm6, %%mm2                     \n\t" // line 3 + 0x80
```
416
```                "paddsb %%mm5, %%mm2                    \n\t"
```
417
```                "psubb %%mm6, %%mm2                     \n\t"
```
418
```                "movq %%mm2, (%%"REG_a", %1, 2)         \n\t"
```
419

420
```                "movq (%%"REG_c", %1), %%mm2            \n\t"
```
421
```                "paddb %%mm6, %%mm2                     \n\t" // line 6 + 0x80
```
422
```                "psubsb %%mm5, %%mm2                    \n\t"
```
423
```                "psubb %%mm6, %%mm2                     \n\t"
```
424
```                "movq %%mm2, (%%"REG_c", %1)            \n\t"
```
425

426
```                :
```
427
```                : "r" (src), "r" ((long)stride)
```
428
```                : "%"REG_a, "%"REG_c
```
429
```        );
```
377
```        "movq "MANGLE(pQPb)", %%mm0             \n\t" // QP,..., QP
```
378
```        "movq %%mm0, %%mm1                      \n\t" // QP,..., QP
```
379
```        "paddusb "MANGLE(b02)", %%mm0           \n\t"
```
380
```        "psrlw \$2, %%mm0                        \n\t"
```
381
```        "pand "MANGLE(b3F)", %%mm0              \n\t" // QP/4,..., QP/4
```
382
```        "paddusb %%mm1, %%mm0                   \n\t" // QP*1.25 ...
```
383
```        "movq (%0, %1, 4), %%mm2                \n\t" // line 4
```
384
```        "movq (%%"REG_c"), %%mm3                \n\t" // line 5
```
385
```        "movq %%mm2, %%mm4                      \n\t" // line 4
```
386
```        "pcmpeqb %%mm5, %%mm5                   \n\t" // -1
```
387
```        "pxor %%mm2, %%mm5                      \n\t" // -line 4 - 1
```
388
```        PAVGB(%%mm3, %%mm5)
```
389
```        "paddb %%mm6, %%mm5                     \n\t" // (l5-l4)/2
```
390
```        "psubusb %%mm3, %%mm4                   \n\t"
```
391
```        "psubusb %%mm2, %%mm3                   \n\t"
```
392
```        "por %%mm3, %%mm4                       \n\t" // |l4 - l5|
```
393
```        "psubusb %%mm0, %%mm4                   \n\t"
```
394
```        "pcmpeqb %%mm7, %%mm4                   \n\t"
```
395
```        "pand %%mm4, %%mm5                      \n\t" // d/2
```
396

397
```//        "paddb %%mm6, %%mm2                     \n\t" // line 4 + 0x80
```
398
```        "paddb %%mm5, %%mm2                     \n\t"
```
399
```//        "psubb %%mm6, %%mm2                     \n\t"
```
400
```        "movq %%mm2, (%0,%1, 4)                 \n\t"
```
401

402
```        "movq (%%"REG_c"), %%mm2                \n\t"
```
403
```//        "paddb %%mm6, %%mm2                     \n\t" // line 5 + 0x80
```
404
```        "psubb %%mm5, %%mm2                     \n\t"
```
405
```//        "psubb %%mm6, %%mm2                     \n\t"
```
406
```        "movq %%mm2, (%%"REG_c")                \n\t"
```
407

408
```        "paddb %%mm6, %%mm5                     \n\t"
```
409
```        "psrlw \$2, %%mm5                        \n\t"
```
410
```        "pand "MANGLE(b3F)", %%mm5              \n\t"
```
411
```        "psubb "MANGLE(b20)", %%mm5             \n\t" // (l5-l4)/8
```
412

413
```        "movq (%%"REG_a", %1, 2), %%mm2         \n\t"
```
414
```        "paddb %%mm6, %%mm2                     \n\t" // line 3 + 0x80
```
415
```        "paddsb %%mm5, %%mm2                    \n\t"
```
416
```        "psubb %%mm6, %%mm2                     \n\t"
```
417
```        "movq %%mm2, (%%"REG_a", %1, 2)         \n\t"
```
418

419
```        "movq (%%"REG_c", %1), %%mm2            \n\t"
```
420
```        "paddb %%mm6, %%mm2                     \n\t" // line 6 + 0x80
```
421
```        "psubsb %%mm5, %%mm2                    \n\t"
```
422
```        "psubb %%mm6, %%mm2                     \n\t"
```
423
```        "movq %%mm2, (%%"REG_c", %1)            \n\t"
```
424

425
```        :
```
426
```        : "r" (src), "r" ((long)stride)
```
427
```        : "%"REG_a, "%"REG_c
```
428
```    );
```
430 429
```#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
```
431
```         const int l1= stride;
```
432
```        const int l2= stride + l1;
```
433
```        const int l3= stride + l2;
```
434
```        const int l4= stride + l3;
```
435
```        const int l5= stride + l4;
```
436
```        const int l6= stride + l5;
```
437
```//        const int l7= stride + l6;
```
438
```//        const int l8= stride + l7;
```
439
```//        const int l9= stride + l8;
```
440
```        int x;
```
441
```        const int QP15= QP + (QP>>2);
```
442
```        src+= stride*3;
```
443
```        for(x=0; x<BLOCK_SIZE; x++)
```
444
```        {
```
445
```                const int v = (src[x+l5] - src[x+l4]);
```
446
```                if(FFABS(v) < QP15)
```
447
```                {
```
448
```                        src[x+l3] +=v>>3;
```
449
```                        src[x+l4] +=v>>1;
```
450
```                        src[x+l5] -=v>>1;
```
451
```                        src[x+l6] -=v>>3;
```
452

453
```                }
```
430
```    const int l1= stride;
```
431
```    const int l2= stride + l1;
```
432
```    const int l3= stride + l2;
```
433
```    const int l4= stride + l3;
```
434
```    const int l5= stride + l4;
```
435
```    const int l6= stride + l5;
```
436
```//    const int l7= stride + l6;
```
437
```//    const int l8= stride + l7;
```
438
```//    const int l9= stride + l8;
```
439
```    int x;
```
440
```    const int QP15= QP + (QP>>2);
```
441
```    src+= stride*3;
```
442
```    for(x=0; x<BLOCK_SIZE; x++){
```
443
```        const int v = (src[x+l5] - src[x+l4]);
```
444
```        if(FFABS(v) < QP15){
```
445
```            src[x+l3] +=v>>3;
```
446
```            src[x+l4] +=v>>1;
```
447
```            src[x+l5] -=v>>1;
```
448
```            src[x+l6] -=v>>3;
```
454 449
```        }
```
450
```    }
```
455 451

456 452
```#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
```
457 453
```}
```
......
467 463
```static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
```
468 464
```{
```
469 465
```#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
```
470
```        src+= stride*3;
```
466
```    src+= stride*3;
```
471 467

472
```        asm volatile(
```
473
```                "pxor %%mm7, %%mm7                      \n\t" // 0
```
474
```                "lea (%0, %1), %%"REG_a"                \n\t"
```
475
```                "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
```
468
```    asm volatile(
```
469
```        "pxor %%mm7, %%mm7                      \n\t" // 0
```
470
```        "lea (%0, %1), %%"REG_a"                \n\t"
```
471
```        "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
```
476 472
```//      0       1       2       3       4       5       6       7       8       9
```
477 473
```//      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1 %0+8%1  ecx+4%1
```
478
```                "movq (%%"REG_a", %1, 2), %%mm0         \n\t" // line 3
```
479
```                "movq (%0, %1, 4), %%mm1                \n\t" // line 4
```
480
```                "movq %%mm1, %%mm2                      \n\t" // line 4
```
481
```                "psubusb %%mm0, %%mm1                   \n\t"
```
482
```                "psubusb %%mm2, %%mm0                   \n\t"
```
483
```                "por %%mm1, %%mm0                       \n\t" // |l2 - l3|
```
484
```                "movq (%%"REG_c"), %%mm3                \n\t" // line 5
```
485
```                "movq (%%"REG_c", %1), %%mm4            \n\t" // line 6
```
486
```                "movq %%mm3, %%mm5                      \n\t" // line 5
```
487
```                "psubusb %%mm4, %%mm3                   \n\t"
```
488
```                "psubusb %%mm5, %%mm4                   \n\t"
```
489
```                "por %%mm4, %%mm3                       \n\t" // |l5 - l6|
```
490
```                PAVGB(%%mm3, %%mm0)                           // (|l2 - l3| + |l5 - l6|)/2
```
491
```                "movq %%mm2, %%mm1                      \n\t" // line 4
```
492
```                "psubusb %%mm5, %%mm2                   \n\t"
```
493
```                "movq %%mm2, %%mm4                      \n\t"
```
494
```                "pcmpeqb %%mm7, %%mm2                   \n\t" // (l4 - l5) <= 0 ? -1 : 0
```
495
```                "psubusb %%mm1, %%mm5                   \n\t"
```
496
```                "por %%mm5, %%mm4                       \n\t" // |l4 - l5|
```
497
```                "psubusb %%mm0, %%mm4                   \n\t" //d = MAX(0, |l4-l5| - (|l2-l3| + |l5-l6|)/2)
```
498
```                "movq %%mm4, %%mm3                      \n\t" // d
```
499
```                "movq %2, %%mm0                         \n\t"
```
500
```                "paddusb %%mm0, %%mm0                   \n\t"
```
501
```                "psubusb %%mm0, %%mm4                   \n\t"
```
502
```                "pcmpeqb %%mm7, %%mm4                   \n\t" // d <= QP ? -1 : 0
```
503
```                "psubusb "MANGLE(b01)", %%mm3           \n\t"
```
504
```                "pand %%mm4, %%mm3                      \n\t" // d <= QP ? d : 0
```
505

506
```                PAVGB(%%mm7, %%mm3)                           // d/2
```
507
```                "movq %%mm3, %%mm1                      \n\t" // d/2
```
508
```                PAVGB(%%mm7, %%mm3)                           // d/4
```
509
```                PAVGB(%%mm1, %%mm3)                           // 3*d/8
```
510

511
```                "movq (%0, %1, 4), %%mm0                \n\t" // line 4
```
512
```                "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l4-1 : l4
```
513
```                "psubusb %%mm3, %%mm0                   \n\t"
```
514
```                "pxor %%mm2, %%mm0                      \n\t"
```
515
```                "movq %%mm0, (%0, %1, 4)                \n\t" // line 4
```
516

517
```                "movq (%%"REG_c"), %%mm0                \n\t" // line 5
```
518
```                "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l5-1 : l5
```
519
```                "paddusb %%mm3, %%mm0                   \n\t"
```
520
```                "pxor %%mm2, %%mm0                      \n\t"
```
521
```                "movq %%mm0, (%%"REG_c")                \n\t" // line 5
```
522

523
```                PAVGB(%%mm7, %%mm1)                           // d/4
```
524

525
```                "movq (%%"REG_a", %1, 2), %%mm0         \n\t" // line 3
```
526
```                "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l4-1 : l4
```
527
```                "psubusb %%mm1, %%mm0                   \n\t"
```
528
```                "pxor %%mm2, %%mm0                      \n\t"
```
529
```                "movq %%mm0, (%%"REG_a", %1, 2)         \n\t" // line 3
```
530

531
```                "movq (%%"REG_c", %1), %%mm0            \n\t" // line 6
```
532
```                "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l5-1 : l5
```
533
```                "paddusb %%mm1, %%mm0                   \n\t"
```
534
```                "pxor %%mm2, %%mm0                      \n\t"
```
535
```                "movq %%mm0, (%%"REG_c", %1)            \n\t" // line 6
```
536

537
```                PAVGB(%%mm7, %%mm1)                           // d/8
```
538

539
```                "movq (%%"REG_a", %1), %%mm0            \n\t" // line 2
```
540
```                "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l2-1 : l2
```
541
```                "psubusb %%mm1, %%mm0                   \n\t"
```
542
```                "pxor %%mm2, %%mm0                      \n\t"
```
543
```                "movq %%mm0, (%%"REG_a", %1)            \n\t" // line 2
```
544

545
```                "movq (%%"REG_c", %1, 2), %%mm0         \n\t" // line 7
```
546
```                "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l7-1 : l7
```
547
```                "paddusb %%mm1, %%mm0                   \n\t"
```
548
```                "pxor %%mm2, %%mm0                      \n\t"
```
549
```                "movq %%mm0, (%%"REG_c", %1, 2)         \n\t" // line 7
```
550

551
```                :
```
552
```                : "r" (src), "r" ((long)stride), "m" (co->pQPb)
```
553
```                : "%"REG_a, "%"REG_c
```
554
```        );
```
474
```        "movq (%%"REG_a", %1, 2), %%mm0         \n\t" // line 3
```
475
```        "movq (%0, %1, 4), %%mm1                \n\t" // line 4
```
476
```        "movq %%mm1, %%mm2                      \n\t" // line 4
```
477
```        "psubusb %%mm0, %%mm1                   \n\t"
```
478
```        "psubusb %%mm2, %%mm0                   \n\t"
```
479
```        "por %%mm1, %%mm0                       \n\t" // |l2 - l3|
```
480
```        "movq (%%"REG_c"), %%mm3                \n\t" // line 5
```
481
```        "movq (%%"REG_c", %1), %%mm4            \n\t" // line 6
```
482
```        "movq %%mm3, %%mm5                      \n\t" // line 5
```
483
```        "psubusb %%mm4, %%mm3                   \n\t"
```
484
```        "psubusb %%mm5, %%mm4                   \n\t"
```
485
```        "por %%mm4, %%mm3                       \n\t" // |l5 - l6|
```
486
```        PAVGB(%%mm3, %%mm0)                           // (|l2 - l3| + |l5 - l6|)/2
```
487
```        "movq %%mm2, %%mm1                      \n\t" // line 4
```
488
```        "psubusb %%mm5, %%mm2                   \n\t"
```
489
```        "movq %%mm2, %%mm4                      \n\t"
```
490
```        "pcmpeqb %%mm7, %%mm2                   \n\t" // (l4 - l5) <= 0 ? -1 : 0
```
491
```        "psubusb %%mm1, %%mm5                   \n\t"
```
492
```        "por %%mm5, %%mm4                       \n\t" // |l4 - l5|
```
493
```        "psubusb %%mm0, %%mm4                   \n\t" //d = MAX(0, |l4-l5| - (|l2-l3| + |l5-l6|)/2)
```
494
```        "movq %%mm4, %%mm3                      \n\t" // d
```
495
```        "movq %2, %%mm0                         \n\t"
```
496
```        "paddusb %%mm0, %%mm0                   \n\t"
```
497
```        "psubusb %%mm0, %%mm4                   \n\t"
```
498
```        "pcmpeqb %%mm7, %%mm4                   \n\t" // d <= QP ? -1 : 0
```
499
```        "psubusb "MANGLE(b01)", %%mm3           \n\t"
```
500
```        "pand %%mm4, %%mm3                      \n\t" // d <= QP ? d : 0
```
501

502
```        PAVGB(%%mm7, %%mm3)                           // d/2
```
503
```        "movq %%mm3, %%mm1                      \n\t" // d/2
```
504
```        PAVGB(%%mm7, %%mm3)                           // d/4
```
505
```        PAVGB(%%mm1, %%mm3)                           // 3*d/8
```
506

507
```        "movq (%0, %1, 4), %%mm0                \n\t" // line 4
```
508
```        "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l4-1 : l4
```
509
```        "psubusb %%mm3, %%mm0                   \n\t"
```
510
```        "pxor %%mm2, %%mm0                      \n\t"
```
511
```        "movq %%mm0, (%0, %1, 4)                \n\t" // line 4
```
512

513
```        "movq (%%"REG_c"), %%mm0                \n\t" // line 5
```
514
```        "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l5-1 : l5
```
515
```        "paddusb %%mm3, %%mm0                   \n\t"
```
516
```        "pxor %%mm2, %%mm0                      \n\t"
```
517
```        "movq %%mm0, (%%"REG_c")                \n\t" // line 5
```
518

519
```        PAVGB(%%mm7, %%mm1)                           // d/4
```
520

521
```        "movq (%%"REG_a", %1, 2), %%mm0         \n\t" // line 3
```
522
```        "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l4-1 : l4
```
523
```        "psubusb %%mm1, %%mm0                   \n\t"
```
524
```        "pxor %%mm2, %%mm0                      \n\t"
```
525
```        "movq %%mm0, (%%"REG_a", %1, 2)         \n\t" // line 3
```
526

527
```        "movq (%%"REG_c", %1), %%mm0            \n\t" // line 6
```
528
```        "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l5-1 : l5
```
529
```        "paddusb %%mm1, %%mm0                   \n\t"
```
530
```        "pxor %%mm2, %%mm0                      \n\t"
```
531
```        "movq %%mm0, (%%"REG_c", %1)            \n\t" // line 6
```
532

533
```        PAVGB(%%mm7, %%mm1)                           // d/8
```
534

535
```        "movq (%%"REG_a", %1), %%mm0            \n\t" // line 2
```
536
```        "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l2-1 : l2
```
537
```        "psubusb %%mm1, %%mm0                   \n\t"
```
538
```        "pxor %%mm2, %%mm0                      \n\t"
```
539
```        "movq %%mm0, (%%"REG_a", %1)            \n\t" // line 2
```
540

541
```        "movq (%%"REG_c", %1, 2), %%mm0         \n\t" // line 7
```
542
```        "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l7-1 : l7
```
543
```        "paddusb %%mm1, %%mm0                   \n\t"
```
544
```        "pxor %%mm2, %%mm0                      \n\t"
```
545
```        "movq %%mm0, (%%"REG_c", %1, 2)         \n\t" // line 7
```
546

547
```        :
```
548
```        : "r" (src), "r" ((long)stride), "m" (co->pQPb)
```
549
```        : "%"REG_a, "%"REG_c
```
550
```    );
```
555 551
```#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
```
556 552

557
```        const int l1= stride;
```
558
```        const int l2= stride + l1;
```
559
```        const int l3= stride + l2;
```
560
```        const int l4= stride + l3;
```
561
```        const int l5= stride + l4;
```
562
```        const int l6= stride + l5;
```
563
```        const int l7= stride + l6;
```
564
```//        const int l8= stride + l7;
```
565
```//        const int l9= stride + l8;
```
566
```        int x;
```
567

568
```        src+= stride*3;
```
569
```        for(x=0; x<BLOCK_SIZE; x++)
```
570
```        {
```
571
```                int a= src[l3] - src[l4];
```
572
```                int b= src[l4] - src[l5];
```
573
```                int c= src[l5] - src[l6];
```
574

575
```                int d= FFABS(b) - ((FFABS(a) + FFABS(c))>>1);
```
576
```                d= FFMAX(d, 0);
```
577

578
```                if(d < co->QP*2)
```
579
```                {
```
580
```                        int v = d * FFSIGN(-b);
```
581

582
```                        src[l2] +=v>>3;
```
583
```                        src[l3] +=v>>2;
```
584
```                        src[l4] +=(3*v)>>3;
```
585
```                        src[l5] -=(3*v)>>3;
```
586
```                        src[l6] -=v>>2;
```
587
```                        src[l7] -=v>>3;
```
588

589
```                }
```
590
```                src++;
```
553
```    const int l1= stride;
```
554
```    const int l2= stride + l1;
```
555
```    const int l3= stride + l2;
```
556
```    const int l4= stride + l3;
```
557
```    const int l5= stride + l4;
```
558
```    const int l6= stride + l5;
```
559
```    const int l7= stride + l6;
```
560
```//    const int l8= stride + l7;
```
561
```//    const int l9= stride + l8;
```
562
```    int x;
```
563

564
```    src+= stride*3;
```
565
```    for(x=0; x<BLOCK_SIZE; x++){
```
566
```        int a= src[l3] - src[l4];
```
567
```        int b= src[l4] - src[l5];
```
568
```        int c= src[l5] - src[l6];
```
569

570
```        int d= FFABS(b) - ((FFABS(a) + FFABS(c))>>1);
```
571
```        d= FFMAX(d, 0);
```
572

573
```        if(d < co->QP*2){
```
574
```            int v = d * FFSIGN(-b);
```
575

576
```            src[l2] +=v>>3;
```
577
```            src[l3] +=v>>2;
```
578
```            src[l4] +=(3*v)>>3;
```
579
```            src[l5] -=(3*v)>>3;
```
580
```            src[l6] -=v>>2;
```
581
```            src[l7] -=v>>3;
```
591 582
```        }
```
583
```        src++;
```
584
```    }
```
592 585
```#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
```
593 586
```}
```
594 587

......
597 590
```{
```
598 591
```#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
```
599 592
```/*
```
600
```        uint8_t tmp[16];
```
601
```        const int l1= stride;
```
602
```        const int l2= stride + l1;
```
603
```        const int l3= stride + l2;
```
604
```        const int l4= (int)tmp - (int)src - stride*3;
```
605
```        const int l5= (int)tmp - (int)src - stride*3 + 8;
```
606
```        const int l6= stride*3 + l3;
```
607
```        const int l7= stride + l6;
```
608
```        const int l8= stride + l7;
```
609

610
```        memcpy(tmp, src+stride*7, 8);
```
611
```        memcpy(tmp+8, src+stride*8, 8);
```
593
```    uint8_t tmp[16];
```
594
```    const int l1= stride;
```
595
```    const int l2= stride + l1;
```
596
```    const int l3= stride + l2;
```
597
```    const int l4= (int)tmp - (int)src - stride*3;
```
598
```    const int l5= (int)tmp - (int)src - stride*3 + 8;
```
599
```    const int l6= stride*3 + l3;
```
600
```    const int l7= stride + l6;
```
601
```    const int l8= stride + l7;
```
602

603
```    memcpy(tmp, src+stride*7, 8);
```
604
```    memcpy(tmp+8, src+stride*8, 8);
```
612 605
```*/
```
613
```        src+= stride*4;
```
614
```        asm volatile(
```
606
```    src+= stride*4;
```
607
```    asm volatile(
```
615 608

616 609
```#if 0 //sligtly more accurate and slightly slower
```
617
```                "pxor %%mm7, %%mm7                      \n\t" // 0
```
618
```                "lea (%0, %1), %%"REG_a"                \n\t"
```
619
```                "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
```
610
```        "pxor %%mm7, %%mm7                      \n\t" // 0
```
611
```        "lea (%0, %1), %%"REG_a"                \n\t"
```
612
```        "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
```
620 613
```//      0       1       2       3       4       5       6       7
```
621 614
```//      %0      %0+%1   %0+2%1  eax+2%1 %0+4%1  eax+4%1 ecx+%1  ecx+2%1
```
622 615
```//      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1
```
623 616

624 617

625
```                "movq (%0, %1, 2), %%mm0                \n\t" // l2
```
626
```                "movq (%0), %%mm1                       \n\t" // l0
```
627
```                "movq %%mm0, %%mm2                      \n\t" // l2
```
628
```                PAVGB(%%mm7, %%mm0)                           // ~l2/2
```
629
```                PAVGB(%%mm1, %%mm0)                           // ~(l2 + 2l0)/4
```
630
```                PAVGB(%%mm2, %%mm0)                           // ~(5l2 + 2l0)/8
```
631

632
```                "movq (%%"REG_a"), %%mm1                \n\t" // l1
```
633
```                "movq (%%"REG_a", %1, 2), %%mm3         \n\t" // l3
```
634
```                "movq %%mm1, %%mm4                      \n\t" // l1
```
635
```                PAVGB(%%mm7, %%mm1)                           // ~l1/2
```
636
```                PAVGB(%%mm3, %%mm1)                           // ~(l1 + 2l3)/4
```
637
```                PAVGB(%%mm4, %%mm1)                           // ~(5l1 + 2l3)/8
```
638

639
```                "movq %%mm0, %%mm4                      \n\t" // ~(5l2 + 2l0)/8
```
640
```                "psubusb %%mm1, %%mm0                   \n\t"
```
641
```                "psubusb %%mm4, %%mm1                   \n\t"
```
642
```                "por %%mm0, %%mm1                       \n\t" // ~|2l0 - 5l1 + 5l2 - 2l3|/8
```
618
```        "movq (%0, %1, 2), %%mm0                \n\t" // l2
```
619
```        "movq (%0), %%mm1                       \n\t" // l0
```
620
```        "movq %%mm0, %%mm2                      \n\t" // l2
```
621
```        PAVGB(%%mm7, %%mm0)                           // ~l2/2
```
622
```        PAVGB(%%mm1, %%mm0)                           // ~(l2 + 2l0)/4
```
623
```        PAVGB(%%mm2, %%mm0)                           // ~(5l2 + 2l0)/8
```
624

625
```        "movq (%%"REG_a"), %%mm1                \n\t" // l1
```
626
```        "movq (%%"REG_a", %1, 2), %%mm3         \n\t" // l3
```
627
```        "movq %%mm1, %%mm4                      \n\t" // l1
```
628
```        PAVGB(%%mm7, %%mm1)                           // ~l1/2
```
629
```        PAVGB(%%mm3, %%mm1)                           // ~(l1 + 2l3)/4
```
630
```        PAVGB(%%mm4, %%mm1)                           // ~(5l1 + 2l3)/8
```
631

632
```        "movq %%mm0, %%mm4                      \n\t" // ~(5l2 + 2l0)/8
```
633
```        "psubusb %%mm1, %%mm0                   \n\t"
```
634
```        "psubusb %%mm4, %%mm1                   \n\t"
```
635
```        "por %%mm0, %%mm1                       \n\t" // ~|2l0 - 5l1 + 5l2 - 2l3|/8
```
643 636
```// mm1= |lenergy|, mm2= l2, mm3= l3, mm7=0
```
644 637

645
```                "movq (%0, %1, 4), %%mm0                \n\t" // l4
```
646
```                "movq %%mm0, %%mm4                      \n\t" // l4
```
647
```                PAVGB(%%mm7, %%mm0)                           // ~l4/2
```
648
```                PAVGB(%%mm2, %%mm0)                           // ~(l4 + 2l2)/4
```
649
```                PAVGB(%%mm4, %%mm0)                           // ~(5l4 + 2l2)/8
```
650

651
```                "movq (%%"REG_c"), %%mm2                \n\t" // l5
```
652
```                "movq %%mm3, %%mm5                      \n\t" // l3
```
653
```                PAVGB(%%mm7, %%mm3)                           // ~l3/2
```
654
```                PAVGB(%%mm2, %%mm3)                           // ~(l3 + 2l5)/4
```
655
```                PAVGB(%%mm5, %%mm3)                           // ~(5l3 + 2l5)/8
```
656

657
```                "movq %%mm0, %%mm6                      \n\t" // ~(5l4 + 2l2)/8
```
658
```                "psubusb %%mm3, %%mm0                   \n\t"
```
659
```                "psubusb %%mm6, %%mm3                   \n\t"
```
660
```                "por %%mm0, %%mm3                       \n\t" // ~|2l2 - 5l3 + 5l4 - 2l5|/8
```
661
```                "pcmpeqb %%mm7, %%mm0                   \n\t" // SIGN(2l2 - 5l3 + 5l4 - 2l5)
```
638
```        "movq (%0, %1, 4), %%mm0                \n\t" // l4
```
639
```        "movq %%mm0, %%mm4                      \n\t" // l4
```
640
```        PAVGB(%%mm7, %%mm0)                           // ~l4/2
```
641
```        PAVGB(%%mm2, %%mm0)                           // ~(l4 + 2l2)/4
```
642
```        PAVGB(%%mm4, %%mm0)                           // ~(5l4 + 2l2)/8
```
643

644
```        "movq (%%"REG_c"), %%mm2                \n\t" // l5
```
645
```        "movq %%mm3, %%mm5                      \n\t" // l3
```
646
```        PAVGB(%%mm7, %%mm3)                           // ~l3/2
```
647
```        PAVGB(%%mm2, %%mm3)                           // ~(l3 + 2l5)/4
```
648
```        PAVGB(%%mm5, %%mm3)                           // ~(5l3 + 2l5)/8
```
649

650
```        "movq %%mm0, %%mm6                      \n\t" // ~(5l4 + 2l2)/8
```
651
```        "psubusb %%mm3, %%mm0                   \n\t"
```
652
```        "psubusb %%mm6, %%mm3                   \n\t"
```
653
```        "por %%mm0, %%mm3                       \n\t" // ~|2l2 - 5l3 + 5l4 - 2l5|/8
```
654
```        "pcmpeqb %%mm7, %%mm0                   \n\t" // SIGN(2l2 - 5l3 + 5l4 - 2l5)
```
662 655
```// mm0= SIGN(menergy), mm1= |lenergy|, mm2= l5, mm3= |menergy|, mm4=l4, mm5= l3, mm7=0
```
663 656

664
```                "movq (%%"REG_c", %1), %%mm6            \n\t" // l6
```
665
```                "movq %%mm6, %%mm5                      \n\t" // l6
```
666
```                PAVGB(%%mm7, %%mm6)                           // ~l6/2
```
667
```                PAVGB(%%mm4, %%mm6)                           // ~(l6 + 2l4)/4
```
668
```                PAVGB(%%mm5, %%mm6)                           // ~(5l6 + 2l4)/8
```
669

670
```                "movq (%%"REG_c", %1, 2), %%mm5         \n\t" // l7
```
671
```                "movq %%mm2, %%mm4                      \n\t" // l5
```
672
```                PAVGB(%%mm7, %%mm2)                           // ~l5/2
```
673
```                PAVGB(%%mm5, %%mm2)                           // ~(l5 + 2l7)/4
```
674
```                PAVGB(%%mm4, %%mm2)                           // ~(5l5 + 2l7)/8
```
675

676
```                "movq %%mm6, %%mm4                      \n\t" // ~(5l6 + 2l4)/8
```
677
```                "psubusb %%mm2, %%mm6                   \n\t"
```
678
```                "psubusb %%mm4, %%mm2                   \n\t"
```
679
```                "por %%mm6, %%mm2                       \n\t" // ~|2l4 - 5l5 + 5l6 - 2l7|/8
```
657
```        "movq (%%"REG_c", %1), %%mm6            \n\t" // l6
```
658
```        "movq %%mm6, %%mm5                      \n\t" // l6
```
659
```        PAVGB(%%mm7, %%mm6)                           // ~l6/2
```
660
```        PAVGB(%%mm4, %%mm6)                           // ~(l6 + 2l4)/4
```
661
```        PAVGB(%%mm5, %%mm6)                           // ~(5l6 + 2l4)/8
```
662

663
```        "movq (%%"REG_c", %1, 2), %%mm5         \n\t" // l7
```
664
```        "movq %%mm2, %%mm4                      \n\t" // l5
```
665
```        PAVGB(%%mm7, %%mm2)                           // ~l5/2
```
666
```        PAVGB(%%mm5, %%mm2)                           // ~(l5 + 2l7)/4
```
667
```        PAVGB(%%mm4, %%mm2)                           // ~(5l5 + 2l7)/8
```
668

669
```        "movq %%mm6, %%mm4                      \n\t" // ~(5l6 + 2l4)/8
```
670
```        "psubusb %%mm2, %%mm6                   \n\t"
```
671
```        "psubusb %%mm4, %%mm2                   \n\t"
```
672
```        "por %%mm6, %%mm2                       \n\t" // ~|2l4 - 5l5 + 5l6 - 2l7|/8
```
680 673
```// mm0= SIGN(menergy), mm1= |lenergy|/8, mm2= |renergy|/8, mm3= |menergy|/8, mm7=0
```
681 674

682 675

683
```                PMINUB(%%mm2, %%mm1, %%mm4)                   // MIN(|lenergy|,|renergy|)/8
```
684
```                "movq %2, %%mm4                         \n\t" // QP //FIXME QP+1 ?
```
685
```                "paddusb "MANGLE(b01)", %%mm4           \n\t"
```
686
```                "pcmpgtb %%mm3, %%mm4                   \n\t" // |menergy|/8 < QP
```
687
```                "psubusb %%mm1, %%mm3                   \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8
```
688
```                "pand %%mm4, %%mm3                      \n\t"
```
689

690
```                "movq %%mm3, %%mm1                      \n\t"
```
691
```//                "psubusb "MANGLE(b01)", %%mm3           \n\t"
```
692
```                PAVGB(%%mm7, %%mm3)
```
693
```                PAVGB(%%mm7, %%mm3)
```
694
```                "paddusb %%mm1, %%mm3                   \n\t"
```
695
```//                "paddusb "MANGLE(b01)", %%mm3           \n\t"
```
696

697
```                "movq (%%"REG_a", %1, 2), %%mm6         \n\t" //l3
```
698
```                "movq (%0, %1, 4), %%mm5                \n\t" //l4
```
699
```                "movq (%0, %1, 4), %%mm4                \n\t" //l4
```
700
```                "psubusb %%mm6, %%mm5                   \n\t"
```
701
```                "psubusb %%mm4, %%mm6                   \n\t"
```
702
```                "por %%mm6, %%mm5                       \n\t" // |l3-l4|
```
703
```                "pcmpeqb %%mm7, %%mm6                   \n\t" // SIGN(l3-l4)
```
704
```                "pxor %%mm6, %%mm0                      \n\t"
```
705
```                "pand %%mm0, %%mm3                      \n\t"
```
706
```                PMINUB(%%mm5, %%mm3, %%mm0)
```
707

708
```                "psubusb "MANGLE(b01)", %%mm3           \n\t"
```
709
```                PAVGB(%%mm7, %%mm3)
```
710

711
```                "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
```
712
```                "movq (%0, %1, 4), %%mm2                \n\t"
```
713
```                "pxor %%mm6, %%mm0                      \n\t"
```
714
```                "pxor %%mm6, %%mm2                      \n\t"
```
715
```                "psubb %%mm3, %%mm0                     \n\t"
```
716
```                "paddb %%mm3, %%mm2                     \n\t"
```
717
```                "pxor %%mm6, %%mm0                      \n\t"
```
718
```                "pxor %%mm6, %%mm2                      \n\t"
```
719
```                "movq %%mm0, (%%"REG_a", %1, 2)         \n\t"
```
720
```                "movq %%mm2, (%0, %1, 4)                \n\t"
```
676
```        PMINUB(%%mm2, %%mm1, %%mm4)                   // MIN(|lenergy|,|renergy|)/8
```
677
```        "movq %2, %%mm4                         \n\t" // QP //FIXME QP+1 ?
```
678
```        "paddusb "MANGLE(b01)", %%mm4           \n\t"
```
679
```        "pcmpgtb %%mm3, %%mm4                   \n\t" // |menergy|/8 < QP
```
680
```        "psubusb %%mm1, %%mm3                   \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8
```
681
```        "pand %%mm4, %%mm3                      \n\t"
```
682

683
```        "movq %%mm3, %%mm1                      \n\t"
```
684
```//        "psubusb "MANGLE(b01)", %%mm3           \n\t"
```
685
```        PAVGB(%%mm7, %%mm3)
```
686
```        PAVGB(%%mm7, %%mm3)
```
687
```        "paddusb %%mm1, %%mm3                   \n\t"
```
688
```//        "paddusb "MANGLE(b01)", %%mm3           \n\t"
```
689

690
```        "movq (%%"REG_a", %1, 2), %%mm6         \n\t" //l3
```
691
```        "movq (%0, %1, 4), %%mm5                \n\t" //l4
```
692
```        "movq (%0, %1, 4), %%mm4                \n\t" //l4
```
693
```        "psubusb %%mm6, %%mm5                   \n\t"
```
694
```        "psubusb %%mm4, %%mm6                   \n\t"
```
695
```        "por %%mm6, %%mm5                       \n\t" // |l3-l4|
```
696
```        "pcmpeqb %%mm7, %%mm6                   \n\t" // SIGN(l3-l4)
```
697
```        "pxor %%mm6, %%mm0                      \n\t"
```
698
```        "pand %%mm0, %%mm3                      \n\t"
```
699
```        PMINUB(%%mm5, %%mm3, %%mm0)
```
700

701
```        "psubusb "MANGLE(b01)", %%mm3           \n\t"
```
702
```        PAVGB(%%mm7, %%mm3)
```
703

704
```        "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
```
705
```        "movq (%0, %1, 4), %%mm2                \n\t"
```
706
```        "pxor %%mm6, %%mm0                      \n\t"
```
707
```        "pxor %%mm6, %%mm2                      \n\t"
```
708
```        "psubb %%mm3, %%mm0                     \n\t"
```
709
```        "paddb %%mm3, %%mm2                     \n\t"
```
710
```        "pxor %%mm6, %%mm0                      \n\t"
```
711
```        "pxor %%mm6, %%mm2                      \n\t"
```
712
```        "movq %%mm0, (%%"REG_a", %1, 2)         \n\t"
```
713
```        "movq %%mm2, (%0, %1, 4)                \n\t"
```
721 714
```#endif //0
```
722 715

723
```                "lea (%0, %1), %%"REG_a"                \n\t"
```
724
```                "pcmpeqb %%mm6, %%mm6                   \n\t" // -1
```
716
```        "lea (%0, %1), %%"REG_a"                \n\t"
```
717
```        "pcmpeqb %%mm6, %%mm6                   \n\t" // -1
```
725 718
```//      0       1       2       3       4       5       6       7
```
726 719
```//      %0      %0+%1   %0+2%1  eax+2%1 %0+4%1  eax+4%1 ecx+%1  ecx+2%1
```
727 720
```//      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1
```
728 721

729 722

730
```                "movq (%%"REG_a", %1, 2), %%mm1         \n\t" // l3
```
731
```                "movq (%0, %1, 4), %%mm0                \n\t" // l4
```
732
```                "pxor %%mm6, %%mm1                      \n\t" // -l3-1
```
733
```                PAVGB(%%mm1, %%mm0)                           // -q+128 = (l4-l3+256)/2
```
723
```        "movq (%%"REG_a", %1, 2), %%mm1         \n\t" // l3
```
724
```        "movq (%0, %1, 4), %%mm0                \n\t" // l4
```
725
```        "pxor %%mm6, %%mm1                      \n\t" // -l3-1
```
726
```        PAVGB(%%mm1, %%mm0)                           // -q+128 = (l4-l3+256)/2
```
734 727
```// mm1=-l3-1, mm0=128-q
```
735 728

736
```                "movq (%%"REG_a", %1, 4), %%mm2         \n\t" // l5
```
737
```                "movq (%%"REG_a", %1), %%mm3            \n\t" // l2
```
738
```                "pxor %%mm6, %%mm2                      \n\t" // -l5-1
```
739
```                "movq %%mm2, %%mm5                      \n\t" // -l5-1
```
740
```                "movq "MANGLE(b80)", %%mm4              \n\t" // 128
```
741
```                "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
```
742
```                PAVGB(%%mm3, %%mm2)                           // (l2-l5+256)/2
```
743
```                PAVGB(%%mm0, %%mm4)                           // ~(l4-l3)/4 + 128
```
744
```                PAVGB(%%mm2, %%mm4)                           // ~(l2-l5)/4 +(l4-l3)/8 + 128
```
745
```                PAVGB(%%mm0, %%mm4)                           // ~(l2-l5)/8 +5(l4-l3)/16 + 128
```
729
```        "movq (%%"REG_a", %1, 4), %%mm2         \n\t" // l5
```
730
```        "movq (%%"REG_a", %1), %%mm3            \n\t" // l2
```
731
```        "pxor %%mm6, %%mm2                      \n\t" // -l5-1
```
732
```        "movq %%mm2, %%mm5                      \n\t" // -l5-1
```
733
```        "movq "MANGLE(b80)", %%mm4              \n\t" // 128
```
734
```        "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
```
735
```        PAVGB(%%mm3, %%mm2)                           // (l2-l5+256)/2
```
736
```        PAVGB(%%mm0, %%mm4)                           // ~(l4-l3)/4 + 128
```
737
```        PAVGB(%%mm2, %%mm4)                           // ~(l2-l5)/4 +(l4-l3)/8 + 128
```
738
```        PAVGB(%%mm0, %%mm4)                           // ~(l2-l5)/8 +5(l4-l3)/16 + 128
```
746 739
```// mm1=-l3-1, mm0=128-q, mm3=l2, mm4=menergy/16 + 128, mm5= -l5-1
```
747 740

748
```                "movq (%%"REG_a"), %%mm2                \n\t" // l1
```
749
```                "pxor %%mm6, %%mm2                      \n\t" // -l1-1
```
750
```                PAVGB(%%mm3, %%mm2)                           // (l2-l1+256)/2
```
751
```                PAVGB((%0), %%mm1)                            // (l0-l3+256)/2
```
752
```                "movq "MANGLE(b80)", %%mm3              \n\t" // 128
```
753
```                PAVGB(%%mm2, %%mm3)                           // ~(l2-l1)/4 + 128
```
754
```                PAVGB(%%mm1, %%mm3)                           // ~(l0-l3)/4 +(l2-l1)/8 + 128
```
755
```                PAVGB(%%mm2, %%mm3)                           // ~(l0-l3)/8 +5(l2-l1)/16 + 128
```
741
```        "movq (%%"REG_a"), %%mm2                \n\t" // l1
```
742
```        "pxor %%mm6, %%mm2                      \n\t" // -l1-1
```
743
```        PAVGB(%%mm3, %%mm2)                           // (l2-l1+256)/2
```
744
```        PAVGB((%0), %%mm1)                            // (l0-l3+256)/2
```
745
```        "movq "MANGLE(b80)", %%mm3              \n\t" // 128
```
746
```        PAVGB(%%mm2, %%mm3)                           // ~(l2-l1)/4 + 128
```
747
```        PAVGB(%%mm1, %%mm3)                           // ~(l0-l3)/4 +(l2-l1)/8 + 128
```
748
```        PAVGB(%%mm2, %%mm3)                           // ~(l0-l3)/8 +5(l2-l1)/16 + 128
```
756 749
```// mm0=128-q, mm3=lenergy/16 + 128, mm4= menergy/16 + 128, mm5= -l5-1
```
757 750

758
```                PAVGB((%%REGc, %1), %%mm5)                    // (l6-l5+256)/2
```
759
```                "movq (%%"REG_c", %1, 2), %%mm1         \n\t" // l7
```
760
```                "pxor %%mm6, %%mm1                      \n\t" // -l7-1
```
761
```                PAVGB((%0, %1, 4), %%mm1)                     // (l4-l7+256)/2
```
762
```                "movq "MANGLE(b80)", %%mm2              \n\t" // 128
```
763
```                PAVGB(%%mm5, %%mm2)                           // ~(l6-l5)/4 + 128
```
764
```                PAVGB(%%mm1, %%mm2)                           // ~(l4-l7)/4 +(l6-l5)/8 + 128
```
765
```                PAVGB(%%mm5, %%mm2)                           // ~(l4-l7)/8 +5(l6-l5)/16 + 128
```
751
```        PAVGB((%%REGc, %1), %%mm5)                    // (l6-l5+256)/2
```
752
```        "movq (%%"REG_c", %1, 2), %%mm1         \n\t" // l7
```
753
```        "pxor %%mm6, %%mm1                      \n\t" // -l7-1
```
754
```        PAVGB((%0, %1, 4), %%mm1)                     // (l4-l7+256)/2
```
755
```        "movq "MANGLE(b80)", %%mm2              \n\t" // 128
```
756
```        PAVGB(%%mm5, %%mm2)                           // ~(l6-l5)/4 + 128
```
757
```        PAVGB(%%mm1, %%mm2)                           // ~(l4-l7)/4 +(l6-l5)/8 + 128
```
758
```        PAVGB(%%mm5, %%mm2)                           // ~(l4-l7)/8 +5(l6-l5)/16 + 128
```
766 759
```// mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128
```
767 760

768
```                "movq "MANGLE(b00)", %%mm1              \n\t" // 0
```
769
```                "movq "MANGLE(b00)", %%mm5              \n\t" // 0
```
770
```                "psubb %%mm2, %%mm1                     \n\t" // 128 - renergy/16
```
771
```                "psubb %%mm3, %%mm5                     \n\t" // 128 - lenergy/16
```
772
```                PMAXUB(%%mm1, %%mm2)                          // 128 + |renergy/16|
```
773
```                 PMAXUB(%%mm5, %%mm3)                         // 128 + |lenergy/16|
```
774
```                PMINUB(%%mm2, %%mm3, %%mm1)                   // 128 + MIN(|lenergy|,|renergy|)/16
```
761
```        "movq "MANGLE(b00)", %%mm1              \n\t" // 0
```
762
```        "movq "MANGLE(b00)", %%mm5              \n\t" // 0
```
763
```        "psubb %%mm2, %%mm1                     \n\t" // 128 - renergy/16
```
764
```        "psubb %%mm3, %%mm5                     \n\t" // 128 - lenergy/16
```
765
```        PMAXUB(%%mm1, %%mm2)                          // 128 + |renergy/16|
```
766
```        PMAXUB(%%mm5, %%mm3)                          // 128 + |lenergy/16|
```
767
```        PMINUB(%%mm2, %%mm3, %%mm1)                   // 128 + MIN(|lenergy|,|renergy|)/16
```
775 768

776 769
```// mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128
```
777 770

778
```                "movq "MANGLE(b00)", %%mm7              \n\t" // 0
```
779
```                "movq %2, %%mm2                         \n\t" // QP
```
780
```                PAVGB(%%mm6, %%mm2)                           // 128 + QP/2
```
781
```                "psubb %%mm6, %%mm2                     \n\t"
```
782

783
```                "movq %%mm4, %%mm1                      \n\t"
```
784
```                "pcmpgtb %%mm7, %%mm1                   \n\t" // SIGN(menergy)
```
785
```                "pxor %%mm1, %%mm4                      \n\t"
```
786
```                "psubb %%mm1, %%mm4                     \n\t" // 128 + |menergy|/16
```
787
```                "pcmpgtb %%mm4, %%mm2                   \n\t" // |menergy|/16 < QP/2
```
788
```                "psubusb %%mm3, %%mm4                   \n\t" //d=|menergy|/16 - MIN(|lenergy|,|renergy|)/16
```
771
```        "movq "MANGLE(b00)", %%mm7              \n\t" // 0
```
772
```        "movq %2, %%mm2                         \n\t" // QP
```
773
```        PAVGB(%%mm6, %%mm2)                           // 128 + QP/2
```
774
```        "psubb %%mm6, %%mm2                     \n\t"
```
775

776
```        "movq %%mm4, %%mm1                      \n\t"
```
777
```        "pcmpgtb %%mm7, %%mm1                   \n\t" // SIGN(menergy)
```
778
```        "pxor %%mm1, %%mm4                      \n\t"
```
779
```        "psubb %%mm1, %%mm4                     \n\t" // 128 + |menergy|/16
```
780
```        "pcmpgtb %%mm4, %%mm2                   \n\t" // |menergy|/16 < QP/2
```
781
```        "psubusb %%mm3, %%mm4                   \n\t" //d=|menergy|/16 - MIN(|lenergy|,|renergy|)/16
```
789 782
```// mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16
```
790 783

791
```                "movq %%mm4, %%mm3                      \n\t" // d
```
792
```                "psubusb "MANGLE(b01)", %%mm4           \n\t"
```
793
```                PAVGB(%%mm7, %%mm4)                           // d/32
```
794
```                PAVGB(%%mm7, %%mm4)                           // (d + 32)/64
```
795
```                "paddb %%mm3, %%mm4                     \n\t" // 5d/64
```
796
```                "pand %%mm2, %%mm4                      \n\t"
```
797

798
```                "movq "MANGLE(b80)", %%mm5              \n\t" // 128
```
799
```                "psubb %%mm0, %%mm5                     \n\t" // q
```
800
```                "paddsb %%mm6, %%mm5                    \n\t" // fix bad rounding
```
801
```                "pcmpgtb %%mm5, %%mm7                   \n\t" // SIGN(q)
```
802
```                "pxor %%mm7, %%mm5                      \n\t"
```
803

804
```                PMINUB(%%mm5, %%mm4, %%mm3)                   // MIN(|q|, 5d/64)
```
805
```                "pxor %%mm1, %%mm7                      \n\t" // SIGN(d*q)
```
806

807
```                "pand %%mm7, %%mm4                      \n\t"
```
808
```                "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
```
809
```                "movq (%0, %1, 4), %%mm2                \n\t"
```
810
```                "pxor %%mm1, %%mm0                      \n\t"
```
811
```                "pxor %%mm1, %%mm2                      \n\t"
```
812
```                "paddb %%mm4, %%mm0                     \n\t"
```
813
```                "psubb %%mm4, %%mm2                     \n\t"
```
814
```                "pxor %%mm1, %%mm0                      \n\t"
```
815
```                "pxor %%mm1, %%mm2                      \n\t"
```
816
```                "movq %%mm0, (%%"REG_a", %1, 2)         \n\t"
```
817
```                "movq %%mm2, (%0, %1, 4)                \n\t"
```
818

819
```                :
```
820
```                : "r" (src), "r" ((long)stride), "m" (c->pQPb)
```
821
```                : "%"REG_a, "%"REG_c
```
822
```        );
```
784
```        "movq %%mm4, %%mm3                      \n\t" // d
```
785
```        "psubusb "MANGLE(b01)", %%mm4           \n\t"
```
786
```        PAVGB(%%mm7, %%mm4)                           // d/32
```
787
```        PAVGB(%%mm7, %%mm4)                           // (d + 32)/64
```
788
```        "paddb %%mm3, %%mm4                     \n\t" // 5d/64
```
789
```        "pand %%mm2, %%mm4                      \n\t"
```
790

791
```        "movq "MANGLE(b80)", %%mm5              \n\t" // 128
```
792
```        "psubb %%mm0, %%mm5                     \n\t" // q
```
793
```        "paddsb %%mm6, %%mm5                    \n\t" // fix bad rounding
```
794
```        "pcmpgtb %%mm5, %%mm7                   \n\t" // SIGN(q)
```
795
```        "pxor %%mm7, %%mm5                      \n\t"
```
796

797
```        PMINUB(%%mm5, %%mm4, %%mm3)                   // MIN(|q|, 5d/64)
```
798
```        "pxor %%mm1, %%mm7                      \n\t" // SIGN(d*q)
```
799

800
```        "pand %%mm7, %%mm4                      \n\t"
```
801
```        "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
```
802
```        "movq (%0, %1, 4), %%mm2                \n\t"
```
803
```        "pxor %%mm1, %%mm0                      \n\t"
```
804
```        "pxor %%mm1, %%mm2                      \n\t"
```
805
```        "paddb %%mm4, %%mm0                     \n\t"
```
806
```        "psubb %%mm4, %%mm2                     \n\t"
```
807
```        "pxor %%mm1, %%mm0                      \n\t"
```
808
```        "pxor %%mm1, %%mm2                      \n\t"
```
809
```        "movq %%mm0, (%%"REG_a", %1, 2)         \n\t"
```
810
```        "movq %%mm2, (%0, %1, 4)                \n\t"
```
811

812
```        :
```
813
```        : "r" (src), "r" ((long)stride), "m" (c->pQPb)
```
814
```        : "%"REG_a, "%"REG_c
```
815
```    );
```
823 816

824 817
```/*
```
825
```        {
```
826
```        int x;
```
827
```        src-= stride;
```
828
```        for(x=0; x<BLOCK_SIZE; x++)
```
829
```        {
```
830
```                const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
```
831
```                if(FFABS(middleEnergy)< 8*QP)
```
832
```                {
```
833
```                        const int q=(src[l4] - src[l5])/2;
```
834
```                        const int leftEnergy=  5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]);
```
835
```                        const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]);
```
836

837
```                        int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
```
838
```                        d= FFMAX(d, 0);
```
839

840
```                        d= (5*d + 32) >> 6;
```
841
```                        d*= FFSIGN(-middleEnergy);
```
842

843
```                        if(q>0)
```
844
```                        {
```
845
```                                d= d<0 ? 0 : d;
```
846
```                                d= d>q ? q : d;
```
847
```                        }
```
848
```                        else
```
849
```                        {
```
850
```                                d= d>0 ? 0 : d;
```
851
```                                d= d<q ? q : d;
```
852
```                        }
```
853

854
```                        src[l4]-= d;
```
855
```                        src[l5]+= d;
```
856
```                }
```
857
```                src++;
```
818
```    {
```
819
```    int x;
```
820
```    src-= stride;
```
821
```    for(x=0; x<BLOCK_SIZE; x++){
```
822
```        const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
```
823
```        if(FFABS(middleEnergy)< 8*QP){
```
824
```            const int q=(src[l4] - src[l5])/2;
```
825
```            const int leftEnergy=  5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]);
```
826
```            const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]);
```
827

828
```            int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
```
829
```            d= FFMAX(d, 0);
```
830

831
```            d= (5*d + 32) >> 6;
```
832
```            d*= FFSIGN(-middleEnergy);
```
833

834
```            if(q>0){
```
835
```                d= d<0 ? 0 : d;
```
836
```                d= d>q ? q : d;
```
837
```            }else{
```
838
```                d= d>0 ? 0 : d;
```
839
```                d= d<q ? q : d;
```
840
```            }
```
841

842
```            src[l4]-= d;
```
843
```            src[l5]+= d;
```
858 844
```        }
```
859
```src-=8;
```
860
```        for(x=0; x<8; x++)
```
861
```        {
```
862
```                int y;
```
863
```                for(y=4; y<6; y++)
```
864
```                {
```
865
```                        int d= src[x+y*stride] - tmp[x+(y-4)*8];
```
866
```                        int ad= FFABS(d);
```
867
```                        static int max=0;
```
868
```                        static int sum=0;
```
869
```                        static int num=0;
```
870
```                        static int bias=0;
```
871

872
```                        if(max<ad) max=ad;
```
873
```                        sum+= ad>3 ? 1 : 0;
```
874
```                        if(ad>3)
```
875
```                        {
```
876
```                                src[0] = src[7] = src[stride*7] = src[(stride+1)*7]=255;
```
877
```                        }
```
878
```                        if(y==4) bias+=d;
```
879
```                        num++;
```
880
```                        if(num%1000000 == 0)
```
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff