Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dsputil.c @ b6204677

History | View | Annotate | Download (140 KB)

1 de6d9b64 Fabrice Bellard
/*
2
 * DSP utils
3 ff4ec49e Fabrice Bellard
 * Copyright (c) 2000, 2001 Fabrice Bellard.
4 8f2ab833 Michael Niedermayer
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 de6d9b64 Fabrice Bellard
 *
6 ff4ec49e Fabrice Bellard
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10 de6d9b64 Fabrice Bellard
 *
11 ff4ec49e Fabrice Bellard
 * This library is distributed in the hope that it will be useful,
12 de6d9b64 Fabrice Bellard
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ff4ec49e Fabrice Bellard
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15 de6d9b64 Fabrice Bellard
 *
16 ff4ec49e Fabrice Bellard
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19 7ff037e9 Michael Niedermayer
 *
20 59fe111e Michael Niedermayer
 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
21 de6d9b64 Fabrice Bellard
 */
22 115329f1 Diego Biurrun
23 983e3246 Michael Niedermayer
/**
24
 * @file dsputil.c
25
 * DSP utils
26
 */
27 115329f1 Diego Biurrun
28 de6d9b64 Fabrice Bellard
#include "avcodec.h"
29
#include "dsputil.h"
30 1457ab52 Michael Niedermayer
#include "mpegvideo.h"
31 b0368839 Michael Niedermayer
#include "simple_idct.h"
32 65e4c8c9 Michael Niedermayer
#include "faandct.h"
33 5596c60c Michael Niedermayer
34 88730be6 Måns Rullgård
/* snow.c */
35
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
36
37 8b69867f Michael Niedermayer
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
38
uint32_t squareTbl[512] = {0, };
39 de6d9b64 Fabrice Bellard
40 0c1a9eda Zdenek Kabelac
const uint8_t ff_zigzag_direct[64] = {
41 2ad1516a Michael Niedermayer
    0,   1,  8, 16,  9,  2,  3, 10,
42
    17, 24, 32, 25, 18, 11,  4,  5,
43 e0eac44e Fabrice Bellard
    12, 19, 26, 33, 40, 48, 41, 34,
44 2ad1516a Michael Niedermayer
    27, 20, 13,  6,  7, 14, 21, 28,
45 e0eac44e Fabrice Bellard
    35, 42, 49, 56, 57, 50, 43, 36,
46
    29, 22, 15, 23, 30, 37, 44, 51,
47
    58, 59, 52, 45, 38, 31, 39, 46,
48
    53, 60, 61, 54, 47, 55, 62, 63
49
};
50
51 10acc479 Roman Shaposhnik
/* Specific zigzag scan for 248 idct. NOTE that unlike the
52
   specification, we interleave the fields */
53
const uint8_t ff_zigzag248_direct[64] = {
54
     0,  8,  1,  9, 16, 24,  2, 10,
55
    17, 25, 32, 40, 48, 56, 33, 41,
56
    18, 26,  3, 11,  4, 12, 19, 27,
57
    34, 42, 49, 57, 50, 58, 35, 43,
58
    20, 28,  5, 13,  6, 14, 21, 29,
59
    36, 44, 51, 59, 52, 60, 37, 45,
60
    22, 30,  7, 15, 23, 31, 38, 46,
61
    53, 61, 54, 62, 39, 47, 55, 63,
62
};
63
64 2f349de2 Michael Niedermayer
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
65 8b69867f Michael Niedermayer
uint16_t __align8 inv_zigzag_direct16[64] = {0, };
66 2f349de2 Michael Niedermayer
67 0c1a9eda Zdenek Kabelac
const uint8_t ff_alternate_horizontal_scan[64] = {
68 115329f1 Diego Biurrun
    0,  1,   2,  3,  8,  9, 16, 17,
69 e0eac44e Fabrice Bellard
    10, 11,  4,  5,  6,  7, 15, 14,
70 115329f1 Diego Biurrun
    13, 12, 19, 18, 24, 25, 32, 33,
71 e0eac44e Fabrice Bellard
    26, 27, 20, 21, 22, 23, 28, 29,
72 115329f1 Diego Biurrun
    30, 31, 34, 35, 40, 41, 48, 49,
73 e0eac44e Fabrice Bellard
    42, 43, 36, 37, 38, 39, 44, 45,
74 115329f1 Diego Biurrun
    46, 47, 50, 51, 56, 57, 58, 59,
75 e0eac44e Fabrice Bellard
    52, 53, 54, 55, 60, 61, 62, 63,
76
};
77
78 0c1a9eda Zdenek Kabelac
const uint8_t ff_alternate_vertical_scan[64] = {
79 115329f1 Diego Biurrun
    0,  8,  16, 24,  1,  9,  2, 10,
80 e0eac44e Fabrice Bellard
    17, 25, 32, 40, 48, 56, 57, 49,
81 115329f1 Diego Biurrun
    41, 33, 26, 18,  3, 11,  4, 12,
82 e0eac44e Fabrice Bellard
    19, 27, 34, 42, 50, 58, 35, 43,
83 115329f1 Diego Biurrun
    51, 59, 20, 28,  5, 13,  6, 14,
84 e0eac44e Fabrice Bellard
    21, 29, 36, 44, 52, 60, 37, 45,
85 115329f1 Diego Biurrun
    53, 61, 22, 30,  7, 15, 23, 31,
86 e0eac44e Fabrice Bellard
    38, 46, 54, 62, 39, 47, 55, 63,
87
};
88
89 2f349de2 Michael Niedermayer
/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
90 0c1a9eda Zdenek Kabelac
const uint32_t inverse[256]={
91 115329f1 Diego Biurrun
         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757,
92
 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154,
93
 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709,
94
 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333,
95
 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367,
96
 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283,
97
  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315,
98
  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085,
99
  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498,
100
  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675,
101
  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441,
102
  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183,
103
  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712,
104
  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400,
105
  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163,
106
  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641,
107
  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573,
108
  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737,
109
  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493,
110
  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373,
111
  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368,
112
  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671,
113
  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767,
114
  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740,
115
  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751,
116
  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635,
117
  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593,
118
  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944,
119
  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933,
120
  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575,
121
  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532,
122 2f349de2 Michael Niedermayer
  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
123
};
124
125 b0368839 Michael Niedermayer
/* Input permutation for the simple_idct_mmx */
126
static const uint8_t simple_mmx_permutation[64]={
127 bb270c08 Diego Biurrun
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
128
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
129
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
130
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
131
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
132
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
133
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
134
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
135 b0368839 Michael Niedermayer
};
136
137 0c1a9eda Zdenek Kabelac
static int pix_sum_c(uint8_t * pix, int line_size)
138 3aa102be Michael Niedermayer
{
139
    int s, i, j;
140
141
    s = 0;
142
    for (i = 0; i < 16; i++) {
143 bb270c08 Diego Biurrun
        for (j = 0; j < 16; j += 8) {
144
            s += pix[0];
145
            s += pix[1];
146
            s += pix[2];
147
            s += pix[3];
148
            s += pix[4];
149
            s += pix[5];
150
            s += pix[6];
151
            s += pix[7];
152
            pix += 8;
153
        }
154
        pix += line_size - 16;
155 3aa102be Michael Niedermayer
    }
156
    return s;
157
}
158
159 0c1a9eda Zdenek Kabelac
static int pix_norm1_c(uint8_t * pix, int line_size)
160 3aa102be Michael Niedermayer
{
161
    int s, i, j;
162 0c1a9eda Zdenek Kabelac
    uint32_t *sq = squareTbl + 256;
163 3aa102be Michael Niedermayer
164
    s = 0;
165
    for (i = 0; i < 16; i++) {
166 bb270c08 Diego Biurrun
        for (j = 0; j < 16; j += 8) {
167 2a006cd3 Felix von Leitner
#if 0
168 bb270c08 Diego Biurrun
            s += sq[pix[0]];
169
            s += sq[pix[1]];
170
            s += sq[pix[2]];
171
            s += sq[pix[3]];
172
            s += sq[pix[4]];
173
            s += sq[pix[5]];
174
            s += sq[pix[6]];
175
            s += sq[pix[7]];
176 2a006cd3 Felix von Leitner
#else
177
#if LONG_MAX > 2147483647
178 bb270c08 Diego Biurrun
            register uint64_t x=*(uint64_t*)pix;
179
            s += sq[x&0xff];
180
            s += sq[(x>>8)&0xff];
181
            s += sq[(x>>16)&0xff];
182
            s += sq[(x>>24)&0xff];
183 2a006cd3 Felix von Leitner
            s += sq[(x>>32)&0xff];
184
            s += sq[(x>>40)&0xff];
185
            s += sq[(x>>48)&0xff];
186
            s += sq[(x>>56)&0xff];
187
#else
188 bb270c08 Diego Biurrun
            register uint32_t x=*(uint32_t*)pix;
189
            s += sq[x&0xff];
190
            s += sq[(x>>8)&0xff];
191
            s += sq[(x>>16)&0xff];
192
            s += sq[(x>>24)&0xff];
193 2a006cd3 Felix von Leitner
            x=*(uint32_t*)(pix+4);
194
            s += sq[x&0xff];
195
            s += sq[(x>>8)&0xff];
196
            s += sq[(x>>16)&0xff];
197
            s += sq[(x>>24)&0xff];
198
#endif
199
#endif
200 bb270c08 Diego Biurrun
            pix += 8;
201
        }
202
        pix += line_size - 16;
203 3aa102be Michael Niedermayer
    }
204
    return s;
205
}
206
207 3d2e8cce Michael Niedermayer
static void bswap_buf(uint32_t *dst, uint32_t *src, int w){
208
    int i;
209 115329f1 Diego Biurrun
210 3d2e8cce Michael Niedermayer
    for(i=0; i+8<=w; i+=8){
211
        dst[i+0]= bswap_32(src[i+0]);
212
        dst[i+1]= bswap_32(src[i+1]);
213
        dst[i+2]= bswap_32(src[i+2]);
214
        dst[i+3]= bswap_32(src[i+3]);
215
        dst[i+4]= bswap_32(src[i+4]);
216
        dst[i+5]= bswap_32(src[i+5]);
217
        dst[i+6]= bswap_32(src[i+6]);
218
        dst[i+7]= bswap_32(src[i+7]);
219
    }
220
    for(;i<w; i++){
221
        dst[i+0]= bswap_32(src[i+0]);
222
    }
223
}
224 3aa102be Michael Niedermayer
225 26efc54e Michael Niedermayer
static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
226
{
227
    int s, i;
228
    uint32_t *sq = squareTbl + 256;
229
230
    s = 0;
231
    for (i = 0; i < h; i++) {
232
        s += sq[pix1[0] - pix2[0]];
233
        s += sq[pix1[1] - pix2[1]];
234
        s += sq[pix1[2] - pix2[2]];
235
        s += sq[pix1[3] - pix2[3]];
236
        pix1 += line_size;
237
        pix2 += line_size;
238
    }
239
    return s;
240
}
241
242 bb198e19 Michael Niedermayer
static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
243 1457ab52 Michael Niedermayer
{
244
    int s, i;
245 0c1a9eda Zdenek Kabelac
    uint32_t *sq = squareTbl + 256;
246 1457ab52 Michael Niedermayer
247
    s = 0;
248 bb198e19 Michael Niedermayer
    for (i = 0; i < h; i++) {
249 1457ab52 Michael Niedermayer
        s += sq[pix1[0] - pix2[0]];
250
        s += sq[pix1[1] - pix2[1]];
251
        s += sq[pix1[2] - pix2[2]];
252
        s += sq[pix1[3] - pix2[3]];
253
        s += sq[pix1[4] - pix2[4]];
254
        s += sq[pix1[5] - pix2[5]];
255
        s += sq[pix1[6] - pix2[6]];
256
        s += sq[pix1[7] - pix2[7]];
257
        pix1 += line_size;
258
        pix2 += line_size;
259
    }
260
    return s;
261
}
262
263 bb198e19 Michael Niedermayer
static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
264 9c76bd48 Brian Foley
{
265 6b026927 Falk Hüffner
    int s, i;
266
    uint32_t *sq = squareTbl + 256;
267 9c76bd48 Brian Foley
268
    s = 0;
269 bb198e19 Michael Niedermayer
    for (i = 0; i < h; i++) {
270 6b026927 Falk Hüffner
        s += sq[pix1[ 0] - pix2[ 0]];
271
        s += sq[pix1[ 1] - pix2[ 1]];
272
        s += sq[pix1[ 2] - pix2[ 2]];
273
        s += sq[pix1[ 3] - pix2[ 3]];
274
        s += sq[pix1[ 4] - pix2[ 4]];
275
        s += sq[pix1[ 5] - pix2[ 5]];
276
        s += sq[pix1[ 6] - pix2[ 6]];
277
        s += sq[pix1[ 7] - pix2[ 7]];
278
        s += sq[pix1[ 8] - pix2[ 8]];
279
        s += sq[pix1[ 9] - pix2[ 9]];
280
        s += sq[pix1[10] - pix2[10]];
281
        s += sq[pix1[11] - pix2[11]];
282
        s += sq[pix1[12] - pix2[12]];
283
        s += sq[pix1[13] - pix2[13]];
284
        s += sq[pix1[14] - pix2[14]];
285
        s += sq[pix1[15] - pix2[15]];
286 2a006cd3 Felix von Leitner
287 6b026927 Falk Hüffner
        pix1 += line_size;
288
        pix2 += line_size;
289 9c76bd48 Brian Foley
    }
290
    return s;
291
}
292
293 26efc54e Michael Niedermayer
294
static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
295 8b975b7c Michael Niedermayer
#ifdef CONFIG_SNOW_ENCODER //idwt is in snow.c
296 26efc54e Michael Niedermayer
    int s, i, j;
297
    const int dec_count= w==8 ? 3 : 4;
298
    int tmp[16*16];
299
#if 0
300
    int level, ori;
301 115329f1 Diego Biurrun
    static const int scale[2][2][4][4]={
302 26efc54e Michael Niedermayer
      {
303
        {
304
            //8x8 dec=3
305
            {268, 239, 239, 213},
306
            {  0, 224, 224, 152},
307
            {  0, 135, 135, 110},
308
        },{
309
            //16x16 dec=4
310
            {344, 310, 310, 280},
311
            {  0, 320, 320, 228},
312
            {  0, 175, 175, 136},
313
            {  0, 129, 129, 102},
314
        }
315
      },{
316
        {//FIXME 5/3
317
            //8x8 dec=3
318
            {275, 245, 245, 218},
319
            {  0, 230, 230, 156},
320
            {  0, 138, 138, 113},
321
        },{
322
            //16x16 dec=4
323
            {352, 317, 317, 286},
324
            {  0, 328, 328, 233},
325
            {  0, 180, 180, 140},
326
            {  0, 132, 132, 105},
327
        }
328
      }
329
    };
330
#endif
331
332
    for (i = 0; i < h; i++) {
333
        for (j = 0; j < w; j+=4) {
334
            tmp[16*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
335
            tmp[16*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
336
            tmp[16*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
337
            tmp[16*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
338
        }
339
        pix1 += line_size;
340
        pix2 += line_size;
341
    }
342 8b975b7c Michael Niedermayer
343 26efc54e Michael Niedermayer
    ff_spatial_dwt(tmp, w, h, 16, type, dec_count);
344
345
    s=0;
346
#if 0
347
    for(level=0; level<dec_count; level++){
348
        for(ori= level ? 1 : 0; ori<4; ori++){
349
            int sx= (ori&1) ? 1<<level: 0;
350
            int stride= 16<<(dec_count-level);
351
            int sy= (ori&2) ? stride>>1 : 0;
352
            int size= 1<<level;
353 115329f1 Diego Biurrun

354 26efc54e Michael Niedermayer
            for(i=0; i<size; i++){
355
                for(j=0; j<size; j++){
356
                    int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
357
                    s += ABS(v);
358
                }
359
            }
360
        }
361
    }
362
#endif
363
    for (i = 0; i < h; i++) {
364
        for (j = 0; j < w; j+=4) {
365
            s+= ABS(tmp[16*i+j+0]);
366
            s+= ABS(tmp[16*i+j+1]);
367
            s+= ABS(tmp[16*i+j+2]);
368
            s+= ABS(tmp[16*i+j+3]);
369
        }
370
    }
371 115329f1 Diego Biurrun
    assert(s>=0);
372
373 26efc54e Michael Niedermayer
    return s>>2;
374 8b975b7c Michael Niedermayer
#endif
375 26efc54e Michael Niedermayer
}
376
377
static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
378
    return w_c(v, pix1, pix2, line_size,  8, h, 1);
379
}
380
381
static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
382
    return w_c(v, pix1, pix2, line_size,  8, h, 0);
383
}
384
385
static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
386
    return w_c(v, pix1, pix2, line_size, 16, h, 1);
387
}
388
389
static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
390
    return w_c(v, pix1, pix2, line_size, 16, h, 0);
391
}
392
393 0c1a9eda Zdenek Kabelac
static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
394 de6d9b64 Fabrice Bellard
{
395
    int i;
396
397
    /* read the pixels */
398
    for(i=0;i<8;i++) {
399 c13e1abd Falk Hüffner
        block[0] = pixels[0];
400
        block[1] = pixels[1];
401
        block[2] = pixels[2];
402
        block[3] = pixels[3];
403
        block[4] = pixels[4];
404
        block[5] = pixels[5];
405
        block[6] = pixels[6];
406
        block[7] = pixels[7];
407
        pixels += line_size;
408
        block += 8;
409 de6d9b64 Fabrice Bellard
    }
410
}
411
412 0c1a9eda Zdenek Kabelac
static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
413 bb270c08 Diego Biurrun
                          const uint8_t *s2, int stride){
414 9dbcbd92 Michael Niedermayer
    int i;
415
416
    /* read the pixels */
417
    for(i=0;i<8;i++) {
418 c13e1abd Falk Hüffner
        block[0] = s1[0] - s2[0];
419
        block[1] = s1[1] - s2[1];
420
        block[2] = s1[2] - s2[2];
421
        block[3] = s1[3] - s2[3];
422
        block[4] = s1[4] - s2[4];
423
        block[5] = s1[5] - s2[5];
424
        block[6] = s1[6] - s2[6];
425
        block[7] = s1[7] - s2[7];
426 9dbcbd92 Michael Niedermayer
        s1 += stride;
427
        s2 += stride;
428 c13e1abd Falk Hüffner
        block += 8;
429 9dbcbd92 Michael Niedermayer
    }
430
}
431
432
433 0c1a9eda Zdenek Kabelac
static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
434 bb270c08 Diego Biurrun
                                 int line_size)
435 de6d9b64 Fabrice Bellard
{
436
    int i;
437 0c1a9eda Zdenek Kabelac
    uint8_t *cm = cropTbl + MAX_NEG_CROP;
438 115329f1 Diego Biurrun
439 de6d9b64 Fabrice Bellard
    /* read the pixels */
440
    for(i=0;i<8;i++) {
441 c13e1abd Falk Hüffner
        pixels[0] = cm[block[0]];
442
        pixels[1] = cm[block[1]];
443
        pixels[2] = cm[block[2]];
444
        pixels[3] = cm[block[3]];
445
        pixels[4] = cm[block[4]];
446
        pixels[5] = cm[block[5]];
447
        pixels[6] = cm[block[6]];
448
        pixels[7] = cm[block[7]];
449
450
        pixels += line_size;
451
        block += 8;
452 de6d9b64 Fabrice Bellard
    }
453
}
454
455 178fcca8 Michael Niedermayer
static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
456 bb270c08 Diego Biurrun
                                 int line_size)
457 178fcca8 Michael Niedermayer
{
458
    int i;
459
    uint8_t *cm = cropTbl + MAX_NEG_CROP;
460 115329f1 Diego Biurrun
461 178fcca8 Michael Niedermayer
    /* read the pixels */
462
    for(i=0;i<4;i++) {
463
        pixels[0] = cm[block[0]];
464
        pixels[1] = cm[block[1]];
465
        pixels[2] = cm[block[2]];
466
        pixels[3] = cm[block[3]];
467
468
        pixels += line_size;
469
        block += 8;
470
    }
471
}
472
473 9ca358b9 Michael Niedermayer
static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
474 bb270c08 Diego Biurrun
                                 int line_size)
475 9ca358b9 Michael Niedermayer
{
476
    int i;
477
    uint8_t *cm = cropTbl + MAX_NEG_CROP;
478 115329f1 Diego Biurrun
479 9ca358b9 Michael Niedermayer
    /* read the pixels */
480
    for(i=0;i<2;i++) {
481
        pixels[0] = cm[block[0]];
482
        pixels[1] = cm[block[1]];
483
484
        pixels += line_size;
485
        block += 8;
486
    }
487
}
488
489 115329f1 Diego Biurrun
static void put_signed_pixels_clamped_c(const DCTELEM *block,
490 f9ed9d85 Mike Melanson
                                        uint8_t *restrict pixels,
491
                                        int line_size)
492
{
493
    int i, j;
494
495
    for (i = 0; i < 8; i++) {
496
        for (j = 0; j < 8; j++) {
497
            if (*block < -128)
498
                *pixels = 0;
499
            else if (*block > 127)
500
                *pixels = 255;
501
            else
502
                *pixels = (uint8_t)(*block + 128);
503
            block++;
504
            pixels++;
505
        }
506
        pixels += (line_size - 8);
507
    }
508
}
509
510 0c1a9eda Zdenek Kabelac
static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
511 c13e1abd Falk Hüffner
                          int line_size)
512 de6d9b64 Fabrice Bellard
{
513
    int i;
514 0c1a9eda Zdenek Kabelac
    uint8_t *cm = cropTbl + MAX_NEG_CROP;
515 115329f1 Diego Biurrun
516 de6d9b64 Fabrice Bellard
    /* read the pixels */
517
    for(i=0;i<8;i++) {
518 c13e1abd Falk Hüffner
        pixels[0] = cm[pixels[0] + block[0]];
519
        pixels[1] = cm[pixels[1] + block[1]];
520
        pixels[2] = cm[pixels[2] + block[2]];
521
        pixels[3] = cm[pixels[3] + block[3]];
522
        pixels[4] = cm[pixels[4] + block[4]];
523
        pixels[5] = cm[pixels[5] + block[5]];
524
        pixels[6] = cm[pixels[6] + block[6]];
525
        pixels[7] = cm[pixels[7] + block[7]];
526
        pixels += line_size;
527
        block += 8;
528 de6d9b64 Fabrice Bellard
    }
529
}
530 178fcca8 Michael Niedermayer
531
static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
532
                          int line_size)
533
{
534
    int i;
535
    uint8_t *cm = cropTbl + MAX_NEG_CROP;
536 115329f1 Diego Biurrun
537 178fcca8 Michael Niedermayer
    /* read the pixels */
538
    for(i=0;i<4;i++) {
539
        pixels[0] = cm[pixels[0] + block[0]];
540
        pixels[1] = cm[pixels[1] + block[1]];
541
        pixels[2] = cm[pixels[2] + block[2]];
542
        pixels[3] = cm[pixels[3] + block[3]];
543
        pixels += line_size;
544
        block += 8;
545
    }
546
}
547 9ca358b9 Michael Niedermayer
548
static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
549
                          int line_size)
550
{
551
    int i;
552
    uint8_t *cm = cropTbl + MAX_NEG_CROP;
553 115329f1 Diego Biurrun
554 9ca358b9 Michael Niedermayer
    /* read the pixels */
555
    for(i=0;i<2;i++) {
556
        pixels[0] = cm[pixels[0] + block[0]];
557
        pixels[1] = cm[pixels[1] + block[1]];
558
        pixels += line_size;
559
        block += 8;
560
    }
561
}
562 36940eca Loren Merritt
563
static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
564
{
565
    int i;
566
    for(i=0;i<8;i++) {
567
        pixels[0] += block[0];
568
        pixels[1] += block[1];
569
        pixels[2] += block[2];
570
        pixels[3] += block[3];
571
        pixels[4] += block[4];
572
        pixels[5] += block[5];
573
        pixels[6] += block[6];
574
        pixels[7] += block[7];
575
        pixels += line_size;
576
        block += 8;
577
    }
578
}
579
580
static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
581
{
582
    int i;
583
    for(i=0;i<4;i++) {
584
        pixels[0] += block[0];
585
        pixels[1] += block[1];
586
        pixels[2] += block[2];
587
        pixels[3] += block[3];
588
        pixels += line_size;
589
        block += 4;
590
    }
591
}
592
593 59fe111e Michael Niedermayer
#if 0
594

595
#define PIXOP2(OPNAME, OP) \
596 b3184779 Michael Niedermayer
static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
597 59fe111e Michael Niedermayer
{\
598
    int i;\
599
    for(i=0; i<h; i++){\
600
        OP(*((uint64_t*)block), LD64(pixels));\
601
        pixels+=line_size;\
602
        block +=line_size;\
603
    }\
604
}\
605
\
606 45553457 Zdenek Kabelac
static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
607 59fe111e Michael Niedermayer
{\
608
    int i;\
609
    for(i=0; i<h; i++){\
610
        const uint64_t a= LD64(pixels  );\
611
        const uint64_t b= LD64(pixels+1);\
612
        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
613
        pixels+=line_size;\
614
        block +=line_size;\
615
    }\
616
}\
617
\
618 45553457 Zdenek Kabelac
static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
619 59fe111e Michael Niedermayer
{\
620
    int i;\
621
    for(i=0; i<h; i++){\
622
        const uint64_t a= LD64(pixels  );\
623
        const uint64_t b= LD64(pixels+1);\
624
        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
625
        pixels+=line_size;\
626
        block +=line_size;\
627
    }\
628
}\
629
\
630 45553457 Zdenek Kabelac
static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
631 59fe111e Michael Niedermayer
{\
632
    int i;\
633
    for(i=0; i<h; i++){\
634
        const uint64_t a= LD64(pixels          );\
635
        const uint64_t b= LD64(pixels+line_size);\
636
        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
637
        pixels+=line_size;\
638
        block +=line_size;\
639
    }\
640
}\
641
\
642 45553457 Zdenek Kabelac
static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
643 59fe111e Michael Niedermayer
{\
644
    int i;\
645
    for(i=0; i<h; i++){\
646
        const uint64_t a= LD64(pixels          );\
647
        const uint64_t b= LD64(pixels+line_size);\
648
        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
649
        pixels+=line_size;\
650
        block +=line_size;\
651
    }\
652
}\
653
\
654 45553457 Zdenek Kabelac
static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
655 59fe111e Michael Niedermayer
{\
656
        int i;\
657
        const uint64_t a= LD64(pixels  );\
658
        const uint64_t b= LD64(pixels+1);\
659
        uint64_t l0=  (a&0x0303030303030303ULL)\
660
                    + (b&0x0303030303030303ULL)\
661
                    + 0x0202020202020202ULL;\
662
        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
663
                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
664
        uint64_t l1,h1;\
665
\
666
        pixels+=line_size;\
667
        for(i=0; i<h; i+=2){\
668
            uint64_t a= LD64(pixels  );\
669
            uint64_t b= LD64(pixels+1);\
670
            l1=  (a&0x0303030303030303ULL)\
671
               + (b&0x0303030303030303ULL);\
672
            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
673
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
674
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
675
            pixels+=line_size;\
676
            block +=line_size;\
677
            a= LD64(pixels  );\
678
            b= LD64(pixels+1);\
679
            l0=  (a&0x0303030303030303ULL)\
680
               + (b&0x0303030303030303ULL)\
681
               + 0x0202020202020202ULL;\
682
            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
683
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
684
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
685
            pixels+=line_size;\
686
            block +=line_size;\
687
        }\
688
}\
689
\
690 45553457 Zdenek Kabelac
static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
691 59fe111e Michael Niedermayer
{\
692
        int i;\
693
        const uint64_t a= LD64(pixels  );\
694
        const uint64_t b= LD64(pixels+1);\
695
        uint64_t l0=  (a&0x0303030303030303ULL)\
696
                    + (b&0x0303030303030303ULL)\
697
                    + 0x0101010101010101ULL;\
698
        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
699
                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
700
        uint64_t l1,h1;\
701
\
702
        pixels+=line_size;\
703
        for(i=0; i<h; i+=2){\
704
            uint64_t a= LD64(pixels  );\
705
            uint64_t b= LD64(pixels+1);\
706
            l1=  (a&0x0303030303030303ULL)\
707
               + (b&0x0303030303030303ULL);\
708
            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
709
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
710
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
711
            pixels+=line_size;\
712
            block +=line_size;\
713
            a= LD64(pixels  );\
714
            b= LD64(pixels+1);\
715
            l0=  (a&0x0303030303030303ULL)\
716
               + (b&0x0303030303030303ULL)\
717
               + 0x0101010101010101ULL;\
718
            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
719
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
720
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
721
            pixels+=line_size;\
722
            block +=line_size;\
723
        }\
724
}\
725
\
726 45553457 Zdenek Kabelac
CALL_2X_PIXELS(OPNAME ## _pixels16_c    , OPNAME ## _pixels_c    , 8)\
727
CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
728
CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
729
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
730
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
731
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
732
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
733 59fe111e Michael Niedermayer

734
#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
735
#else // 64 bit variant
736
737
#define PIXOP2(OPNAME, OP) \
738 669ac79c Michael Niedermayer
static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
739
    int i;\
740
    for(i=0; i<h; i++){\
741
        OP(*((uint16_t*)(block  )), LD16(pixels  ));\
742
        pixels+=line_size;\
743
        block +=line_size;\
744
    }\
745
}\
746 0da71265 Michael Niedermayer
static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
747
    int i;\
748
    for(i=0; i<h; i++){\
749
        OP(*((uint32_t*)(block  )), LD32(pixels  ));\
750
        pixels+=line_size;\
751
        block +=line_size;\
752
    }\
753
}\
754 45553457 Zdenek Kabelac
static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
755 59fe111e Michael Niedermayer
    int i;\
756
    for(i=0; i<h; i++){\
757
        OP(*((uint32_t*)(block  )), LD32(pixels  ));\
758
        OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
759
        pixels+=line_size;\
760
        block +=line_size;\
761
    }\
762
}\
763 45553457 Zdenek Kabelac
static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
764
    OPNAME ## _pixels8_c(block, pixels, line_size, h);\
765 b3184779 Michael Niedermayer
}\
766 59fe111e Michael Niedermayer
\
767 b3184779 Michael Niedermayer
static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
768
                                                int src_stride1, int src_stride2, int h){\
769 59fe111e Michael Niedermayer
    int i;\
770
    for(i=0; i<h; i++){\
771 b3184779 Michael Niedermayer
        uint32_t a,b;\
772
        a= LD32(&src1[i*src_stride1  ]);\
773
        b= LD32(&src2[i*src_stride2  ]);\
774 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride  ]), no_rnd_avg32(a, b));\
775 b3184779 Michael Niedermayer
        a= LD32(&src1[i*src_stride1+4]);\
776
        b= LD32(&src2[i*src_stride2+4]);\
777 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
778 59fe111e Michael Niedermayer
    }\
779
}\
780
\
781 b3184779 Michael Niedermayer
static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
782
                                                int src_stride1, int src_stride2, int h){\
783 59fe111e Michael Niedermayer
    int i;\
784
    for(i=0; i<h; i++){\
785 b3184779 Michael Niedermayer
        uint32_t a,b;\
786
        a= LD32(&src1[i*src_stride1  ]);\
787
        b= LD32(&src2[i*src_stride2  ]);\
788 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
789 b3184779 Michael Niedermayer
        a= LD32(&src1[i*src_stride1+4]);\
790
        b= LD32(&src2[i*src_stride2+4]);\
791 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
792 59fe111e Michael Niedermayer
    }\
793
}\
794
\
795 0da71265 Michael Niedermayer
static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
796
                                                int src_stride1, int src_stride2, int h){\
797
    int i;\
798
    for(i=0; i<h; i++){\
799
        uint32_t a,b;\
800
        a= LD32(&src1[i*src_stride1  ]);\
801
        b= LD32(&src2[i*src_stride2  ]);\
802 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
803 0da71265 Michael Niedermayer
    }\
804
}\
805
\
806 669ac79c Michael Niedermayer
static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
807
                                                int src_stride1, int src_stride2, int h){\
808
    int i;\
809
    for(i=0; i<h; i++){\
810
        uint32_t a,b;\
811
        a= LD16(&src1[i*src_stride1  ]);\
812
        b= LD16(&src2[i*src_stride2  ]);\
813
        OP(*((uint16_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
814
    }\
815
}\
816
\
817 b3184779 Michael Niedermayer
static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
818
                                                int src_stride1, int src_stride2, int h){\
819
    OPNAME ## _pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
820
    OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
821
}\
822
\
823
static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
824
                                                int src_stride1, int src_stride2, int h){\
825
    OPNAME ## _no_rnd_pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
826
    OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
827
}\
828
\
829 45553457 Zdenek Kabelac
static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
830 b3184779 Michael Niedermayer
    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
831
}\
832
\
833 45553457 Zdenek Kabelac
static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
834 b3184779 Michael Niedermayer
    OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
835
}\
836
\
837 45553457 Zdenek Kabelac
static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
838 b3184779 Michael Niedermayer
    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
839
}\
840
\
841 45553457 Zdenek Kabelac
static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
842 b3184779 Michael Niedermayer
    OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
843
}\
844
\
845
static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
846
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
847 59fe111e Michael Niedermayer
    int i;\
848
    for(i=0; i<h; i++){\
849 b3184779 Michael Niedermayer
        uint32_t a, b, c, d, l0, l1, h0, h1;\
850
        a= LD32(&src1[i*src_stride1]);\
851
        b= LD32(&src2[i*src_stride2]);\
852
        c= LD32(&src3[i*src_stride3]);\
853
        d= LD32(&src4[i*src_stride4]);\
854
        l0=  (a&0x03030303UL)\
855
           + (b&0x03030303UL)\
856
           + 0x02020202UL;\
857
        h0= ((a&0xFCFCFCFCUL)>>2)\
858
          + ((b&0xFCFCFCFCUL)>>2);\
859
        l1=  (c&0x03030303UL)\
860
           + (d&0x03030303UL);\
861
        h1= ((c&0xFCFCFCFCUL)>>2)\
862
          + ((d&0xFCFCFCFCUL)>>2);\
863
        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
864
        a= LD32(&src1[i*src_stride1+4]);\
865
        b= LD32(&src2[i*src_stride2+4]);\
866
        c= LD32(&src3[i*src_stride3+4]);\
867
        d= LD32(&src4[i*src_stride4+4]);\
868
        l0=  (a&0x03030303UL)\
869
           + (b&0x03030303UL)\
870
           + 0x02020202UL;\
871
        h0= ((a&0xFCFCFCFCUL)>>2)\
872
          + ((b&0xFCFCFCFCUL)>>2);\
873
        l1=  (c&0x03030303UL)\
874
           + (d&0x03030303UL);\
875
        h1= ((c&0xFCFCFCFCUL)>>2)\
876
          + ((d&0xFCFCFCFCUL)>>2);\
877
        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
878 59fe111e Michael Niedermayer
    }\
879
}\
880 669ac79c Michael Niedermayer
\
881
static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
882
    OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
883
}\
884
\
885
static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
886
    OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
887
}\
888
\
889
static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
890
    OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
891
}\
892
\
893
static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
894
    OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
895
}\
896
\
897 b3184779 Michael Niedermayer
static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
898
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
899 59fe111e Michael Niedermayer
    int i;\
900
    for(i=0; i<h; i++){\
901 b3184779 Michael Niedermayer
        uint32_t a, b, c, d, l0, l1, h0, h1;\
902
        a= LD32(&src1[i*src_stride1]);\
903
        b= LD32(&src2[i*src_stride2]);\
904
        c= LD32(&src3[i*src_stride3]);\
905
        d= LD32(&src4[i*src_stride4]);\
906
        l0=  (a&0x03030303UL)\
907
           + (b&0x03030303UL)\
908
           + 0x01010101UL;\
909
        h0= ((a&0xFCFCFCFCUL)>>2)\
910
          + ((b&0xFCFCFCFCUL)>>2);\
911
        l1=  (c&0x03030303UL)\
912
           + (d&0x03030303UL);\
913
        h1= ((c&0xFCFCFCFCUL)>>2)\
914
          + ((d&0xFCFCFCFCUL)>>2);\
915
        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
916
        a= LD32(&src1[i*src_stride1+4]);\
917
        b= LD32(&src2[i*src_stride2+4]);\
918
        c= LD32(&src3[i*src_stride3+4]);\
919
        d= LD32(&src4[i*src_stride4+4]);\
920
        l0=  (a&0x03030303UL)\
921
           + (b&0x03030303UL)\
922
           + 0x01010101UL;\
923
        h0= ((a&0xFCFCFCFCUL)>>2)\
924
          + ((b&0xFCFCFCFCUL)>>2);\
925
        l1=  (c&0x03030303UL)\
926
           + (d&0x03030303UL);\
927
        h1= ((c&0xFCFCFCFCUL)>>2)\
928
          + ((d&0xFCFCFCFCUL)>>2);\
929
        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
930 59fe111e Michael Niedermayer
    }\
931
}\
932 b3184779 Michael Niedermayer
static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
933
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
934
    OPNAME ## _pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
935
    OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
936
}\
937
static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
938
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
939
    OPNAME ## _no_rnd_pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
940
    OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
941
}\
942 59fe111e Michael Niedermayer
\
943 669ac79c Michael Niedermayer
static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
944
{\
945
        int i, a0, b0, a1, b1;\
946
        a0= pixels[0];\
947
        b0= pixels[1] + 2;\
948
        a0 += b0;\
949
        b0 += pixels[2];\
950
\
951
        pixels+=line_size;\
952
        for(i=0; i<h; i+=2){\
953
            a1= pixels[0];\
954
            b1= pixels[1];\
955
            a1 += b1;\
956
            b1 += pixels[2];\
957
\
958
            block[0]= (a1+a0)>>2; /* FIXME non put */\
959
            block[1]= (b1+b0)>>2;\
960
\
961
            pixels+=line_size;\
962
            block +=line_size;\
963
\
964
            a0= pixels[0];\
965
            b0= pixels[1] + 2;\
966
            a0 += b0;\
967
            b0 += pixels[2];\
968
\
969
            block[0]= (a1+a0)>>2;\
970
            block[1]= (b1+b0)>>2;\
971
            pixels+=line_size;\
972
            block +=line_size;\
973
        }\
974
}\
975
\
976
static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
977
{\
978
        int i;\
979
        const uint32_t a= LD32(pixels  );\
980
        const uint32_t b= LD32(pixels+1);\
981
        uint32_t l0=  (a&0x03030303UL)\
982
                    + (b&0x03030303UL)\
983
                    + 0x02020202UL;\
984
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
985
                   + ((b&0xFCFCFCFCUL)>>2);\
986
        uint32_t l1,h1;\
987
\
988
        pixels+=line_size;\
989
        for(i=0; i<h; i+=2){\
990
            uint32_t a= LD32(pixels  );\
991
            uint32_t b= LD32(pixels+1);\
992
            l1=  (a&0x03030303UL)\
993
               + (b&0x03030303UL);\
994
            h1= ((a&0xFCFCFCFCUL)>>2)\
995
              + ((b&0xFCFCFCFCUL)>>2);\
996
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
997
            pixels+=line_size;\
998
            block +=line_size;\
999
            a= LD32(pixels  );\
1000
            b= LD32(pixels+1);\
1001
            l0=  (a&0x03030303UL)\
1002
               + (b&0x03030303UL)\
1003
               + 0x02020202UL;\
1004
            h0= ((a&0xFCFCFCFCUL)>>2)\
1005
              + ((b&0xFCFCFCFCUL)>>2);\
1006
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1007
            pixels+=line_size;\
1008
            block +=line_size;\
1009
        }\
1010
}\
1011
\
1012 45553457 Zdenek Kabelac
static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1013 59fe111e Michael Niedermayer
{\
1014
    int j;\
1015
    for(j=0; j<2; j++){\
1016
        int i;\
1017
        const uint32_t a= LD32(pixels  );\
1018
        const uint32_t b= LD32(pixels+1);\
1019
        uint32_t l0=  (a&0x03030303UL)\
1020
                    + (b&0x03030303UL)\
1021
                    + 0x02020202UL;\
1022
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1023
                   + ((b&0xFCFCFCFCUL)>>2);\
1024
        uint32_t l1,h1;\
1025
\
1026
        pixels+=line_size;\
1027
        for(i=0; i<h; i+=2){\
1028
            uint32_t a= LD32(pixels  );\
1029
            uint32_t b= LD32(pixels+1);\
1030
            l1=  (a&0x03030303UL)\
1031
               + (b&0x03030303UL);\
1032
            h1= ((a&0xFCFCFCFCUL)>>2)\
1033
              + ((b&0xFCFCFCFCUL)>>2);\
1034
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1035
            pixels+=line_size;\
1036
            block +=line_size;\
1037
            a= LD32(pixels  );\
1038
            b= LD32(pixels+1);\
1039
            l0=  (a&0x03030303UL)\
1040
               + (b&0x03030303UL)\
1041
               + 0x02020202UL;\
1042
            h0= ((a&0xFCFCFCFCUL)>>2)\
1043
              + ((b&0xFCFCFCFCUL)>>2);\
1044
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1045
            pixels+=line_size;\
1046
            block +=line_size;\
1047
        }\
1048
        pixels+=4-line_size*(h+1);\
1049
        block +=4-line_size*h;\
1050
    }\
1051
}\
1052
\
1053 45553457 Zdenek Kabelac
static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1054 59fe111e Michael Niedermayer
{\
1055
    int j;\
1056
    for(j=0; j<2; j++){\
1057
        int i;\
1058
        const uint32_t a= LD32(pixels  );\
1059
        const uint32_t b= LD32(pixels+1);\
1060
        uint32_t l0=  (a&0x03030303UL)\
1061
                    + (b&0x03030303UL)\
1062
                    + 0x01010101UL;\
1063
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1064
                   + ((b&0xFCFCFCFCUL)>>2);\
1065
        uint32_t l1,h1;\
1066
\
1067
        pixels+=line_size;\
1068
        for(i=0; i<h; i+=2){\
1069
            uint32_t a= LD32(pixels  );\
1070
            uint32_t b= LD32(pixels+1);\
1071
            l1=  (a&0x03030303UL)\
1072
               + (b&0x03030303UL);\
1073
            h1= ((a&0xFCFCFCFCUL)>>2)\
1074
              + ((b&0xFCFCFCFCUL)>>2);\
1075
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1076
            pixels+=line_size;\
1077
            block +=line_size;\
1078
            a= LD32(pixels  );\
1079
            b= LD32(pixels+1);\
1080
            l0=  (a&0x03030303UL)\
1081
               + (b&0x03030303UL)\
1082
               + 0x01010101UL;\
1083
            h0= ((a&0xFCFCFCFCUL)>>2)\
1084
              + ((b&0xFCFCFCFCUL)>>2);\
1085
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1086
            pixels+=line_size;\
1087
            block +=line_size;\
1088
        }\
1089
        pixels+=4-line_size*(h+1);\
1090
        block +=4-line_size*h;\
1091
    }\
1092
}\
1093
\
1094 45553457 Zdenek Kabelac
CALL_2X_PIXELS(OPNAME ## _pixels16_c  , OPNAME ## _pixels8_c  , 8)\
1095
CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
1096
CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
1097
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
1098
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c  , OPNAME ## _pixels8_c         , 8)\
1099
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
1100
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
1101
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
1102 b3184779 Michael Niedermayer
1103 d8085ea7 Michael Niedermayer
#define op_avg(a, b) a = rnd_avg32(a, b)
1104 59fe111e Michael Niedermayer
#endif
1105
#define op_put(a, b) a = b
1106
1107
PIXOP2(avg, op_avg)
1108
PIXOP2(put, op_put)
1109
#undef op_avg
1110
#undef op_put
1111
1112 de6d9b64 Fabrice Bellard
#define avg2(a,b) ((a+b+1)>>1)
1113
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
1114
1115 c0a0170c Michael Niedermayer
static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1116
    put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
1117
}
1118
1119
static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1120
    put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
1121
}
1122 073b013d Michael Niedermayer
1123 0c1a9eda Zdenek Kabelac
static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
1124 44eb4951 Michael Niedermayer
{
1125
    const int A=(16-x16)*(16-y16);
1126
    const int B=(   x16)*(16-y16);
1127
    const int C=(16-x16)*(   y16);
1128
    const int D=(   x16)*(   y16);
1129
    int i;
1130
1131
    for(i=0; i<h; i++)
1132
    {
1133 b3184779 Michael Niedermayer
        dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
1134
        dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
1135
        dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
1136
        dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
1137
        dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
1138
        dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
1139
        dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
1140
        dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
1141
        dst+= stride;
1142
        src+= stride;
1143 44eb4951 Michael Niedermayer
    }
1144
}
1145
1146 115329f1 Diego Biurrun
static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
1147 073b013d Michael Niedermayer
                  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
1148
{
1149
    int y, vx, vy;
1150
    const int s= 1<<shift;
1151 115329f1 Diego Biurrun
1152 073b013d Michael Niedermayer
    width--;
1153
    height--;
1154
1155
    for(y=0; y<h; y++){
1156
        int x;
1157
1158
        vx= ox;
1159
        vy= oy;
1160
        for(x=0; x<8; x++){ //XXX FIXME optimize
1161
            int src_x, src_y, frac_x, frac_y, index;
1162
1163
            src_x= vx>>16;
1164
            src_y= vy>>16;
1165
            frac_x= src_x&(s-1);
1166
            frac_y= src_y&(s-1);
1167
            src_x>>=shift;
1168
            src_y>>=shift;
1169 115329f1 Diego Biurrun
1170 073b013d Michael Niedermayer
            if((unsigned)src_x < width){
1171
                if((unsigned)src_y < height){
1172
                    index= src_x + src_y*stride;
1173
                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
1174
                                           + src[index       +1]*   frac_x )*(s-frac_y)
1175
                                        + (  src[index+stride  ]*(s-frac_x)
1176
                                           + src[index+stride+1]*   frac_x )*   frac_y
1177
                                        + r)>>(shift*2);
1178
                }else{
1179 115329f1 Diego Biurrun
                    index= src_x + clip(src_y, 0, height)*stride;
1180
                    dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
1181 073b013d Michael Niedermayer
                                          + src[index       +1]*   frac_x )*s
1182
                                        + r)>>(shift*2);
1183
                }
1184
            }else{
1185
                if((unsigned)src_y < height){
1186 115329f1 Diego Biurrun
                    index= clip(src_x, 0, width) + src_y*stride;
1187
                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
1188 073b013d Michael Niedermayer
                                           + src[index+stride  ]*   frac_y )*s
1189
                                        + r)>>(shift*2);
1190
                }else{
1191 115329f1 Diego Biurrun
                    index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
1192 073b013d Michael Niedermayer
                    dst[y*stride + x]=    src[index         ];
1193
                }
1194
            }
1195 115329f1 Diego Biurrun
1196 073b013d Michael Niedermayer
            vx+= dxx;
1197
            vy+= dyx;
1198
        }
1199
        ox += dxy;
1200
        oy += dyy;
1201
    }
1202
}
1203 669ac79c Michael Niedermayer
1204
static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1205
    switch(width){
1206
    case 2: put_pixels2_c (dst, src, stride, height); break;
1207
    case 4: put_pixels4_c (dst, src, stride, height); break;
1208
    case 8: put_pixels8_c (dst, src, stride, height); break;
1209
    case 16:put_pixels16_c(dst, src, stride, height); break;
1210
    }
1211
}
1212
1213
static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1214
    int i,j;
1215
    for (i=0; i < height; i++) {
1216
      for (j=0; j < width; j++) {
1217 bb270c08 Diego Biurrun
        dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
1218 669ac79c Michael Niedermayer
      }
1219
      src += stride;
1220
      dst += stride;
1221
    }
1222
}
1223
1224
static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1225
    int i,j;
1226
    for (i=0; i < height; i++) {
1227
      for (j=0; j < width; j++) {
1228 bb270c08 Diego Biurrun
        dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
1229 669ac79c Michael Niedermayer
      }
1230
      src += stride;
1231
      dst += stride;
1232
    }
1233
}
1234 115329f1 Diego Biurrun
1235 669ac79c Michael Niedermayer
static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1236
    int i,j;
1237
    for (i=0; i < height; i++) {
1238
      for (j=0; j < width; j++) {
1239 bb270c08 Diego Biurrun
        dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
1240 669ac79c Michael Niedermayer
      }
1241
      src += stride;
1242
      dst += stride;
1243
    }
1244
}
1245 115329f1 Diego Biurrun
1246 669ac79c Michael Niedermayer
static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1247
    int i,j;
1248
    for (i=0; i < height; i++) {
1249
      for (j=0; j < width; j++) {
1250 bb270c08 Diego Biurrun
        dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
1251 669ac79c Michael Niedermayer
      }
1252
      src += stride;
1253
      dst += stride;
1254
    }
1255
}
1256
1257
static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1258
    int i,j;
1259
    for (i=0; i < height; i++) {
1260
      for (j=0; j < width; j++) {
1261 bb270c08 Diego Biurrun
        dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
1262 669ac79c Michael Niedermayer
      }
1263
      src += stride;
1264
      dst += stride;
1265
    }
1266
}
1267
1268
static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1269
    int i,j;
1270
    for (i=0; i < height; i++) {
1271
      for (j=0; j < width; j++) {
1272 bb270c08 Diego Biurrun
        dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
1273 669ac79c Michael Niedermayer
      }
1274
      src += stride;
1275
      dst += stride;
1276
    }
1277
}
1278
1279
static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1280
    int i,j;
1281
    for (i=0; i < height; i++) {
1282
      for (j=0; j < width; j++) {
1283 bb270c08 Diego Biurrun
        dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
1284 669ac79c Michael Niedermayer
      }
1285
      src += stride;
1286
      dst += stride;
1287
    }
1288
}
1289
1290
static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1291
    int i,j;
1292
    for (i=0; i < height; i++) {
1293
      for (j=0; j < width; j++) {
1294 bb270c08 Diego Biurrun
        dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
1295 669ac79c Michael Niedermayer
      }
1296
      src += stride;
1297
      dst += stride;
1298
    }
1299
}
1300 da3b9756 Mike Melanson
1301
static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1302
    switch(width){
1303
    case 2: avg_pixels2_c (dst, src, stride, height); break;
1304
    case 4: avg_pixels4_c (dst, src, stride, height); break;
1305
    case 8: avg_pixels8_c (dst, src, stride, height); break;
1306
    case 16:avg_pixels16_c(dst, src, stride, height); break;
1307
    }
1308
}
1309
1310
static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1311
    int i,j;
1312
    for (i=0; i < height; i++) {
1313
      for (j=0; j < width; j++) {
1314 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
1315 da3b9756 Mike Melanson
      }
1316
      src += stride;
1317
      dst += stride;
1318
    }
1319
}
1320
1321
static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1322
    int i,j;
1323
    for (i=0; i < height; i++) {
1324
      for (j=0; j < width; j++) {
1325 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
1326 da3b9756 Mike Melanson
      }
1327
      src += stride;
1328
      dst += stride;
1329
    }
1330
}
1331 115329f1 Diego Biurrun
1332 da3b9756 Mike Melanson
static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1333
    int i,j;
1334
    for (i=0; i < height; i++) {
1335
      for (j=0; j < width; j++) {
1336 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
1337 da3b9756 Mike Melanson
      }
1338
      src += stride;
1339
      dst += stride;
1340
    }
1341
}
1342 115329f1 Diego Biurrun
1343 da3b9756 Mike Melanson
static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1344
    int i,j;
1345
    for (i=0; i < height; i++) {
1346
      for (j=0; j < width; j++) {
1347 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1348 da3b9756 Mike Melanson
      }
1349
      src += stride;
1350
      dst += stride;
1351
    }
1352
}
1353
1354
static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1355
    int i,j;
1356
    for (i=0; i < height; i++) {
1357
      for (j=0; j < width; j++) {
1358 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1359 da3b9756 Mike Melanson
      }
1360
      src += stride;
1361
      dst += stride;
1362
    }
1363
}
1364
1365
static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1366
    int i,j;
1367
    for (i=0; i < height; i++) {
1368
      for (j=0; j < width; j++) {
1369 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
1370 da3b9756 Mike Melanson
      }
1371
      src += stride;
1372
      dst += stride;
1373
    }
1374
}
1375
1376
static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1377
    int i,j;
1378
    for (i=0; i < height; i++) {
1379
      for (j=0; j < width; j++) {
1380 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1381 da3b9756 Mike Melanson
      }
1382
      src += stride;
1383
      dst += stride;
1384
    }
1385
}
1386
1387
static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1388
    int i,j;
1389
    for (i=0; i < height; i++) {
1390
      for (j=0; j < width; j++) {
1391 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1392 da3b9756 Mike Melanson
      }
1393
      src += stride;
1394
      dst += stride;
1395
    }
1396
}
1397 669ac79c Michael Niedermayer
#if 0
1398
#define TPEL_WIDTH(width)\
1399
static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1400
    void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1401
static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1402
    void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1403
static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1404
    void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1405
static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1406
    void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1407
static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1408
    void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1409
static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1410
    void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1411
static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1412
    void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1413
static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1414
    void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1415
static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1416
    void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1417
#endif
1418
1419 0da71265 Michael Niedermayer
#define H264_CHROMA_MC(OPNAME, OP)\
1420
static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1421
    const int A=(8-x)*(8-y);\
1422
    const int B=(  x)*(8-y);\
1423
    const int C=(8-x)*(  y);\
1424
    const int D=(  x)*(  y);\
1425
    int i;\
1426
    \
1427
    assert(x<8 && y<8 && x>=0 && y>=0);\
1428
\
1429
    for(i=0; i<h; i++)\
1430
    {\
1431
        OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1432
        OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1433
        dst+= stride;\
1434
        src+= stride;\
1435
    }\
1436
}\
1437
\
1438
static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1439
    const int A=(8-x)*(8-y);\
1440
    const int B=(  x)*(8-y);\
1441
    const int C=(8-x)*(  y);\
1442
    const int D=(  x)*(  y);\
1443
    int i;\
1444
    \
1445
    assert(x<8 && y<8 && x>=0 && y>=0);\
1446
\
1447
    for(i=0; i<h; i++)\
1448
    {\
1449
        OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1450
        OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1451
        OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1452
        OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1453
        dst+= stride;\
1454
        src+= stride;\
1455
    }\
1456
}\
1457
\
1458
static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1459
    const int A=(8-x)*(8-y);\
1460
    const int B=(  x)*(8-y);\
1461
    const int C=(8-x)*(  y);\
1462
    const int D=(  x)*(  y);\
1463
    int i;\
1464
    \
1465
    assert(x<8 && y<8 && x>=0 && y>=0);\
1466
\
1467
    for(i=0; i<h; i++)\
1468
    {\
1469
        OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1470
        OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1471
        OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1472
        OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1473
        OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1474
        OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1475
        OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1476
        OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1477
        dst+= stride;\
1478
        src+= stride;\
1479
    }\
1480
}
1481
1482
#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1483
#define op_put(a, b) a = (((b) + 32)>>6)
1484
1485
H264_CHROMA_MC(put_       , op_put)
1486
H264_CHROMA_MC(avg_       , op_avg)
1487
#undef op_avg
1488
#undef op_put
1489
1490
static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1491
{
1492
    int i;
1493
    for(i=0; i<h; i++)
1494
    {
1495
        ST32(dst   , LD32(src   ));
1496
        dst+=dstStride;
1497
        src+=srcStride;
1498
    }
1499
}
1500
1501
static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1502
{
1503
    int i;
1504
    for(i=0; i<h; i++)
1505
    {
1506
        ST32(dst   , LD32(src   ));
1507
        ST32(dst+4 , LD32(src+4 ));
1508
        dst+=dstStride;
1509
        src+=srcStride;
1510
    }
1511
}
1512
1513
static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1514
{
1515
    int i;
1516
    for(i=0; i<h; i++)
1517
    {
1518
        ST32(dst   , LD32(src   ));
1519
        ST32(dst+4 , LD32(src+4 ));
1520
        ST32(dst+8 , LD32(src+8 ));
1521
        ST32(dst+12, LD32(src+12));
1522
        dst+=dstStride;
1523
        src+=srcStride;
1524
    }
1525
}
1526 073b013d Michael Niedermayer
1527 0c1a9eda Zdenek Kabelac
static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1528 44eb4951 Michael Niedermayer
{
1529
    int i;
1530
    for(i=0; i<h; i++)
1531
    {
1532 b3184779 Michael Niedermayer
        ST32(dst   , LD32(src   ));
1533
        ST32(dst+4 , LD32(src+4 ));
1534
        ST32(dst+8 , LD32(src+8 ));
1535
        ST32(dst+12, LD32(src+12));
1536
        dst[16]= src[16];
1537 44eb4951 Michael Niedermayer
        dst+=dstStride;
1538
        src+=srcStride;
1539
    }
1540
}
1541
1542 0c1a9eda Zdenek Kabelac
static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1543 44eb4951 Michael Niedermayer
{
1544
    int i;
1545 b3184779 Michael Niedermayer
    for(i=0; i<h; i++)
1546 44eb4951 Michael Niedermayer
    {
1547 b3184779 Michael Niedermayer
        ST32(dst   , LD32(src   ));
1548
        ST32(dst+4 , LD32(src+4 ));
1549
        dst[8]= src[8];
1550 44eb4951 Michael Niedermayer
        dst+=dstStride;
1551
        src+=srcStride;
1552
    }
1553
}
1554
1555 826f429a Michael Niedermayer
1556 b3184779 Michael Niedermayer
#define QPEL_MC(r, OPNAME, RND, OP) \
1557 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1558
    uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1559 b3184779 Michael Niedermayer
    int i;\
1560
    for(i=0; i<h; i++)\
1561
    {\
1562
        OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1563
        OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1564
        OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1565
        OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1566
        OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1567
        OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1568
        OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1569
        OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1570
        dst+=dstStride;\
1571
        src+=srcStride;\
1572
    }\
1573 44eb4951 Michael Niedermayer
}\
1574
\
1575 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1576 db794953 Michael Niedermayer
    const int w=8;\
1577 0c1a9eda Zdenek Kabelac
    uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1578 b3184779 Michael Niedermayer
    int i;\
1579
    for(i=0; i<w; i++)\
1580
    {\
1581
        const int src0= src[0*srcStride];\
1582
        const int src1= src[1*srcStride];\
1583
        const int src2= src[2*srcStride];\
1584
        const int src3= src[3*srcStride];\
1585
        const int src4= src[4*srcStride];\
1586
        const int src5= src[5*srcStride];\
1587
        const int src6= src[6*srcStride];\
1588
        const int src7= src[7*srcStride];\
1589
        const int src8= src[8*srcStride];\
1590
        OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1591
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1592
        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1593
        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1594
        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1595
        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1596
        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1597
        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1598
        dst++;\
1599
        src++;\
1600
    }\
1601
}\
1602
\
1603 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1604
    uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1605 b3184779 Michael Niedermayer
    int i;\
1606 826f429a Michael Niedermayer
    \
1607 b3184779 Michael Niedermayer
    for(i=0; i<h; i++)\
1608
    {\
1609
        OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1610
        OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1611
        OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1612
        OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1613
        OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1614
        OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1615
        OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1616
        OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1617
        OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1618
        OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1619
        OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1620
        OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1621
        OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1622
        OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1623
        OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1624
        OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1625
        dst+=dstStride;\
1626
        src+=srcStride;\
1627
    }\
1628
}\
1629
\
1630 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1631
    uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1632 b3184779 Michael Niedermayer
    int i;\
1633 826f429a Michael Niedermayer
    const int w=16;\
1634 b3184779 Michael Niedermayer
    for(i=0; i<w; i++)\
1635
    {\
1636
        const int src0= src[0*srcStride];\
1637
        const int src1= src[1*srcStride];\
1638
        const int src2= src[2*srcStride];\
1639
        const int src3= src[3*srcStride];\
1640
        const int src4= src[4*srcStride];\
1641
        const int src5= src[5*srcStride];\
1642
        const int src6= src[6*srcStride];\
1643
        const int src7= src[7*srcStride];\
1644
        const int src8= src[8*srcStride];\
1645
        const int src9= src[9*srcStride];\
1646
        const int src10= src[10*srcStride];\
1647
        const int src11= src[11*srcStride];\
1648
        const int src12= src[12*srcStride];\
1649
        const int src13= src[13*srcStride];\
1650
        const int src14= src[14*srcStride];\
1651
        const int src15= src[15*srcStride];\
1652
        const int src16= src[16*srcStride];\
1653
        OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1654
        OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1655
        OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1656
        OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1657
        OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1658
        OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1659
        OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1660
        OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1661
        OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1662
        OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1663
        OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1664
        OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1665
        OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1666
        OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1667
        OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1668
        OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1669
        dst++;\
1670
        src++;\
1671
    }\
1672
}\
1673
\
1674 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
1675 45553457 Zdenek Kabelac
    OPNAME ## pixels8_c(dst, src, stride, 8);\
1676 b3184779 Michael Niedermayer
}\
1677
\
1678 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1679
    uint8_t half[64];\
1680 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1681
    OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
1682 44eb4951 Michael Niedermayer
}\
1683
\
1684 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1685 b3184779 Michael Niedermayer
    OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
1686 44eb4951 Michael Niedermayer
}\
1687
\
1688 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1689
    uint8_t half[64];\
1690 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1691
    OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
1692 44eb4951 Michael Niedermayer
}\
1693
\
1694 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1695
    uint8_t full[16*9];\
1696
    uint8_t half[64];\
1697 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1698 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1699 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
1700 44eb4951 Michael Niedermayer
}\
1701
\
1702 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1703
    uint8_t full[16*9];\
1704 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1705 db794953 Michael Niedermayer
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
1706 44eb4951 Michael Niedermayer
}\
1707
\
1708 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1709
    uint8_t full[16*9];\
1710
    uint8_t half[64];\
1711 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1712 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1713 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
1714 44eb4951 Michael Niedermayer
}\
1715 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1716
    uint8_t full[16*9];\
1717
    uint8_t halfH[72];\
1718
    uint8_t halfV[64];\
1719
    uint8_t halfHV[64];\
1720 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1721
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1722 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1723
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1724 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1725 44eb4951 Michael Niedermayer
}\
1726 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1727
    uint8_t full[16*9];\
1728
    uint8_t halfH[72];\
1729
    uint8_t halfHV[64];\
1730 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1731
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1732
    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1733
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1734
    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1735
}\
1736 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1737
    uint8_t full[16*9];\
1738
    uint8_t halfH[72];\
1739
    uint8_t halfV[64];\
1740
    uint8_t halfHV[64];\
1741 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1742
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1743 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1744
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1745 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1746 44eb4951 Michael Niedermayer
}\
1747 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1748
    uint8_t full[16*9];\
1749
    uint8_t halfH[72];\
1750
    uint8_t halfHV[64];\
1751 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1752
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1753
    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1754
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1755
    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1756
}\
1757 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1758
    uint8_t full[16*9];\
1759
    uint8_t halfH[72];\
1760
    uint8_t halfV[64];\
1761
    uint8_t halfHV[64];\
1762 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1763
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1764 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1765
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1766 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1767 44eb4951 Michael Niedermayer
}\
1768 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1769
    uint8_t full[16*9];\
1770
    uint8_t halfH[72];\
1771
    uint8_t halfHV[64];\
1772 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1773
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1774
    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1775
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1776
    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1777
}\
1778 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1779
    uint8_t full[16*9];\
1780
    uint8_t halfH[72];\
1781
    uint8_t halfV[64];\
1782
    uint8_t halfHV[64];\
1783 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1784
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
1785 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1786
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1787 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1788 44eb4951 Michael Niedermayer
}\
1789 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1790
    uint8_t full[16*9];\
1791
    uint8_t halfH[72];\
1792
    uint8_t halfHV[64];\
1793 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1794
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1795
    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1796
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1797
    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1798
}\
1799 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1800
    uint8_t halfH[72];\
1801
    uint8_t halfHV[64];\
1802 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1803 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1804 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1805 44eb4951 Michael Niedermayer
}\
1806 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1807
    uint8_t halfH[72];\
1808
    uint8_t halfHV[64];\
1809 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1810 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1811 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1812 44eb4951 Michael Niedermayer
}\
1813 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1814
    uint8_t full[16*9];\
1815
    uint8_t halfH[72];\
1816
    uint8_t halfV[64];\
1817
    uint8_t halfHV[64];\
1818 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1819
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1820 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1821
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1822 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1823 44eb4951 Michael Niedermayer
}\
1824 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1825
    uint8_t full[16*9];\
1826
    uint8_t halfH[72];\
1827 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1828
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1829
    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1830
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1831
}\
1832 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1833
    uint8_t full[16*9];\
1834
    uint8_t halfH[72];\
1835
    uint8_t halfV[64];\
1836
    uint8_t halfHV[64];\
1837 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1838
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1839 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1840
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1841 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1842 44eb4951 Michael Niedermayer
}\
1843 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1844
    uint8_t full[16*9];\
1845
    uint8_t halfH[72];\
1846 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1847
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1848
    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1849
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1850
}\
1851 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1852
    uint8_t halfH[72];\
1853 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1854 db794953 Michael Niedermayer
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1855 b3184779 Michael Niedermayer
}\
1856 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
1857 45553457 Zdenek Kabelac
    OPNAME ## pixels16_c(dst, src, stride, 16);\
1858 b3184779 Michael Niedermayer
}\
1859
\
1860 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1861
    uint8_t half[256];\
1862 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1863
    OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
1864
}\
1865
\
1866 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1867 b3184779 Michael Niedermayer
    OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1868 44eb4951 Michael Niedermayer
}\
1869 b3184779 Michael Niedermayer
\
1870 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1871
    uint8_t half[256];\
1872 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1873
    OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
1874
}\
1875
\
1876 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1877
    uint8_t full[24*17];\
1878
    uint8_t half[256];\
1879 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1880 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1881 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
1882
}\
1883
\
1884 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1885
    uint8_t full[24*17];\
1886 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1887 826f429a Michael Niedermayer
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1888 b3184779 Michael Niedermayer
}\
1889
\
1890 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1891
    uint8_t full[24*17];\
1892
    uint8_t half[256];\
1893 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1894 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1895 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
1896
}\
1897 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1898
    uint8_t full[24*17];\
1899
    uint8_t halfH[272];\
1900
    uint8_t halfV[256];\
1901
    uint8_t halfHV[256];\
1902 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1903
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1904 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1905
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1906 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1907
}\
1908 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1909
    uint8_t full[24*17];\
1910
    uint8_t halfH[272];\
1911
    uint8_t halfHV[256];\
1912 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1913
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1914
    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1915
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1916
    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1917
}\
1918 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1919
    uint8_t full[24*17];\
1920
    uint8_t halfH[272];\
1921
    uint8_t halfV[256];\
1922
    uint8_t halfHV[256];\
1923 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1924
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1925 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1926
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1927 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1928
}\
1929 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1930
    uint8_t full[24*17];\
1931
    uint8_t halfH[272];\
1932
    uint8_t halfHV[256];\
1933 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1934
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1935
    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1936
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1937
    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1938
}\
1939 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1940
    uint8_t full[24*17];\
1941
    uint8_t halfH[272];\
1942
    uint8_t halfV[256];\
1943
    uint8_t halfHV[256];\
1944 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1945
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1946 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1947
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1948 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1949
}\
1950 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1951
    uint8_t full[24*17];\
1952
    uint8_t halfH[272];\
1953
    uint8_t halfHV[256];\
1954 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1955
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1956
    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1957
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1958
    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1959
}\
1960 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1961
    uint8_t full[24*17];\
1962
    uint8_t halfH[272];\
1963
    uint8_t halfV[256];\
1964
    uint8_t halfHV[256];\
1965 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1966
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
1967 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1968
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1969 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1970
}\
1971 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1972
    uint8_t full[24*17];\
1973
    uint8_t halfH[272];\
1974
    uint8_t halfHV[256];\
1975 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1976
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1977
    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1978
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1979
    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1980
}\
1981 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1982
    uint8_t halfH[272];\
1983
    uint8_t halfHV[256];\
1984 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1985 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1986 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1987
}\
1988 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1989
    uint8_t halfH[272];\
1990
    uint8_t halfHV[256];\
1991 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1992 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1993 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1994
}\
1995 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1996
    uint8_t full[24*17];\
1997
    uint8_t halfH[272];\
1998
    uint8_t halfV[256];\
1999
    uint8_t halfHV[256];\
2000 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2001
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2002 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2003
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2004 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2005
}\
2006 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2007
    uint8_t full[24*17];\
2008
    uint8_t halfH[272];\
2009 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2010
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2011
    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2012
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2013
}\
2014 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2015
    uint8_t full[24*17];\
2016
    uint8_t halfH[272];\
2017
    uint8_t halfV[256];\
2018
    uint8_t halfHV[256];\
2019 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2020
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2021 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2022
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2023 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2024
}\
2025 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2026
    uint8_t full[24*17];\
2027
    uint8_t halfH[272];\
2028 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2029
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2030
    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2031
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2032
}\
2033 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2034
    uint8_t halfH[272];\
2035 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2036 826f429a Michael Niedermayer
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2037 45553457 Zdenek Kabelac
}
2038 44eb4951 Michael Niedermayer
2039 b3184779 Michael Niedermayer
#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2040
#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
2041
#define op_put(a, b) a = cm[((b) + 16)>>5]
2042
#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
2043
2044
QPEL_MC(0, put_       , _       , op_put)
2045
QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
2046
QPEL_MC(0, avg_       , _       , op_avg)
2047
//QPEL_MC(1, avg_no_rnd , _       , op_avg)
2048
#undef op_avg
2049
#undef op_avg_no_rnd
2050
#undef op_put
2051
#undef op_put_no_rnd
2052 44eb4951 Michael Niedermayer
2053 0da71265 Michael Niedermayer
#if 1
2054
#define H264_LOWPASS(OPNAME, OP, OP2) \
2055
static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2056
    const int h=4;\
2057
    uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2058
    int i;\
2059
    for(i=0; i<h; i++)\
2060
    {\
2061
        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2062
        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2063
        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
2064
        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
2065
        dst+=dstStride;\
2066
        src+=srcStride;\
2067
    }\
2068
}\
2069
\
2070
static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2071
    const int w=4;\
2072
    uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2073
    int i;\
2074
    for(i=0; i<w; i++)\
2075
    {\
2076
        const int srcB= src[-2*srcStride];\
2077
        const int srcA= src[-1*srcStride];\
2078
        const int src0= src[0 *srcStride];\
2079
        const int src1= src[1 *srcStride];\
2080
        const int src2= src[2 *srcStride];\
2081
        const int src3= src[3 *srcStride];\
2082
        const int src4= src[4 *srcStride];\
2083
        const int src5= src[5 *srcStride];\
2084
        const int src6= src[6 *srcStride];\
2085
        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2086
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2087
        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2088
        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2089
        dst++;\
2090
        src++;\
2091
    }\
2092
}\
2093
\
2094
static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2095
    const int h=4;\
2096
    const int w=4;\
2097
    uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2098
    int i;\
2099
    src -= 2*srcStride;\
2100
    for(i=0; i<h+5; i++)\
2101
    {\
2102
        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2103
        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2104
        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
2105
        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
2106
        tmp+=tmpStride;\
2107
        src+=srcStride;\
2108
    }\
2109
    tmp -= tmpStride*(h+5-2);\
2110
    for(i=0; i<w; i++)\
2111
    {\
2112
        const int tmpB= tmp[-2*tmpStride];\
2113
        const int tmpA= tmp[-1*tmpStride];\
2114
        const int tmp0= tmp[0 *tmpStride];\
2115
        const int tmp1= tmp[1 *tmpStride];\
2116
        const int tmp2= tmp[2 *tmpStride];\
2117
        const int tmp3= tmp[3 *tmpStride];\
2118
        const int tmp4= tmp[4 *tmpStride];\
2119
        const int tmp5= tmp[5 *tmpStride];\
2120
        const int tmp6= tmp[6 *tmpStride];\
2121
        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2122
        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2123
        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2124
        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2125
        dst++;\
2126
        tmp++;\
2127
    }\
2128
}\
2129
\
2130
static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2131
    const int h=8;\
2132
    uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2133
    int i;\
2134
    for(i=0; i<h; i++)\
2135
    {\
2136
        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
2137
        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
2138
        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
2139
        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
2140
        OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
2141
        OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
2142
        OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
2143
        OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
2144
        dst+=dstStride;\
2145
        src+=srcStride;\
2146
    }\
2147
}\
2148
\
2149
static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2150
    const int w=8;\
2151
    uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2152
    int i;\
2153
    for(i=0; i<w; i++)\
2154
    {\
2155
        const int srcB= src[-2*srcStride];\
2156
        const int srcA= src[-1*srcStride];\
2157
        const int src0= src[0 *srcStride];\
2158
        const int src1= src[1 *srcStride];\
2159
        const int src2= src[2 *srcStride];\
2160
        const int src3= src[3 *srcStride];\
2161
        const int src4= src[4 *srcStride];\
2162
        const int src5= src[5 *srcStride];\
2163
        const int src6= src[6 *srcStride];\
2164
        const int src7= src[7 *srcStride];\
2165
        const int src8= src[8 *srcStride];\
2166
        const int src9= src[9 *srcStride];\
2167
        const int src10=src[10*srcStride];\
2168
        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2169
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2170
        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2171
        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2172
        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
2173
        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
2174
        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
2175
        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
2176
        dst++;\
2177
        src++;\
2178
    }\
2179
}\
2180
\
2181
static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2182
    const int h=8;\
2183
    const int w=8;\
2184
    uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2185
    int i;\
2186
    src -= 2*srcStride;\
2187
    for(i=0; i<h+5; i++)\
2188
    {\
2189
        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
2190
        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
2191
        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
2192
        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
2193
        tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
2194
        tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
2195
        tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
2196
        tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
2197
        tmp+=tmpStride;\
2198
        src+=srcStride;\
2199
    }\
2200
    tmp -= tmpStride*(h+5-2);\
2201
    for(i=0; i<w; i++)\
2202
    {\
2203
        const int tmpB= tmp[-2*tmpStride];\
2204
        const int tmpA= tmp[-1*tmpStride];\
2205
        const int tmp0= tmp[0 *tmpStride];\
2206
        const int tmp1= tmp[1 *tmpStride];\
2207
        const int tmp2= tmp[2 *tmpStride];\
2208
        const int tmp3= tmp[3 *tmpStride];\
2209
        const int tmp4= tmp[4 *tmpStride];\
2210
        const int tmp5= tmp[5 *tmpStride];\
2211
        const int tmp6= tmp[6 *tmpStride];\
2212
        const int tmp7= tmp[7 *tmpStride];\
2213
        const int tmp8= tmp[8 *tmpStride];\
2214
        const int tmp9= tmp[9 *tmpStride];\
2215
        const int tmp10=tmp[10*tmpStride];\
2216
        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2217
        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2218
        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2219
        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2220
        OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
2221
        OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
2222
        OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
2223
        OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
2224
        dst++;\
2225
        tmp++;\
2226
    }\
2227
}\
2228
\
2229
static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2230
    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
2231
    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2232
    src += 8*srcStride;\
2233
    dst += 8*dstStride;\
2234
    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
2235
    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2236
}\
2237
\
2238
static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2239
    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
2240
    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2241
    src += 8*srcStride;\
2242
    dst += 8*dstStride;\
2243
    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
2244
    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2245
}\
2246
\
2247
static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2248
    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
2249
    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2250
    src += 8*srcStride;\
2251
    dst += 8*dstStride;\
2252
    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
2253
    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2254
}\
2255
2256
#define H264_MC(OPNAME, SIZE) \
2257
static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2258
    OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2259
}\
2260
\
2261
static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2262
    uint8_t half[SIZE*SIZE];\
2263
    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2264
    OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2265
}\
2266
\
2267
static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2268
    OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2269
}\
2270
\
2271
static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2272
    uint8_t half[SIZE*SIZE];\
2273
    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2274
    OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2275
}\
2276
\
2277
static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2278
    uint8_t full[SIZE*(SIZE+5)];\
2279
    uint8_t * const full_mid= full + SIZE*2;\
2280
    uint8_t half[SIZE*SIZE];\
2281
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2282
    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2283
    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2284
}\
2285
\
2286
static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2287
    uint8_t full[SIZE*(SIZE+5)];\
2288
    uint8_t * const full_mid= full + SIZE*2;\
2289
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2290
    OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2291
}\
2292
\
2293
static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2294
    uint8_t full[SIZE*(SIZE+5)];\
2295
    uint8_t * const full_mid= full + SIZE*2;\
2296
    uint8_t half[SIZE*SIZE];\
2297
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2298
    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2299
    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2300
}\
2301
\
2302
static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2303
    uint8_t full[SIZE*(SIZE+5)];\
2304
    uint8_t * const full_mid= full + SIZE*2;\
2305
    uint8_t halfH[SIZE*SIZE];\
2306
    uint8_t halfV[SIZE*SIZE];\
2307
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2308
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2309
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2310
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2311
}\
2312
\
2313
static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2314
    uint8_t full[SIZE*(SIZE+5)];\
2315
    uint8_t * const full_mid= full + SIZE*2;\
2316
    uint8_t halfH[SIZE*SIZE];\
2317
    uint8_t halfV[SIZE*SIZE];\
2318
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2319
    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
2320
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2321
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2322
}\
2323
\
2324
static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2325
    uint8_t full[SIZE*(SIZE+5)];\
2326
    uint8_t * const full_mid= full + SIZE*2;\
2327
    uint8_t halfH[SIZE*SIZE];\
2328
    uint8_t halfV[SIZE*SIZE];\
2329
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2330
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2331
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2332
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2333
}\
2334
\
2335
static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2336
    uint8_t full[SIZE*(SIZE+5)];\
2337
    uint8_t * const full_mid= full + SIZE*2;\
2338
    uint8_t halfH[SIZE*SIZE];\
2339
    uint8_t halfV[SIZE*SIZE];\
2340
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2341
    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
2342
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2343
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2344
}\
2345
\
2346
static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2347
    int16_t tmp[SIZE*(SIZE+5)];\
2348
    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
2349
}\
2350
\
2351
static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2352
    int16_t tmp[SIZE*(SIZE+5)];\
2353
    uint8_t halfH[SIZE*SIZE];\
2354
    uint8_t halfHV[SIZE*SIZE];\
2355
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2356
    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2357
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2358
}\
2359
\
2360
static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2361
    int16_t tmp[SIZE*(SIZE+5)];\
2362
    uint8_t halfH[SIZE*SIZE];\
2363
    uint8_t halfHV[SIZE*SIZE];\
2364
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2365
    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2366
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2367
}\
2368
\
2369
static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2370
    uint8_t full[SIZE*(SIZE+5)];\
2371
    uint8_t * const full_mid= full + SIZE*2;\
2372
    int16_t tmp[SIZE*(SIZE+5)];\
2373
    uint8_t halfV[SIZE*SIZE];\
2374
    uint8_t halfHV[SIZE*SIZE];\
2375
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2376
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2377
    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2378
    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2379
}\
2380
\
2381
static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2382
    uint8_t full[SIZE*(SIZE+5)];\
2383
    uint8_t * const full_mid= full + SIZE*2;\
2384
    int16_t tmp[SIZE*(SIZE+5)];\
2385
    uint8_t halfV[SIZE*SIZE];\
2386
    uint8_t halfHV[SIZE*SIZE];\
2387
    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
2388
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2389
    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2390
    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2391
}\
2392
2393
#define op_avg(a, b)  a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2394
//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2395
#define op_put(a, b)  a = cm[((b) + 16)>>5]
2396
#define op2_avg(a, b)  a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2397
#define op2_put(a, b)  a = cm[((b) + 512)>>10]
2398
2399
H264_LOWPASS(put_       , op_put, op2_put)
2400
H264_LOWPASS(avg_       , op_avg, op2_avg)
2401
H264_MC(put_, 4)
2402
H264_MC(put_, 8)
2403
H264_MC(put_, 16)
2404
H264_MC(avg_, 4)
2405
H264_MC(avg_, 8)
2406
H264_MC(avg_, 16)
2407
2408
#undef op_avg
2409
#undef op_put
2410
#undef op2_avg
2411
#undef op2_put
2412
#endif
2413
2414 91c56db6 Michael Niedermayer
#define op_scale1(x)  block[x] = clip_uint8( (block[x]*weight + offset) >> log2_denom )
2415
#define op_scale2(x)  dst[x] = clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
2416 9f2d1b4f Loren Merritt
#define H264_WEIGHT(W,H) \
2417
static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
2418 88730be6 Måns Rullgård
    int attribute_unused x, y; \
2419 9f2d1b4f Loren Merritt
    offset <<= log2_denom; \
2420
    if(log2_denom) offset += 1<<(log2_denom-1); \
2421
    for(y=0; y<H; y++, block += stride){ \
2422
        op_scale1(0); \
2423
        op_scale1(1); \
2424
        if(W==2) continue; \
2425
        op_scale1(2); \
2426
        op_scale1(3); \
2427
        if(W==4) continue; \
2428
        op_scale1(4); \
2429
        op_scale1(5); \
2430
        op_scale1(6); \
2431
        op_scale1(7); \
2432
        if(W==8) continue; \
2433
        op_scale1(8); \
2434
        op_scale1(9); \
2435
        op_scale1(10); \
2436
        op_scale1(11); \
2437
        op_scale1(12); \
2438
        op_scale1(13); \
2439
        op_scale1(14); \
2440
        op_scale1(15); \
2441
    } \
2442
} \
2443
static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offsetd, int offsets){ \
2444 88730be6 Måns Rullgård
    int attribute_unused x, y; \
2445 9f2d1b4f Loren Merritt
    int offset = (offsets + offsetd + 1) >> 1; \
2446
    offset = ((offset << 1) + 1) << log2_denom; \
2447
    for(y=0; y<H; y++, dst += stride, src += stride){ \
2448
        op_scale2(0); \
2449
        op_scale2(1); \
2450
        if(W==2) continue; \
2451
        op_scale2(2); \
2452
        op_scale2(3); \
2453
        if(W==4) continue; \
2454
        op_scale2(4); \
2455
        op_scale2(5); \
2456
        op_scale2(6); \
2457
        op_scale2(7); \
2458
        if(W==8) continue; \
2459
        op_scale2(8); \
2460
        op_scale2(9); \
2461
        op_scale2(10); \
2462
        op_scale2(11); \
2463
        op_scale2(12); \
2464
        op_scale2(13); \
2465
        op_scale2(14); \
2466
        op_scale2(15); \
2467
    } \
2468
}
2469
2470
H264_WEIGHT(16,16)
2471
H264_WEIGHT(16,8)
2472
H264_WEIGHT(8,16)
2473
H264_WEIGHT(8,8)
2474
H264_WEIGHT(8,4)
2475
H264_WEIGHT(4,8)
2476
H264_WEIGHT(4,4)
2477
H264_WEIGHT(4,2)
2478
H264_WEIGHT(2,4)
2479
H264_WEIGHT(2,2)
2480
2481
#undef op_scale1
2482
#undef op_scale2
2483
#undef H264_WEIGHT
2484
2485 1457ab52 Michael Niedermayer
static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
2486
    uint8_t *cm = cropTbl + MAX_NEG_CROP;
2487
    int i;
2488
2489
    for(i=0; i<h; i++){
2490
        dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
2491
        dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
2492
        dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
2493
        dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
2494
        dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
2495
        dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
2496
        dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
2497
        dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
2498
        dst+=dstStride;
2499 115329f1 Diego Biurrun
        src+=srcStride;
2500 1457ab52 Michael Niedermayer
    }
2501
}
2502
2503
static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
2504
    uint8_t *cm = cropTbl + MAX_NEG_CROP;
2505
    int i;
2506
2507
    for(i=0; i<w; i++){
2508
        const int src_1= src[ -srcStride];
2509
        const int src0 = src[0          ];
2510
        const int src1 = src[  srcStride];
2511
        const int src2 = src[2*srcStride];
2512
        const int src3 = src[3*srcStride];
2513
        const int src4 = src[4*srcStride];
2514
        const int src5 = src[5*srcStride];
2515
        const int src6 = src[6*srcStride];
2516
        const int src7 = src[7*srcStride];
2517
        const int src8 = src[8*srcStride];
2518
        const int src9 = src[9*srcStride];
2519
        dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
2520
        dst[1*dstStride]= cm[(9*(src1 + src2) - (src0  + src3) + 8)>>4];
2521
        dst[2*dstStride]= cm[(9*(src2 + src3) - (src1  + src4) + 8)>>4];
2522
        dst[3*dstStride]= cm[(9*(src3 + src4) - (src2  + src5) + 8)>>4];
2523
        dst[4*dstStride]= cm[(9*(src4 + src5) - (src3  + src6) + 8)>>4];
2524
        dst[5*dstStride]= cm[(9*(src5 + src6) - (src4  + src7) + 8)>>4];
2525
        dst[6*dstStride]= cm[(9*(src6 + src7) - (src5  + src8) + 8)>>4];
2526
        dst[7*dstStride]= cm[(9*(src7 + src8) - (src6  + src9) + 8)>>4];
2527
        src++;
2528
        dst++;
2529
    }
2530
}
2531
2532
static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
2533
    put_pixels8_c(dst, src, stride, 8);
2534
}
2535
2536
static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
2537
    uint8_t half[64];
2538
    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2539
    put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
2540
}
2541
2542
static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
2543
    wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
2544
}
2545
2546
static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
2547
    uint8_t half[64];
2548
    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2549
    put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
2550
}
2551
2552
static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
2553
    wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
2554
}
2555
2556
static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
2557
    uint8_t halfH[88];
2558
    uint8_t halfV[64];
2559
    uint8_t halfHV[64];
2560
    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2561
    wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
2562
    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2563
    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2564
}
2565
static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
2566
    uint8_t halfH[88];
2567
    uint8_t halfV[64];
2568
    uint8_t halfHV[64];
2569
    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2570
    wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
2571
    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2572
    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2573
}
2574
static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
2575
    uint8_t halfH[88];
2576
    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2577
    wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
2578
}
2579
2580 332f9ac4 Michael Niedermayer
static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
2581
    int x;
2582
    const int strength= ff_h263_loop_filter_strength[qscale];
2583 115329f1 Diego Biurrun
2584 332f9ac4 Michael Niedermayer
    for(x=0; x<8; x++){
2585
        int d1, d2, ad1;
2586
        int p0= src[x-2*stride];
2587
        int p1= src[x-1*stride];
2588
        int p2= src[x+0*stride];
2589
        int p3= src[x+1*stride];
2590
        int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2591
2592
        if     (d<-2*strength) d1= 0;
2593
        else if(d<-  strength) d1=-2*strength - d;
2594
        else if(d<   strength) d1= d;
2595
        else if(d< 2*strength) d1= 2*strength - d;
2596
        else                   d1= 0;
2597 115329f1 Diego Biurrun
2598 332f9ac4 Michael Niedermayer
        p1 += d1;
2599
        p2 -= d1;
2600
        if(p1&256) p1= ~(p1>>31);
2601
        if(p2&256) p2= ~(p2>>31);
2602 115329f1 Diego Biurrun
2603 332f9ac4 Michael Niedermayer
        src[x-1*stride] = p1;
2604
        src[x+0*stride] = p2;
2605
2606 5b5404e3 Michael Niedermayer
        ad1= ABS(d1)>>1;
2607 115329f1 Diego Biurrun
2608 332f9ac4 Michael Niedermayer
        d2= clip((p0-p3)/4, -ad1, ad1);
2609 115329f1 Diego Biurrun
2610 332f9ac4 Michael Niedermayer
        src[x-2*stride] = p0 - d2;
2611
        src[x+  stride] = p3 + d2;
2612
    }
2613
}
2614
2615
static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
2616
    int y;
2617
    const int strength= ff_h263_loop_filter_strength[qscale];
2618 115329f1 Diego Biurrun
2619 332f9ac4 Michael Niedermayer
    for(y=0; y<8; y++){
2620
        int d1, d2, ad1;
2621
        int p0= src[y*stride-2];
2622
        int p1= src[y*stride-1];
2623
        int p2= src[y*stride+0];
2624
        int p3= src[y*stride+1];
2625
        int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2626
2627
        if     (d<-2*strength) d1= 0;
2628
        else if(d<-  strength) d1=-2*strength - d;
2629
        else if(d<   strength) d1= d;
2630
        else if(d< 2*strength) d1= 2*strength - d;
2631
        else                   d1= 0;
2632 115329f1 Diego Biurrun
2633 332f9ac4 Michael Niedermayer
        p1 += d1;
2634
        p2 -= d1;
2635
        if(p1&256) p1= ~(p1>>31);
2636
        if(p2&256) p2= ~(p2>>31);
2637 115329f1 Diego Biurrun
2638 332f9ac4 Michael Niedermayer
        src[y*stride-1] = p1;
2639
        src[y*stride+0] = p2;
2640
2641
        ad1= ABS(d1)>>1;
2642 115329f1 Diego Biurrun
2643 332f9ac4 Michael Niedermayer
        d2= clip((p0-p3)/4, -ad1, ad1);
2644 115329f1 Diego Biurrun
2645 332f9ac4 Michael Niedermayer
        src[y*stride-2] = p0 - d2;
2646
        src[y*stride+1] = p3 + d2;
2647
    }
2648
}
2649 1457ab52 Michael Niedermayer
2650 fdbbf2e0 Michael Niedermayer
static void h261_loop_filter_c(uint8_t *src, int stride){
2651
    int x,y,xy,yz;
2652
    int temp[64];
2653
2654
    for(x=0; x<8; x++){
2655
        temp[x      ] = 4*src[x           ];
2656
        temp[x + 7*8] = 4*src[x + 7*stride];
2657
    }
2658
    for(y=1; y<7; y++){
2659
        for(x=0; x<8; x++){
2660
            xy = y * stride + x;
2661
            yz = y * 8 + x;
2662
            temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
2663 c6148de2 Michael Niedermayer
        }
2664
    }
2665 115329f1 Diego Biurrun
2666 fdbbf2e0 Michael Niedermayer
    for(y=0; y<8; y++){
2667
        src[  y*stride] = (temp[  y*8] + 2)>>2;
2668
        src[7+y*stride] = (temp[7+y*8] + 2)>>2;
2669
        for(x=1; x<7; x++){
2670
            xy = y * stride + x;
2671
            yz = y * 8 + x;
2672
            src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
2673 c6148de2 Michael Niedermayer
        }
2674
    }
2675
}
2676
2677 5cf08f23 Loren Merritt
static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
2678 42251a2a Loren Merritt
{
2679
    int i, d;
2680
    for( i = 0; i < 4; i++ ) {
2681
        if( tc0[i] < 0 ) {
2682
            pix += 4*ystride;
2683
            continue;
2684
        }
2685
        for( d = 0; d < 4; d++ ) {
2686
            const int p0 = pix[-1*xstride];
2687
            const int p1 = pix[-2*xstride];
2688
            const int p2 = pix[-3*xstride];
2689
            const int q0 = pix[0];
2690
            const int q1 = pix[1*xstride];
2691
            const int q2 = pix[2*xstride];
2692 115329f1 Diego Biurrun
2693 42251a2a Loren Merritt
            if( ABS( p0 - q0 ) < alpha &&
2694
                ABS( p1 - p0 ) < beta &&
2695
                ABS( q1 - q0 ) < beta ) {
2696 115329f1 Diego Biurrun
2697 42251a2a Loren Merritt
                int tc = tc0[i];
2698
                int i_delta;
2699 115329f1 Diego Biurrun
2700 42251a2a Loren Merritt
                if( ABS( p2 - p0 ) < beta ) {
2701 bda1c56c Michael Niedermayer
                    pix[-2*xstride] = p1 + clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
2702 42251a2a Loren Merritt
                    tc++;
2703
                }
2704
                if( ABS( q2 - q0 ) < beta ) {
2705 bda1c56c Michael Niedermayer
                    pix[   xstride] = q1 + clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
2706