Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dsputil.c @ 9f9c3229

History | View | Annotate | Download (69.9 KB)

1 de6d9b64 Fabrice Bellard
/*
2
 * DSP utils
3 ff4ec49e Fabrice Bellard
 * Copyright (c) 2000, 2001 Fabrice Bellard.
4 de6d9b64 Fabrice Bellard
 *
5 ff4ec49e Fabrice Bellard
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9 de6d9b64 Fabrice Bellard
 *
10 ff4ec49e Fabrice Bellard
 * This library is distributed in the hope that it will be useful,
11 de6d9b64 Fabrice Bellard
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ff4ec49e Fabrice Bellard
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14 de6d9b64 Fabrice Bellard
 *
15 ff4ec49e Fabrice Bellard
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18 7ff037e9 Michael Niedermayer
 *
19 59fe111e Michael Niedermayer
 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
20 de6d9b64 Fabrice Bellard
 */
21
#include "avcodec.h"
22
#include "dsputil.h"
23
24
void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
25 9dbcbd92 Michael Niedermayer
void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
26 de6d9b64 Fabrice Bellard
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
27
void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
28 073b013d Michael Niedermayer
void (*ff_gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
29
void (*ff_gmc )(UINT8 *dst, UINT8 *src, int stride, int h, int ox, int oy, 
30
                  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
31 649c00c9 Michael Niedermayer
void (*clear_blocks)(DCTELEM *blocks);
32 3aa102be Michael Niedermayer
int (*pix_sum)(UINT8 * pix, int line_size);
33
int (*pix_norm1)(UINT8 * pix, int line_size);
34 de6d9b64 Fabrice Bellard
35
op_pixels_abs_func pix_abs16x16;
36
op_pixels_abs_func pix_abs16x16_x2;
37
op_pixels_abs_func pix_abs16x16_y2;
38
op_pixels_abs_func pix_abs16x16_xy2;
39
40 ba6802de Michael Niedermayer
op_pixels_abs_func pix_abs8x8;
41
op_pixels_abs_func pix_abs8x8_x2;
42
op_pixels_abs_func pix_abs8x8_y2;
43
op_pixels_abs_func pix_abs8x8_xy2;
44
45 5596c60c Michael Niedermayer
int ff_bit_exact=0;
46
47 0cfa9713 Fabrice Bellard
UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
48 de6d9b64 Fabrice Bellard
UINT32 squareTbl[512];
49
50 2ad1516a Michael Niedermayer
const UINT8 ff_zigzag_direct[64] = {
51
    0,   1,  8, 16,  9,  2,  3, 10,
52
    17, 24, 32, 25, 18, 11,  4,  5,
53 e0eac44e Fabrice Bellard
    12, 19, 26, 33, 40, 48, 41, 34,
54 2ad1516a Michael Niedermayer
    27, 20, 13,  6,  7, 14, 21, 28,
55 e0eac44e Fabrice Bellard
    35, 42, 49, 56, 57, 50, 43, 36,
56
    29, 22, 15, 23, 30, 37, 44, 51,
57
    58, 59, 52, 45, 38, 31, 39, 46,
58
    53, 60, 61, 54, 47, 55, 62, 63
59
};
60
61 2f349de2 Michael Niedermayer
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
62
UINT16 __align8 inv_zigzag_direct16[64];
63
64 2ad1516a Michael Niedermayer
const UINT8 ff_alternate_horizontal_scan[64] = {
65
    0,  1,   2,  3,  8,  9, 16, 17, 
66 e0eac44e Fabrice Bellard
    10, 11,  4,  5,  6,  7, 15, 14,
67
    13, 12, 19, 18, 24, 25, 32, 33, 
68
    26, 27, 20, 21, 22, 23, 28, 29,
69
    30, 31, 34, 35, 40, 41, 48, 49, 
70
    42, 43, 36, 37, 38, 39, 44, 45,
71
    46, 47, 50, 51, 56, 57, 58, 59, 
72
    52, 53, 54, 55, 60, 61, 62, 63,
73
};
74
75 2ad1516a Michael Niedermayer
const UINT8 ff_alternate_vertical_scan[64] = {
76
    0,  8,  16, 24,  1,  9,  2, 10, 
77 e0eac44e Fabrice Bellard
    17, 25, 32, 40, 48, 56, 57, 49,
78
    41, 33, 26, 18,  3, 11,  4, 12, 
79
    19, 27, 34, 42, 50, 58, 35, 43,
80
    51, 59, 20, 28,  5, 13,  6, 14, 
81
    21, 29, 36, 44, 52, 60, 37, 45,
82
    53, 61, 22, 30,  7, 15, 23, 31, 
83
    38, 46, 54, 62, 39, 47, 55, 63,
84
};
85
86 2f349de2 Michael Niedermayer
/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
87
UINT32 inverse[256]={
88
         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757, 
89
 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154, 
90
 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709, 
91
 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333, 
92
 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367, 
93
 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283, 
94
  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315, 
95
  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085, 
96
  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498, 
97
  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675, 
98
  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441, 
99
  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183, 
100
  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712, 
101
  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400, 
102
  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163, 
103
  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641, 
104
  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573, 
105
  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737, 
106
  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493, 
107
  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373, 
108
  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368, 
109
  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671, 
110
  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767, 
111
  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740, 
112
  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751, 
113
  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635, 
114
  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593, 
115
  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944, 
116
  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933, 
117
  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575, 
118
  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532, 
119
  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
120
};
121
122 3aa102be Michael Niedermayer
int pix_sum_c(UINT8 * pix, int line_size)
123
{
124
    int s, i, j;
125
126
    s = 0;
127
    for (i = 0; i < 16; i++) {
128
        for (j = 0; j < 16; j += 8) {
129
            s += pix[0];
130
            s += pix[1];
131
            s += pix[2];
132
            s += pix[3];
133
            s += pix[4];
134
            s += pix[5];
135
            s += pix[6];
136
            s += pix[7];
137
            pix += 8;
138
        }
139
        pix += line_size - 16;
140
    }
141
    return s;
142
}
143
144
int pix_norm1_c(UINT8 * pix, int line_size)
145
{
146
    int s, i, j;
147
    UINT32 *sq = squareTbl + 256;
148
149
    s = 0;
150
    for (i = 0; i < 16; i++) {
151
        for (j = 0; j < 16; j += 8) {
152
            s += sq[pix[0]];
153
            s += sq[pix[1]];
154
            s += sq[pix[2]];
155
            s += sq[pix[3]];
156
            s += sq[pix[4]];
157
            s += sq[pix[5]];
158
            s += sq[pix[6]];
159
            s += sq[pix[7]];
160
            pix += 8;
161
        }
162
        pix += line_size - 16;
163
    }
164
    return s;
165
}
166
167
168 c13e1abd Falk Hüffner
void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
169 de6d9b64 Fabrice Bellard
{
170
    int i;
171
172
    /* read the pixels */
173
    for(i=0;i<8;i++) {
174 c13e1abd Falk Hüffner
        block[0] = pixels[0];
175
        block[1] = pixels[1];
176
        block[2] = pixels[2];
177
        block[3] = pixels[3];
178
        block[4] = pixels[4];
179
        block[5] = pixels[5];
180
        block[6] = pixels[6];
181
        block[7] = pixels[7];
182
        pixels += line_size;
183
        block += 8;
184 de6d9b64 Fabrice Bellard
    }
185
}
186
187 c13e1abd Falk Hüffner
void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2,
188
                   int stride){
189 9dbcbd92 Michael Niedermayer
    int i;
190
191
    /* read the pixels */
192
    for(i=0;i<8;i++) {
193 c13e1abd Falk Hüffner
        block[0] = s1[0] - s2[0];
194
        block[1] = s1[1] - s2[1];
195
        block[2] = s1[2] - s2[2];
196
        block[3] = s1[3] - s2[3];
197
        block[4] = s1[4] - s2[4];
198
        block[5] = s1[5] - s2[5];
199
        block[6] = s1[6] - s2[6];
200
        block[7] = s1[7] - s2[7];
201 9dbcbd92 Michael Niedermayer
        s1 += stride;
202
        s2 += stride;
203 c13e1abd Falk Hüffner
        block += 8;
204 9dbcbd92 Michael Niedermayer
    }
205
}
206
207
208 c13e1abd Falk Hüffner
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
209
                          int line_size)
210 de6d9b64 Fabrice Bellard
{
211
    int i;
212
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
213
    
214
    /* read the pixels */
215
    for(i=0;i<8;i++) {
216 c13e1abd Falk Hüffner
        pixels[0] = cm[block[0]];
217
        pixels[1] = cm[block[1]];
218
        pixels[2] = cm[block[2]];
219
        pixels[3] = cm[block[3]];
220
        pixels[4] = cm[block[4]];
221
        pixels[5] = cm[block[5]];
222
        pixels[6] = cm[block[6]];
223
        pixels[7] = cm[block[7]];
224
225
        pixels += line_size;
226
        block += 8;
227 de6d9b64 Fabrice Bellard
    }
228
}
229
230 c13e1abd Falk Hüffner
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
231
                          int line_size)
232 de6d9b64 Fabrice Bellard
{
233
    int i;
234
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
235
    
236
    /* read the pixels */
237
    for(i=0;i<8;i++) {
238 c13e1abd Falk Hüffner
        pixels[0] = cm[pixels[0] + block[0]];
239
        pixels[1] = cm[pixels[1] + block[1]];
240
        pixels[2] = cm[pixels[2] + block[2]];
241
        pixels[3] = cm[pixels[3] + block[3]];
242
        pixels[4] = cm[pixels[4] + block[4]];
243
        pixels[5] = cm[pixels[5] + block[5]];
244
        pixels[6] = cm[pixels[6] + block[6]];
245
        pixels[7] = cm[pixels[7] + block[7]];
246
        pixels += line_size;
247
        block += 8;
248 de6d9b64 Fabrice Bellard
    }
249
}
250 59fe111e Michael Niedermayer
#if 0
251

252
#define PIXOP2(OPNAME, OP) \
253 b3184779 Michael Niedermayer
static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
254 59fe111e Michael Niedermayer
{\
255
    int i;\
256
    for(i=0; i<h; i++){\
257
        OP(*((uint64_t*)block), LD64(pixels));\
258
        pixels+=line_size;\
259
        block +=line_size;\
260
    }\
261
}\
262
\
263 b3184779 Michael Niedermayer
static void OPNAME ## _no_rnd_pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
264 59fe111e Michael Niedermayer
{\
265
    int i;\
266
    for(i=0; i<h; i++){\
267
        const uint64_t a= LD64(pixels  );\
268
        const uint64_t b= LD64(pixels+1);\
269
        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
270
        pixels+=line_size;\
271
        block +=line_size;\
272
    }\
273
}\
274
\
275 b3184779 Michael Niedermayer
static void OPNAME ## _pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
276 59fe111e Michael Niedermayer
{\
277
    int i;\
278
    for(i=0; i<h; i++){\
279
        const uint64_t a= LD64(pixels  );\
280
        const uint64_t b= LD64(pixels+1);\
281
        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
282
        pixels+=line_size;\
283
        block +=line_size;\
284
    }\
285
}\
286
\
287 b3184779 Michael Niedermayer
static void OPNAME ## _no_rnd_pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
288 59fe111e Michael Niedermayer
{\
289
    int i;\
290
    for(i=0; i<h; i++){\
291
        const uint64_t a= LD64(pixels          );\
292
        const uint64_t b= LD64(pixels+line_size);\
293
        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
294
        pixels+=line_size;\
295
        block +=line_size;\
296
    }\
297
}\
298
\
299 b3184779 Michael Niedermayer
static void OPNAME ## _pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
300 59fe111e Michael Niedermayer
{\
301
    int i;\
302
    for(i=0; i<h; i++){\
303
        const uint64_t a= LD64(pixels          );\
304
        const uint64_t b= LD64(pixels+line_size);\
305
        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
306
        pixels+=line_size;\
307
        block +=line_size;\
308
    }\
309
}\
310
\
311 b3184779 Michael Niedermayer
static void OPNAME ## _pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
312 59fe111e Michael Niedermayer
{\
313
        int i;\
314
        const uint64_t a= LD64(pixels  );\
315
        const uint64_t b= LD64(pixels+1);\
316
        uint64_t l0=  (a&0x0303030303030303ULL)\
317
                    + (b&0x0303030303030303ULL)\
318
                    + 0x0202020202020202ULL;\
319
        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
320
                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
321
        uint64_t l1,h1;\
322
\
323
        pixels+=line_size;\
324
        for(i=0; i<h; i+=2){\
325
            uint64_t a= LD64(pixels  );\
326
            uint64_t b= LD64(pixels+1);\
327
            l1=  (a&0x0303030303030303ULL)\
328
               + (b&0x0303030303030303ULL);\
329
            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
330
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
331
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
332
            pixels+=line_size;\
333
            block +=line_size;\
334
            a= LD64(pixels  );\
335
            b= LD64(pixels+1);\
336
            l0=  (a&0x0303030303030303ULL)\
337
               + (b&0x0303030303030303ULL)\
338
               + 0x0202020202020202ULL;\
339
            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
340
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
341
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
342
            pixels+=line_size;\
343
            block +=line_size;\
344
        }\
345
}\
346
\
347 b3184779 Michael Niedermayer
static void OPNAME ## _no_rnd_pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
348 59fe111e Michael Niedermayer
{\
349
        int i;\
350
        const uint64_t a= LD64(pixels  );\
351
        const uint64_t b= LD64(pixels+1);\
352
        uint64_t l0=  (a&0x0303030303030303ULL)\
353
                    + (b&0x0303030303030303ULL)\
354
                    + 0x0101010101010101ULL;\
355
        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
356
                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
357
        uint64_t l1,h1;\
358
\
359
        pixels+=line_size;\
360
        for(i=0; i<h; i+=2){\
361
            uint64_t a= LD64(pixels  );\
362
            uint64_t b= LD64(pixels+1);\
363
            l1=  (a&0x0303030303030303ULL)\
364
               + (b&0x0303030303030303ULL);\
365
            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
366
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
367
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
368
            pixels+=line_size;\
369
            block +=line_size;\
370
            a= LD64(pixels  );\
371
            b= LD64(pixels+1);\
372
            l0=  (a&0x0303030303030303ULL)\
373
               + (b&0x0303030303030303ULL)\
374
               + 0x0101010101010101ULL;\
375
            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
376
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
377
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
378
            pixels+=line_size;\
379
            block +=line_size;\
380
        }\
381
}\
382
\
383 b3184779 Michael Niedermayer
CALL_2X_PIXELS(OPNAME ## _pixels16    , OPNAME ## _pixels    , 8)\
384
CALL_2X_PIXELS(OPNAME ## _pixels16_x2 , OPNAME ## _pixels_x2 , 8)\
385
CALL_2X_PIXELS(OPNAME ## _pixels16_y2 , OPNAME ## _pixels_y2 , 8)\
386
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2, OPNAME ## _pixels_xy2, 8)\
387
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2 , OPNAME ## _no_rnd_pixels_x2 , 8)\
388
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2 , OPNAME ## _no_rnd_pixels_y2 , 8)\
389
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2, OPNAME ## _no_rnd_pixels_xy2, 8)\
390
\
391
void (*OPNAME ## _pixels_tab[2][4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
392
    {\
393
        OPNAME ## _pixels,\
394
        OPNAME ## _pixels_x2,\
395
        OPNAME ## _pixels_y2,\
396
        OPNAME ## _pixels_xy2},\
397
    {\
398
        OPNAME ## _pixels16,\
399
        OPNAME ## _pixels16_x2,\
400
        OPNAME ## _pixels16_y2,\
401
        OPNAME ## _pixels16_xy2}\
402 59fe111e Michael Niedermayer
};\
403
\
404 b3184779 Michael Niedermayer
void (*OPNAME ## _no_rnd_pixels_tab[2][4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
405
    {\
406
        OPNAME ## _pixels,\
407
        OPNAME ## _no_rnd_pixels_x2,\
408
        OPNAME ## _no_rnd_pixels_y2,\
409
        OPNAME ## _no_rnd_pixels_xy2},\
410
    {\
411
        OPNAME ## _pixels16,\
412
        OPNAME ## _no_rnd_pixels16_x2,\
413
        OPNAME ## _no_rnd_pixels16_y2,\
414
        OPNAME ## _no_rnd_pixels16_xy2}\
415 59fe111e Michael Niedermayer
};
416

417
#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
418
#else // 64 bit variant
419
420
#define PIXOP2(OPNAME, OP) \
421 b3184779 Michael Niedermayer
static void OPNAME ## _pixels8(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
422 59fe111e Michael Niedermayer
    int i;\
423
    for(i=0; i<h; i++){\
424
        OP(*((uint32_t*)(block  )), LD32(pixels  ));\
425
        OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
426
        pixels+=line_size;\
427
        block +=line_size;\
428
    }\
429
}\
430 b3184779 Michael Niedermayer
static inline void OPNAME ## _no_rnd_pixels8(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
431
    OPNAME ## _pixels8(block, pixels, line_size, h);\
432
}\
433 59fe111e Michael Niedermayer
\
434 b3184779 Michael Niedermayer
static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
435
                                                int src_stride1, int src_stride2, int h){\
436 59fe111e Michael Niedermayer
    int i;\
437
    for(i=0; i<h; i++){\
438 b3184779 Michael Niedermayer
        uint32_t a,b;\
439
        a= LD32(&src1[i*src_stride1  ]);\
440
        b= LD32(&src2[i*src_stride2  ]);\
441
        OP(*((uint32_t*)&dst[i*dst_stride  ]), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
442
        a= LD32(&src1[i*src_stride1+4]);\
443
        b= LD32(&src2[i*src_stride2+4]);\
444
        OP(*((uint32_t*)&dst[i*dst_stride+4]), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
445 59fe111e Michael Niedermayer
    }\
446
}\
447
\
448 b3184779 Michael Niedermayer
static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
449
                                                int src_stride1, int src_stride2, int h){\
450 59fe111e Michael Niedermayer
    int i;\
451
    for(i=0; i<h; i++){\
452 b3184779 Michael Niedermayer
        uint32_t a,b;\
453
        a= LD32(&src1[i*src_stride1  ]);\
454
        b= LD32(&src2[i*src_stride2  ]);\
455
        OP(*((uint32_t*)&dst[i*dst_stride  ]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
456
        a= LD32(&src1[i*src_stride1+4]);\
457
        b= LD32(&src2[i*src_stride2+4]);\
458
        OP(*((uint32_t*)&dst[i*dst_stride+4]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
459 59fe111e Michael Niedermayer
    }\
460
}\
461
\
462 b3184779 Michael Niedermayer
static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
463
                                                int src_stride1, int src_stride2, int h){\
464
    OPNAME ## _pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
465
    OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
466
}\
467
\
468
static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
469
                                                int src_stride1, int src_stride2, int h){\
470
    OPNAME ## _no_rnd_pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
471
    OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
472
}\
473
\
474
static inline void OPNAME ## _no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
475
    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
476
}\
477
\
478
static inline void OPNAME ## _pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
479
    OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
480
}\
481
\
482
static inline void OPNAME ## _no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
483
    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
484
}\
485
\
486
static inline void OPNAME ## _pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
487
    OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
488
}\
489
\
490
static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
491
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
492 59fe111e Michael Niedermayer
    int i;\
493
    for(i=0; i<h; i++){\
494 b3184779 Michael Niedermayer
        uint32_t a, b, c, d, l0, l1, h0, h1;\
495
        a= LD32(&src1[i*src_stride1]);\
496
        b= LD32(&src2[i*src_stride2]);\
497
        c= LD32(&src3[i*src_stride3]);\
498
        d= LD32(&src4[i*src_stride4]);\
499
        l0=  (a&0x03030303UL)\
500
           + (b&0x03030303UL)\
501
           + 0x02020202UL;\
502
        h0= ((a&0xFCFCFCFCUL)>>2)\
503
          + ((b&0xFCFCFCFCUL)>>2);\
504
        l1=  (c&0x03030303UL)\
505
           + (d&0x03030303UL);\
506
        h1= ((c&0xFCFCFCFCUL)>>2)\
507
          + ((d&0xFCFCFCFCUL)>>2);\
508
        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
509
        a= LD32(&src1[i*src_stride1+4]);\
510
        b= LD32(&src2[i*src_stride2+4]);\
511
        c= LD32(&src3[i*src_stride3+4]);\
512
        d= LD32(&src4[i*src_stride4+4]);\
513
        l0=  (a&0x03030303UL)\
514
           + (b&0x03030303UL)\
515
           + 0x02020202UL;\
516
        h0= ((a&0xFCFCFCFCUL)>>2)\
517
          + ((b&0xFCFCFCFCUL)>>2);\
518
        l1=  (c&0x03030303UL)\
519
           + (d&0x03030303UL);\
520
        h1= ((c&0xFCFCFCFCUL)>>2)\
521
          + ((d&0xFCFCFCFCUL)>>2);\
522
        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
523 59fe111e Michael Niedermayer
    }\
524
}\
525 b3184779 Michael Niedermayer
static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
526
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
527 59fe111e Michael Niedermayer
    int i;\
528
    for(i=0; i<h; i++){\
529 b3184779 Michael Niedermayer
        uint32_t a, b, c, d, l0, l1, h0, h1;\
530
        a= LD32(&src1[i*src_stride1]);\
531
        b= LD32(&src2[i*src_stride2]);\
532
        c= LD32(&src3[i*src_stride3]);\
533
        d= LD32(&src4[i*src_stride4]);\
534
        l0=  (a&0x03030303UL)\
535
           + (b&0x03030303UL)\
536
           + 0x01010101UL;\
537
        h0= ((a&0xFCFCFCFCUL)>>2)\
538
          + ((b&0xFCFCFCFCUL)>>2);\
539
        l1=  (c&0x03030303UL)\
540
           + (d&0x03030303UL);\
541
        h1= ((c&0xFCFCFCFCUL)>>2)\
542
          + ((d&0xFCFCFCFCUL)>>2);\
543
        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
544
        a= LD32(&src1[i*src_stride1+4]);\
545
        b= LD32(&src2[i*src_stride2+4]);\
546
        c= LD32(&src3[i*src_stride3+4]);\
547
        d= LD32(&src4[i*src_stride4+4]);\
548
        l0=  (a&0x03030303UL)\
549
           + (b&0x03030303UL)\
550
           + 0x01010101UL;\
551
        h0= ((a&0xFCFCFCFCUL)>>2)\
552
          + ((b&0xFCFCFCFCUL)>>2);\
553
        l1=  (c&0x03030303UL)\
554
           + (d&0x03030303UL);\
555
        h1= ((c&0xFCFCFCFCUL)>>2)\
556
          + ((d&0xFCFCFCFCUL)>>2);\
557
        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
558 59fe111e Michael Niedermayer
    }\
559
}\
560 b3184779 Michael Niedermayer
static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
561
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
562
    OPNAME ## _pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
563
    OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
564
}\
565
static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
566
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
567
    OPNAME ## _no_rnd_pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
568
    OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
569
}\
570 59fe111e Michael Niedermayer
\
571 b3184779 Michael Niedermayer
static inline void OPNAME ## _pixels8_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
572 59fe111e Michael Niedermayer
{\
573
    int j;\
574
    for(j=0; j<2; j++){\
575
        int i;\
576
        const uint32_t a= LD32(pixels  );\
577
        const uint32_t b= LD32(pixels+1);\
578
        uint32_t l0=  (a&0x03030303UL)\
579
                    + (b&0x03030303UL)\
580
                    + 0x02020202UL;\
581
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
582
                   + ((b&0xFCFCFCFCUL)>>2);\
583
        uint32_t l1,h1;\
584
\
585
        pixels+=line_size;\
586
        for(i=0; i<h; i+=2){\
587
            uint32_t a= LD32(pixels  );\
588
            uint32_t b= LD32(pixels+1);\
589
            l1=  (a&0x03030303UL)\
590
               + (b&0x03030303UL);\
591
            h1= ((a&0xFCFCFCFCUL)>>2)\
592
              + ((b&0xFCFCFCFCUL)>>2);\
593
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
594
            pixels+=line_size;\
595
            block +=line_size;\
596
            a= LD32(pixels  );\
597
            b= LD32(pixels+1);\
598
            l0=  (a&0x03030303UL)\
599
               + (b&0x03030303UL)\
600
               + 0x02020202UL;\
601
            h0= ((a&0xFCFCFCFCUL)>>2)\
602
              + ((b&0xFCFCFCFCUL)>>2);\
603
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
604
            pixels+=line_size;\
605
            block +=line_size;\
606
        }\
607
        pixels+=4-line_size*(h+1);\
608
        block +=4-line_size*h;\
609
    }\
610
}\
611
\
612 b3184779 Michael Niedermayer
static inline void OPNAME ## _no_rnd_pixels8_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
613 59fe111e Michael Niedermayer
{\
614
    int j;\
615
    for(j=0; j<2; j++){\
616
        int i;\
617
        const uint32_t a= LD32(pixels  );\
618
        const uint32_t b= LD32(pixels+1);\
619
        uint32_t l0=  (a&0x03030303UL)\
620
                    + (b&0x03030303UL)\
621
                    + 0x01010101UL;\
622
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
623
                   + ((b&0xFCFCFCFCUL)>>2);\
624
        uint32_t l1,h1;\
625
\
626
        pixels+=line_size;\
627
        for(i=0; i<h; i+=2){\
628
            uint32_t a= LD32(pixels  );\
629
            uint32_t b= LD32(pixels+1);\
630
            l1=  (a&0x03030303UL)\
631
               + (b&0x03030303UL);\
632
            h1= ((a&0xFCFCFCFCUL)>>2)\
633
              + ((b&0xFCFCFCFCUL)>>2);\
634
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
635
            pixels+=line_size;\
636
            block +=line_size;\
637
            a= LD32(pixels  );\
638
            b= LD32(pixels+1);\
639
            l0=  (a&0x03030303UL)\
640
               + (b&0x03030303UL)\
641
               + 0x01010101UL;\
642
            h0= ((a&0xFCFCFCFCUL)>>2)\
643
              + ((b&0xFCFCFCFCUL)>>2);\
644
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
645
            pixels+=line_size;\
646
            block +=line_size;\
647
        }\
648
        pixels+=4-line_size*(h+1);\
649
        block +=4-line_size*h;\
650
    }\
651
}\
652
\
653 b3184779 Michael Niedermayer
CALL_2X_PIXELS(OPNAME ## _pixels16    , OPNAME ## _pixels8    , 8)\
654
CALL_2X_PIXELS(OPNAME ## _pixels16_x2 , OPNAME ## _pixels8_x2 , 8)\
655
CALL_2X_PIXELS(OPNAME ## _pixels16_y2 , OPNAME ## _pixels8_y2 , 8)\
656
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2, OPNAME ## _pixels8_xy2, 8)\
657
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16    , OPNAME ## _pixels8    , 8)\
658
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2 , OPNAME ## _no_rnd_pixels8_x2 , 8)\
659
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2 , OPNAME ## _no_rnd_pixels8_y2 , 8)\
660
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2, OPNAME ## _no_rnd_pixels8_xy2, 8)\
661
\
662
void (*OPNAME ## _pixels_tab[2][4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
663
    {\
664
        OPNAME ## _pixels16,\
665
        OPNAME ## _pixels16_x2,\
666
        OPNAME ## _pixels16_y2,\
667
        OPNAME ## _pixels16_xy2},\
668
    {\
669
        OPNAME ## _pixels8,\
670
        OPNAME ## _pixels8_x2,\
671
        OPNAME ## _pixels8_y2,\
672
        OPNAME ## _pixels8_xy2},\
673 59fe111e Michael Niedermayer
};\
674
\
675 b3184779 Michael Niedermayer
void (*OPNAME ## _no_rnd_pixels_tab[2][4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
676
    {\
677
        OPNAME ## _pixels16,\
678
        OPNAME ## _no_rnd_pixels16_x2,\
679
        OPNAME ## _no_rnd_pixels16_y2,\
680
        OPNAME ## _no_rnd_pixels16_xy2},\
681
    {\
682
        OPNAME ## _pixels8,\
683
        OPNAME ## _no_rnd_pixels8_x2,\
684
        OPNAME ## _no_rnd_pixels8_y2,\
685
        OPNAME ## _no_rnd_pixels8_xy2},\
686 59fe111e Michael Niedermayer
};
687 b3184779 Michael Niedermayer
688 59fe111e Michael Niedermayer
#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
689
#endif
690
#define op_put(a, b) a = b
691
692
PIXOP2(avg, op_avg)
693
PIXOP2(put, op_put)
694
#undef op_avg
695
#undef op_put
696
697 57060b1e Fabrice Bellard
#if 0
698 59fe111e Michael Niedermayer
/* FIXME this stuff could be removed as its ot really used anymore */
699 de6d9b64 Fabrice Bellard
#define PIXOP(BTYPE, OPNAME, OP, INCR)                                                   \
700
                                                                                         \
701
static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h)    \
702
{                                                                                        \
703
    BTYPE *p;                                                                            \
704
    const UINT8 *pix;                                                                    \
705
                                                                                         \
706
    p = block;                                                                           \
707
    pix = pixels;                                                                        \
708
    do {                                                                                 \
709
        OP(p[0], pix[0]);                                                                  \
710
        OP(p[1], pix[1]);                                                                  \
711
        OP(p[2], pix[2]);                                                                  \
712
        OP(p[3], pix[3]);                                                                  \
713
        OP(p[4], pix[4]);                                                                  \
714
        OP(p[5], pix[5]);                                                                  \
715
        OP(p[6], pix[6]);                                                                  \
716
        OP(p[7], pix[7]);                                                                  \
717
        pix += line_size;                                                                \
718
        p += INCR;                                                                       \
719
    } while (--h);;                                                                       \
720
}                                                                                        \
721
                                                                                         \
722
static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h)     \
723
{                                                                                        \
724
    BTYPE *p;                                                                          \
725
    const UINT8 *pix;                                                                    \
726
                                                                                         \
727
    p = block;                                                                           \
728
    pix = pixels;                                                                        \
729
    do {                                                                   \
730
        OP(p[0], avg2(pix[0], pix[1]));                                                    \
731
        OP(p[1], avg2(pix[1], pix[2]));                                                    \
732
        OP(p[2], avg2(pix[2], pix[3]));                                                    \
733
        OP(p[3], avg2(pix[3], pix[4]));                                                    \
734
        OP(p[4], avg2(pix[4], pix[5]));                                                    \
735
        OP(p[5], avg2(pix[5], pix[6]));                                                    \
736
        OP(p[6], avg2(pix[6], pix[7]));                                                    \
737
        OP(p[7], avg2(pix[7], pix[8]));                                                    \
738
        pix += line_size;                                                                \
739
        p += INCR;                                                                       \
740
    } while (--h);                                                                        \
741
}                                                                                        \
742
                                                                                         \
743
static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h)     \
744
{                                                                                        \
745
    BTYPE *p;                                                                          \
746
    const UINT8 *pix;                                                                    \
747
    const UINT8 *pix1;                                                                   \
748
                                                                                         \
749
    p = block;                                                                           \
750
    pix = pixels;                                                                        \
751
    pix1 = pixels + line_size;                                                           \
752
    do {                                                                                 \
753
        OP(p[0], avg2(pix[0], pix1[0]));                                                   \
754
        OP(p[1], avg2(pix[1], pix1[1]));                                                   \
755
        OP(p[2], avg2(pix[2], pix1[2]));                                                   \
756
        OP(p[3], avg2(pix[3], pix1[3]));                                                   \
757
        OP(p[4], avg2(pix[4], pix1[4]));                                                   \
758
        OP(p[5], avg2(pix[5], pix1[5]));                                                   \
759
        OP(p[6], avg2(pix[6], pix1[6]));                                                   \
760
        OP(p[7], avg2(pix[7], pix1[7]));                                                   \
761
        pix += line_size;                                                                \
762
        pix1 += line_size;                                                               \
763
        p += INCR;                                                                       \
764
    } while(--h);                                                                         \
765
}                                                                                        \
766
                                                                                         \
767
static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h)    \
768
{                                                                                        \
769
    BTYPE *p;                                                                          \
770
    const UINT8 *pix;                                                                    \
771
    const UINT8 *pix1;                                                                   \
772
                                                                                         \
773
    p = block;                                                                           \
774
    pix = pixels;                                                                        \
775
    pix1 = pixels + line_size;                                                           \
776
    do {                                                                   \
777
        OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1]));                                  \
778
        OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2]));                                  \
779
        OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3]));                                  \
780
        OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4]));                                  \
781
        OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5]));                                  \
782
        OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6]));                                  \
783
        OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7]));                                  \
784
        OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8]));                                  \
785
        pix += line_size;                                                                \
786
        pix1 += line_size;                                                               \
787
        p += INCR;                                                                       \
788
    } while(--h);                                                                         \
789
}                                                                                        \
790
                                                                                         \
791
void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \
792
    OPNAME ## _pixels,                                                                   \
793
    OPNAME ## _pixels_x2,                                                                \
794
    OPNAME ## _pixels_y2,                                                                \
795
    OPNAME ## _pixels_xy2,                                                               \
796
};
797

798
/* rounding primitives */
799
#define avg2(a,b) ((a+b+1)>>1)
800
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
801

802
#define op_avg(a, b) a = avg2(a, b)
803
#define op_sub(a, b) a -= b
804 3aa102be Michael Niedermayer
#define op_put(a, b) a = b
805 de6d9b64 Fabrice Bellard

806
PIXOP(DCTELEM, sub, op_sub, 8)
807 3aa102be Michael Niedermayer
PIXOP(uint8_t, avg, op_avg, line_size)
808
PIXOP(uint8_t, put, op_put, line_size)
809 de6d9b64 Fabrice Bellard

810
/* not rounding primitives */
811
#undef avg2
812
#undef avg4
813
#define avg2(a,b) ((a+b)>>1)
814
#define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
815

816 3aa102be Michael Niedermayer
PIXOP(uint8_t, avg_no_rnd, op_avg, line_size)
817
PIXOP(uint8_t, put_no_rnd, op_put, line_size)
818 de6d9b64 Fabrice Bellard
/* motion estimation */
819

820
#undef avg2
821
#undef avg4
822 57060b1e Fabrice Bellard
#endif
823
824 de6d9b64 Fabrice Bellard
#define avg2(a,b) ((a+b+1)>>1)
825
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
826
827 073b013d Michael Niedermayer
828 b3184779 Michael Niedermayer
static void gmc1_c(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, int rounder)
829 44eb4951 Michael Niedermayer
{
830
    const int A=(16-x16)*(16-y16);
831
    const int B=(   x16)*(16-y16);
832
    const int C=(16-x16)*(   y16);
833
    const int D=(   x16)*(   y16);
834
    int i;
835
836
    for(i=0; i<h; i++)
837
    {
838 b3184779 Michael Niedermayer
        dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
839
        dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
840
        dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
841
        dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
842
        dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
843
        dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
844
        dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
845
        dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
846
        dst+= stride;
847
        src+= stride;
848 44eb4951 Michael Niedermayer
    }
849
}
850
851 073b013d Michael Niedermayer
static void gmc_c(UINT8 *dst, UINT8 *src, int stride, int h, int ox, int oy, 
852
                  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
853
{
854
    int y, vx, vy;
855
    const int s= 1<<shift;
856
    
857
    width--;
858
    height--;
859
860
    for(y=0; y<h; y++){
861
        int x;
862
863
        vx= ox;
864
        vy= oy;
865
        for(x=0; x<8; x++){ //XXX FIXME optimize
866
            int src_x, src_y, frac_x, frac_y, index;
867
868
            src_x= vx>>16;
869
            src_y= vy>>16;
870
            frac_x= src_x&(s-1);
871
            frac_y= src_y&(s-1);
872
            src_x>>=shift;
873
            src_y>>=shift;
874
  
875
            if((unsigned)src_x < width){
876
                if((unsigned)src_y < height){
877
                    index= src_x + src_y*stride;
878
                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
879
                                           + src[index       +1]*   frac_x )*(s-frac_y)
880
                                        + (  src[index+stride  ]*(s-frac_x)
881
                                           + src[index+stride+1]*   frac_x )*   frac_y
882
                                        + r)>>(shift*2);
883
                }else{
884
                    index= src_x + clip(src_y, 0, height)*stride;                    
885
                    dst[y*stride + x]= ( (  src[index         ]*(s-frac_x) 
886
                                          + src[index       +1]*   frac_x )*s
887
                                        + r)>>(shift*2);
888
                }
889
            }else{
890
                if((unsigned)src_y < height){
891
                    index= clip(src_x, 0, width) + src_y*stride;                    
892
                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_y) 
893
                                           + src[index+stride  ]*   frac_y )*s
894
                                        + r)>>(shift*2);
895
                }else{
896
                    index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;                    
897
                    dst[y*stride + x]=    src[index         ];
898
                }
899
            }
900
            
901
            vx+= dxx;
902
            vy+= dyx;
903
        }
904
        ox += dxy;
905
        oy += dyy;
906
    }
907
}
908
909 b3184779 Michael Niedermayer
static inline void copy_block17(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h)
910 44eb4951 Michael Niedermayer
{
911
    int i;
912
    for(i=0; i<h; i++)
913
    {
914 b3184779 Michael Niedermayer
        ST32(dst   , LD32(src   ));
915
        ST32(dst+4 , LD32(src+4 ));
916
        ST32(dst+8 , LD32(src+8 ));
917
        ST32(dst+12, LD32(src+12));
918
        dst[16]= src[16];
919 44eb4951 Michael Niedermayer
        dst+=dstStride;
920
        src+=srcStride;
921
    }
922
}
923
924 b3184779 Michael Niedermayer
static inline void copy_block9(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h)
925 44eb4951 Michael Niedermayer
{
926
    int i;
927 b3184779 Michael Niedermayer
    for(i=0; i<h; i++)
928 44eb4951 Michael Niedermayer
    {
929 b3184779 Michael Niedermayer
        ST32(dst   , LD32(src   ));
930
        ST32(dst+4 , LD32(src+4 ));
931
        dst[8]= src[8];
932 44eb4951 Michael Niedermayer
        dst+=dstStride;
933
        src+=srcStride;
934
    }
935
}
936
937 b3184779 Michael Niedermayer
#define QPEL_MC(r, OPNAME, RND, OP) \
938
static void OPNAME ## mpeg4_qpel8_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h){\
939
    UINT8 *cm = cropTbl + MAX_NEG_CROP;\
940
    int i;\
941
    for(i=0; i<h; i++)\
942
    {\
943
        OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
944
        OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
945
        OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
946
        OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
947
        OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
948
        OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
949
        OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
950
        OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
951
        dst+=dstStride;\
952
        src+=srcStride;\
953
    }\
954 44eb4951 Michael Niedermayer
}\
955
\
956 b3184779 Michael Niedermayer
static void OPNAME ## mpeg4_qpel8_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w){\
957
    UINT8 *cm = cropTbl + MAX_NEG_CROP;\
958
    int i;\
959
    for(i=0; i<w; i++)\
960
    {\
961
        const int src0= src[0*srcStride];\
962
        const int src1= src[1*srcStride];\
963
        const int src2= src[2*srcStride];\
964
        const int src3= src[3*srcStride];\
965
        const int src4= src[4*srcStride];\
966
        const int src5= src[5*srcStride];\
967
        const int src6= src[6*srcStride];\
968
        const int src7= src[7*srcStride];\
969
        const int src8= src[8*srcStride];\
970
        OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
971
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
972
        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
973
        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
974
        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
975
        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
976
        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
977
        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
978
        dst++;\
979
        src++;\
980
    }\
981
}\
982
\
983
static void OPNAME ## mpeg4_qpel16_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h){\
984
    UINT8 *cm = cropTbl + MAX_NEG_CROP;\
985
    int i;\
986
    for(i=0; i<h; i++)\
987
    {\
988
        OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
989
        OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
990
        OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
991
        OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
992
        OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
993
        OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
994
        OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
995
        OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
996
        OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
997
        OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
998
        OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
999
        OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1000
        OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1001
        OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1002
        OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1003
        OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1004
        dst+=dstStride;\
1005
        src+=srcStride;\
1006
    }\
1007
}\
1008
\
1009
static void OPNAME ## mpeg4_qpel16_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w){\
1010
    UINT8 *cm = cropTbl + MAX_NEG_CROP;\
1011
    int i;\
1012
    for(i=0; i<w; i++)\
1013
    {\
1014
        const int src0= src[0*srcStride];\
1015
        const int src1= src[1*srcStride];\
1016
        const int src2= src[2*srcStride];\
1017
        const int src3= src[3*srcStride];\
1018
        const int src4= src[4*srcStride];\
1019
        const int src5= src[5*srcStride];\
1020
        const int src6= src[6*srcStride];\
1021
        const int src7= src[7*srcStride];\
1022
        const int src8= src[8*srcStride];\
1023
        const int src9= src[9*srcStride];\
1024
        const int src10= src[10*srcStride];\
1025
        const int src11= src[11*srcStride];\
1026
        const int src12= src[12*srcStride];\
1027
        const int src13= src[13*srcStride];\
1028
        const int src14= src[14*srcStride];\
1029
        const int src15= src[15*srcStride];\
1030
        const int src16= src[16*srcStride];\
1031
        OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1032
        OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1033
        OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1034
        OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1035
        OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1036
        OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1037
        OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1038
        OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1039
        OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1040
        OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1041
        OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1042
        OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1043
        OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1044
        OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1045
        OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1046
        OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1047
        dst++;\
1048
        src++;\
1049
    }\
1050
}\
1051
\
1052
static void OPNAME ## qpel8_mc00_c (UINT8 *dst, UINT8 *src, int stride){\
1053
    OPNAME ## pixels8(dst, src, stride, 8);\
1054
}\
1055
\
1056
static void OPNAME ## qpel8_mc10_c(UINT8 *dst, UINT8 *src, int stride){\
1057 44eb4951 Michael Niedermayer
    UINT8 half[64];\
1058 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1059
    OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
1060 44eb4951 Michael Niedermayer
}\
1061
\
1062 b3184779 Michael Niedermayer
static void OPNAME ## qpel8_mc20_c(UINT8 *dst, UINT8 *src, int stride){\
1063
    OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
1064 44eb4951 Michael Niedermayer
}\
1065
\
1066 b3184779 Michael Niedermayer
static void OPNAME ## qpel8_mc30_c(UINT8 *dst, UINT8 *src, int stride){\
1067 44eb4951 Michael Niedermayer
    UINT8 half[64];\
1068 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1069
    OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
1070 44eb4951 Michael Niedermayer
}\
1071
\
1072 b3184779 Michael Niedermayer
static void OPNAME ## qpel8_mc01_c(UINT8 *dst, UINT8 *src, int stride){\
1073
    UINT8 full[16*9];\
1074 44eb4951 Michael Niedermayer
    UINT8 half[64];\
1075 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1076
    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16, 8);\
1077
    OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
1078 44eb4951 Michael Niedermayer
}\
1079
\
1080 b3184779 Michael Niedermayer
static void OPNAME ## qpel8_mc02_c(UINT8 *dst, UINT8 *src, int stride){\
1081
    UINT8 full[16*9];\
1082
    copy_block9(full, src, 16, stride, 9);\
1083
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16, 8);\
1084 44eb4951 Michael Niedermayer
}\
1085
\
1086 b3184779 Michael Niedermayer
static void OPNAME ## qpel8_mc03_c(UINT8 *dst, UINT8 *src, int stride){\
1087
    UINT8 full[16*9];\
1088 44eb4951 Michael Niedermayer
    UINT8 half[64];\
1089 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1090
    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16, 8);\
1091
    OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
1092 44eb4951 Michael Niedermayer
}\
1093 b3184779 Michael Niedermayer
static void OPNAME ## qpel8_mc11_c(UINT8 *dst, UINT8 *src, int stride){\
1094
    UINT8 full[16*9];\
1095 44eb4951 Michael Niedermayer
    UINT8 halfH[72];\
1096 7ff037e9 Michael Niedermayer
    UINT8 halfV[64];\
1097 44eb4951 Michael Niedermayer
    UINT8 halfHV[64];\
1098 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1099
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1100
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16, 8);\
1101
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
1102
    OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1103 44eb4951 Michael Niedermayer
}\
1104 b3184779 Michael Niedermayer
static void OPNAME ## qpel8_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
1105
    UINT8 full[16*9];\
1106 44eb4951 Michael Niedermayer
    UINT8 halfH[72];\
1107 7ff037e9 Michael Niedermayer
    UINT8 halfV[64];\
1108 44eb4951 Michael Niedermayer
    UINT8 halfHV[64];\
1109 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1110
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1111
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16, 8);\
1112
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
1113
    OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1114 44eb4951 Michael Niedermayer
}\
1115 b3184779 Michael Niedermayer
static void OPNAME ## qpel8_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
1116
    UINT8 full[16*9];\
1117 44eb4951 Michael Niedermayer
    UINT8 halfH[72];\
1118 7ff037e9 Michael Niedermayer
    UINT8 halfV[64];\
1119 44eb4951 Michael Niedermayer
    UINT8 halfHV[64];\
1120 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1121
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1122
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16, 8);\
1123
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
1124
    OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1125 44eb4951 Michael Niedermayer
}\
1126 b3184779 Michael Niedermayer
static void OPNAME ## qpel8_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
1127
    UINT8 full[16*9];\
1128 44eb4951 Michael Niedermayer
    UINT8 halfH[72];\
1129 7ff037e9 Michael Niedermayer
    UINT8 halfV[64];\
1130 44eb4951 Michael Niedermayer
    UINT8 halfHV[64];\
1131 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1132
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
1133
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16, 8);\
1134
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
1135
    OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1136 44eb4951 Michael Niedermayer
}\
1137 b3184779 Michael Niedermayer
static void OPNAME ## qpel8_mc21_c(UINT8 *dst, UINT8 *src, int stride){\
1138 44eb4951 Michael Niedermayer
    UINT8 halfH[72];\
1139
    UINT8 halfHV[64];\
1140 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1141
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
1142
    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1143 44eb4951 Michael Niedermayer
}\
1144 b3184779 Michael Niedermayer
static void OPNAME ## qpel8_mc23_c(UINT8 *dst, UINT8 *src, int stride){\
1145 44eb4951 Michael Niedermayer
    UINT8 halfH[72];\
1146
    UINT8 halfHV[64];\
1147 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1148
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
1149
    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1150 44eb4951 Michael Niedermayer
}\
1151 b3184779 Michael Niedermayer
static void OPNAME ## qpel8_mc12_c(UINT8 *dst, UINT8 *src, int stride){\
1152
    UINT8 full[16*9];\
1153 44eb4951 Michael Niedermayer
    UINT8 halfH[72];\
1154 7ff037e9 Michael Niedermayer
    UINT8 halfV[64];\
1155 44eb4951 Michael Niedermayer
    UINT8 halfHV[64];\
1156 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1157
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1158
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16, 8);\
1159
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
1160
    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1161 44eb4951 Michael Niedermayer
}\
1162 b3184779 Michael Niedermayer
static void OPNAME ## qpel8_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
1163
    UINT8 full[16*9];\
1164 44eb4951 Michael Niedermayer
    UINT8 halfH[72];\
1165 7ff037e9 Michael Niedermayer
    UINT8 halfV[64];\
1166 44eb4951 Michael Niedermayer
    UINT8 halfHV[64];\
1167 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1168
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1169
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16, 8);\
1170
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
1171
    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1172 44eb4951 Michael Niedermayer
}\
1173 b3184779 Michael Niedermayer
static void OPNAME ## qpel8_mc22_c(UINT8 *dst, UINT8 *src, int stride){\
1174 44eb4951 Michael Niedermayer
    UINT8 halfH[72];\
1175 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1176
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8, 8);\
1177
}\
1178
static void OPNAME ## qpel16_mc00_c (UINT8 *dst, UINT8 *src, int stride){\
1179
    OPNAME ## pixels16(dst, src, stride, 16);\
1180
}\
1181
\
1182
static void OPNAME ## qpel16_mc10_c(UINT8 *dst, UINT8 *src, int stride){\
1183
    UINT8 half[256];\
1184
    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1185
    OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
1186
}\
1187
\
1188
static void OPNAME ## qpel16_mc20_c(UINT8 *dst, UINT8 *src, int stride){\
1189
    OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1190 44eb4951 Michael Niedermayer
}\
1191 b3184779 Michael Niedermayer
\
1192
static void OPNAME ## qpel16_mc30_c(UINT8 *dst, UINT8 *src, int stride){\
1193
    UINT8 half[256];\
1194
    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1195
    OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
1196
}\
1197
\
1198
static void OPNAME ## qpel16_mc01_c(UINT8 *dst, UINT8 *src, int stride){\
1199
    UINT8 full[24*17];\
1200
    UINT8 half[256];\
1201
    copy_block17(full, src, 24, stride, 17);\
1202
    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24, 16);\
1203
    OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
1204
}\
1205
\
1206
static void OPNAME ## qpel16_mc02_c(UINT8 *dst, UINT8 *src, int stride){\
1207
    UINT8 full[24*17];\
1208
    copy_block17(full, src, 24, stride, 17);\
1209
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24, 16);\
1210
}\
1211
\
1212
static void OPNAME ## qpel16_mc03_c(UINT8 *dst, UINT8 *src, int stride){\
1213
    UINT8 full[24*17];\
1214
    UINT8 half[256];\
1215
    copy_block17(full, src, 24, stride, 17);\
1216
    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24, 16);\
1217
    OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
1218
}\
1219
static void OPNAME ## qpel16_mc11_c(UINT8 *dst, UINT8 *src, int stride){\
1220
    UINT8 full[24*17];\
1221
    UINT8 halfH[272];\
1222
    UINT8 halfV[256];\
1223
    UINT8 halfHV[256];\
1224
    copy_block17(full, src, 24, stride, 17);\
1225
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1226
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24, 16);\
1227
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
1228
    OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1229
}\
1230
static void OPNAME ## qpel16_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
1231
    UINT8 full[24*17];\
1232
    UINT8 halfH[272];\
1233
    UINT8 halfV[256];\
1234
    UINT8 halfHV[256];\
1235
    copy_block17(full, src, 24, stride, 17);\
1236
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1237
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24, 16);\
1238
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
1239
    OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1240
}\
1241
static void OPNAME ## qpel16_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
1242
    UINT8 full[24*17];\
1243
    UINT8 halfH[272];\
1244
    UINT8 halfV[256];\
1245
    UINT8 halfHV[256];\
1246
    copy_block17(full, src, 24, stride, 17);\
1247
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1248
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24, 16);\
1249
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
1250
    OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1251
}\
1252
static void OPNAME ## qpel16_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
1253
    UINT8 full[24*17];\
1254
    UINT8 halfH[272];\
1255
    UINT8 halfV[256];\
1256
    UINT8 halfHV[256];\
1257
    copy_block17(full, src, 24, stride, 17);\
1258
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
1259
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24, 16);\
1260
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
1261
    OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1262
}\
1263
static void OPNAME ## qpel16_mc21_c(UINT8 *dst, UINT8 *src, int stride){\
1264
    UINT8 halfH[272];\
1265
    UINT8 halfHV[256];\
1266
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1267
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
1268
    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1269
}\
1270
static void OPNAME ## qpel16_mc23_c(UINT8 *dst, UINT8 *src, int stride){\
1271
    UINT8 halfH[272];\
1272
    UINT8 halfHV[256];\
1273
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1274
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
1275
    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1276
}\
1277
static void OPNAME ## qpel16_mc12_c(UINT8 *dst, UINT8 *src, int stride){\
1278
    UINT8 full[24*17];\
1279
    UINT8 halfH[272];\
1280
    UINT8 halfV[256];\
1281
    UINT8 halfHV[256];\
1282
    copy_block17(full, src, 24, stride, 17);\
1283
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1284
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24, 16);\
1285
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
1286
    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1287
}\
1288
static void OPNAME ## qpel16_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
1289
    UINT8 full[24*17];\
1290
    UINT8 halfH[272];\
1291
    UINT8 halfV[256];\
1292
    UINT8 halfHV[256];\
1293
    copy_block17(full, src, 24, stride, 17);\
1294
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1295
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24, 16);\
1296
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
1297
    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1298
}\
1299
static void OPNAME ## qpel16_mc22_c(UINT8 *dst, UINT8 *src, int stride){\
1300
    UINT8 halfH[272];\
1301
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1302
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16, 16);\
1303
}\
1304
qpel_mc_func OPNAME ## qpel_pixels_tab[2][16]={ \
1305
  {\
1306
    OPNAME ## qpel16_mc00_c,                                                                   \
1307
    OPNAME ## qpel16_mc10_c,                                                                   \
1308
    OPNAME ## qpel16_mc20_c,                                                                   \
1309
    OPNAME ## qpel16_mc30_c,                                                                   \
1310
    OPNAME ## qpel16_mc01_c,                                                                   \
1311
    OPNAME ## qpel16_mc11_c,                                                                   \
1312
    OPNAME ## qpel16_mc21_c,                                                                   \
1313
    OPNAME ## qpel16_mc31_c,                                                                   \
1314
    OPNAME ## qpel16_mc02_c,                                                                   \
1315
    OPNAME ## qpel16_mc12_c,                                                                   \
1316
    OPNAME ## qpel16_mc22_c,                                                                   \
1317
    OPNAME ## qpel16_mc32_c,                                                                   \
1318
    OPNAME ## qpel16_mc03_c,                                                                   \
1319
    OPNAME ## qpel16_mc13_c,                                                                   \
1320
    OPNAME ## qpel16_mc23_c,                                                                   \
1321
    OPNAME ## qpel16_mc33_c,                                                                   \
1322
  },{\
1323
    OPNAME ## qpel8_mc00_c,                                                                   \
1324
    OPNAME ## qpel8_mc10_c,                                                                   \
1325
    OPNAME ## qpel8_mc20_c,                                                                   \
1326
    OPNAME ## qpel8_mc30_c,                                                                   \
1327
    OPNAME ## qpel8_mc01_c,                                                                   \
1328
    OPNAME ## qpel8_mc11_c,                                                                   \
1329
    OPNAME ## qpel8_mc21_c,                                                                   \
1330
    OPNAME ## qpel8_mc31_c,                                                                   \
1331
    OPNAME ## qpel8_mc02_c,                                                                   \
1332
    OPNAME ## qpel8_mc12_c,                                                                   \
1333
    OPNAME ## qpel8_mc22_c,                                                                   \
1334
    OPNAME ## qpel8_mc32_c,                                                                   \
1335
    OPNAME ## qpel8_mc03_c,                                                                   \
1336
    OPNAME ## qpel8_mc13_c,                                                                   \
1337
    OPNAME ## qpel8_mc23_c,                                                                   \
1338
    OPNAME ## qpel8_mc33_c,                                                                   \
1339
  }\
1340 44eb4951 Michael Niedermayer
};
1341
1342 b3184779 Michael Niedermayer
#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1343
#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1344
#define op_put(a, b) a = cm[((b) + 16)>>5]
1345
#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1346
1347
QPEL_MC(0, put_       , _       , op_put)
1348
QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1349
QPEL_MC(0, avg_       , _       , op_avg)
1350
//QPEL_MC(1, avg_no_rnd , _       , op_avg)
1351
#undef op_avg
1352
#undef op_avg_no_rnd
1353
#undef op_put
1354
#undef op_put_no_rnd
1355 44eb4951 Michael Niedermayer
1356 ba6802de Michael Niedermayer
int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1357 de6d9b64 Fabrice Bellard
{
1358
    int s, i;
1359
1360
    s = 0;
1361 ba6802de Michael Niedermayer
    for(i=0;i<16;i++) {
1362 de6d9b64 Fabrice Bellard
        s += abs(pix1[0] - pix2[0]);
1363
        s += abs(pix1[1] - pix2[1]);
1364
        s += abs(pix1[2] - pix2[2]);
1365
        s += abs(pix1[3] - pix2[3]);
1366
        s += abs(pix1[4] - pix2[4]);
1367
        s += abs(pix1[5] - pix2[5]);
1368
        s += abs(pix1[6] - pix2[6]);
1369
        s += abs(pix1[7] - pix2[7]);
1370
        s += abs(pix1[8] - pix2[8]);
1371
        s += abs(pix1[9] - pix2[9]);
1372
        s += abs(pix1[10] - pix2[10]);
1373
        s += abs(pix1[11] - pix2[11]);
1374
        s += abs(pix1[12] - pix2[12]);
1375
        s += abs(pix1[13] - pix2[13]);
1376
        s += abs(pix1[14] - pix2[14]);
1377
        s += abs(pix1[15] - pix2[15]);
1378
        pix1 += line_size;
1379
        pix2 += line_size;
1380
    }
1381
    return s;
1382
}
1383
1384 ba6802de Michael Niedermayer
int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1385 de6d9b64 Fabrice Bellard
{
1386
    int s, i;
1387
1388
    s = 0;
1389 ba6802de Michael Niedermayer
    for(i=0;i<16;i++) {
1390 de6d9b64 Fabrice Bellard
        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1391
        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1392
        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1393
        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1394
        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1395
        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1396
        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1397
        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1398
        s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1399
        s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1400
        s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1401
        s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1402
        s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1403
        s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1404
        s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1405
        s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1406
        pix1 += line_size;
1407
        pix2 += line_size;
1408
    }
1409
    return s;
1410
}
1411
1412 ba6802de Michael Niedermayer
int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1413 de6d9b64 Fabrice Bellard
{
1414
    int s, i;
1415
    UINT8 *pix3 = pix2 + line_size;
1416
1417
    s = 0;
1418 ba6802de Michael Niedermayer
    for(i=0;i<16;i++) {
1419 de6d9b64 Fabrice Bellard
        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1420
        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1421
        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1422
        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1423
        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1424
        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1425
        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1426
        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1427
        s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1428
        s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1429
        s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1430
        s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1431
        s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1432
        s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1433
        s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1434
        s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1435
        pix1 += line_size;
1436
        pix2 += line_size;
1437
        pix3 += line_size;
1438
    }
1439
    return s;
1440
}
1441
1442 ba6802de Michael Niedermayer
int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1443 de6d9b64 Fabrice Bellard
{
1444
    int s, i;
1445
    UINT8 *pix3 = pix2 + line_size;
1446
1447
    s = 0;
1448 ba6802de Michael Niedermayer
    for(i=0;i<16;i++) {
1449 de6d9b64 Fabrice Bellard
        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1450
        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1451
        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1452
        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1453
        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1454
        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1455
        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1456
        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1457
        s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1458
        s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1459
        s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1460
        s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1461
        s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1462
        s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1463
        s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1464
        s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1465
        pix1 += line_size;
1466
        pix2 += line_size;
1467
        pix3 += line_size;
1468
    }
1469
    return s;
1470
}
1471
1472 ba6802de Michael Niedermayer
int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1473
{
1474
    int s, i;
1475
1476
    s = 0;
1477
    for(i=0;i<8;i++) {
1478
        s += abs(pix1[0] - pix2[0]);
1479
        s += abs(pix1[1] - pix2[1]);
1480
        s += abs(pix1[2] - pix2[2]);
1481
        s += abs(pix1[3] - pix2[3]);
1482
        s += abs(pix1[4] - pix2[4]);
1483
        s += abs(pix1[5] - pix2[5]);
1484
        s += abs(pix1[6] - pix2[6]);
1485
        s += abs(pix1[7] - pix2[7]);
1486
        pix1 += line_size;
1487
        pix2 += line_size;
1488
    }
1489
    return s;
1490
}
1491
1492
int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1493
{
1494
    int s, i;
1495
1496
    s = 0;
1497
    for(i=0;i<8;i++) {
1498
        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1499
        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1500
        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1501
        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1502
        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1503
        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1504
        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1505
        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1506
        pix1 += line_size;
1507
        pix2 += line_size;
1508
    }
1509
    return s;
1510
}
1511
1512
int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1513
{
1514
    int s, i;
1515
    UINT8 *pix3 = pix2 + line_size;
1516
1517
    s = 0;
1518
    for(i=0;i<8;i++) {
1519
        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1520
        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1521
        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1522
        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1523
        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1524
        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1525
        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1526
        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1527
        pix1 += line_size;
1528
        pix2 += line_size;
1529
        pix3 += line_size;
1530
    }
1531
    return s;
1532
}
1533
1534
int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1535
{
1536
    int s, i;
1537
    UINT8 *pix3 = pix2 + line_size;
1538
1539
    s = 0;
1540
    for(i=0;i<8;i++) {
1541
        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1542
        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1543
        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1544
        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1545
        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1546
        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1547
        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1548
        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1549
        pix1 += line_size;
1550
        pix2 += line_size;
1551
        pix3 += line_size;
1552
    }
1553
    return s;
1554
}
1555
1556 7801d21d Michael Niedermayer
void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last)
1557 d962f6fd Arpi
{
1558 7801d21d Michael Niedermayer
    int i;
1559
    INT16 temp[64];
1560
    
1561
    if(last<=0) return;
1562
    if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
1563 d962f6fd Arpi
1564 7801d21d Michael Niedermayer
    for(i=0; i<=last; i++){
1565
        const int j= scantable[i];
1566
        temp[j]= block[j];
1567
        block[j]=0;
1568
    }
1569
    
1570
    for(i=0; i<=last; i++){
1571
        const int j= scantable[i];
1572
        const int perm_j= permutation[j];
1573
        block[perm_j]= temp[j];
1574
    }
1575 d962f6fd Arpi
}
1576 e0eac44e Fabrice Bellard
1577 649c00c9 Michael Niedermayer
void clear_blocks_c(DCTELEM *blocks)
1578
{
1579
    memset(blocks, 0, sizeof(DCTELEM)*6*64);
1580
}
1581
1582 e0eac44e Fabrice Bellard
void dsputil_init(void)
1583
{
1584 d2975f8d Michael Niedermayer
    int i;
1585 e0eac44e Fabrice Bellard
1586 de6d9b64 Fabrice Bellard
    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
1587
    for(i=0;i<MAX_NEG_CROP;i++) {
1588
        cropTbl[i] = 0;
1589
        cropTbl[i + MAX_NEG_CROP + 256] = 255;
1590
    }
1591
1592
    for(i=0;i<512;i++) {
1593
        squareTbl[i] = (i - 256) * (i - 256);
1594
    }
1595
1596
    get_pixels = get_pixels_c;
1597 9dbcbd92 Michael Niedermayer
    diff_pixels = diff_pixels_c;
1598 de6d9b64 Fabrice Bellard
    put_pixels_clamped = put_pixels_clamped_c;
1599
    add_pixels_clamped = add_pixels_clamped_c;
1600 073b013d Michael Niedermayer
    ff_gmc1= gmc1_c;
1601
    ff_gmc= gmc_c;
1602 649c00c9 Michael Niedermayer
    clear_blocks= clear_blocks_c;
1603 3aa102be Michael Niedermayer
    pix_sum= pix_sum_c;
1604
    pix_norm1= pix_norm1_c;
1605 de6d9b64 Fabrice Bellard
1606 ba6802de Michael Niedermayer
    pix_abs16x16     = pix_abs16x16_c;
1607
    pix_abs16x16_x2  = pix_abs16x16_x2_c;
1608
    pix_abs16x16_y2  = pix_abs16x16_y2_c;
1609 de6d9b64 Fabrice Bellard
    pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
1610 ba6802de Michael Niedermayer
    pix_abs8x8     = pix_abs8x8_c;
1611
    pix_abs8x8_x2  = pix_abs8x8_x2_c;
1612
    pix_abs8x8_y2  = pix_abs8x8_y2_c;
1613
    pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
1614 de6d9b64 Fabrice Bellard
1615 980fc7b8 Fabrice Bellard
#ifdef HAVE_MMX
1616 de6d9b64 Fabrice Bellard
    dsputil_init_mmx();
1617
#endif
1618 3d03c0a2 Fabrice Bellard
#ifdef ARCH_ARMV4L
1619
    dsputil_init_armv4l();
1620
#endif
1621 c34270f5 Fabrice Bellard
#ifdef HAVE_MLIB
1622
    dsputil_init_mlib();
1623
#endif
1624 1e98dffb Nick Kurshev
#ifdef ARCH_ALPHA
1625
    dsputil_init_alpha();
1626
#endif
1627 59925ef2 Brian Foley
#ifdef ARCH_POWERPC
1628 ab6c65f6 Brian Foley
    dsputil_init_ppc();
1629 a43bd1d7 Heliodoro Tammaro
#endif
1630 d46aba26 Leon van Stuivenberg
#ifdef HAVE_MMI
1631
    dsputil_init_mmi();
1632
#endif
1633 c34270f5 Fabrice Bellard
1634 2ad1516a Michael Niedermayer
    for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
1635 de6d9b64 Fabrice Bellard
}
1636 43f1708f Juanjo
1637 57060b1e Fabrice Bellard
/* remove any non bit exact operation (testing purpose) */
1638
void avcodec_set_bit_exact(void)
1639
{
1640 5596c60c Michael Niedermayer
    ff_bit_exact=1;
1641 57060b1e Fabrice Bellard
#ifdef HAVE_MMX
1642
    dsputil_set_bit_exact_mmx();
1643
#endif
1644
}
1645
1646 43f1708f Juanjo
void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3],
1647
              int orig_linesize[3], int coded_linesize,
1648
              AVCodecContext *avctx)
1649
{
1650
    int quad, diff, x, y;
1651
    UINT8 *orig, *coded;
1652
    UINT32 *sq = squareTbl + 256;
1653
    
1654
    quad = 0;
1655
    diff = 0;
1656
    
1657
    /* Luminance */
1658
    orig = orig_image[0];
1659
    coded = coded_image[0];
1660
    
1661
    for (y=0;y<avctx->height;y++) {
1662
        for (x=0;x<avctx->width;x++) {
1663
            diff = *(orig + x) - *(coded + x);
1664
            quad += sq[diff];
1665
        }
1666
        orig += orig_linesize[0];
1667
        coded += coded_linesize;
1668
    }
1669
   
1670
    avctx->psnr_y = (float) quad / (float) (avctx->width * avctx->height);
1671
    
1672
    if (avctx->psnr_y) {
1673
        avctx->psnr_y = (float) (255 * 255) / avctx->psnr_y;
1674
        avctx->psnr_y = 10 * (float) log10 (avctx->psnr_y); 
1675
    } else
1676
        avctx->psnr_y = 99.99;
1677
}