Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dsputil.c @ be7109c1

History | View | Annotate | Download (53.2 KB)

1 de6d9b64 Fabrice Bellard
/*
2
 * DSP utils
3 ff4ec49e Fabrice Bellard
 * Copyright (c) 2000, 2001 Fabrice Bellard.
4 de6d9b64 Fabrice Bellard
 *
5 ff4ec49e Fabrice Bellard
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9 de6d9b64 Fabrice Bellard
 *
10 ff4ec49e Fabrice Bellard
 * This library is distributed in the hope that it will be useful,
11 de6d9b64 Fabrice Bellard
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ff4ec49e Fabrice Bellard
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14 de6d9b64 Fabrice Bellard
 *
15 ff4ec49e Fabrice Bellard
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18 7ff037e9 Michael Niedermayer
 *
19 59fe111e Michael Niedermayer
 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
20 de6d9b64 Fabrice Bellard
 */
21
#include "avcodec.h"
22
#include "dsputil.h"
23 d962f6fd Arpi
#include "simple_idct.h"
24 de6d9b64 Fabrice Bellard
25 4af7bcc1 Arpi
void (*ff_idct)(DCTELEM *block);
26 8ee14970 Fabrice Bellard
void (*ff_idct_put)(UINT8 *dest, int line_size, DCTELEM *block);
27
void (*ff_idct_add)(UINT8 *dest, int line_size, DCTELEM *block);
28 de6d9b64 Fabrice Bellard
void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
29 9dbcbd92 Michael Niedermayer
void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
30 de6d9b64 Fabrice Bellard
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
31
void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
32 44eb4951 Michael Niedermayer
void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
33 649c00c9 Michael Niedermayer
void (*clear_blocks)(DCTELEM *blocks);
34 3aa102be Michael Niedermayer
int (*pix_sum)(UINT8 * pix, int line_size);
35
int (*pix_norm1)(UINT8 * pix, int line_size);
36 de6d9b64 Fabrice Bellard
37
op_pixels_abs_func pix_abs16x16;
38
op_pixels_abs_func pix_abs16x16_x2;
39
op_pixels_abs_func pix_abs16x16_y2;
40
op_pixels_abs_func pix_abs16x16_xy2;
41
42 ba6802de Michael Niedermayer
op_pixels_abs_func pix_abs8x8;
43
op_pixels_abs_func pix_abs8x8_x2;
44
op_pixels_abs_func pix_abs8x8_y2;
45
op_pixels_abs_func pix_abs8x8_xy2;
46
47 0cfa9713 Fabrice Bellard
UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
48 de6d9b64 Fabrice Bellard
UINT32 squareTbl[512];
49
50 adc09b2e Marko Kreen
extern INT16 ff_mpeg1_default_intra_matrix[64];
51
extern INT16 ff_mpeg1_default_non_intra_matrix[64];
52 f0ca2e1b Zdenek Kabelac
extern INT16 ff_mpeg4_default_intra_matrix[64];
53
extern INT16 ff_mpeg4_default_non_intra_matrix[64];
54 e0eac44e Fabrice Bellard
55
UINT8 zigzag_direct[64] = {
56
    0, 1, 8, 16, 9, 2, 3, 10,
57
    17, 24, 32, 25, 18, 11, 4, 5,
58
    12, 19, 26, 33, 40, 48, 41, 34,
59
    27, 20, 13, 6, 7, 14, 21, 28,
60
    35, 42, 49, 56, 57, 50, 43, 36,
61
    29, 22, 15, 23, 30, 37, 44, 51,
62
    58, 59, 52, 45, 38, 31, 39, 46,
63
    53, 60, 61, 54, 47, 55, 62, 63
64
};
65
66 2f349de2 Michael Niedermayer
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
67
UINT16 __align8 inv_zigzag_direct16[64];
68
69
/* not permutated zigzag_direct for MMX quantizer */
70
UINT8 zigzag_direct_noperm[64];
71
72 e0eac44e Fabrice Bellard
UINT8 ff_alternate_horizontal_scan[64] = {
73
    0,  1,  2,  3,  8,  9, 16, 17, 
74
    10, 11,  4,  5,  6,  7, 15, 14,
75
    13, 12, 19, 18, 24, 25, 32, 33, 
76
    26, 27, 20, 21, 22, 23, 28, 29,
77
    30, 31, 34, 35, 40, 41, 48, 49, 
78
    42, 43, 36, 37, 38, 39, 44, 45,
79
    46, 47, 50, 51, 56, 57, 58, 59, 
80
    52, 53, 54, 55, 60, 61, 62, 63,
81
};
82
83
UINT8 ff_alternate_vertical_scan[64] = {
84
    0,  8, 16, 24,  1,  9,  2, 10, 
85
    17, 25, 32, 40, 48, 56, 57, 49,
86
    41, 33, 26, 18,  3, 11,  4, 12, 
87
    19, 27, 34, 42, 50, 58, 35, 43,
88
    51, 59, 20, 28,  5, 13,  6, 14, 
89
    21, 29, 36, 44, 52, 60, 37, 45,
90
    53, 61, 22, 30,  7, 15, 23, 31, 
91
    38, 46, 54, 62, 39, 47, 55, 63,
92
};
93
94 e4986da9 Juanjo
#ifdef SIMPLE_IDCT
95
96 0a8d8945 Michael Niedermayer
/* Input permutation for the simple_idct_mmx */
97 5a240838 Michael Niedermayer
static UINT8 simple_mmx_permutation[64]={
98 0a8d8945 Michael Niedermayer
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 
99
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 
100
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 
101
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 
102
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 
103
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 
104
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 
105
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
106 5a240838 Michael Niedermayer
};
107 e4986da9 Juanjo
#endif
108 5a240838 Michael Niedermayer
109 2f349de2 Michael Niedermayer
/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
110
UINT32 inverse[256]={
111
         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757, 
112
 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154, 
113
 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709, 
114
 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333, 
115
 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367, 
116
 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283, 
117
  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315, 
118
  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085, 
119
  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498, 
120
  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675, 
121
  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441, 
122
  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183, 
123
  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712, 
124
  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400, 
125
  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163, 
126
  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641, 
127
  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573, 
128
  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737, 
129
  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493, 
130
  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373, 
131
  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368, 
132
  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671, 
133
  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767, 
134
  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740, 
135
  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751, 
136
  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635, 
137
  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593, 
138
  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944, 
139
  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933, 
140
  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575, 
141
  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532, 
142
  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
143
};
144
145 badaf88e Michael Niedermayer
/* used to skip zeros at the end */
146
UINT8 zigzag_end[64];
147
148 5a240838 Michael Niedermayer
UINT8 permutation[64];
149
//UINT8 invPermutation[64];
150
151 20695ec9 Falk Hüffner
static void build_zigzag_end(void)
152 badaf88e Michael Niedermayer
{
153
    int lastIndex;
154
    int lastIndexAfterPerm=0;
155
    for(lastIndex=0; lastIndex<64; lastIndex++)
156
    {
157
        if(zigzag_direct[lastIndex] > lastIndexAfterPerm) 
158
            lastIndexAfterPerm= zigzag_direct[lastIndex];
159
        zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
160
    }
161
}
162
163 3aa102be Michael Niedermayer
int pix_sum_c(UINT8 * pix, int line_size)
164
{
165
    int s, i, j;
166
167
    s = 0;
168
    for (i = 0; i < 16; i++) {
169
        for (j = 0; j < 16; j += 8) {
170
            s += pix[0];
171
            s += pix[1];
172
            s += pix[2];
173
            s += pix[3];
174
            s += pix[4];
175
            s += pix[5];
176
            s += pix[6];
177
            s += pix[7];
178
            pix += 8;
179
        }
180
        pix += line_size - 16;
181
    }
182
    return s;
183
}
184
185
int pix_norm1_c(UINT8 * pix, int line_size)
186
{
187
    int s, i, j;
188
    UINT32 *sq = squareTbl + 256;
189
190
    s = 0;
191
    for (i = 0; i < 16; i++) {
192
        for (j = 0; j < 16; j += 8) {
193
            s += sq[pix[0]];
194
            s += sq[pix[1]];
195
            s += sq[pix[2]];
196
            s += sq[pix[3]];
197
            s += sq[pix[4]];
198
            s += sq[pix[5]];
199
            s += sq[pix[6]];
200
            s += sq[pix[7]];
201
            pix += 8;
202
        }
203
        pix += line_size - 16;
204
    }
205
    return s;
206
}
207
208
209 c13e1abd Falk Hüffner
void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
210 de6d9b64 Fabrice Bellard
{
211
    int i;
212
213
    /* read the pixels */
214
    for(i=0;i<8;i++) {
215 c13e1abd Falk Hüffner
        block[0] = pixels[0];
216
        block[1] = pixels[1];
217
        block[2] = pixels[2];
218
        block[3] = pixels[3];
219
        block[4] = pixels[4];
220
        block[5] = pixels[5];
221
        block[6] = pixels[6];
222
        block[7] = pixels[7];
223
        pixels += line_size;
224
        block += 8;
225 de6d9b64 Fabrice Bellard
    }
226
}
227
228 c13e1abd Falk Hüffner
void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2,
229
                   int stride){
230 9dbcbd92 Michael Niedermayer
    int i;
231
232
    /* read the pixels */
233
    for(i=0;i<8;i++) {
234 c13e1abd Falk Hüffner
        block[0] = s1[0] - s2[0];
235
        block[1] = s1[1] - s2[1];
236
        block[2] = s1[2] - s2[2];
237
        block[3] = s1[3] - s2[3];
238
        block[4] = s1[4] - s2[4];
239
        block[5] = s1[5] - s2[5];
240
        block[6] = s1[6] - s2[6];
241
        block[7] = s1[7] - s2[7];
242 9dbcbd92 Michael Niedermayer
        s1 += stride;
243
        s2 += stride;
244 c13e1abd Falk Hüffner
        block += 8;
245 9dbcbd92 Michael Niedermayer
    }
246
}
247
248
249 c13e1abd Falk Hüffner
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
250
                          int line_size)
251 de6d9b64 Fabrice Bellard
{
252
    int i;
253
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
254
    
255
    /* read the pixels */
256
    for(i=0;i<8;i++) {
257 c13e1abd Falk Hüffner
        pixels[0] = cm[block[0]];
258
        pixels[1] = cm[block[1]];
259
        pixels[2] = cm[block[2]];
260
        pixels[3] = cm[block[3]];
261
        pixels[4] = cm[block[4]];
262
        pixels[5] = cm[block[5]];
263
        pixels[6] = cm[block[6]];
264
        pixels[7] = cm[block[7]];
265
266
        pixels += line_size;
267
        block += 8;
268 de6d9b64 Fabrice Bellard
    }
269
}
270
271 c13e1abd Falk Hüffner
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
272
                          int line_size)
273 de6d9b64 Fabrice Bellard
{
274
    int i;
275
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
276
    
277
    /* read the pixels */
278
    for(i=0;i<8;i++) {
279 c13e1abd Falk Hüffner
        pixels[0] = cm[pixels[0] + block[0]];
280
        pixels[1] = cm[pixels[1] + block[1]];
281
        pixels[2] = cm[pixels[2] + block[2]];
282
        pixels[3] = cm[pixels[3] + block[3]];
283
        pixels[4] = cm[pixels[4] + block[4]];
284
        pixels[5] = cm[pixels[5] + block[5]];
285
        pixels[6] = cm[pixels[6] + block[6]];
286
        pixels[7] = cm[pixels[7] + block[7]];
287
        pixels += line_size;
288
        block += 8;
289 de6d9b64 Fabrice Bellard
    }
290
}
291 59fe111e Michael Niedermayer
#if 0
292

293
#define PIXOP2(OPNAME, OP) \
294
void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
295
{\
296
    int i;\
297
    for(i=0; i<h; i++){\
298
        OP(*((uint64_t*)block), LD64(pixels));\
299
        pixels+=line_size;\
300
        block +=line_size;\
301
    }\
302
}\
303
\
304
void OPNAME ## _no_rnd_pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
305
{\
306
    int i;\
307
    for(i=0; i<h; i++){\
308
        const uint64_t a= LD64(pixels  );\
309
        const uint64_t b= LD64(pixels+1);\
310
        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
311
        pixels+=line_size;\
312
        block +=line_size;\
313
    }\
314
}\
315
\
316
void OPNAME ## _pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
317
{\
318
    int i;\
319
    for(i=0; i<h; i++){\
320
        const uint64_t a= LD64(pixels  );\
321
        const uint64_t b= LD64(pixels+1);\
322
        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
323
        pixels+=line_size;\
324
        block +=line_size;\
325
    }\
326
}\
327
\
328
void OPNAME ## _no_rnd_pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
329
{\
330
    int i;\
331
    for(i=0; i<h; i++){\
332
        const uint64_t a= LD64(pixels          );\
333
        const uint64_t b= LD64(pixels+line_size);\
334
        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
335
        pixels+=line_size;\
336
        block +=line_size;\
337
    }\
338
}\
339
\
340
void OPNAME ## _pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
341
{\
342
    int i;\
343
    for(i=0; i<h; i++){\
344
        const uint64_t a= LD64(pixels          );\
345
        const uint64_t b= LD64(pixels+line_size);\
346
        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
347
        pixels+=line_size;\
348
        block +=line_size;\
349
    }\
350
}\
351
\
352
void OPNAME ## _pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
353
{\
354
        int i;\
355
        const uint64_t a= LD64(pixels  );\
356
        const uint64_t b= LD64(pixels+1);\
357
        uint64_t l0=  (a&0x0303030303030303ULL)\
358
                    + (b&0x0303030303030303ULL)\
359
                    + 0x0202020202020202ULL;\
360
        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
361
                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
362
        uint64_t l1,h1;\
363
\
364
        pixels+=line_size;\
365
        for(i=0; i<h; i+=2){\
366
            uint64_t a= LD64(pixels  );\
367
            uint64_t b= LD64(pixels+1);\
368
            l1=  (a&0x0303030303030303ULL)\
369
               + (b&0x0303030303030303ULL);\
370
            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
371
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
372
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
373
            pixels+=line_size;\
374
            block +=line_size;\
375
            a= LD64(pixels  );\
376
            b= LD64(pixels+1);\
377
            l0=  (a&0x0303030303030303ULL)\
378
               + (b&0x0303030303030303ULL)\
379
               + 0x0202020202020202ULL;\
380
            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
381
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
382
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
383
            pixels+=line_size;\
384
            block +=line_size;\
385
        }\
386
}\
387
\
388
void OPNAME ## _no_rnd_pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
389
{\
390
        int i;\
391
        const uint64_t a= LD64(pixels  );\
392
        const uint64_t b= LD64(pixels+1);\
393
        uint64_t l0=  (a&0x0303030303030303ULL)\
394
                    + (b&0x0303030303030303ULL)\
395
                    + 0x0101010101010101ULL;\
396
        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
397
                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
398
        uint64_t l1,h1;\
399
\
400
        pixels+=line_size;\
401
        for(i=0; i<h; i+=2){\
402
            uint64_t a= LD64(pixels  );\
403
            uint64_t b= LD64(pixels+1);\
404
            l1=  (a&0x0303030303030303ULL)\
405
               + (b&0x0303030303030303ULL);\
406
            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
407
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
408
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
409
            pixels+=line_size;\
410
            block +=line_size;\
411
            a= LD64(pixels  );\
412
            b= LD64(pixels+1);\
413
            l0=  (a&0x0303030303030303ULL)\
414
               + (b&0x0303030303030303ULL)\
415
               + 0x0101010101010101ULL;\
416
            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
417
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
418
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
419
            pixels+=line_size;\
420
            block +=line_size;\
421
        }\
422
}\
423
\
424
void (*OPNAME ## _pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
425
    OPNAME ## _pixels,\
426
    OPNAME ## _pixels_x2,\
427
    OPNAME ## _pixels_y2,\
428
    OPNAME ## _pixels_xy2,\
429
};\
430
\
431
void (*OPNAME ## _no_rnd_pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
432
    OPNAME ## _pixels,\
433
    OPNAME ## _no_rnd_pixels_x2,\
434
    OPNAME ## _no_rnd_pixels_y2,\
435
    OPNAME ## _no_rnd_pixels_xy2,\
436
};
437

438
#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
439
#else // 64 bit variant
440
441
#define PIXOP2(OPNAME, OP) \
442
void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
443
{\
444
    int i;\
445
    for(i=0; i<h; i++){\
446
        OP(*((uint32_t*)(block  )), LD32(pixels  ));\
447
        OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
448
        pixels+=line_size;\
449
        block +=line_size;\
450
    }\
451
}\
452
\
453
void OPNAME ## _no_rnd_pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
454
{\
455
    int i;\
456
    for(i=0; i<h; i++){\
457
        int j;\
458
        for(j=0; j<2; j++){\
459
            const uint32_t a= LD32(pixels  );\
460
            const uint32_t b= LD32(pixels+1);\
461
            OP(*((uint32_t*)block), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
462
            pixels+=4;\
463
            block +=4;\
464
        }\
465
        pixels+=line_size-8;\
466
        block +=line_size-8;\
467
    }\
468
}\
469
\
470
void OPNAME ## _pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
471
{\
472
    int i;\
473
    for(i=0; i<h; i++){\
474
        int j;\
475
        for(j=0; j<2; j++){\
476
            const uint32_t a= LD32(pixels  );\
477
            const uint32_t b= LD32(pixels+1);\
478
            OP(*((uint32_t*)block), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
479
            pixels+=4;\
480
            block +=4;\
481
        }\
482
        pixels+=line_size-8;\
483
        block +=line_size-8;\
484
    }\
485
}\
486
\
487
void OPNAME ## _no_rnd_pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
488
{\
489
    int i;\
490
    for(i=0; i<h; i++){\
491
        int j;\
492
        for(j=0; j<2; j++){\
493
            const uint32_t a= LD32(pixels          );\
494
            const uint32_t b= LD32(pixels+line_size);\
495
            OP(*((uint32_t*)block), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
496
            pixels+=4;\
497
            block +=4;\
498
        }\
499
        pixels+=line_size-8;\
500
        block +=line_size-8;\
501
    }\
502
}\
503
\
504
void OPNAME ## _pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
505
{\
506
    int i;\
507
    for(i=0; i<h; i++){\
508
        int j;\
509
        for(j=0; j<2; j++){\
510
            const uint32_t a= LD32(pixels          );\
511
            const uint32_t b= LD32(pixels+line_size);\
512
            OP(*((uint32_t*)block), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
513
            pixels+=4;\
514
            block +=4;\
515
        }\
516
        pixels+=line_size-8;\
517
        block +=line_size-8;\
518
    }\
519
}\
520
\
521
void OPNAME ## _pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
522
{\
523
    int j;\
524
    for(j=0; j<2; j++){\
525
        int i;\
526
        const uint32_t a= LD32(pixels  );\
527
        const uint32_t b= LD32(pixels+1);\
528
        uint32_t l0=  (a&0x03030303UL)\
529
                    + (b&0x03030303UL)\
530
                    + 0x02020202UL;\
531
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
532
                   + ((b&0xFCFCFCFCUL)>>2);\
533
        uint32_t l1,h1;\
534
\
535
        pixels+=line_size;\
536
        for(i=0; i<h; i+=2){\
537
            uint32_t a= LD32(pixels  );\
538
            uint32_t b= LD32(pixels+1);\
539
            l1=  (a&0x03030303UL)\
540
               + (b&0x03030303UL);\
541
            h1= ((a&0xFCFCFCFCUL)>>2)\
542
              + ((b&0xFCFCFCFCUL)>>2);\
543
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
544
            pixels+=line_size;\
545
            block +=line_size;\
546
            a= LD32(pixels  );\
547
            b= LD32(pixels+1);\
548
            l0=  (a&0x03030303UL)\
549
               + (b&0x03030303UL)\
550
               + 0x02020202UL;\
551
            h0= ((a&0xFCFCFCFCUL)>>2)\
552
              + ((b&0xFCFCFCFCUL)>>2);\
553
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
554
            pixels+=line_size;\
555
            block +=line_size;\
556
        }\
557
        pixels+=4-line_size*(h+1);\
558
        block +=4-line_size*h;\
559
    }\
560
}\
561
\
562
void OPNAME ## _no_rnd_pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
563
{\
564
    int j;\
565
    for(j=0; j<2; j++){\
566
        int i;\
567
        const uint32_t a= LD32(pixels  );\
568
        const uint32_t b= LD32(pixels+1);\
569
        uint32_t l0=  (a&0x03030303UL)\
570
                    + (b&0x03030303UL)\
571
                    + 0x01010101UL;\
572
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
573
                   + ((b&0xFCFCFCFCUL)>>2);\
574
        uint32_t l1,h1;\
575
\
576
        pixels+=line_size;\
577
        for(i=0; i<h; i+=2){\
578
            uint32_t a= LD32(pixels  );\
579
            uint32_t b= LD32(pixels+1);\
580
            l1=  (a&0x03030303UL)\
581
               + (b&0x03030303UL);\
582
            h1= ((a&0xFCFCFCFCUL)>>2)\
583
              + ((b&0xFCFCFCFCUL)>>2);\
584
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
585
            pixels+=line_size;\
586
            block +=line_size;\
587
            a= LD32(pixels  );\
588
            b= LD32(pixels+1);\
589
            l0=  (a&0x03030303UL)\
590
               + (b&0x03030303UL)\
591
               + 0x01010101UL;\
592
            h0= ((a&0xFCFCFCFCUL)>>2)\
593
              + ((b&0xFCFCFCFCUL)>>2);\
594
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
595
            pixels+=line_size;\
596
            block +=line_size;\
597
        }\
598
        pixels+=4-line_size*(h+1);\
599
        block +=4-line_size*h;\
600
    }\
601
}\
602
\
603
void (*OPNAME ## _pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
604
    OPNAME ## _pixels,\
605
    OPNAME ## _pixels_x2,\
606
    OPNAME ## _pixels_y2,\
607
    OPNAME ## _pixels_xy2,\
608
};\
609
\
610
void (*OPNAME ## _no_rnd_pixels_tab[4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
611
    OPNAME ## _pixels,\
612
    OPNAME ## _no_rnd_pixels_x2,\
613
    OPNAME ## _no_rnd_pixels_y2,\
614
    OPNAME ## _no_rnd_pixels_xy2,\
615
};
616
#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
617
#endif
618
#define op_put(a, b) a = b
619
620
PIXOP2(avg, op_avg)
621
PIXOP2(put, op_put)
622
#undef op_avg
623
#undef op_put
624
625 57060b1e Fabrice Bellard
#if 0
626 59fe111e Michael Niedermayer
/* FIXME this stuff could be removed as its ot really used anymore */
627 de6d9b64 Fabrice Bellard
#define PIXOP(BTYPE, OPNAME, OP, INCR)                                                   \
628
                                                                                         \
629
static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h)    \
630
{                                                                                        \
631
    BTYPE *p;                                                                            \
632
    const UINT8 *pix;                                                                    \
633
                                                                                         \
634
    p = block;                                                                           \
635
    pix = pixels;                                                                        \
636
    do {                                                                                 \
637
        OP(p[0], pix[0]);                                                                  \
638
        OP(p[1], pix[1]);                                                                  \
639
        OP(p[2], pix[2]);                                                                  \
640
        OP(p[3], pix[3]);                                                                  \
641
        OP(p[4], pix[4]);                                                                  \
642
        OP(p[5], pix[5]);                                                                  \
643
        OP(p[6], pix[6]);                                                                  \
644
        OP(p[7], pix[7]);                                                                  \
645
        pix += line_size;                                                                \
646
        p += INCR;                                                                       \
647
    } while (--h);;                                                                       \
648
}                                                                                        \
649
                                                                                         \
650
static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h)     \
651
{                                                                                        \
652
    BTYPE *p;                                                                          \
653
    const UINT8 *pix;                                                                    \
654
                                                                                         \
655
    p = block;                                                                           \
656
    pix = pixels;                                                                        \
657
    do {                                                                   \
658
        OP(p[0], avg2(pix[0], pix[1]));                                                    \
659
        OP(p[1], avg2(pix[1], pix[2]));                                                    \
660
        OP(p[2], avg2(pix[2], pix[3]));                                                    \
661
        OP(p[3], avg2(pix[3], pix[4]));                                                    \
662
        OP(p[4], avg2(pix[4], pix[5]));                                                    \
663
        OP(p[5], avg2(pix[5], pix[6]));                                                    \
664
        OP(p[6], avg2(pix[6], pix[7]));                                                    \
665
        OP(p[7], avg2(pix[7], pix[8]));                                                    \
666
        pix += line_size;                                                                \
667
        p += INCR;                                                                       \
668
    } while (--h);                                                                        \
669
}                                                                                        \
670
                                                                                         \
671
static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h)     \
672
{                                                                                        \
673
    BTYPE *p;                                                                          \
674
    const UINT8 *pix;                                                                    \
675
    const UINT8 *pix1;                                                                   \
676
                                                                                         \
677
    p = block;                                                                           \
678
    pix = pixels;                                                                        \
679
    pix1 = pixels + line_size;                                                           \
680
    do {                                                                                 \
681
        OP(p[0], avg2(pix[0], pix1[0]));                                                   \
682
        OP(p[1], avg2(pix[1], pix1[1]));                                                   \
683
        OP(p[2], avg2(pix[2], pix1[2]));                                                   \
684
        OP(p[3], avg2(pix[3], pix1[3]));                                                   \
685
        OP(p[4], avg2(pix[4], pix1[4]));                                                   \
686
        OP(p[5], avg2(pix[5], pix1[5]));                                                   \
687
        OP(p[6], avg2(pix[6], pix1[6]));                                                   \
688
        OP(p[7], avg2(pix[7], pix1[7]));                                                   \
689
        pix += line_size;                                                                \
690
        pix1 += line_size;                                                               \
691
        p += INCR;                                                                       \
692
    } while(--h);                                                                         \
693
}                                                                                        \
694
                                                                                         \
695
static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h)    \
696
{                                                                                        \
697
    BTYPE *p;                                                                          \
698
    const UINT8 *pix;                                                                    \
699
    const UINT8 *pix1;                                                                   \
700
                                                                                         \
701
    p = block;                                                                           \
702
    pix = pixels;                                                                        \
703
    pix1 = pixels + line_size;                                                           \
704
    do {                                                                   \
705
        OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1]));                                  \
706
        OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2]));                                  \
707
        OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3]));                                  \
708
        OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4]));                                  \
709
        OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5]));                                  \
710
        OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6]));                                  \
711
        OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7]));                                  \
712
        OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8]));                                  \
713
        pix += line_size;                                                                \
714
        pix1 += line_size;                                                               \
715
        p += INCR;                                                                       \
716
    } while(--h);                                                                         \
717
}                                                                                        \
718
                                                                                         \
719
void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \
720
    OPNAME ## _pixels,                                                                   \
721
    OPNAME ## _pixels_x2,                                                                \
722
    OPNAME ## _pixels_y2,                                                                \
723
    OPNAME ## _pixels_xy2,                                                               \
724
};
725

726
/* rounding primitives */
727
#define avg2(a,b) ((a+b+1)>>1)
728
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
729

730
#define op_avg(a, b) a = avg2(a, b)
731
#define op_sub(a, b) a -= b
732 3aa102be Michael Niedermayer
#define op_put(a, b) a = b
733 de6d9b64 Fabrice Bellard

734
PIXOP(DCTELEM, sub, op_sub, 8)
735 3aa102be Michael Niedermayer
PIXOP(uint8_t, avg, op_avg, line_size)
736
PIXOP(uint8_t, put, op_put, line_size)
737 de6d9b64 Fabrice Bellard

738
/* not rounding primitives */
739
#undef avg2
740
#undef avg4
741
#define avg2(a,b) ((a+b)>>1)
742
#define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
743

744 3aa102be Michael Niedermayer
PIXOP(uint8_t, avg_no_rnd, op_avg, line_size)
745
PIXOP(uint8_t, put_no_rnd, op_put, line_size)
746 de6d9b64 Fabrice Bellard
/* motion estimation */
747

748
#undef avg2
749
#undef avg4
750 57060b1e Fabrice Bellard
#endif
751
752 de6d9b64 Fabrice Bellard
#define avg2(a,b) ((a+b+1)>>1)
753
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
754
755 44eb4951 Michael Niedermayer
static void gmc1_c(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder)
756
{
757
    const int A=(16-x16)*(16-y16);
758
    const int B=(   x16)*(16-y16);
759
    const int C=(16-x16)*(   y16);
760
    const int D=(   x16)*(   y16);
761
    int i;
762
    rounder= 128 - rounder;
763
764
    for(i=0; i<h; i++)
765
    {
766
        dst[0]= (A*src[0] + B*src[1] + C*src[srcStride+0] + D*src[srcStride+1] + rounder)>>8;
767
        dst[1]= (A*src[1] + B*src[2] + C*src[srcStride+1] + D*src[srcStride+2] + rounder)>>8;
768
        dst[2]= (A*src[2] + B*src[3] + C*src[srcStride+2] + D*src[srcStride+3] + rounder)>>8;
769
        dst[3]= (A*src[3] + B*src[4] + C*src[srcStride+3] + D*src[srcStride+4] + rounder)>>8;
770
        dst[4]= (A*src[4] + B*src[5] + C*src[srcStride+4] + D*src[srcStride+5] + rounder)>>8;
771
        dst[5]= (A*src[5] + B*src[6] + C*src[srcStride+5] + D*src[srcStride+6] + rounder)>>8;
772
        dst[6]= (A*src[6] + B*src[7] + C*src[srcStride+6] + D*src[srcStride+7] + rounder)>>8;
773
        dst[7]= (A*src[7] + B*src[8] + C*src[srcStride+7] + D*src[srcStride+8] + rounder)>>8;
774
        dst+= srcStride;
775
        src+= srcStride;
776
    }
777
}
778
779
static void qpel_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h, int r)
780
{
781
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
782
    int i;
783
    for(i=0; i<h; i++)
784
    {
785 ba6802de Michael Niedermayer
        dst[0]= cm[(((src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]) + r)>>5)];
786
        dst[1]= cm[(((src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]) + r)>>5)];
787
        dst[2]= cm[(((src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]) + r)>>5)];
788
        dst[3]= cm[(((src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]) + r)>>5)];
789
        dst[4]= cm[(((src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]) + r)>>5)];
790
        dst[5]= cm[(((src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]) + r)>>5)];
791
        dst[6]= cm[(((src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]) + r)>>5)];
792
        dst[7]= cm[(((src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]) + r)>>5)];
793 44eb4951 Michael Niedermayer
        dst+=dstStride;
794
        src+=srcStride;
795
    }
796
}
797
798
static void qpel_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w, int r)
799
{
800
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
801
    int i;
802
    for(i=0; i<w; i++)
803
    {
804
        const int src0= src[0*srcStride];
805
        const int src1= src[1*srcStride];
806
        const int src2= src[2*srcStride];
807
        const int src3= src[3*srcStride];
808
        const int src4= src[4*srcStride];
809
        const int src5= src[5*srcStride];
810
        const int src6= src[6*srcStride];
811
        const int src7= src[7*srcStride];
812
        const int src8= src[8*srcStride];
813 ba6802de Michael Niedermayer
        dst[0*dstStride]= cm[(((src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4) + r)>>5)];
814
        dst[1*dstStride]= cm[(((src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5) + r)>>5)];
815
        dst[2*dstStride]= cm[(((src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6) + r)>>5)];
816
        dst[3*dstStride]= cm[(((src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7) + r)>>5)];
817
        dst[4*dstStride]= cm[(((src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8) + r)>>5)];
818
        dst[5*dstStride]= cm[(((src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8) + r)>>5)];
819
        dst[6*dstStride]= cm[(((src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7) + r)>>5)];
820
        dst[7*dstStride]= cm[(((src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6) + r)>>5)];
821 44eb4951 Michael Niedermayer
        dst++;
822
        src++;
823
    }
824
}
825
826
static inline void put_block(UINT8 *dst, UINT8 *src, int dstStride, int srcStride)
827
{
828
    int i;
829
    for(i=0; i<8; i++)
830
    {
831
        dst[0]= src[0];
832
        dst[1]= src[1];
833
        dst[2]= src[2];
834
        dst[3]= src[3];
835
        dst[4]= src[4];
836
        dst[5]= src[5];
837
        dst[6]= src[6];
838
        dst[7]= src[7];
839
        dst+=dstStride;
840
        src+=srcStride;
841
    }
842
}
843
844
static inline void avg2_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, int dstStride, int srcStride, int r)
845
{
846
    int i;
847
    for(i=0; i<8; i++)
848
    {
849
        dst[0]= (src1[0] + src2[0] + r)>>1;
850
        dst[1]= (src1[1] + src2[1] + r)>>1;
851
        dst[2]= (src1[2] + src2[2] + r)>>1;
852
        dst[3]= (src1[3] + src2[3] + r)>>1;
853
        dst[4]= (src1[4] + src2[4] + r)>>1;
854
        dst[5]= (src1[5] + src2[5] + r)>>1;
855
        dst[6]= (src1[6] + src2[6] + r)>>1;
856
        dst[7]= (src1[7] + src2[7] + r)>>1;
857
        dst+=dstStride;
858
        src1+=srcStride;
859
        src2+=8;
860
    }
861
}
862
863
static inline void avg4_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, UINT8 *src3, UINT8 *src4, int dstStride, int srcStride, int r)
864
{
865
    int i;
866
    for(i=0; i<8; i++)
867
    {
868
        dst[0]= (src1[0] + src2[0] + src3[0] + src4[0] + r)>>2;
869
        dst[1]= (src1[1] + src2[1] + src3[1] + src4[1] + r)>>2;
870
        dst[2]= (src1[2] + src2[2] + src3[2] + src4[2] + r)>>2;
871
        dst[3]= (src1[3] + src2[3] + src3[3] + src4[3] + r)>>2;
872
        dst[4]= (src1[4] + src2[4] + src3[4] + src4[4] + r)>>2;
873
        dst[5]= (src1[5] + src2[5] + src3[5] + src4[5] + r)>>2;
874
        dst[6]= (src1[6] + src2[6] + src3[6] + src4[6] + r)>>2;
875
        dst[7]= (src1[7] + src2[7] + src3[7] + src4[7] + r)>>2;
876
        dst+=dstStride;
877
        src1+=srcStride;
878
        src2+=8;
879 7ff037e9 Michael Niedermayer
        src3+=8;
880 44eb4951 Michael Niedermayer
        src4+=8;
881
    }
882
}
883
884
#define QPEL_MC(r, name) \
885
static void qpel_mc00_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
886
{\
887
    put_block(dst, src, dstStride, srcStride);\
888
}\
889
\
890
static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
891
{\
892
    UINT8 half[64];\
893 ba6802de Michael Niedermayer
    qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\
894 44eb4951 Michael Niedermayer
    avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
895
}\
896
\
897
static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
898
{\
899 ba6802de Michael Niedermayer
    qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\
900 44eb4951 Michael Niedermayer
}\
901
\
902
static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
903
{\
904
    UINT8 half[64];\
905 ba6802de Michael Niedermayer
    qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\
906 44eb4951 Michael Niedermayer
    avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\
907
}\
908
\
909
static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
910
{\
911
    UINT8 half[64];\
912 ba6802de Michael Niedermayer
    qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\
913 44eb4951 Michael Niedermayer
    avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
914
}\
915
\
916
static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
917
{\
918 ba6802de Michael Niedermayer
    qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\
919 44eb4951 Michael Niedermayer
}\
920
\
921
static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
922
{\
923
    UINT8 half[64];\
924 ba6802de Michael Niedermayer
    qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\
925 44eb4951 Michael Niedermayer
    avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\
926
}\
927
static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
928
{\
929
    UINT8 halfH[72];\
930 7ff037e9 Michael Niedermayer
    UINT8 halfV[64];\
931 44eb4951 Michael Niedermayer
    UINT8 halfHV[64];\
932 ba6802de Michael Niedermayer
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
933
    qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
934
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
935 44eb4951 Michael Niedermayer
    avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
936
}\
937
static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
938
{\
939
    UINT8 halfH[72];\
940 7ff037e9 Michael Niedermayer
    UINT8 halfV[64];\
941 44eb4951 Michael Niedermayer
    UINT8 halfHV[64];\
942 ba6802de Michael Niedermayer
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
943
    qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
944
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
945 44eb4951 Michael Niedermayer
    avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
946
}\
947
static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
948
{\
949
    UINT8 halfH[72];\
950 7ff037e9 Michael Niedermayer
    UINT8 halfV[64];\
951 44eb4951 Michael Niedermayer
    UINT8 halfHV[64];\
952 ba6802de Michael Niedermayer
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
953
    qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
954
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
955 7ff037e9 Michael Niedermayer
    avg4_block(dst, src+srcStride, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
956 44eb4951 Michael Niedermayer
}\
957
static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
958
{\
959
    UINT8 halfH[72];\
960 7ff037e9 Michael Niedermayer
    UINT8 halfV[64];\
961 44eb4951 Michael Niedermayer
    UINT8 halfHV[64];\
962 ba6802de Michael Niedermayer
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
963
    qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
964
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
965 7ff037e9 Michael Niedermayer
    avg4_block(dst, src+srcStride+1, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
966 44eb4951 Michael Niedermayer
}\
967
static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
968
{\
969
    UINT8 halfH[72];\
970
    UINT8 halfHV[64];\
971 ba6802de Michael Niedermayer
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
972
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
973 44eb4951 Michael Niedermayer
    avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\
974
}\
975
static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
976
{\
977
    UINT8 halfH[72];\
978
    UINT8 halfHV[64];\
979 ba6802de Michael Niedermayer
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
980
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
981 44eb4951 Michael Niedermayer
    avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\
982
}\
983
static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
984
{\
985
    UINT8 halfH[72];\
986 7ff037e9 Michael Niedermayer
    UINT8 halfV[64];\
987 44eb4951 Michael Niedermayer
    UINT8 halfHV[64];\
988 ba6802de Michael Niedermayer
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
989
    qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
990
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
991 7ff037e9 Michael Niedermayer
    avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
992 44eb4951 Michael Niedermayer
}\
993
static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
994
{\
995
    UINT8 halfH[72];\
996 7ff037e9 Michael Niedermayer
    UINT8 halfV[64];\
997 44eb4951 Michael Niedermayer
    UINT8 halfHV[64];\
998 ba6802de Michael Niedermayer
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
999
    qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
1000
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
1001 7ff037e9 Michael Niedermayer
    avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
1002 44eb4951 Michael Niedermayer
}\
1003
static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
1004
{\
1005
    UINT8 halfH[72];\
1006 ba6802de Michael Niedermayer
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
1007
    qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 16-r);\
1008 44eb4951 Michael Niedermayer
}\
1009
qpel_mc_func qpel_mc ## name ## _tab[16]={ \
1010
    qpel_mc00_c ## name,                                                                   \
1011
    qpel_mc10_c ## name,                                                                   \
1012
    qpel_mc20_c ## name,                                                                   \
1013
    qpel_mc30_c ## name,                                                                   \
1014
    qpel_mc01_c ## name,                                                                   \
1015
    qpel_mc11_c ## name,                                                                   \
1016
    qpel_mc21_c ## name,                                                                   \
1017
    qpel_mc31_c ## name,                                                                   \
1018
    qpel_mc02_c ## name,                                                                   \
1019
    qpel_mc12_c ## name,                                                                   \
1020
    qpel_mc22_c ## name,                                                                   \
1021
    qpel_mc32_c ## name,                                                                   \
1022
    qpel_mc03_c ## name,                                                                   \
1023
    qpel_mc13_c ## name,                                                                   \
1024
    qpel_mc23_c ## name,                                                                   \
1025
    qpel_mc33_c ## name,                                                                   \
1026
};
1027
1028
QPEL_MC(0, _rnd)
1029
QPEL_MC(1, _no_rnd)
1030
1031 ba6802de Michael Niedermayer
int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1032 de6d9b64 Fabrice Bellard
{
1033
    int s, i;
1034
1035
    s = 0;
1036 ba6802de Michael Niedermayer
    for(i=0;i<16;i++) {
1037 de6d9b64 Fabrice Bellard
        s += abs(pix1[0] - pix2[0]);
1038
        s += abs(pix1[1] - pix2[1]);
1039
        s += abs(pix1[2] - pix2[2]);
1040
        s += abs(pix1[3] - pix2[3]);
1041
        s += abs(pix1[4] - pix2[4]);
1042
        s += abs(pix1[5] - pix2[5]);
1043
        s += abs(pix1[6] - pix2[6]);
1044
        s += abs(pix1[7] - pix2[7]);
1045
        s += abs(pix1[8] - pix2[8]);
1046
        s += abs(pix1[9] - pix2[9]);
1047
        s += abs(pix1[10] - pix2[10]);
1048
        s += abs(pix1[11] - pix2[11]);
1049
        s += abs(pix1[12] - pix2[12]);
1050
        s += abs(pix1[13] - pix2[13]);
1051
        s += abs(pix1[14] - pix2[14]);
1052
        s += abs(pix1[15] - pix2[15]);
1053
        pix1 += line_size;
1054
        pix2 += line_size;
1055
    }
1056
    return s;
1057
}
1058
1059 ba6802de Michael Niedermayer
int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1060 de6d9b64 Fabrice Bellard
{
1061
    int s, i;
1062
1063
    s = 0;
1064 ba6802de Michael Niedermayer
    for(i=0;i<16;i++) {
1065 de6d9b64 Fabrice Bellard
        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1066
        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1067
        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1068
        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1069
        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1070
        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1071
        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1072
        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1073
        s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1074
        s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1075
        s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1076
        s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1077
        s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1078
        s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1079
        s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1080
        s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1081
        pix1 += line_size;
1082
        pix2 += line_size;
1083
    }
1084
    return s;
1085
}
1086
1087 ba6802de Michael Niedermayer
int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1088 de6d9b64 Fabrice Bellard
{
1089
    int s, i;
1090
    UINT8 *pix3 = pix2 + line_size;
1091
1092
    s = 0;
1093 ba6802de Michael Niedermayer
    for(i=0;i<16;i++) {
1094 de6d9b64 Fabrice Bellard
        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1095
        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1096
        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1097
        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1098
        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1099
        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1100
        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1101
        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1102
        s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1103
        s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1104
        s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1105
        s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1106
        s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1107
        s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1108
        s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1109
        s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1110
        pix1 += line_size;
1111
        pix2 += line_size;
1112
        pix3 += line_size;
1113
    }
1114
    return s;
1115
}
1116
1117 ba6802de Michael Niedermayer
int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1118 de6d9b64 Fabrice Bellard
{
1119
    int s, i;
1120
    UINT8 *pix3 = pix2 + line_size;
1121
1122
    s = 0;
1123 ba6802de Michael Niedermayer
    for(i=0;i<16;i++) {
1124 de6d9b64 Fabrice Bellard
        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1125
        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1126
        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1127
        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1128
        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1129
        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1130
        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1131
        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1132
        s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1133
        s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1134
        s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1135
        s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1136
        s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1137
        s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1138
        s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1139
        s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1140
        pix1 += line_size;
1141
        pix2 += line_size;
1142
        pix3 += line_size;
1143
    }
1144
    return s;
1145
}
1146
1147 ba6802de Michael Niedermayer
int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1148
{
1149
    int s, i;
1150
1151
    s = 0;
1152
    for(i=0;i<8;i++) {
1153
        s += abs(pix1[0] - pix2[0]);
1154
        s += abs(pix1[1] - pix2[1]);
1155
        s += abs(pix1[2] - pix2[2]);
1156
        s += abs(pix1[3] - pix2[3]);
1157
        s += abs(pix1[4] - pix2[4]);
1158
        s += abs(pix1[5] - pix2[5]);
1159
        s += abs(pix1[6] - pix2[6]);
1160
        s += abs(pix1[7] - pix2[7]);
1161
        pix1 += line_size;
1162
        pix2 += line_size;
1163
    }
1164
    return s;
1165
}
1166
1167
int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1168
{
1169
    int s, i;
1170
1171
    s = 0;
1172
    for(i=0;i<8;i++) {
1173
        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1174
        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1175
        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1176
        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1177
        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1178
        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1179
        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1180
        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1181
        pix1 += line_size;
1182
        pix2 += line_size;
1183
    }
1184
    return s;
1185
}
1186
1187
int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1188
{
1189
    int s, i;
1190
    UINT8 *pix3 = pix2 + line_size;
1191
1192
    s = 0;
1193
    for(i=0;i<8;i++) {
1194
        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1195
        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1196
        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1197
        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1198
        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1199
        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1200
        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1201
        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1202
        pix1 += line_size;
1203
        pix2 += line_size;
1204
        pix3 += line_size;
1205
    }
1206
    return s;
1207
}
1208
1209
int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
1210
{
1211
    int s, i;
1212
    UINT8 *pix3 = pix2 + line_size;
1213
1214
    s = 0;
1215
    for(i=0;i<8;i++) {
1216
        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1217
        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1218
        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1219
        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1220
        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1221
        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1222
        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1223
        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1224
        pix1 += line_size;
1225
        pix2 += line_size;
1226
        pix3 += line_size;
1227
    }
1228
    return s;
1229
}
1230
1231 e0eac44e Fabrice Bellard
/* permute block according so that it corresponds to the MMX idct
1232
   order */
1233 d962f6fd Arpi
#ifdef SIMPLE_IDCT
1234 5a240838 Michael Niedermayer
 /* general permutation, but perhaps slightly slower */
1235 d962f6fd Arpi
void block_permute(INT16 *block)
1236
{
1237
        int i;
1238
        INT16 temp[64];
1239
1240
        for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i];
1241
1242
        for(i=0; i<64; i++) block[i] = temp[i];
1243
}
1244
#else
1245
1246 e0eac44e Fabrice Bellard
void block_permute(INT16 *block)
1247 de6d9b64 Fabrice Bellard
{
1248 e0eac44e Fabrice Bellard
    int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
1249 de6d9b64 Fabrice Bellard
    int i;
1250
1251 e0eac44e Fabrice Bellard
    for(i=0;i<8;i++) {
1252
        tmp1 = block[1];
1253
        tmp2 = block[2];
1254
        tmp3 = block[3];
1255
        tmp4 = block[4];
1256
        tmp5 = block[5];
1257
        tmp6 = block[6];
1258
        block[1] = tmp2;
1259
        block[2] = tmp4;
1260
        block[3] = tmp6;
1261
        block[4] = tmp1;
1262
        block[5] = tmp3;
1263
        block[6] = tmp5;
1264
        block += 8;
1265
    }
1266
}
1267 d962f6fd Arpi
#endif
1268 e0eac44e Fabrice Bellard
1269 649c00c9 Michael Niedermayer
void clear_blocks_c(DCTELEM *blocks)
1270
{
1271
    memset(blocks, 0, sizeof(DCTELEM)*6*64);
1272
}
1273
1274 8ee14970 Fabrice Bellard
/* XXX: those functions should be suppressed ASAP when all IDCTs are
1275
   converted */
1276
void gen_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
1277
{
1278
    ff_idct (block);
1279
    put_pixels_clamped(block, dest, line_size);
1280
}
1281
1282
void gen_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
1283
{
1284
    ff_idct (block);
1285
    add_pixels_clamped(block, dest, line_size);
1286
}
1287
1288 e0eac44e Fabrice Bellard
void dsputil_init(void)
1289
{
1290
    int i, j;
1291 c34270f5 Fabrice Bellard
    int use_permuted_idct;
1292 e0eac44e Fabrice Bellard
1293 de6d9b64 Fabrice Bellard
    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
1294
    for(i=0;i<MAX_NEG_CROP;i++) {
1295
        cropTbl[i] = 0;
1296
        cropTbl[i + MAX_NEG_CROP + 256] = 255;
1297
    }
1298
1299
    for(i=0;i<512;i++) {
1300
        squareTbl[i] = (i - 256) * (i - 256);
1301
    }
1302
1303 d962f6fd Arpi
#ifdef SIMPLE_IDCT
1304 8ee14970 Fabrice Bellard
    ff_idct = NULL;
1305 d962f6fd Arpi
#else
1306 4af7bcc1 Arpi
    ff_idct = j_rev_dct;
1307 d962f6fd Arpi
#endif
1308 de6d9b64 Fabrice Bellard
    get_pixels = get_pixels_c;
1309 9dbcbd92 Michael Niedermayer
    diff_pixels = diff_pixels_c;
1310 de6d9b64 Fabrice Bellard
    put_pixels_clamped = put_pixels_clamped_c;
1311
    add_pixels_clamped = add_pixels_clamped_c;
1312 44eb4951 Michael Niedermayer
    gmc1= gmc1_c;
1313 649c00c9 Michael Niedermayer
    clear_blocks= clear_blocks_c;
1314 3aa102be Michael Niedermayer
    pix_sum= pix_sum_c;
1315
    pix_norm1= pix_norm1_c;
1316 de6d9b64 Fabrice Bellard
1317 ba6802de Michael Niedermayer
    pix_abs16x16     = pix_abs16x16_c;
1318
    pix_abs16x16_x2  = pix_abs16x16_x2_c;
1319
    pix_abs16x16_y2  = pix_abs16x16_y2_c;
1320 de6d9b64 Fabrice Bellard
    pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
1321 ba6802de Michael Niedermayer
    pix_abs8x8     = pix_abs8x8_c;
1322
    pix_abs8x8_x2  = pix_abs8x8_x2_c;
1323
    pix_abs8x8_y2  = pix_abs8x8_y2_c;
1324
    pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
1325 de6d9b64 Fabrice Bellard
1326 c34270f5 Fabrice Bellard
    use_permuted_idct = 1;
1327 e0eac44e Fabrice Bellard
1328 980fc7b8 Fabrice Bellard
#ifdef HAVE_MMX
1329 de6d9b64 Fabrice Bellard
    dsputil_init_mmx();
1330
#endif
1331 3d03c0a2 Fabrice Bellard
#ifdef ARCH_ARMV4L
1332
    dsputil_init_armv4l();
1333
#endif
1334 c34270f5 Fabrice Bellard
#ifdef HAVE_MLIB
1335
    dsputil_init_mlib();
1336
    use_permuted_idct = 0;
1337
#endif
1338 1e98dffb Nick Kurshev
#ifdef ARCH_ALPHA
1339
    dsputil_init_alpha();
1340
    use_permuted_idct = 0;
1341
#endif
1342 59925ef2 Brian Foley
#ifdef ARCH_POWERPC
1343 ab6c65f6 Brian Foley
    dsputil_init_ppc();
1344 a43bd1d7 Heliodoro Tammaro
#endif
1345 c34270f5 Fabrice Bellard
1346 d962f6fd Arpi
#ifdef SIMPLE_IDCT
1347 8ee14970 Fabrice Bellard
    if (ff_idct == NULL) {
1348
        ff_idct_put = simple_idct_put;
1349
        ff_idct_add = simple_idct_add;
1350
        use_permuted_idct=0;
1351 fc2bb4f4 Michael Niedermayer
    }
1352
#endif
1353
    if(ff_idct != NULL) {
1354 8ee14970 Fabrice Bellard
        ff_idct_put = gen_idct_put;
1355
        ff_idct_add = gen_idct_add;
1356
    }
1357 d962f6fd Arpi
1358 5a240838 Michael Niedermayer
    if(use_permuted_idct)
1359
#ifdef SIMPLE_IDCT
1360
        for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i];
1361
#else
1362
        for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
1363
#endif
1364
    else
1365
        for(i=0; i<64; i++) permutation[i]=i;
1366
1367 2f349de2 Michael Niedermayer
    for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
1368
    for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
1369
    
1370 c34270f5 Fabrice Bellard
    if (use_permuted_idct) {
1371
        /* permute for IDCT */
1372
        for(i=0;i<64;i++) {
1373
            j = zigzag_direct[i];
1374
            zigzag_direct[i] = block_permute_op(j);
1375
            j = ff_alternate_horizontal_scan[i];
1376
            ff_alternate_horizontal_scan[i] = block_permute_op(j);
1377
            j = ff_alternate_vertical_scan[i];
1378
            ff_alternate_vertical_scan[i] = block_permute_op(j);
1379
        }
1380 adc09b2e Marko Kreen
        block_permute(ff_mpeg1_default_intra_matrix);
1381
        block_permute(ff_mpeg1_default_non_intra_matrix);
1382 3bf43d42 Michael Niedermayer
        block_permute(ff_mpeg4_default_intra_matrix);
1383
        block_permute(ff_mpeg4_default_non_intra_matrix);
1384 c34270f5 Fabrice Bellard
    }
1385 badaf88e Michael Niedermayer
    
1386
    build_zigzag_end();
1387 de6d9b64 Fabrice Bellard
}
1388 43f1708f Juanjo
1389 57060b1e Fabrice Bellard
/* remove any non bit exact operation (testing purpose) */
1390
void avcodec_set_bit_exact(void)
1391
{
1392
#ifdef HAVE_MMX
1393
    dsputil_set_bit_exact_mmx();
1394
#endif
1395
}
1396
1397 43f1708f Juanjo
void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3],
1398
              int orig_linesize[3], int coded_linesize,
1399
              AVCodecContext *avctx)
1400
{
1401
    int quad, diff, x, y;
1402
    UINT8 *orig, *coded;
1403
    UINT32 *sq = squareTbl + 256;
1404
    
1405
    quad = 0;
1406
    diff = 0;
1407
    
1408
    /* Luminance */
1409
    orig = orig_image[0];
1410
    coded = coded_image[0];
1411
    
1412
    for (y=0;y<avctx->height;y++) {
1413
        for (x=0;x<avctx->width;x++) {
1414
            diff = *(orig + x) - *(coded + x);
1415
            quad += sq[diff];
1416
        }
1417
        orig += orig_linesize[0];
1418
        coded += coded_linesize;
1419
    }
1420
   
1421
    avctx->psnr_y = (float) quad / (float) (avctx->width * avctx->height);
1422
    
1423
    if (avctx->psnr_y) {
1424
        avctx->psnr_y = (float) (255 * 255) / avctx->psnr_y;
1425
        avctx->psnr_y = 10 * (float) log10 (avctx->psnr_y); 
1426
    } else
1427
        avctx->psnr_y = 99.99;
1428
}