Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dsputil.c @ 1c2a8c7f

History | View | Annotate | Download (40.8 KB)

1 de6d9b64 Fabrice Bellard
/*
2
 * DSP utils
3
 * Copyright (c) 2000, 2001 Gerard Lantau.
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 7ff037e9 Michael Niedermayer
 *
19
 * gmc & q-pel support by Michael Niedermayer <michaelni@gmx.at>
20 de6d9b64 Fabrice Bellard
 */
21
#include <stdlib.h>
22
#include <stdio.h>
23 43f1708f Juanjo
#include <math.h>
24 de6d9b64 Fabrice Bellard
#include "avcodec.h"
25
#include "dsputil.h"
26 d962f6fd Arpi
#include "simple_idct.h"
27 de6d9b64 Fabrice Bellard
28 4af7bcc1 Arpi
void (*ff_idct)(DCTELEM *block);
29 de6d9b64 Fabrice Bellard
void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
30 9dbcbd92 Michael Niedermayer
void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
31 de6d9b64 Fabrice Bellard
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
32
void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
33 44eb4951 Michael Niedermayer
void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
34 649c00c9 Michael Niedermayer
void (*clear_blocks)(DCTELEM *blocks);
35 de6d9b64 Fabrice Bellard
36
op_pixels_abs_func pix_abs16x16;
37
op_pixels_abs_func pix_abs16x16_x2;
38
op_pixels_abs_func pix_abs16x16_y2;
39
op_pixels_abs_func pix_abs16x16_xy2;
40
41 ba6802de Michael Niedermayer
op_pixels_abs_func pix_abs8x8;
42
op_pixels_abs_func pix_abs8x8_x2;
43
op_pixels_abs_func pix_abs8x8_y2;
44
op_pixels_abs_func pix_abs8x8_xy2;
45
46 0cfa9713 Fabrice Bellard
UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
47 de6d9b64 Fabrice Bellard
UINT32 squareTbl[512];
48
49 e0eac44e Fabrice Bellard
extern UINT16 default_intra_matrix[64];
50
extern UINT16 default_non_intra_matrix[64];
51 3bf43d42 Michael Niedermayer
extern UINT16 ff_mpeg4_default_intra_matrix[64];
52
extern UINT16 ff_mpeg4_default_non_intra_matrix[64];
53 e0eac44e Fabrice Bellard
54
UINT8 zigzag_direct[64] = {
55
    0, 1, 8, 16, 9, 2, 3, 10,
56
    17, 24, 32, 25, 18, 11, 4, 5,
57
    12, 19, 26, 33, 40, 48, 41, 34,
58
    27, 20, 13, 6, 7, 14, 21, 28,
59
    35, 42, 49, 56, 57, 50, 43, 36,
60
    29, 22, 15, 23, 30, 37, 44, 51,
61
    58, 59, 52, 45, 38, 31, 39, 46,
62
    53, 60, 61, 54, 47, 55, 62, 63
63
};
64
65 2f349de2 Michael Niedermayer
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
66
UINT16 __align8 inv_zigzag_direct16[64];
67
68
/* not permutated zigzag_direct for MMX quantizer */
69
UINT8 zigzag_direct_noperm[64];
70
71 e0eac44e Fabrice Bellard
UINT8 ff_alternate_horizontal_scan[64] = {
72
    0,  1,  2,  3,  8,  9, 16, 17, 
73
    10, 11,  4,  5,  6,  7, 15, 14,
74
    13, 12, 19, 18, 24, 25, 32, 33, 
75
    26, 27, 20, 21, 22, 23, 28, 29,
76
    30, 31, 34, 35, 40, 41, 48, 49, 
77
    42, 43, 36, 37, 38, 39, 44, 45,
78
    46, 47, 50, 51, 56, 57, 58, 59, 
79
    52, 53, 54, 55, 60, 61, 62, 63,
80
};
81
82
UINT8 ff_alternate_vertical_scan[64] = {
83
    0,  8, 16, 24,  1,  9,  2, 10, 
84
    17, 25, 32, 40, 48, 56, 57, 49,
85
    41, 33, 26, 18,  3, 11,  4, 12, 
86
    19, 27, 34, 42, 50, 58, 35, 43,
87
    51, 59, 20, 28,  5, 13,  6, 14, 
88
    21, 29, 36, 44, 52, 60, 37, 45,
89
    53, 61, 22, 30,  7, 15, 23, 31, 
90
    38, 46, 54, 62, 39, 47, 55, 63,
91
};
92
93 e4986da9 Juanjo
#ifdef SIMPLE_IDCT
94
95 0a8d8945 Michael Niedermayer
/* Input permutation for the simple_idct_mmx */
96 5a240838 Michael Niedermayer
static UINT8 simple_mmx_permutation[64]={
97 0a8d8945 Michael Niedermayer
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 
98
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 
99
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 
100
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 
101
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 
102
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 
103
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 
104
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
105 5a240838 Michael Niedermayer
};
106 e4986da9 Juanjo
#endif
107 5a240838 Michael Niedermayer
108 2f349de2 Michael Niedermayer
/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
109
UINT32 inverse[256]={
110
         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757, 
111
 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154, 
112
 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709, 
113
 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333, 
114
 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367, 
115
 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283, 
116
  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315, 
117
  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085, 
118
  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498, 
119
  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675, 
120
  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441, 
121
  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183, 
122
  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712, 
123
  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400, 
124
  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163, 
125
  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641, 
126
  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573, 
127
  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737, 
128
  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493, 
129
  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373, 
130
  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368, 
131
  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671, 
132
  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767, 
133
  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740, 
134
  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751, 
135
  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635, 
136
  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593, 
137
  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944, 
138
  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933, 
139
  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575, 
140
  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532, 
141
  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
142
};
143
144 badaf88e Michael Niedermayer
/* used to skip zeros at the end */
145
UINT8 zigzag_end[64];
146
147 5a240838 Michael Niedermayer
UINT8 permutation[64];
148
//UINT8 invPermutation[64];
149
150 badaf88e Michael Niedermayer
static void build_zigzag_end()
151
{
152
    int lastIndex;
153
    int lastIndexAfterPerm=0;
154
    for(lastIndex=0; lastIndex<64; lastIndex++)
155
    {
156
        if(zigzag_direct[lastIndex] > lastIndexAfterPerm) 
157
            lastIndexAfterPerm= zigzag_direct[lastIndex];
158
        zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
159
    }
160
}
161
162 de6d9b64 Fabrice Bellard
void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size)
163
{
164
    DCTELEM *p;
165
    const UINT8 *pix;
166
    int i;
167
168
    /* read the pixels */
169
    p = block;
170
    pix = pixels;
171
    for(i=0;i<8;i++) {
172
        p[0] = pix[0];
173
        p[1] = pix[1];
174
        p[2] = pix[2];
175
        p[3] = pix[3];
176
        p[4] = pix[4];
177
        p[5] = pix[5];
178
        p[6] = pix[6];
179
        p[7] = pix[7];
180
        pix += line_size;
181
        p += 8;
182
    }
183
}
184
185 9dbcbd92 Michael Niedermayer
void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride){
186
    DCTELEM *p;
187
    int i;
188
189
    /* read the pixels */
190
    p = block;
191
    for(i=0;i<8;i++) {
192
        p[0] = s1[0] - s2[0];
193
        p[1] = s1[1] - s2[1];
194
        p[2] = s1[2] - s2[2];
195
        p[3] = s1[3] - s2[3];
196
        p[4] = s1[4] - s2[4];
197
        p[5] = s1[5] - s2[5];
198
        p[6] = s1[6] - s2[6];
199
        p[7] = s1[7] - s2[7];
200
        s1 += stride;
201
        s2 += stride;
202
        p += 8;
203
    }
204
}
205
206
207 de6d9b64 Fabrice Bellard
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
208
{
209
    const DCTELEM *p;
210
    UINT8 *pix;
211
    int i;
212
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
213
    
214
    /* read the pixels */
215
    p = block;
216
    pix = pixels;
217
    for(i=0;i<8;i++) {
218
        pix[0] = cm[p[0]];
219
        pix[1] = cm[p[1]];
220
        pix[2] = cm[p[2]];
221
        pix[3] = cm[p[3]];
222
        pix[4] = cm[p[4]];
223
        pix[5] = cm[p[5]];
224
        pix[6] = cm[p[6]];
225
        pix[7] = cm[p[7]];
226
        pix += line_size;
227
        p += 8;
228
    }
229
}
230
231
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
232
{
233
    const DCTELEM *p;
234
    UINT8 *pix;
235
    int i;
236
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
237
    
238
    /* read the pixels */
239
    p = block;
240
    pix = pixels;
241
    for(i=0;i<8;i++) {
242
        pix[0] = cm[pix[0] + p[0]];
243
        pix[1] = cm[pix[1] + p[1]];
244
        pix[2] = cm[pix[2] + p[2]];
245
        pix[3] = cm[pix[3] + p[3]];
246
        pix[4] = cm[pix[4] + p[4]];
247
        pix[5] = cm[pix[5] + p[5]];
248
        pix[6] = cm[pix[6] + p[6]];
249
        pix[7] = cm[pix[7] + p[7]];
250
        pix += line_size;
251
        p += 8;
252
    }
253
}
254
255
#define PIXOP(BTYPE, OPNAME, OP, INCR)                                                   \
256
                                                                                         \
257
static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h)    \
258
{                                                                                        \
259
    BTYPE *p;                                                                            \
260
    const UINT8 *pix;                                                                    \
261
                                                                                         \
262
    p = block;                                                                           \
263
    pix = pixels;                                                                        \
264
    do {                                                                                 \
265
        OP(p[0], pix[0]);                                                                  \
266
        OP(p[1], pix[1]);                                                                  \
267
        OP(p[2], pix[2]);                                                                  \
268
        OP(p[3], pix[3]);                                                                  \
269
        OP(p[4], pix[4]);                                                                  \
270
        OP(p[5], pix[5]);                                                                  \
271
        OP(p[6], pix[6]);                                                                  \
272
        OP(p[7], pix[7]);                                                                  \
273
        pix += line_size;                                                                \
274
        p += INCR;                                                                       \
275
    } while (--h);;                                                                       \
276
}                                                                                        \
277
                                                                                         \
278
static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h)     \
279
{                                                                                        \
280
    BTYPE *p;                                                                          \
281
    const UINT8 *pix;                                                                    \
282
                                                                                         \
283
    p = block;                                                                           \
284
    pix = pixels;                                                                        \
285
    do {                                                                   \
286
        OP(p[0], avg2(pix[0], pix[1]));                                                    \
287
        OP(p[1], avg2(pix[1], pix[2]));                                                    \
288
        OP(p[2], avg2(pix[2], pix[3]));                                                    \
289
        OP(p[3], avg2(pix[3], pix[4]));                                                    \
290
        OP(p[4], avg2(pix[4], pix[5]));                                                    \
291
        OP(p[5], avg2(pix[5], pix[6]));                                                    \
292
        OP(p[6], avg2(pix[6], pix[7]));                                                    \
293
        OP(p[7], avg2(pix[7], pix[8]));                                                    \
294
        pix += line_size;                                                                \
295
        p += INCR;                                                                       \
296
    } while (--h);                                                                        \
297
}                                                                                        \
298
                                                                                         \
299
static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h)     \
300
{                                                                                        \
301
    BTYPE *p;                                                                          \
302
    const UINT8 *pix;                                                                    \
303
    const UINT8 *pix1;                                                                   \
304
                                                                                         \
305
    p = block;                                                                           \
306
    pix = pixels;                                                                        \
307
    pix1 = pixels + line_size;                                                           \
308
    do {                                                                                 \
309
        OP(p[0], avg2(pix[0], pix1[0]));                                                   \
310
        OP(p[1], avg2(pix[1], pix1[1]));                                                   \
311
        OP(p[2], avg2(pix[2], pix1[2]));                                                   \
312
        OP(p[3], avg2(pix[3], pix1[3]));                                                   \
313
        OP(p[4], avg2(pix[4], pix1[4]));                                                   \
314
        OP(p[5], avg2(pix[5], pix1[5]));                                                   \
315
        OP(p[6], avg2(pix[6], pix1[6]));                                                   \
316
        OP(p[7], avg2(pix[7], pix1[7]));                                                   \
317
        pix += line_size;                                                                \
318
        pix1 += line_size;                                                               \
319
        p += INCR;                                                                       \
320
    } while(--h);                                                                         \
321
}                                                                                        \
322
                                                                                         \
323
static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h)    \
324
{                                                                                        \
325
    BTYPE *p;                                                                          \
326
    const UINT8 *pix;                                                                    \
327
    const UINT8 *pix1;                                                                   \
328
                                                                                         \
329
    p = block;                                                                           \
330
    pix = pixels;                                                                        \
331
    pix1 = pixels + line_size;                                                           \
332
    do {                                                                   \
333
        OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1]));                                  \
334
        OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2]));                                  \
335
        OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3]));                                  \
336
        OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4]));                                  \
337
        OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5]));                                  \
338
        OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6]));                                  \
339
        OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7]));                                  \
340
        OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8]));                                  \
341
        pix += line_size;                                                                \
342
        pix1 += line_size;                                                               \
343
        p += INCR;                                                                       \
344
    } while(--h);                                                                         \
345
}                                                                                        \
346
                                                                                         \
347
void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \
348
    OPNAME ## _pixels,                                                                   \
349
    OPNAME ## _pixels_x2,                                                                \
350
    OPNAME ## _pixels_y2,                                                                \
351
    OPNAME ## _pixels_xy2,                                                               \
352
};
353
354
355
/* rounding primitives */
356
#define avg2(a,b) ((a+b+1)>>1)
357
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
358
359
#define op_put(a, b) a = b
360
#define op_avg(a, b) a = avg2(a, b)
361
#define op_sub(a, b) a -= b
362
363
PIXOP(UINT8, put, op_put, line_size)
364
PIXOP(UINT8, avg, op_avg, line_size)
365
366
PIXOP(DCTELEM, sub, op_sub, 8)
367
368
/* not rounding primitives */
369
#undef avg2
370
#undef avg4
371
#define avg2(a,b) ((a+b)>>1)
372
#define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
373
374
PIXOP(UINT8, put_no_rnd, op_put, line_size)
375
PIXOP(UINT8, avg_no_rnd, op_avg, line_size)
376
377
/* motion estimation */
378
379
#undef avg2
380
#undef avg4
381
#define avg2(a,b) ((a+b+1)>>1)
382
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
383
384 44eb4951 Michael Niedermayer
static void gmc1_c(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder)
385
{
386
    const int A=(16-x16)*(16-y16);
387
    const int B=(   x16)*(16-y16);
388
    const int C=(16-x16)*(   y16);
389
    const int D=(   x16)*(   y16);
390
    int i;
391
    rounder= 128 - rounder;
392
393
    for(i=0; i<h; i++)
394
    {
395
        dst[0]= (A*src[0] + B*src[1] + C*src[srcStride+0] + D*src[srcStride+1] + rounder)>>8;
396
        dst[1]= (A*src[1] + B*src[2] + C*src[srcStride+1] + D*src[srcStride+2] + rounder)>>8;
397
        dst[2]= (A*src[2] + B*src[3] + C*src[srcStride+2] + D*src[srcStride+3] + rounder)>>8;
398
        dst[3]= (A*src[3] + B*src[4] + C*src[srcStride+3] + D*src[srcStride+4] + rounder)>>8;
399
        dst[4]= (A*src[4] + B*src[5] + C*src[srcStride+4] + D*src[srcStride+5] + rounder)>>8;
400
        dst[5]= (A*src[5] + B*src[6] + C*src[srcStride+5] + D*src[srcStride+6] + rounder)>>8;
401
        dst[6]= (A*src[6] + B*src[7] + C*src[srcStride+6] + D*src[srcStride+7] + rounder)>>8;
402
        dst[7]= (A*src[7] + B*src[8] + C*src[srcStride+7] + D*src[srcStride+8] + rounder)>>8;
403
        dst+= srcStride;
404
        src+= srcStride;
405
    }
406
}
407
408
static void qpel_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h, int r)
409
{
410
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
411
    int i;
412
    for(i=0; i<h; i++)
413
    {
414 ba6802de Michael Niedermayer
        dst[0]= cm[(((src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]) + r)>>5)];
415
        dst[1]= cm[(((src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]) + r)>>5)];
416
        dst[2]= cm[(((src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]) + r)>>5)];
417
        dst[3]= cm[(((src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]) + r)>>5)];
418
        dst[4]= cm[(((src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]) + r)>>5)];
419
        dst[5]= cm[(((src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]) + r)>>5)];
420
        dst[6]= cm[(((src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]) + r)>>5)];
421
        dst[7]= cm[(((src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]) + r)>>5)];
422 44eb4951 Michael Niedermayer
        dst+=dstStride;
423
        src+=srcStride;
424
    }
425
}
426
427
static void qpel_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w, int r)
428
{
429
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
430
    int i;
431
    for(i=0; i<w; i++)
432
    {
433
        const int src0= src[0*srcStride];
434
        const int src1= src[1*srcStride];
435
        const int src2= src[2*srcStride];
436
        const int src3= src[3*srcStride];
437
        const int src4= src[4*srcStride];
438
        const int src5= src[5*srcStride];
439
        const int src6= src[6*srcStride];
440
        const int src7= src[7*srcStride];
441
        const int src8= src[8*srcStride];
442 ba6802de Michael Niedermayer
        dst[0*dstStride]= cm[(((src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4) + r)>>5)];
443
        dst[1*dstStride]= cm[(((src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5) + r)>>5)];
444
        dst[2*dstStride]= cm[(((src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6) + r)>>5)];
445
        dst[3*dstStride]= cm[(((src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7) + r)>>5)];
446
        dst[4*dstStride]= cm[(((src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8) + r)>>5)];
447
        dst[5*dstStride]= cm[(((src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8) + r)>>5)];
448
        dst[6*dstStride]= cm[(((src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7) + r)>>5)];
449
        dst[7*dstStride]= cm[(((src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6) + r)>>5)];
450 44eb4951 Michael Niedermayer
        dst++;
451
        src++;
452
    }
453
}
454
455
static inline void put_block(UINT8 *dst, UINT8 *src, int dstStride, int srcStride)
456
{
457
    int i;
458
    for(i=0; i<8; i++)
459
    {
460
        dst[0]= src[0];
461
        dst[1]= src[1];
462
        dst[2]= src[2];
463
        dst[3]= src[3];
464
        dst[4]= src[4];
465
        dst[5]= src[5];
466
        dst[6]= src[6];
467
        dst[7]= src[7];
468
        dst+=dstStride;
469
        src+=srcStride;
470
    }
471
}
472
473
static inline void avg2_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, int dstStride, int srcStride, int r)
474
{
475
    int i;
476
    for(i=0; i<8; i++)
477
    {
478
        dst[0]= (src1[0] + src2[0] + r)>>1;
479
        dst[1]= (src1[1] + src2[1] + r)>>1;
480
        dst[2]= (src1[2] + src2[2] + r)>>1;
481
        dst[3]= (src1[3] + src2[3] + r)>>1;
482
        dst[4]= (src1[4] + src2[4] + r)>>1;
483
        dst[5]= (src1[5] + src2[5] + r)>>1;
484
        dst[6]= (src1[6] + src2[6] + r)>>1;
485
        dst[7]= (src1[7] + src2[7] + r)>>1;
486
        dst+=dstStride;
487
        src1+=srcStride;
488
        src2+=8;
489
    }
490
}
491
492
static inline void avg4_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, UINT8 *src3, UINT8 *src4, int dstStride, int srcStride, int r)
493
{
494
    int i;
495
    for(i=0; i<8; i++)
496
    {
497
        dst[0]= (src1[0] + src2[0] + src3[0] + src4[0] + r)>>2;
498
        dst[1]= (src1[1] + src2[1] + src3[1] + src4[1] + r)>>2;
499
        dst[2]= (src1[2] + src2[2] + src3[2] + src4[2] + r)>>2;
500
        dst[3]= (src1[3] + src2[3] + src3[3] + src4[3] + r)>>2;
501
        dst[4]= (src1[4] + src2[4] + src3[4] + src4[4] + r)>>2;
502
        dst[5]= (src1[5] + src2[5] + src3[5] + src4[5] + r)>>2;
503
        dst[6]= (src1[6] + src2[6] + src3[6] + src4[6] + r)>>2;
504
        dst[7]= (src1[7] + src2[7] + src3[7] + src4[7] + r)>>2;
505
        dst+=dstStride;
506
        src1+=srcStride;
507
        src2+=8;
508 7ff037e9 Michael Niedermayer
        src3+=8;
509 44eb4951 Michael Niedermayer
        src4+=8;
510
    }
511
}
512
513
#define QPEL_MC(r, name) \
514
static void qpel_mc00_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
515
{\
516
    put_block(dst, src, dstStride, srcStride);\
517
}\
518
\
519
static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
520
{\
521
    UINT8 half[64];\
522 ba6802de Michael Niedermayer
    qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\
523 44eb4951 Michael Niedermayer
    avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
524
}\
525
\
526
static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
527
{\
528 ba6802de Michael Niedermayer
    qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\
529 44eb4951 Michael Niedermayer
}\
530
\
531
static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
532
{\
533
    UINT8 half[64];\
534 ba6802de Michael Niedermayer
    qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\
535 44eb4951 Michael Niedermayer
    avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\
536
}\
537
\
538
static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
539
{\
540
    UINT8 half[64];\
541 ba6802de Michael Niedermayer
    qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\
542 44eb4951 Michael Niedermayer
    avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
543
}\
544
\
545
static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
546
{\
547 ba6802de Michael Niedermayer
    qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\
548 44eb4951 Michael Niedermayer
}\
549
\
550
static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
551
{\
552
    UINT8 half[64];\
553 ba6802de Michael Niedermayer
    qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\
554 44eb4951 Michael Niedermayer
    avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\
555
}\
556
static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
557
{\
558
    UINT8 halfH[72];\
559 7ff037e9 Michael Niedermayer
    UINT8 halfV[64];\
560 44eb4951 Michael Niedermayer
    UINT8 halfHV[64];\
561 ba6802de Michael Niedermayer
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
562
    qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
563
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
564 44eb4951 Michael Niedermayer
    avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
565
}\
566
static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
567
{\
568
    UINT8 halfH[72];\
569 7ff037e9 Michael Niedermayer
    UINT8 halfV[64];\
570 44eb4951 Michael Niedermayer
    UINT8 halfHV[64];\
571 ba6802de Michael Niedermayer
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
572
    qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
573
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
574 44eb4951 Michael Niedermayer
    avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
575
}\
576
static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
577
{\
578
    UINT8 halfH[72];\
579 7ff037e9 Michael Niedermayer
    UINT8 halfV[64];\
580 44eb4951 Michael Niedermayer
    UINT8 halfHV[64];\
581 ba6802de Michael Niedermayer
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
582
    qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
583
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
584 7ff037e9 Michael Niedermayer
    avg4_block(dst, src+srcStride, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
585 44eb4951 Michael Niedermayer
}\
586
static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
587
{\
588
    UINT8 halfH[72];\
589 7ff037e9 Michael Niedermayer
    UINT8 halfV[64];\
590 44eb4951 Michael Niedermayer
    UINT8 halfHV[64];\
591 ba6802de Michael Niedermayer
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
592
    qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
593
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
594 7ff037e9 Michael Niedermayer
    avg4_block(dst, src+srcStride+1, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
595 44eb4951 Michael Niedermayer
}\
596
static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
597
{\
598
    UINT8 halfH[72];\
599
    UINT8 halfHV[64];\
600 ba6802de Michael Niedermayer
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
601
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
602 44eb4951 Michael Niedermayer
    avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\
603
}\
604
static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
605
{\
606
    UINT8 halfH[72];\
607
    UINT8 halfHV[64];\
608 ba6802de Michael Niedermayer
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
609
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
610 44eb4951 Michael Niedermayer
    avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\
611
}\
612
static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
613
{\
614
    UINT8 halfH[72];\
615 7ff037e9 Michael Niedermayer
    UINT8 halfV[64];\
616 44eb4951 Michael Niedermayer
    UINT8 halfHV[64];\
617 ba6802de Michael Niedermayer
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
618
    qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
619
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
620 7ff037e9 Michael Niedermayer
    avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
621 44eb4951 Michael Niedermayer
}\
622
static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
623
{\
624
    UINT8 halfH[72];\
625 7ff037e9 Michael Niedermayer
    UINT8 halfV[64];\
626 44eb4951 Michael Niedermayer
    UINT8 halfHV[64];\
627 ba6802de Michael Niedermayer
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
628
    qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
629
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
630 7ff037e9 Michael Niedermayer
    avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
631 44eb4951 Michael Niedermayer
}\
632
static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
633
{\
634
    UINT8 halfH[72];\
635 ba6802de Michael Niedermayer
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
636
    qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 16-r);\
637 44eb4951 Michael Niedermayer
}\
638
qpel_mc_func qpel_mc ## name ## _tab[16]={ \
639
    qpel_mc00_c ## name,                                                                   \
640
    qpel_mc10_c ## name,                                                                   \
641
    qpel_mc20_c ## name,                                                                   \
642
    qpel_mc30_c ## name,                                                                   \
643
    qpel_mc01_c ## name,                                                                   \
644
    qpel_mc11_c ## name,                                                                   \
645
    qpel_mc21_c ## name,                                                                   \
646
    qpel_mc31_c ## name,                                                                   \
647
    qpel_mc02_c ## name,                                                                   \
648
    qpel_mc12_c ## name,                                                                   \
649
    qpel_mc22_c ## name,                                                                   \
650
    qpel_mc32_c ## name,                                                                   \
651
    qpel_mc03_c ## name,                                                                   \
652
    qpel_mc13_c ## name,                                                                   \
653
    qpel_mc23_c ## name,                                                                   \
654
    qpel_mc33_c ## name,                                                                   \
655
};
656
657
QPEL_MC(0, _rnd)
658
QPEL_MC(1, _no_rnd)
659
660 ba6802de Michael Niedermayer
int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
661 de6d9b64 Fabrice Bellard
{
662
    int s, i;
663
664
    s = 0;
665 ba6802de Michael Niedermayer
    for(i=0;i<16;i++) {
666 de6d9b64 Fabrice Bellard
        s += abs(pix1[0] - pix2[0]);
667
        s += abs(pix1[1] - pix2[1]);
668
        s += abs(pix1[2] - pix2[2]);
669
        s += abs(pix1[3] - pix2[3]);
670
        s += abs(pix1[4] - pix2[4]);
671
        s += abs(pix1[5] - pix2[5]);
672
        s += abs(pix1[6] - pix2[6]);
673
        s += abs(pix1[7] - pix2[7]);
674
        s += abs(pix1[8] - pix2[8]);
675
        s += abs(pix1[9] - pix2[9]);
676
        s += abs(pix1[10] - pix2[10]);
677
        s += abs(pix1[11] - pix2[11]);
678
        s += abs(pix1[12] - pix2[12]);
679
        s += abs(pix1[13] - pix2[13]);
680
        s += abs(pix1[14] - pix2[14]);
681
        s += abs(pix1[15] - pix2[15]);
682
        pix1 += line_size;
683
        pix2 += line_size;
684
    }
685
    return s;
686
}
687
688 ba6802de Michael Niedermayer
int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
689 de6d9b64 Fabrice Bellard
{
690
    int s, i;
691
692
    s = 0;
693 ba6802de Michael Niedermayer
    for(i=0;i<16;i++) {
694 de6d9b64 Fabrice Bellard
        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
695
        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
696
        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
697
        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
698
        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
699
        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
700
        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
701
        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
702
        s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
703
        s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
704
        s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
705
        s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
706
        s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
707
        s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
708
        s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
709
        s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
710
        pix1 += line_size;
711
        pix2 += line_size;
712
    }
713
    return s;
714
}
715
716 ba6802de Michael Niedermayer
int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
717 de6d9b64 Fabrice Bellard
{
718
    int s, i;
719
    UINT8 *pix3 = pix2 + line_size;
720
721
    s = 0;
722 ba6802de Michael Niedermayer
    for(i=0;i<16;i++) {
723 de6d9b64 Fabrice Bellard
        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
724
        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
725
        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
726
        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
727
        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
728
        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
729
        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
730
        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
731
        s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
732
        s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
733
        s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
734
        s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
735
        s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
736
        s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
737
        s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
738
        s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
739
        pix1 += line_size;
740
        pix2 += line_size;
741
        pix3 += line_size;
742
    }
743
    return s;
744
}
745
746 ba6802de Michael Niedermayer
int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
747 de6d9b64 Fabrice Bellard
{
748
    int s, i;
749
    UINT8 *pix3 = pix2 + line_size;
750
751
    s = 0;
752 ba6802de Michael Niedermayer
    for(i=0;i<16;i++) {
753 de6d9b64 Fabrice Bellard
        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
754
        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
755
        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
756
        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
757
        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
758
        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
759
        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
760
        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
761
        s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
762
        s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
763
        s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
764
        s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
765
        s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
766
        s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
767
        s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
768
        s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
769
        pix1 += line_size;
770
        pix2 += line_size;
771
        pix3 += line_size;
772
    }
773
    return s;
774
}
775
776 ba6802de Michael Niedermayer
int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
777
{
778
    int s, i;
779
780
    s = 0;
781
    for(i=0;i<8;i++) {
782
        s += abs(pix1[0] - pix2[0]);
783
        s += abs(pix1[1] - pix2[1]);
784
        s += abs(pix1[2] - pix2[2]);
785
        s += abs(pix1[3] - pix2[3]);
786
        s += abs(pix1[4] - pix2[4]);
787
        s += abs(pix1[5] - pix2[5]);
788
        s += abs(pix1[6] - pix2[6]);
789
        s += abs(pix1[7] - pix2[7]);
790
        pix1 += line_size;
791
        pix2 += line_size;
792
    }
793
    return s;
794
}
795
796
int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
797
{
798
    int s, i;
799
800
    s = 0;
801
    for(i=0;i<8;i++) {
802
        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
803
        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
804
        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
805
        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
806
        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
807
        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
808
        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
809
        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
810
        pix1 += line_size;
811
        pix2 += line_size;
812
    }
813
    return s;
814
}
815
816
int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
817
{
818
    int s, i;
819
    UINT8 *pix3 = pix2 + line_size;
820
821
    s = 0;
822
    for(i=0;i<8;i++) {
823
        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
824
        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
825
        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
826
        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
827
        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
828
        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
829
        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
830
        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
831
        pix1 += line_size;
832
        pix2 += line_size;
833
        pix3 += line_size;
834
    }
835
    return s;
836
}
837
838
int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
839
{
840
    int s, i;
841
    UINT8 *pix3 = pix2 + line_size;
842
843
    s = 0;
844
    for(i=0;i<8;i++) {
845
        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
846
        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
847
        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
848
        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
849
        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
850
        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
851
        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
852
        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
853
        pix1 += line_size;
854
        pix2 += line_size;
855
        pix3 += line_size;
856
    }
857
    return s;
858
}
859
860 e0eac44e Fabrice Bellard
/* permute block according so that it corresponds to the MMX idct
861
   order */
862 d962f6fd Arpi
#ifdef SIMPLE_IDCT
863 5a240838 Michael Niedermayer
 /* general permutation, but perhaps slightly slower */
864 d962f6fd Arpi
void block_permute(INT16 *block)
865
{
866
        int i;
867
        INT16 temp[64];
868
869
        for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i];
870
871
        for(i=0; i<64; i++) block[i] = temp[i];
872
}
873
#else
874
875 e0eac44e Fabrice Bellard
void block_permute(INT16 *block)
876 de6d9b64 Fabrice Bellard
{
877 e0eac44e Fabrice Bellard
    int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
878 de6d9b64 Fabrice Bellard
    int i;
879
880 e0eac44e Fabrice Bellard
    for(i=0;i<8;i++) {
881
        tmp1 = block[1];
882
        tmp2 = block[2];
883
        tmp3 = block[3];
884
        tmp4 = block[4];
885
        tmp5 = block[5];
886
        tmp6 = block[6];
887
        block[1] = tmp2;
888
        block[2] = tmp4;
889
        block[3] = tmp6;
890
        block[4] = tmp1;
891
        block[5] = tmp3;
892
        block[6] = tmp5;
893
        block += 8;
894
    }
895
}
896 d962f6fd Arpi
#endif
897 e0eac44e Fabrice Bellard
898 649c00c9 Michael Niedermayer
void clear_blocks_c(DCTELEM *blocks)
899
{
900
    memset(blocks, 0, sizeof(DCTELEM)*6*64);
901
}
902
903 e0eac44e Fabrice Bellard
void dsputil_init(void)
904
{
905
    int i, j;
906 c34270f5 Fabrice Bellard
    int use_permuted_idct;
907 e0eac44e Fabrice Bellard
908 de6d9b64 Fabrice Bellard
    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
909
    for(i=0;i<MAX_NEG_CROP;i++) {
910
        cropTbl[i] = 0;
911
        cropTbl[i + MAX_NEG_CROP + 256] = 255;
912
    }
913
914
    for(i=0;i<512;i++) {
915
        squareTbl[i] = (i - 256) * (i - 256);
916
    }
917
918 d962f6fd Arpi
#ifdef SIMPLE_IDCT
919
    ff_idct = simple_idct;
920
#else
921 4af7bcc1 Arpi
    ff_idct = j_rev_dct;
922 d962f6fd Arpi
#endif
923 de6d9b64 Fabrice Bellard
    get_pixels = get_pixels_c;
924 9dbcbd92 Michael Niedermayer
    diff_pixels = diff_pixels_c;
925 de6d9b64 Fabrice Bellard
    put_pixels_clamped = put_pixels_clamped_c;
926
    add_pixels_clamped = add_pixels_clamped_c;
927 44eb4951 Michael Niedermayer
    gmc1= gmc1_c;
928 649c00c9 Michael Niedermayer
    clear_blocks= clear_blocks_c;
929 de6d9b64 Fabrice Bellard
930 ba6802de Michael Niedermayer
    pix_abs16x16     = pix_abs16x16_c;
931
    pix_abs16x16_x2  = pix_abs16x16_x2_c;
932
    pix_abs16x16_y2  = pix_abs16x16_y2_c;
933 de6d9b64 Fabrice Bellard
    pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
934 ba6802de Michael Niedermayer
    pix_abs8x8     = pix_abs8x8_c;
935
    pix_abs8x8_x2  = pix_abs8x8_x2_c;
936
    pix_abs8x8_y2  = pix_abs8x8_y2_c;
937
    pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
938 de6d9b64 Fabrice Bellard
    av_fdct = jpeg_fdct_ifast;
939
940 c34270f5 Fabrice Bellard
    use_permuted_idct = 1;
941 e0eac44e Fabrice Bellard
942 980fc7b8 Fabrice Bellard
#ifdef HAVE_MMX
943 de6d9b64 Fabrice Bellard
    dsputil_init_mmx();
944
#endif
945 3d03c0a2 Fabrice Bellard
#ifdef ARCH_ARMV4L
946
    dsputil_init_armv4l();
947
#endif
948 c34270f5 Fabrice Bellard
#ifdef HAVE_MLIB
949
    dsputil_init_mlib();
950
    use_permuted_idct = 0;
951
#endif
952 1e98dffb Nick Kurshev
#ifdef ARCH_ALPHA
953
    dsputil_init_alpha();
954
    use_permuted_idct = 0;
955
#endif
956 c34270f5 Fabrice Bellard
957 d962f6fd Arpi
#ifdef SIMPLE_IDCT
958
    if(ff_idct == simple_idct) use_permuted_idct=0;
959
#endif
960
961 5a240838 Michael Niedermayer
    if(use_permuted_idct)
962
#ifdef SIMPLE_IDCT
963
        for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i];
964
#else
965
        for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
966
#endif
967
    else
968
        for(i=0; i<64; i++) permutation[i]=i;
969
970 2f349de2 Michael Niedermayer
    for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
971
    for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
972
    
973 c34270f5 Fabrice Bellard
    if (use_permuted_idct) {
974
        /* permute for IDCT */
975
        for(i=0;i<64;i++) {
976
            j = zigzag_direct[i];
977
            zigzag_direct[i] = block_permute_op(j);
978
            j = ff_alternate_horizontal_scan[i];
979
            ff_alternate_horizontal_scan[i] = block_permute_op(j);
980
            j = ff_alternate_vertical_scan[i];
981
            ff_alternate_vertical_scan[i] = block_permute_op(j);
982
        }
983
        block_permute(default_intra_matrix);
984
        block_permute(default_non_intra_matrix);
985 3bf43d42 Michael Niedermayer
        block_permute(ff_mpeg4_default_intra_matrix);
986
        block_permute(ff_mpeg4_default_non_intra_matrix);
987 c34270f5 Fabrice Bellard
    }
988 badaf88e Michael Niedermayer
    
989
    build_zigzag_end();
990 de6d9b64 Fabrice Bellard
}
991 43f1708f Juanjo
992
void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3],
993
              int orig_linesize[3], int coded_linesize,
994
              AVCodecContext *avctx)
995
{
996
    int quad, diff, x, y;
997
    UINT8 *orig, *coded;
998
    UINT32 *sq = squareTbl + 256;
999
    
1000
    quad = 0;
1001
    diff = 0;
1002
    
1003
    /* Luminance */
1004
    orig = orig_image[0];
1005
    coded = coded_image[0];
1006
    
1007
    for (y=0;y<avctx->height;y++) {
1008
        for (x=0;x<avctx->width;x++) {
1009
            diff = *(orig + x) - *(coded + x);
1010
            quad += sq[diff];
1011
        }
1012
        orig += orig_linesize[0];
1013
        coded += coded_linesize;
1014
    }
1015
   
1016
    avctx->psnr_y = (float) quad / (float) (avctx->width * avctx->height);
1017
    
1018
    if (avctx->psnr_y) {
1019
        avctx->psnr_y = (float) (255 * 255) / avctx->psnr_y;
1020
        avctx->psnr_y = 10 * (float) log10 (avctx->psnr_y); 
1021
    } else
1022
        avctx->psnr_y = 99.99;
1023
}