Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dsputil.c @ 1c2a8c7f

History | View | Annotate | Download (40.8 KB)

1
/*
2
 * DSP utils
3
 * Copyright (c) 2000, 2001 Gerard Lantau.
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
 *
19
 * gmc & q-pel support by Michael Niedermayer <michaelni@gmx.at>
20
 */
21
#include <stdlib.h>
22
#include <stdio.h>
23
#include <math.h>
24
#include "avcodec.h"
25
#include "dsputil.h"
26
#include "simple_idct.h"
27

    
28
void (*ff_idct)(DCTELEM *block);
29
void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
30
void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
31
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
32
void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
33
void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
34
void (*clear_blocks)(DCTELEM *blocks);
35

    
36
op_pixels_abs_func pix_abs16x16;
37
op_pixels_abs_func pix_abs16x16_x2;
38
op_pixels_abs_func pix_abs16x16_y2;
39
op_pixels_abs_func pix_abs16x16_xy2;
40

    
41
op_pixels_abs_func pix_abs8x8;
42
op_pixels_abs_func pix_abs8x8_x2;
43
op_pixels_abs_func pix_abs8x8_y2;
44
op_pixels_abs_func pix_abs8x8_xy2;
45

    
46
UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
47
UINT32 squareTbl[512];
48

    
49
extern UINT16 default_intra_matrix[64];
50
extern UINT16 default_non_intra_matrix[64];
51
extern UINT16 ff_mpeg4_default_intra_matrix[64];
52
extern UINT16 ff_mpeg4_default_non_intra_matrix[64];
53

    
54
UINT8 zigzag_direct[64] = {
55
    0, 1, 8, 16, 9, 2, 3, 10,
56
    17, 24, 32, 25, 18, 11, 4, 5,
57
    12, 19, 26, 33, 40, 48, 41, 34,
58
    27, 20, 13, 6, 7, 14, 21, 28,
59
    35, 42, 49, 56, 57, 50, 43, 36,
60
    29, 22, 15, 23, 30, 37, 44, 51,
61
    58, 59, 52, 45, 38, 31, 39, 46,
62
    53, 60, 61, 54, 47, 55, 62, 63
63
};
64

    
65
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
66
UINT16 __align8 inv_zigzag_direct16[64];
67

    
68
/* not permutated zigzag_direct for MMX quantizer */
69
UINT8 zigzag_direct_noperm[64];
70

    
71
UINT8 ff_alternate_horizontal_scan[64] = {
72
    0,  1,  2,  3,  8,  9, 16, 17, 
73
    10, 11,  4,  5,  6,  7, 15, 14,
74
    13, 12, 19, 18, 24, 25, 32, 33, 
75
    26, 27, 20, 21, 22, 23, 28, 29,
76
    30, 31, 34, 35, 40, 41, 48, 49, 
77
    42, 43, 36, 37, 38, 39, 44, 45,
78
    46, 47, 50, 51, 56, 57, 58, 59, 
79
    52, 53, 54, 55, 60, 61, 62, 63,
80
};
81

    
82
UINT8 ff_alternate_vertical_scan[64] = {
83
    0,  8, 16, 24,  1,  9,  2, 10, 
84
    17, 25, 32, 40, 48, 56, 57, 49,
85
    41, 33, 26, 18,  3, 11,  4, 12, 
86
    19, 27, 34, 42, 50, 58, 35, 43,
87
    51, 59, 20, 28,  5, 13,  6, 14, 
88
    21, 29, 36, 44, 52, 60, 37, 45,
89
    53, 61, 22, 30,  7, 15, 23, 31, 
90
    38, 46, 54, 62, 39, 47, 55, 63,
91
};
92

    
93
#ifdef SIMPLE_IDCT
94

    
95
/* Input permutation for the simple_idct_mmx */
96
static UINT8 simple_mmx_permutation[64]={
97
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 
98
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 
99
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 
100
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 
101
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 
102
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 
103
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 
104
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
105
};
106
#endif
107

    
108
/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
109
UINT32 inverse[256]={
110
         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757, 
111
 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154, 
112
 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709, 
113
 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333, 
114
 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367, 
115
 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283, 
116
  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315, 
117
  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085, 
118
  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498, 
119
  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675, 
120
  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441, 
121
  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183, 
122
  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712, 
123
  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400, 
124
  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163, 
125
  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641, 
126
  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573, 
127
  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737, 
128
  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493, 
129
  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373, 
130
  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368, 
131
  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671, 
132
  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767, 
133
  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740, 
134
  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751, 
135
  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635, 
136
  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593, 
137
  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944, 
138
  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933, 
139
  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575, 
140
  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532, 
141
  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
142
};
143

    
144
/* used to skip zeros at the end */
145
UINT8 zigzag_end[64];
146

    
147
UINT8 permutation[64];
148
//UINT8 invPermutation[64];
149

    
150
static void build_zigzag_end()
151
{
152
    int lastIndex;
153
    int lastIndexAfterPerm=0;
154
    for(lastIndex=0; lastIndex<64; lastIndex++)
155
    {
156
        if(zigzag_direct[lastIndex] > lastIndexAfterPerm) 
157
            lastIndexAfterPerm= zigzag_direct[lastIndex];
158
        zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
159
    }
160
}
161

    
162
void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size)
163
{
164
    DCTELEM *p;
165
    const UINT8 *pix;
166
    int i;
167

    
168
    /* read the pixels */
169
    p = block;
170
    pix = pixels;
171
    for(i=0;i<8;i++) {
172
        p[0] = pix[0];
173
        p[1] = pix[1];
174
        p[2] = pix[2];
175
        p[3] = pix[3];
176
        p[4] = pix[4];
177
        p[5] = pix[5];
178
        p[6] = pix[6];
179
        p[7] = pix[7];
180
        pix += line_size;
181
        p += 8;
182
    }
183
}
184

    
185
void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride){
186
    DCTELEM *p;
187
    int i;
188

    
189
    /* read the pixels */
190
    p = block;
191
    for(i=0;i<8;i++) {
192
        p[0] = s1[0] - s2[0];
193
        p[1] = s1[1] - s2[1];
194
        p[2] = s1[2] - s2[2];
195
        p[3] = s1[3] - s2[3];
196
        p[4] = s1[4] - s2[4];
197
        p[5] = s1[5] - s2[5];
198
        p[6] = s1[6] - s2[6];
199
        p[7] = s1[7] - s2[7];
200
        s1 += stride;
201
        s2 += stride;
202
        p += 8;
203
    }
204
}
205

    
206

    
207
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
208
{
209
    const DCTELEM *p;
210
    UINT8 *pix;
211
    int i;
212
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
213
    
214
    /* read the pixels */
215
    p = block;
216
    pix = pixels;
217
    for(i=0;i<8;i++) {
218
        pix[0] = cm[p[0]];
219
        pix[1] = cm[p[1]];
220
        pix[2] = cm[p[2]];
221
        pix[3] = cm[p[3]];
222
        pix[4] = cm[p[4]];
223
        pix[5] = cm[p[5]];
224
        pix[6] = cm[p[6]];
225
        pix[7] = cm[p[7]];
226
        pix += line_size;
227
        p += 8;
228
    }
229
}
230

    
231
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
232
{
233
    const DCTELEM *p;
234
    UINT8 *pix;
235
    int i;
236
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
237
    
238
    /* read the pixels */
239
    p = block;
240
    pix = pixels;
241
    for(i=0;i<8;i++) {
242
        pix[0] = cm[pix[0] + p[0]];
243
        pix[1] = cm[pix[1] + p[1]];
244
        pix[2] = cm[pix[2] + p[2]];
245
        pix[3] = cm[pix[3] + p[3]];
246
        pix[4] = cm[pix[4] + p[4]];
247
        pix[5] = cm[pix[5] + p[5]];
248
        pix[6] = cm[pix[6] + p[6]];
249
        pix[7] = cm[pix[7] + p[7]];
250
        pix += line_size;
251
        p += 8;
252
    }
253
}
254

    
255
#define PIXOP(BTYPE, OPNAME, OP, INCR)                                                   \
256
                                                                                         \
257
static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h)    \
258
{                                                                                        \
259
    BTYPE *p;                                                                            \
260
    const UINT8 *pix;                                                                    \
261
                                                                                         \
262
    p = block;                                                                           \
263
    pix = pixels;                                                                        \
264
    do {                                                                                 \
265
        OP(p[0], pix[0]);                                                                  \
266
        OP(p[1], pix[1]);                                                                  \
267
        OP(p[2], pix[2]);                                                                  \
268
        OP(p[3], pix[3]);                                                                  \
269
        OP(p[4], pix[4]);                                                                  \
270
        OP(p[5], pix[5]);                                                                  \
271
        OP(p[6], pix[6]);                                                                  \
272
        OP(p[7], pix[7]);                                                                  \
273
        pix += line_size;                                                                \
274
        p += INCR;                                                                       \
275
    } while (--h);;                                                                       \
276
}                                                                                        \
277
                                                                                         \
278
static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h)     \
279
{                                                                                        \
280
    BTYPE *p;                                                                          \
281
    const UINT8 *pix;                                                                    \
282
                                                                                         \
283
    p = block;                                                                           \
284
    pix = pixels;                                                                        \
285
    do {                                                                   \
286
        OP(p[0], avg2(pix[0], pix[1]));                                                    \
287
        OP(p[1], avg2(pix[1], pix[2]));                                                    \
288
        OP(p[2], avg2(pix[2], pix[3]));                                                    \
289
        OP(p[3], avg2(pix[3], pix[4]));                                                    \
290
        OP(p[4], avg2(pix[4], pix[5]));                                                    \
291
        OP(p[5], avg2(pix[5], pix[6]));                                                    \
292
        OP(p[6], avg2(pix[6], pix[7]));                                                    \
293
        OP(p[7], avg2(pix[7], pix[8]));                                                    \
294
        pix += line_size;                                                                \
295
        p += INCR;                                                                       \
296
    } while (--h);                                                                        \
297
}                                                                                        \
298
                                                                                         \
299
static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h)     \
300
{                                                                                        \
301
    BTYPE *p;                                                                          \
302
    const UINT8 *pix;                                                                    \
303
    const UINT8 *pix1;                                                                   \
304
                                                                                         \
305
    p = block;                                                                           \
306
    pix = pixels;                                                                        \
307
    pix1 = pixels + line_size;                                                           \
308
    do {                                                                                 \
309
        OP(p[0], avg2(pix[0], pix1[0]));                                                   \
310
        OP(p[1], avg2(pix[1], pix1[1]));                                                   \
311
        OP(p[2], avg2(pix[2], pix1[2]));                                                   \
312
        OP(p[3], avg2(pix[3], pix1[3]));                                                   \
313
        OP(p[4], avg2(pix[4], pix1[4]));                                                   \
314
        OP(p[5], avg2(pix[5], pix1[5]));                                                   \
315
        OP(p[6], avg2(pix[6], pix1[6]));                                                   \
316
        OP(p[7], avg2(pix[7], pix1[7]));                                                   \
317
        pix += line_size;                                                                \
318
        pix1 += line_size;                                                               \
319
        p += INCR;                                                                       \
320
    } while(--h);                                                                         \
321
}                                                                                        \
322
                                                                                         \
323
static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h)    \
324
{                                                                                        \
325
    BTYPE *p;                                                                          \
326
    const UINT8 *pix;                                                                    \
327
    const UINT8 *pix1;                                                                   \
328
                                                                                         \
329
    p = block;                                                                           \
330
    pix = pixels;                                                                        \
331
    pix1 = pixels + line_size;                                                           \
332
    do {                                                                   \
333
        OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1]));                                  \
334
        OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2]));                                  \
335
        OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3]));                                  \
336
        OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4]));                                  \
337
        OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5]));                                  \
338
        OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6]));                                  \
339
        OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7]));                                  \
340
        OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8]));                                  \
341
        pix += line_size;                                                                \
342
        pix1 += line_size;                                                               \
343
        p += INCR;                                                                       \
344
    } while(--h);                                                                         \
345
}                                                                                        \
346
                                                                                         \
347
void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \
348
    OPNAME ## _pixels,                                                                   \
349
    OPNAME ## _pixels_x2,                                                                \
350
    OPNAME ## _pixels_y2,                                                                \
351
    OPNAME ## _pixels_xy2,                                                               \
352
};
353

    
354

    
355
/* rounding primitives */
356
#define avg2(a,b) ((a+b+1)>>1)
357
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
358

    
359
#define op_put(a, b) a = b
360
#define op_avg(a, b) a = avg2(a, b)
361
#define op_sub(a, b) a -= b
362

    
363
PIXOP(UINT8, put, op_put, line_size)
364
PIXOP(UINT8, avg, op_avg, line_size)
365

    
366
PIXOP(DCTELEM, sub, op_sub, 8)
367

    
368
/* not rounding primitives */
369
#undef avg2
370
#undef avg4
371
#define avg2(a,b) ((a+b)>>1)
372
#define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
373

    
374
PIXOP(UINT8, put_no_rnd, op_put, line_size)
375
PIXOP(UINT8, avg_no_rnd, op_avg, line_size)
376

    
377
/* motion estimation */
378

    
379
#undef avg2
380
#undef avg4
381
#define avg2(a,b) ((a+b+1)>>1)
382
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
383

    
384
static void gmc1_c(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder)
385
{
386
    const int A=(16-x16)*(16-y16);
387
    const int B=(   x16)*(16-y16);
388
    const int C=(16-x16)*(   y16);
389
    const int D=(   x16)*(   y16);
390
    int i;
391
    rounder= 128 - rounder;
392

    
393
    for(i=0; i<h; i++)
394
    {
395
        dst[0]= (A*src[0] + B*src[1] + C*src[srcStride+0] + D*src[srcStride+1] + rounder)>>8;
396
        dst[1]= (A*src[1] + B*src[2] + C*src[srcStride+1] + D*src[srcStride+2] + rounder)>>8;
397
        dst[2]= (A*src[2] + B*src[3] + C*src[srcStride+2] + D*src[srcStride+3] + rounder)>>8;
398
        dst[3]= (A*src[3] + B*src[4] + C*src[srcStride+3] + D*src[srcStride+4] + rounder)>>8;
399
        dst[4]= (A*src[4] + B*src[5] + C*src[srcStride+4] + D*src[srcStride+5] + rounder)>>8;
400
        dst[5]= (A*src[5] + B*src[6] + C*src[srcStride+5] + D*src[srcStride+6] + rounder)>>8;
401
        dst[6]= (A*src[6] + B*src[7] + C*src[srcStride+6] + D*src[srcStride+7] + rounder)>>8;
402
        dst[7]= (A*src[7] + B*src[8] + C*src[srcStride+7] + D*src[srcStride+8] + rounder)>>8;
403
        dst+= srcStride;
404
        src+= srcStride;
405
    }
406
}
407

    
408
static void qpel_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h, int r)
409
{
410
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
411
    int i;
412
    for(i=0; i<h; i++)
413
    {
414
        dst[0]= cm[(((src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]) + r)>>5)];
415
        dst[1]= cm[(((src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]) + r)>>5)];
416
        dst[2]= cm[(((src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]) + r)>>5)];
417
        dst[3]= cm[(((src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]) + r)>>5)];
418
        dst[4]= cm[(((src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]) + r)>>5)];
419
        dst[5]= cm[(((src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]) + r)>>5)];
420
        dst[6]= cm[(((src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]) + r)>>5)];
421
        dst[7]= cm[(((src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]) + r)>>5)];
422
        dst+=dstStride;
423
        src+=srcStride;
424
    }
425
}
426

    
427
static void qpel_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w, int r)
428
{
429
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
430
    int i;
431
    for(i=0; i<w; i++)
432
    {
433
        const int src0= src[0*srcStride];
434
        const int src1= src[1*srcStride];
435
        const int src2= src[2*srcStride];
436
        const int src3= src[3*srcStride];
437
        const int src4= src[4*srcStride];
438
        const int src5= src[5*srcStride];
439
        const int src6= src[6*srcStride];
440
        const int src7= src[7*srcStride];
441
        const int src8= src[8*srcStride];
442
        dst[0*dstStride]= cm[(((src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4) + r)>>5)];
443
        dst[1*dstStride]= cm[(((src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5) + r)>>5)];
444
        dst[2*dstStride]= cm[(((src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6) + r)>>5)];
445
        dst[3*dstStride]= cm[(((src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7) + r)>>5)];
446
        dst[4*dstStride]= cm[(((src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8) + r)>>5)];
447
        dst[5*dstStride]= cm[(((src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8) + r)>>5)];
448
        dst[6*dstStride]= cm[(((src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7) + r)>>5)];
449
        dst[7*dstStride]= cm[(((src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6) + r)>>5)];
450
        dst++;
451
        src++;
452
    }
453
}
454

    
455
static inline void put_block(UINT8 *dst, UINT8 *src, int dstStride, int srcStride)
456
{
457
    int i;
458
    for(i=0; i<8; i++)
459
    {
460
        dst[0]= src[0];
461
        dst[1]= src[1];
462
        dst[2]= src[2];
463
        dst[3]= src[3];
464
        dst[4]= src[4];
465
        dst[5]= src[5];
466
        dst[6]= src[6];
467
        dst[7]= src[7];
468
        dst+=dstStride;
469
        src+=srcStride;
470
    }
471
}
472

    
473
static inline void avg2_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, int dstStride, int srcStride, int r)
474
{
475
    int i;
476
    for(i=0; i<8; i++)
477
    {
478
        dst[0]= (src1[0] + src2[0] + r)>>1;
479
        dst[1]= (src1[1] + src2[1] + r)>>1;
480
        dst[2]= (src1[2] + src2[2] + r)>>1;
481
        dst[3]= (src1[3] + src2[3] + r)>>1;
482
        dst[4]= (src1[4] + src2[4] + r)>>1;
483
        dst[5]= (src1[5] + src2[5] + r)>>1;
484
        dst[6]= (src1[6] + src2[6] + r)>>1;
485
        dst[7]= (src1[7] + src2[7] + r)>>1;
486
        dst+=dstStride;
487
        src1+=srcStride;
488
        src2+=8;
489
    }
490
}
491

    
492
static inline void avg4_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, UINT8 *src3, UINT8 *src4, int dstStride, int srcStride, int r)
493
{
494
    int i;
495
    for(i=0; i<8; i++)
496
    {
497
        dst[0]= (src1[0] + src2[0] + src3[0] + src4[0] + r)>>2;
498
        dst[1]= (src1[1] + src2[1] + src3[1] + src4[1] + r)>>2;
499
        dst[2]= (src1[2] + src2[2] + src3[2] + src4[2] + r)>>2;
500
        dst[3]= (src1[3] + src2[3] + src3[3] + src4[3] + r)>>2;
501
        dst[4]= (src1[4] + src2[4] + src3[4] + src4[4] + r)>>2;
502
        dst[5]= (src1[5] + src2[5] + src3[5] + src4[5] + r)>>2;
503
        dst[6]= (src1[6] + src2[6] + src3[6] + src4[6] + r)>>2;
504
        dst[7]= (src1[7] + src2[7] + src3[7] + src4[7] + r)>>2;
505
        dst+=dstStride;
506
        src1+=srcStride;
507
        src2+=8;
508
        src3+=8;
509
        src4+=8;
510
    }
511
}
512

    
513
#define QPEL_MC(r, name) \
514
static void qpel_mc00_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
515
{\
516
    put_block(dst, src, dstStride, srcStride);\
517
}\
518
\
519
static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
520
{\
521
    UINT8 half[64];\
522
    qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\
523
    avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
524
}\
525
\
526
static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
527
{\
528
    qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\
529
}\
530
\
531
static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
532
{\
533
    UINT8 half[64];\
534
    qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\
535
    avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\
536
}\
537
\
538
static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
539
{\
540
    UINT8 half[64];\
541
    qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\
542
    avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
543
}\
544
\
545
static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
546
{\
547
    qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\
548
}\
549
\
550
static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
551
{\
552
    UINT8 half[64];\
553
    qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\
554
    avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\
555
}\
556
static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
557
{\
558
    UINT8 halfH[72];\
559
    UINT8 halfV[64];\
560
    UINT8 halfHV[64];\
561
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
562
    qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
563
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
564
    avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
565
}\
566
static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
567
{\
568
    UINT8 halfH[72];\
569
    UINT8 halfV[64];\
570
    UINT8 halfHV[64];\
571
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
572
    qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
573
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
574
    avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
575
}\
576
static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
577
{\
578
    UINT8 halfH[72];\
579
    UINT8 halfV[64];\
580
    UINT8 halfHV[64];\
581
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
582
    qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
583
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
584
    avg4_block(dst, src+srcStride, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
585
}\
586
static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
587
{\
588
    UINT8 halfH[72];\
589
    UINT8 halfV[64];\
590
    UINT8 halfHV[64];\
591
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
592
    qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
593
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
594
    avg4_block(dst, src+srcStride+1, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
595
}\
596
static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
597
{\
598
    UINT8 halfH[72];\
599
    UINT8 halfHV[64];\
600
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
601
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
602
    avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\
603
}\
604
static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
605
{\
606
    UINT8 halfH[72];\
607
    UINT8 halfHV[64];\
608
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
609
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
610
    avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\
611
}\
612
static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
613
{\
614
    UINT8 halfH[72];\
615
    UINT8 halfV[64];\
616
    UINT8 halfHV[64];\
617
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
618
    qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
619
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
620
    avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
621
}\
622
static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
623
{\
624
    UINT8 halfH[72];\
625
    UINT8 halfV[64];\
626
    UINT8 halfHV[64];\
627
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
628
    qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
629
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
630
    avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
631
}\
632
static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
633
{\
634
    UINT8 halfH[72];\
635
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
636
    qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 16-r);\
637
}\
638
qpel_mc_func qpel_mc ## name ## _tab[16]={ \
639
    qpel_mc00_c ## name,                                                                   \
640
    qpel_mc10_c ## name,                                                                   \
641
    qpel_mc20_c ## name,                                                                   \
642
    qpel_mc30_c ## name,                                                                   \
643
    qpel_mc01_c ## name,                                                                   \
644
    qpel_mc11_c ## name,                                                                   \
645
    qpel_mc21_c ## name,                                                                   \
646
    qpel_mc31_c ## name,                                                                   \
647
    qpel_mc02_c ## name,                                                                   \
648
    qpel_mc12_c ## name,                                                                   \
649
    qpel_mc22_c ## name,                                                                   \
650
    qpel_mc32_c ## name,                                                                   \
651
    qpel_mc03_c ## name,                                                                   \
652
    qpel_mc13_c ## name,                                                                   \
653
    qpel_mc23_c ## name,                                                                   \
654
    qpel_mc33_c ## name,                                                                   \
655
};
656

    
657
QPEL_MC(0, _rnd)
658
QPEL_MC(1, _no_rnd)
659

    
660
int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
661
{
662
    int s, i;
663

    
664
    s = 0;
665
    for(i=0;i<16;i++) {
666
        s += abs(pix1[0] - pix2[0]);
667
        s += abs(pix1[1] - pix2[1]);
668
        s += abs(pix1[2] - pix2[2]);
669
        s += abs(pix1[3] - pix2[3]);
670
        s += abs(pix1[4] - pix2[4]);
671
        s += abs(pix1[5] - pix2[5]);
672
        s += abs(pix1[6] - pix2[6]);
673
        s += abs(pix1[7] - pix2[7]);
674
        s += abs(pix1[8] - pix2[8]);
675
        s += abs(pix1[9] - pix2[9]);
676
        s += abs(pix1[10] - pix2[10]);
677
        s += abs(pix1[11] - pix2[11]);
678
        s += abs(pix1[12] - pix2[12]);
679
        s += abs(pix1[13] - pix2[13]);
680
        s += abs(pix1[14] - pix2[14]);
681
        s += abs(pix1[15] - pix2[15]);
682
        pix1 += line_size;
683
        pix2 += line_size;
684
    }
685
    return s;
686
}
687

    
688
int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
689
{
690
    int s, i;
691

    
692
    s = 0;
693
    for(i=0;i<16;i++) {
694
        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
695
        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
696
        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
697
        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
698
        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
699
        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
700
        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
701
        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
702
        s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
703
        s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
704
        s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
705
        s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
706
        s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
707
        s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
708
        s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
709
        s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
710
        pix1 += line_size;
711
        pix2 += line_size;
712
    }
713
    return s;
714
}
715

    
716
int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
717
{
718
    int s, i;
719
    UINT8 *pix3 = pix2 + line_size;
720

    
721
    s = 0;
722
    for(i=0;i<16;i++) {
723
        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
724
        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
725
        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
726
        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
727
        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
728
        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
729
        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
730
        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
731
        s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
732
        s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
733
        s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
734
        s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
735
        s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
736
        s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
737
        s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
738
        s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
739
        pix1 += line_size;
740
        pix2 += line_size;
741
        pix3 += line_size;
742
    }
743
    return s;
744
}
745

    
746
int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
747
{
748
    int s, i;
749
    UINT8 *pix3 = pix2 + line_size;
750

    
751
    s = 0;
752
    for(i=0;i<16;i++) {
753
        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
754
        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
755
        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
756
        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
757
        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
758
        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
759
        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
760
        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
761
        s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
762
        s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
763
        s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
764
        s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
765
        s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
766
        s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
767
        s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
768
        s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
769
        pix1 += line_size;
770
        pix2 += line_size;
771
        pix3 += line_size;
772
    }
773
    return s;
774
}
775

    
776
int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
777
{
778
    int s, i;
779

    
780
    s = 0;
781
    for(i=0;i<8;i++) {
782
        s += abs(pix1[0] - pix2[0]);
783
        s += abs(pix1[1] - pix2[1]);
784
        s += abs(pix1[2] - pix2[2]);
785
        s += abs(pix1[3] - pix2[3]);
786
        s += abs(pix1[4] - pix2[4]);
787
        s += abs(pix1[5] - pix2[5]);
788
        s += abs(pix1[6] - pix2[6]);
789
        s += abs(pix1[7] - pix2[7]);
790
        pix1 += line_size;
791
        pix2 += line_size;
792
    }
793
    return s;
794
}
795

    
796
int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
797
{
798
    int s, i;
799

    
800
    s = 0;
801
    for(i=0;i<8;i++) {
802
        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
803
        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
804
        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
805
        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
806
        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
807
        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
808
        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
809
        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
810
        pix1 += line_size;
811
        pix2 += line_size;
812
    }
813
    return s;
814
}
815

    
816
int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
817
{
818
    int s, i;
819
    UINT8 *pix3 = pix2 + line_size;
820

    
821
    s = 0;
822
    for(i=0;i<8;i++) {
823
        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
824
        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
825
        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
826
        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
827
        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
828
        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
829
        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
830
        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
831
        pix1 += line_size;
832
        pix2 += line_size;
833
        pix3 += line_size;
834
    }
835
    return s;
836
}
837

    
838
int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
839
{
840
    int s, i;
841
    UINT8 *pix3 = pix2 + line_size;
842

    
843
    s = 0;
844
    for(i=0;i<8;i++) {
845
        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
846
        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
847
        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
848
        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
849
        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
850
        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
851
        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
852
        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
853
        pix1 += line_size;
854
        pix2 += line_size;
855
        pix3 += line_size;
856
    }
857
    return s;
858
}
859

    
860
/* permute block according so that it corresponds to the MMX idct
861
   order */
862
#ifdef SIMPLE_IDCT
863
 /* general permutation, but perhaps slightly slower */
864
void block_permute(INT16 *block)
865
{
866
        int i;
867
        INT16 temp[64];
868

    
869
        for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i];
870

    
871
        for(i=0; i<64; i++) block[i] = temp[i];
872
}
873
#else
874

    
875
void block_permute(INT16 *block)
876
{
877
    int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
878
    int i;
879

    
880
    for(i=0;i<8;i++) {
881
        tmp1 = block[1];
882
        tmp2 = block[2];
883
        tmp3 = block[3];
884
        tmp4 = block[4];
885
        tmp5 = block[5];
886
        tmp6 = block[6];
887
        block[1] = tmp2;
888
        block[2] = tmp4;
889
        block[3] = tmp6;
890
        block[4] = tmp1;
891
        block[5] = tmp3;
892
        block[6] = tmp5;
893
        block += 8;
894
    }
895
}
896
#endif
897

    
898
void clear_blocks_c(DCTELEM *blocks)
899
{
900
    memset(blocks, 0, sizeof(DCTELEM)*6*64);
901
}
902

    
903
void dsputil_init(void)
904
{
905
    int i, j;
906
    int use_permuted_idct;
907

    
908
    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
909
    for(i=0;i<MAX_NEG_CROP;i++) {
910
        cropTbl[i] = 0;
911
        cropTbl[i + MAX_NEG_CROP + 256] = 255;
912
    }
913

    
914
    for(i=0;i<512;i++) {
915
        squareTbl[i] = (i - 256) * (i - 256);
916
    }
917

    
918
#ifdef SIMPLE_IDCT
919
    ff_idct = simple_idct;
920
#else
921
    ff_idct = j_rev_dct;
922
#endif
923
    get_pixels = get_pixels_c;
924
    diff_pixels = diff_pixels_c;
925
    put_pixels_clamped = put_pixels_clamped_c;
926
    add_pixels_clamped = add_pixels_clamped_c;
927
    gmc1= gmc1_c;
928
    clear_blocks= clear_blocks_c;
929

    
930
    pix_abs16x16     = pix_abs16x16_c;
931
    pix_abs16x16_x2  = pix_abs16x16_x2_c;
932
    pix_abs16x16_y2  = pix_abs16x16_y2_c;
933
    pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
934
    pix_abs8x8     = pix_abs8x8_c;
935
    pix_abs8x8_x2  = pix_abs8x8_x2_c;
936
    pix_abs8x8_y2  = pix_abs8x8_y2_c;
937
    pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
938
    av_fdct = jpeg_fdct_ifast;
939

    
940
    use_permuted_idct = 1;
941

    
942
#ifdef HAVE_MMX
943
    dsputil_init_mmx();
944
#endif
945
#ifdef ARCH_ARMV4L
946
    dsputil_init_armv4l();
947
#endif
948
#ifdef HAVE_MLIB
949
    dsputil_init_mlib();
950
    use_permuted_idct = 0;
951
#endif
952
#ifdef ARCH_ALPHA
953
    dsputil_init_alpha();
954
    use_permuted_idct = 0;
955
#endif
956

    
957
#ifdef SIMPLE_IDCT
958
    if(ff_idct == simple_idct) use_permuted_idct=0;
959
#endif
960

    
961
    if(use_permuted_idct)
962
#ifdef SIMPLE_IDCT
963
        for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i];
964
#else
965
        for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
966
#endif
967
    else
968
        for(i=0; i<64; i++) permutation[i]=i;
969

    
970
    for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
971
    for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
972
    
973
    if (use_permuted_idct) {
974
        /* permute for IDCT */
975
        for(i=0;i<64;i++) {
976
            j = zigzag_direct[i];
977
            zigzag_direct[i] = block_permute_op(j);
978
            j = ff_alternate_horizontal_scan[i];
979
            ff_alternate_horizontal_scan[i] = block_permute_op(j);
980
            j = ff_alternate_vertical_scan[i];
981
            ff_alternate_vertical_scan[i] = block_permute_op(j);
982
        }
983
        block_permute(default_intra_matrix);
984
        block_permute(default_non_intra_matrix);
985
        block_permute(ff_mpeg4_default_intra_matrix);
986
        block_permute(ff_mpeg4_default_non_intra_matrix);
987
    }
988
    
989
    build_zigzag_end();
990
}
991

    
992
void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3],
993
              int orig_linesize[3], int coded_linesize,
994
              AVCodecContext *avctx)
995
{
996
    int quad, diff, x, y;
997
    UINT8 *orig, *coded;
998
    UINT32 *sq = squareTbl + 256;
999
    
1000
    quad = 0;
1001
    diff = 0;
1002
    
1003
    /* Luminance */
1004
    orig = orig_image[0];
1005
    coded = coded_image[0];
1006
    
1007
    for (y=0;y<avctx->height;y++) {
1008
        for (x=0;x<avctx->width;x++) {
1009
            diff = *(orig + x) - *(coded + x);
1010
            quad += sq[diff];
1011
        }
1012
        orig += orig_linesize[0];
1013
        coded += coded_linesize;
1014
    }
1015
   
1016
    avctx->psnr_y = (float) quad / (float) (avctx->width * avctx->height);
1017
    
1018
    if (avctx->psnr_y) {
1019
        avctx->psnr_y = (float) (255 * 255) / avctx->psnr_y;
1020
        avctx->psnr_y = 10 * (float) log10 (avctx->psnr_y); 
1021
    } else
1022
        avctx->psnr_y = 99.99;
1023
}
1024