Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dsputil.c @ 3bf43d42

History | View | Annotate | Download (40.1 KB)

1
/*
2
 * DSP utils
3
 * Copyright (c) 2000, 2001 Gerard Lantau.
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
 *
19
 * gmc & q-pel support by Michael Niedermayer <michaelni@gmx.at>
20
 */
21
#include <stdlib.h>
22
#include <stdio.h>
23
#include <math.h>
24
#include "avcodec.h"
25
#include "dsputil.h"
26
#include "simple_idct.h"
27

    
28
void (*ff_idct)(DCTELEM *block);
29
void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
30
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
31
void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
32
void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
33
void (*clear_blocks)(DCTELEM *blocks);
34

    
35
op_pixels_abs_func pix_abs16x16;
36
op_pixels_abs_func pix_abs16x16_x2;
37
op_pixels_abs_func pix_abs16x16_y2;
38
op_pixels_abs_func pix_abs16x16_xy2;
39

    
40
op_pixels_abs_func pix_abs8x8;
41
op_pixels_abs_func pix_abs8x8_x2;
42
op_pixels_abs_func pix_abs8x8_y2;
43
op_pixels_abs_func pix_abs8x8_xy2;
44

    
45
UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
46
UINT32 squareTbl[512];
47

    
48
extern UINT16 default_intra_matrix[64];
49
extern UINT16 default_non_intra_matrix[64];
50
extern UINT16 ff_mpeg4_default_intra_matrix[64];
51
extern UINT16 ff_mpeg4_default_non_intra_matrix[64];
52

    
53
UINT8 zigzag_direct[64] = {
54
    0, 1, 8, 16, 9, 2, 3, 10,
55
    17, 24, 32, 25, 18, 11, 4, 5,
56
    12, 19, 26, 33, 40, 48, 41, 34,
57
    27, 20, 13, 6, 7, 14, 21, 28,
58
    35, 42, 49, 56, 57, 50, 43, 36,
59
    29, 22, 15, 23, 30, 37, 44, 51,
60
    58, 59, 52, 45, 38, 31, 39, 46,
61
    53, 60, 61, 54, 47, 55, 62, 63
62
};
63

    
64
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
65
UINT16 __align8 inv_zigzag_direct16[64];
66

    
67
/* not permutated zigzag_direct for MMX quantizer */
68
UINT8 zigzag_direct_noperm[64];
69

    
70
UINT8 ff_alternate_horizontal_scan[64] = {
71
    0,  1,  2,  3,  8,  9, 16, 17, 
72
    10, 11,  4,  5,  6,  7, 15, 14,
73
    13, 12, 19, 18, 24, 25, 32, 33, 
74
    26, 27, 20, 21, 22, 23, 28, 29,
75
    30, 31, 34, 35, 40, 41, 48, 49, 
76
    42, 43, 36, 37, 38, 39, 44, 45,
77
    46, 47, 50, 51, 56, 57, 58, 59, 
78
    52, 53, 54, 55, 60, 61, 62, 63,
79
};
80

    
81
UINT8 ff_alternate_vertical_scan[64] = {
82
    0,  8, 16, 24,  1,  9,  2, 10, 
83
    17, 25, 32, 40, 48, 56, 57, 49,
84
    41, 33, 26, 18,  3, 11,  4, 12, 
85
    19, 27, 34, 42, 50, 58, 35, 43,
86
    51, 59, 20, 28,  5, 13,  6, 14, 
87
    21, 29, 36, 44, 52, 60, 37, 45,
88
    53, 61, 22, 30,  7, 15, 23, 31, 
89
    38, 46, 54, 62, 39, 47, 55, 63,
90
};
91

    
92
/* Input permutation for the simple_idct_mmx */
93
static UINT8 simple_mmx_permutation[64]={
94
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 
95
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 
96
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 
97
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 
98
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 
99
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 
100
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 
101
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
102
};
103

    
104
/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
105
UINT32 inverse[256]={
106
         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757, 
107
 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154, 
108
 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709, 
109
 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333, 
110
 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367, 
111
 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283, 
112
  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315, 
113
  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085, 
114
  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498, 
115
  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675, 
116
  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441, 
117
  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183, 
118
  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712, 
119
  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400, 
120
  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163, 
121
  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641, 
122
  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573, 
123
  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737, 
124
  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493, 
125
  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373, 
126
  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368, 
127
  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671, 
128
  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767, 
129
  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740, 
130
  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751, 
131
  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635, 
132
  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593, 
133
  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944, 
134
  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933, 
135
  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575, 
136
  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532, 
137
  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
138
};
139

    
140
/* used to skip zeros at the end */
141
UINT8 zigzag_end[64];
142

    
143
UINT8 permutation[64];
144
//UINT8 invPermutation[64];
145

    
146
static void build_zigzag_end()
147
{
148
    int lastIndex;
149
    int lastIndexAfterPerm=0;
150
    for(lastIndex=0; lastIndex<64; lastIndex++)
151
    {
152
        if(zigzag_direct[lastIndex] > lastIndexAfterPerm) 
153
            lastIndexAfterPerm= zigzag_direct[lastIndex];
154
        zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
155
    }
156
}
157

    
158
void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size)
159
{
160
    DCTELEM *p;
161
    const UINT8 *pix;
162
    int i;
163

    
164
    /* read the pixels */
165
    p = block;
166
    pix = pixels;
167
    for(i=0;i<8;i++) {
168
        p[0] = pix[0];
169
        p[1] = pix[1];
170
        p[2] = pix[2];
171
        p[3] = pix[3];
172
        p[4] = pix[4];
173
        p[5] = pix[5];
174
        p[6] = pix[6];
175
        p[7] = pix[7];
176
        pix += line_size;
177
        p += 8;
178
    }
179
}
180

    
181
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
182
{
183
    const DCTELEM *p;
184
    UINT8 *pix;
185
    int i;
186
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
187
    
188
    /* read the pixels */
189
    p = block;
190
    pix = pixels;
191
    for(i=0;i<8;i++) {
192
        pix[0] = cm[p[0]];
193
        pix[1] = cm[p[1]];
194
        pix[2] = cm[p[2]];
195
        pix[3] = cm[p[3]];
196
        pix[4] = cm[p[4]];
197
        pix[5] = cm[p[5]];
198
        pix[6] = cm[p[6]];
199
        pix[7] = cm[p[7]];
200
        pix += line_size;
201
        p += 8;
202
    }
203
}
204

    
205
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
206
{
207
    const DCTELEM *p;
208
    UINT8 *pix;
209
    int i;
210
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
211
    
212
    /* read the pixels */
213
    p = block;
214
    pix = pixels;
215
    for(i=0;i<8;i++) {
216
        pix[0] = cm[pix[0] + p[0]];
217
        pix[1] = cm[pix[1] + p[1]];
218
        pix[2] = cm[pix[2] + p[2]];
219
        pix[3] = cm[pix[3] + p[3]];
220
        pix[4] = cm[pix[4] + p[4]];
221
        pix[5] = cm[pix[5] + p[5]];
222
        pix[6] = cm[pix[6] + p[6]];
223
        pix[7] = cm[pix[7] + p[7]];
224
        pix += line_size;
225
        p += 8;
226
    }
227
}
228

    
229
#define PIXOP(BTYPE, OPNAME, OP, INCR)                                                   \
230
                                                                                         \
231
static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h)    \
232
{                                                                                        \
233
    BTYPE *p;                                                                            \
234
    const UINT8 *pix;                                                                    \
235
                                                                                         \
236
    p = block;                                                                           \
237
    pix = pixels;                                                                        \
238
    do {                                                                                 \
239
        OP(p[0], pix[0]);                                                                  \
240
        OP(p[1], pix[1]);                                                                  \
241
        OP(p[2], pix[2]);                                                                  \
242
        OP(p[3], pix[3]);                                                                  \
243
        OP(p[4], pix[4]);                                                                  \
244
        OP(p[5], pix[5]);                                                                  \
245
        OP(p[6], pix[6]);                                                                  \
246
        OP(p[7], pix[7]);                                                                  \
247
        pix += line_size;                                                                \
248
        p += INCR;                                                                       \
249
    } while (--h);;                                                                       \
250
}                                                                                        \
251
                                                                                         \
252
static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h)     \
253
{                                                                                        \
254
    BTYPE *p;                                                                          \
255
    const UINT8 *pix;                                                                    \
256
                                                                                         \
257
    p = block;                                                                           \
258
    pix = pixels;                                                                        \
259
    do {                                                                   \
260
        OP(p[0], avg2(pix[0], pix[1]));                                                    \
261
        OP(p[1], avg2(pix[1], pix[2]));                                                    \
262
        OP(p[2], avg2(pix[2], pix[3]));                                                    \
263
        OP(p[3], avg2(pix[3], pix[4]));                                                    \
264
        OP(p[4], avg2(pix[4], pix[5]));                                                    \
265
        OP(p[5], avg2(pix[5], pix[6]));                                                    \
266
        OP(p[6], avg2(pix[6], pix[7]));                                                    \
267
        OP(p[7], avg2(pix[7], pix[8]));                                                    \
268
        pix += line_size;                                                                \
269
        p += INCR;                                                                       \
270
    } while (--h);                                                                        \
271
}                                                                                        \
272
                                                                                         \
273
static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h)     \
274
{                                                                                        \
275
    BTYPE *p;                                                                          \
276
    const UINT8 *pix;                                                                    \
277
    const UINT8 *pix1;                                                                   \
278
                                                                                         \
279
    p = block;                                                                           \
280
    pix = pixels;                                                                        \
281
    pix1 = pixels + line_size;                                                           \
282
    do {                                                                                 \
283
        OP(p[0], avg2(pix[0], pix1[0]));                                                   \
284
        OP(p[1], avg2(pix[1], pix1[1]));                                                   \
285
        OP(p[2], avg2(pix[2], pix1[2]));                                                   \
286
        OP(p[3], avg2(pix[3], pix1[3]));                                                   \
287
        OP(p[4], avg2(pix[4], pix1[4]));                                                   \
288
        OP(p[5], avg2(pix[5], pix1[5]));                                                   \
289
        OP(p[6], avg2(pix[6], pix1[6]));                                                   \
290
        OP(p[7], avg2(pix[7], pix1[7]));                                                   \
291
        pix += line_size;                                                                \
292
        pix1 += line_size;                                                               \
293
        p += INCR;                                                                       \
294
    } while(--h);                                                                         \
295
}                                                                                        \
296
                                                                                         \
297
static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h)    \
298
{                                                                                        \
299
    BTYPE *p;                                                                          \
300
    const UINT8 *pix;                                                                    \
301
    const UINT8 *pix1;                                                                   \
302
                                                                                         \
303
    p = block;                                                                           \
304
    pix = pixels;                                                                        \
305
    pix1 = pixels + line_size;                                                           \
306
    do {                                                                   \
307
        OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1]));                                  \
308
        OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2]));                                  \
309
        OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3]));                                  \
310
        OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4]));                                  \
311
        OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5]));                                  \
312
        OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6]));                                  \
313
        OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7]));                                  \
314
        OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8]));                                  \
315
        pix += line_size;                                                                \
316
        pix1 += line_size;                                                               \
317
        p += INCR;                                                                       \
318
    } while(--h);                                                                         \
319
}                                                                                        \
320
                                                                                         \
321
void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \
322
    OPNAME ## _pixels,                                                                   \
323
    OPNAME ## _pixels_x2,                                                                \
324
    OPNAME ## _pixels_y2,                                                                \
325
    OPNAME ## _pixels_xy2,                                                               \
326
};
327

    
328

    
329
/* rounding primitives */
330
#define avg2(a,b) ((a+b+1)>>1)
331
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
332

    
333
#define op_put(a, b) a = b
334
#define op_avg(a, b) a = avg2(a, b)
335
#define op_sub(a, b) a -= b
336

    
337
PIXOP(UINT8, put, op_put, line_size)
338
PIXOP(UINT8, avg, op_avg, line_size)
339

    
340
PIXOP(DCTELEM, sub, op_sub, 8)
341

    
342
/* not rounding primitives */
343
#undef avg2
344
#undef avg4
345
#define avg2(a,b) ((a+b)>>1)
346
#define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
347

    
348
PIXOP(UINT8, put_no_rnd, op_put, line_size)
349
PIXOP(UINT8, avg_no_rnd, op_avg, line_size)
350

    
351
/* motion estimation */
352

    
353
#undef avg2
354
#undef avg4
355
#define avg2(a,b) ((a+b+1)>>1)
356
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
357

    
358
static void gmc1_c(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder)
359
{
360
    const int A=(16-x16)*(16-y16);
361
    const int B=(   x16)*(16-y16);
362
    const int C=(16-x16)*(   y16);
363
    const int D=(   x16)*(   y16);
364
    int i;
365
    rounder= 128 - rounder;
366

    
367
    for(i=0; i<h; i++)
368
    {
369
        dst[0]= (A*src[0] + B*src[1] + C*src[srcStride+0] + D*src[srcStride+1] + rounder)>>8;
370
        dst[1]= (A*src[1] + B*src[2] + C*src[srcStride+1] + D*src[srcStride+2] + rounder)>>8;
371
        dst[2]= (A*src[2] + B*src[3] + C*src[srcStride+2] + D*src[srcStride+3] + rounder)>>8;
372
        dst[3]= (A*src[3] + B*src[4] + C*src[srcStride+3] + D*src[srcStride+4] + rounder)>>8;
373
        dst[4]= (A*src[4] + B*src[5] + C*src[srcStride+4] + D*src[srcStride+5] + rounder)>>8;
374
        dst[5]= (A*src[5] + B*src[6] + C*src[srcStride+5] + D*src[srcStride+6] + rounder)>>8;
375
        dst[6]= (A*src[6] + B*src[7] + C*src[srcStride+6] + D*src[srcStride+7] + rounder)>>8;
376
        dst[7]= (A*src[7] + B*src[8] + C*src[srcStride+7] + D*src[srcStride+8] + rounder)>>8;
377
        dst+= srcStride;
378
        src+= srcStride;
379
    }
380
}
381

    
382
static void qpel_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h, int r)
383
{
384
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
385
    int i;
386
    for(i=0; i<h; i++)
387
    {
388
        dst[0]= cm[(((src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]) + r)>>5)];
389
        dst[1]= cm[(((src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]) + r)>>5)];
390
        dst[2]= cm[(((src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]) + r)>>5)];
391
        dst[3]= cm[(((src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]) + r)>>5)];
392
        dst[4]= cm[(((src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]) + r)>>5)];
393
        dst[5]= cm[(((src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]) + r)>>5)];
394
        dst[6]= cm[(((src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]) + r)>>5)];
395
        dst[7]= cm[(((src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]) + r)>>5)];
396
        dst+=dstStride;
397
        src+=srcStride;
398
    }
399
}
400

    
401
static void qpel_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w, int r)
402
{
403
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
404
    int i;
405
    for(i=0; i<w; i++)
406
    {
407
        const int src0= src[0*srcStride];
408
        const int src1= src[1*srcStride];
409
        const int src2= src[2*srcStride];
410
        const int src3= src[3*srcStride];
411
        const int src4= src[4*srcStride];
412
        const int src5= src[5*srcStride];
413
        const int src6= src[6*srcStride];
414
        const int src7= src[7*srcStride];
415
        const int src8= src[8*srcStride];
416
        dst[0*dstStride]= cm[(((src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4) + r)>>5)];
417
        dst[1*dstStride]= cm[(((src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5) + r)>>5)];
418
        dst[2*dstStride]= cm[(((src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6) + r)>>5)];
419
        dst[3*dstStride]= cm[(((src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7) + r)>>5)];
420
        dst[4*dstStride]= cm[(((src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8) + r)>>5)];
421
        dst[5*dstStride]= cm[(((src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8) + r)>>5)];
422
        dst[6*dstStride]= cm[(((src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7) + r)>>5)];
423
        dst[7*dstStride]= cm[(((src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6) + r)>>5)];
424
        dst++;
425
        src++;
426
    }
427
}
428

    
429
static inline void put_block(UINT8 *dst, UINT8 *src, int dstStride, int srcStride)
430
{
431
    int i;
432
    for(i=0; i<8; i++)
433
    {
434
        dst[0]= src[0];
435
        dst[1]= src[1];
436
        dst[2]= src[2];
437
        dst[3]= src[3];
438
        dst[4]= src[4];
439
        dst[5]= src[5];
440
        dst[6]= src[6];
441
        dst[7]= src[7];
442
        dst+=dstStride;
443
        src+=srcStride;
444
    }
445
}
446

    
447
static inline void avg2_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, int dstStride, int srcStride, int r)
448
{
449
    int i;
450
    for(i=0; i<8; i++)
451
    {
452
        dst[0]= (src1[0] + src2[0] + r)>>1;
453
        dst[1]= (src1[1] + src2[1] + r)>>1;
454
        dst[2]= (src1[2] + src2[2] + r)>>1;
455
        dst[3]= (src1[3] + src2[3] + r)>>1;
456
        dst[4]= (src1[4] + src2[4] + r)>>1;
457
        dst[5]= (src1[5] + src2[5] + r)>>1;
458
        dst[6]= (src1[6] + src2[6] + r)>>1;
459
        dst[7]= (src1[7] + src2[7] + r)>>1;
460
        dst+=dstStride;
461
        src1+=srcStride;
462
        src2+=8;
463
    }
464
}
465

    
466
static inline void avg4_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, UINT8 *src3, UINT8 *src4, int dstStride, int srcStride, int r)
467
{
468
    int i;
469
    for(i=0; i<8; i++)
470
    {
471
        dst[0]= (src1[0] + src2[0] + src3[0] + src4[0] + r)>>2;
472
        dst[1]= (src1[1] + src2[1] + src3[1] + src4[1] + r)>>2;
473
        dst[2]= (src1[2] + src2[2] + src3[2] + src4[2] + r)>>2;
474
        dst[3]= (src1[3] + src2[3] + src3[3] + src4[3] + r)>>2;
475
        dst[4]= (src1[4] + src2[4] + src3[4] + src4[4] + r)>>2;
476
        dst[5]= (src1[5] + src2[5] + src3[5] + src4[5] + r)>>2;
477
        dst[6]= (src1[6] + src2[6] + src3[6] + src4[6] + r)>>2;
478
        dst[7]= (src1[7] + src2[7] + src3[7] + src4[7] + r)>>2;
479
        dst+=dstStride;
480
        src1+=srcStride;
481
        src2+=8;
482
        src3+=8;
483
        src4+=8;
484
    }
485
}
486

    
487
#define QPEL_MC(r, name) \
488
static void qpel_mc00_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
489
{\
490
    put_block(dst, src, dstStride, srcStride);\
491
}\
492
\
493
static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
494
{\
495
    UINT8 half[64];\
496
    qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\
497
    avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
498
}\
499
\
500
static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
501
{\
502
    qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\
503
}\
504
\
505
static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
506
{\
507
    UINT8 half[64];\
508
    qpel_h_lowpass(half, src, 8, srcStride, 8, 16-r);\
509
    avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\
510
}\
511
\
512
static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
513
{\
514
    UINT8 half[64];\
515
    qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\
516
    avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
517
}\
518
\
519
static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
520
{\
521
    qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 16-r);\
522
}\
523
\
524
static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
525
{\
526
    UINT8 half[64];\
527
    qpel_v_lowpass(half, src, 8, srcStride, 8, 16-r);\
528
    avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\
529
}\
530
static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
531
{\
532
    UINT8 halfH[72];\
533
    UINT8 halfV[64];\
534
    UINT8 halfHV[64];\
535
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
536
    qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
537
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
538
    avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
539
}\
540
static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
541
{\
542
    UINT8 halfH[72];\
543
    UINT8 halfV[64];\
544
    UINT8 halfHV[64];\
545
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
546
    qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
547
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
548
    avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
549
}\
550
static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
551
{\
552
    UINT8 halfH[72];\
553
    UINT8 halfV[64];\
554
    UINT8 halfHV[64];\
555
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
556
    qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
557
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
558
    avg4_block(dst, src+srcStride, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
559
}\
560
static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
561
{\
562
    UINT8 halfH[72];\
563
    UINT8 halfV[64];\
564
    UINT8 halfHV[64];\
565
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
566
    qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
567
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
568
    avg4_block(dst, src+srcStride+1, halfH+8, halfV, halfHV, dstStride, srcStride, 2-r);\
569
}\
570
static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
571
{\
572
    UINT8 halfH[72];\
573
    UINT8 halfHV[64];\
574
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
575
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
576
    avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\
577
}\
578
static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
579
{\
580
    UINT8 halfH[72];\
581
    UINT8 halfHV[64];\
582
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
583
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
584
    avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\
585
}\
586
static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
587
{\
588
    UINT8 halfH[72];\
589
    UINT8 halfV[64];\
590
    UINT8 halfHV[64];\
591
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
592
    qpel_v_lowpass(halfV, src, 8, srcStride, 8, 16-r);\
593
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
594
    avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
595
}\
596
static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
597
{\
598
    UINT8 halfH[72];\
599
    UINT8 halfV[64];\
600
    UINT8 halfHV[64];\
601
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
602
    qpel_v_lowpass(halfV, src+1, 8, srcStride, 8, 16-r);\
603
    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 16-r);\
604
    avg2_block(dst, halfV, halfHV, dstStride, 8, 1-r);\
605
}\
606
static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
607
{\
608
    UINT8 halfH[72];\
609
    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 16-r);\
610
    qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 16-r);\
611
}\
612
qpel_mc_func qpel_mc ## name ## _tab[16]={ \
613
    qpel_mc00_c ## name,                                                                   \
614
    qpel_mc10_c ## name,                                                                   \
615
    qpel_mc20_c ## name,                                                                   \
616
    qpel_mc30_c ## name,                                                                   \
617
    qpel_mc01_c ## name,                                                                   \
618
    qpel_mc11_c ## name,                                                                   \
619
    qpel_mc21_c ## name,                                                                   \
620
    qpel_mc31_c ## name,                                                                   \
621
    qpel_mc02_c ## name,                                                                   \
622
    qpel_mc12_c ## name,                                                                   \
623
    qpel_mc22_c ## name,                                                                   \
624
    qpel_mc32_c ## name,                                                                   \
625
    qpel_mc03_c ## name,                                                                   \
626
    qpel_mc13_c ## name,                                                                   \
627
    qpel_mc23_c ## name,                                                                   \
628
    qpel_mc33_c ## name,                                                                   \
629
};
630

    
631
QPEL_MC(0, _rnd)
632
QPEL_MC(1, _no_rnd)
633

    
634
int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
635
{
636
    int s, i;
637

    
638
    s = 0;
639
    for(i=0;i<16;i++) {
640
        s += abs(pix1[0] - pix2[0]);
641
        s += abs(pix1[1] - pix2[1]);
642
        s += abs(pix1[2] - pix2[2]);
643
        s += abs(pix1[3] - pix2[3]);
644
        s += abs(pix1[4] - pix2[4]);
645
        s += abs(pix1[5] - pix2[5]);
646
        s += abs(pix1[6] - pix2[6]);
647
        s += abs(pix1[7] - pix2[7]);
648
        s += abs(pix1[8] - pix2[8]);
649
        s += abs(pix1[9] - pix2[9]);
650
        s += abs(pix1[10] - pix2[10]);
651
        s += abs(pix1[11] - pix2[11]);
652
        s += abs(pix1[12] - pix2[12]);
653
        s += abs(pix1[13] - pix2[13]);
654
        s += abs(pix1[14] - pix2[14]);
655
        s += abs(pix1[15] - pix2[15]);
656
        pix1 += line_size;
657
        pix2 += line_size;
658
    }
659
    return s;
660
}
661

    
662
int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
663
{
664
    int s, i;
665

    
666
    s = 0;
667
    for(i=0;i<16;i++) {
668
        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
669
        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
670
        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
671
        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
672
        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
673
        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
674
        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
675
        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
676
        s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
677
        s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
678
        s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
679
        s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
680
        s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
681
        s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
682
        s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
683
        s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
684
        pix1 += line_size;
685
        pix2 += line_size;
686
    }
687
    return s;
688
}
689

    
690
int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
691
{
692
    int s, i;
693
    UINT8 *pix3 = pix2 + line_size;
694

    
695
    s = 0;
696
    for(i=0;i<16;i++) {
697
        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
698
        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
699
        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
700
        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
701
        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
702
        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
703
        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
704
        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
705
        s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
706
        s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
707
        s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
708
        s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
709
        s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
710
        s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
711
        s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
712
        s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
713
        pix1 += line_size;
714
        pix2 += line_size;
715
        pix3 += line_size;
716
    }
717
    return s;
718
}
719

    
720
int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
721
{
722
    int s, i;
723
    UINT8 *pix3 = pix2 + line_size;
724

    
725
    s = 0;
726
    for(i=0;i<16;i++) {
727
        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
728
        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
729
        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
730
        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
731
        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
732
        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
733
        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
734
        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
735
        s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
736
        s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
737
        s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
738
        s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
739
        s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
740
        s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
741
        s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
742
        s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
743
        pix1 += line_size;
744
        pix2 += line_size;
745
        pix3 += line_size;
746
    }
747
    return s;
748
}
749

    
750
int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
751
{
752
    int s, i;
753

    
754
    s = 0;
755
    for(i=0;i<8;i++) {
756
        s += abs(pix1[0] - pix2[0]);
757
        s += abs(pix1[1] - pix2[1]);
758
        s += abs(pix1[2] - pix2[2]);
759
        s += abs(pix1[3] - pix2[3]);
760
        s += abs(pix1[4] - pix2[4]);
761
        s += abs(pix1[5] - pix2[5]);
762
        s += abs(pix1[6] - pix2[6]);
763
        s += abs(pix1[7] - pix2[7]);
764
        pix1 += line_size;
765
        pix2 += line_size;
766
    }
767
    return s;
768
}
769

    
770
int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
771
{
772
    int s, i;
773

    
774
    s = 0;
775
    for(i=0;i<8;i++) {
776
        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
777
        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
778
        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
779
        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
780
        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
781
        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
782
        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
783
        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
784
        pix1 += line_size;
785
        pix2 += line_size;
786
    }
787
    return s;
788
}
789

    
790
int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
791
{
792
    int s, i;
793
    UINT8 *pix3 = pix2 + line_size;
794

    
795
    s = 0;
796
    for(i=0;i<8;i++) {
797
        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
798
        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
799
        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
800
        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
801
        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
802
        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
803
        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
804
        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
805
        pix1 += line_size;
806
        pix2 += line_size;
807
        pix3 += line_size;
808
    }
809
    return s;
810
}
811

    
812
int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
813
{
814
    int s, i;
815
    UINT8 *pix3 = pix2 + line_size;
816

    
817
    s = 0;
818
    for(i=0;i<8;i++) {
819
        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
820
        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
821
        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
822
        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
823
        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
824
        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
825
        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
826
        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
827
        pix1 += line_size;
828
        pix2 += line_size;
829
        pix3 += line_size;
830
    }
831
    return s;
832
}
833

    
834
/* permute block according so that it corresponds to the MMX idct
835
   order */
836
#ifdef SIMPLE_IDCT
837
 /* general permutation, but perhaps slightly slower */
838
void block_permute(INT16 *block)
839
{
840
        int i;
841
        INT16 temp[64];
842

    
843
        for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i];
844

    
845
        for(i=0; i<64; i++) block[i] = temp[i];
846
}
847
#else
848

    
849
void block_permute(INT16 *block)
850
{
851
    int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
852
    int i;
853

    
854
    for(i=0;i<8;i++) {
855
        tmp1 = block[1];
856
        tmp2 = block[2];
857
        tmp3 = block[3];
858
        tmp4 = block[4];
859
        tmp5 = block[5];
860
        tmp6 = block[6];
861
        block[1] = tmp2;
862
        block[2] = tmp4;
863
        block[3] = tmp6;
864
        block[4] = tmp1;
865
        block[5] = tmp3;
866
        block[6] = tmp5;
867
        block += 8;
868
    }
869
}
870
#endif
871

    
872
void clear_blocks_c(DCTELEM *blocks)
873
{
874
    memset(blocks, 0, sizeof(DCTELEM)*6*64);
875
}
876

    
877
void dsputil_init(void)
878
{
879
    int i, j;
880
    int use_permuted_idct;
881

    
882
    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
883
    for(i=0;i<MAX_NEG_CROP;i++) {
884
        cropTbl[i] = 0;
885
        cropTbl[i + MAX_NEG_CROP + 256] = 255;
886
    }
887

    
888
    for(i=0;i<512;i++) {
889
        squareTbl[i] = (i - 256) * (i - 256);
890
    }
891

    
892
#ifdef SIMPLE_IDCT
893
    ff_idct = simple_idct;
894
#else
895
    ff_idct = j_rev_dct;
896
#endif
897
    get_pixels = get_pixels_c;
898
    put_pixels_clamped = put_pixels_clamped_c;
899
    add_pixels_clamped = add_pixels_clamped_c;
900
    gmc1= gmc1_c;
901
    clear_blocks= clear_blocks_c;
902

    
903
    pix_abs16x16     = pix_abs16x16_c;
904
    pix_abs16x16_x2  = pix_abs16x16_x2_c;
905
    pix_abs16x16_y2  = pix_abs16x16_y2_c;
906
    pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
907
    pix_abs8x8     = pix_abs8x8_c;
908
    pix_abs8x8_x2  = pix_abs8x8_x2_c;
909
    pix_abs8x8_y2  = pix_abs8x8_y2_c;
910
    pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
911
    av_fdct = jpeg_fdct_ifast;
912

    
913
    use_permuted_idct = 1;
914

    
915
#ifdef HAVE_MMX
916
    dsputil_init_mmx();
917
#endif
918
#ifdef ARCH_ARMV4L
919
    dsputil_init_armv4l();
920
#endif
921
#ifdef HAVE_MLIB
922
    dsputil_init_mlib();
923
    use_permuted_idct = 0;
924
#endif
925
#ifdef ARCH_ALPHA
926
    dsputil_init_alpha();
927
    use_permuted_idct = 0;
928
#endif
929

    
930
#ifdef SIMPLE_IDCT
931
    if(ff_idct == simple_idct) use_permuted_idct=0;
932
#endif
933

    
934
    if(use_permuted_idct)
935
#ifdef SIMPLE_IDCT
936
        for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i];
937
#else
938
        for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
939
#endif
940
    else
941
        for(i=0; i<64; i++) permutation[i]=i;
942

    
943
    for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
944
    for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
945
    
946
    if (use_permuted_idct) {
947
        /* permute for IDCT */
948
        for(i=0;i<64;i++) {
949
            j = zigzag_direct[i];
950
            zigzag_direct[i] = block_permute_op(j);
951
            j = ff_alternate_horizontal_scan[i];
952
            ff_alternate_horizontal_scan[i] = block_permute_op(j);
953
            j = ff_alternate_vertical_scan[i];
954
            ff_alternate_vertical_scan[i] = block_permute_op(j);
955
        }
956
        block_permute(default_intra_matrix);
957
        block_permute(default_non_intra_matrix);
958
        block_permute(ff_mpeg4_default_intra_matrix);
959
        block_permute(ff_mpeg4_default_non_intra_matrix);
960
    }
961
    
962
    build_zigzag_end();
963
}
964

    
965
void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3],
966
              int orig_linesize[3], int coded_linesize,
967
              AVCodecContext *avctx)
968
{
969
    int quad, diff, x, y;
970
    UINT8 *orig, *coded;
971
    UINT32 *sq = squareTbl + 256;
972
    
973
    quad = 0;
974
    diff = 0;
975
    
976
    /* Luminance */
977
    orig = orig_image[0];
978
    coded = coded_image[0];
979
    
980
    for (y=0;y<avctx->height;y++) {
981
        for (x=0;x<avctx->width;x++) {
982
            diff = *(orig + x) - *(coded + x);
983
            quad += sq[diff];
984
        }
985
        orig += orig_linesize[0];
986
        coded += coded_linesize;
987
    }
988
   
989
    avctx->psnr_y = (float) quad / (float) (avctx->width * avctx->height);
990
    
991
    if (avctx->psnr_y) {
992
        avctx->psnr_y = (float) (255 * 255) / avctx->psnr_y;
993
        avctx->psnr_y = 10 * (float) log10 (avctx->psnr_y); 
994
    } else
995
        avctx->psnr_y = 99.99;
996
}
997