Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dsputil.c @ 2f349de2

History | View | Annotate | Download (24.4 KB)

1
/*
2
 * DSP utils
3
 * Copyright (c) 2000, 2001 Gerard Lantau.
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
 */
19
#include <stdlib.h>
20
#include <stdio.h>
21
#include "avcodec.h"
22
#include "dsputil.h"
23
#include "simple_idct.h"
24

    
25
void (*ff_idct)(DCTELEM *block);
26
void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
27
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
28
void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
29

    
30
op_pixels_abs_func pix_abs16x16;
31
op_pixels_abs_func pix_abs16x16_x2;
32
op_pixels_abs_func pix_abs16x16_y2;
33
op_pixels_abs_func pix_abs16x16_xy2;
34

    
35
UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
36
UINT32 squareTbl[512];
37

    
38
extern UINT16 default_intra_matrix[64];
39
extern UINT16 default_non_intra_matrix[64];
40

    
41
UINT8 zigzag_direct[64] = {
42
    0, 1, 8, 16, 9, 2, 3, 10,
43
    17, 24, 32, 25, 18, 11, 4, 5,
44
    12, 19, 26, 33, 40, 48, 41, 34,
45
    27, 20, 13, 6, 7, 14, 21, 28,
46
    35, 42, 49, 56, 57, 50, 43, 36,
47
    29, 22, 15, 23, 30, 37, 44, 51,
48
    58, 59, 52, 45, 38, 31, 39, 46,
49
    53, 60, 61, 54, 47, 55, 62, 63
50
};
51

    
52
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
53
UINT16 __align8 inv_zigzag_direct16[64];
54

    
55
/* not permutated zigzag_direct for MMX quantizer */
56
UINT8 zigzag_direct_noperm[64];
57

    
58
UINT8 ff_alternate_horizontal_scan[64] = {
59
    0,  1,  2,  3,  8,  9, 16, 17, 
60
    10, 11,  4,  5,  6,  7, 15, 14,
61
    13, 12, 19, 18, 24, 25, 32, 33, 
62
    26, 27, 20, 21, 22, 23, 28, 29,
63
    30, 31, 34, 35, 40, 41, 48, 49, 
64
    42, 43, 36, 37, 38, 39, 44, 45,
65
    46, 47, 50, 51, 56, 57, 58, 59, 
66
    52, 53, 54, 55, 60, 61, 62, 63,
67
};
68

    
69
UINT8 ff_alternate_vertical_scan[64] = {
70
    0,  8, 16, 24,  1,  9,  2, 10, 
71
    17, 25, 32, 40, 48, 56, 57, 49,
72
    41, 33, 26, 18,  3, 11,  4, 12, 
73
    19, 27, 34, 42, 50, 58, 35, 43,
74
    51, 59, 20, 28,  5, 13,  6, 14, 
75
    21, 29, 36, 44, 52, 60, 37, 45,
76
    53, 61, 22, 30,  7, 15, 23, 31, 
77
    38, 46, 54, 62, 39, 47, 55, 63,
78
};
79

    
80
/* Input permutation for the simple_idct_mmx */
81
static UINT8 simple_mmx_permutation[64]={
82
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 
83
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 
84
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 
85
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 
86
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 
87
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 
88
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 
89
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
90
};
91

    
92
/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
93
UINT32 inverse[256]={
94
         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757, 
95
 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154, 
96
 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709, 
97
 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333, 
98
 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367, 
99
 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283, 
100
  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315, 
101
  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085, 
102
  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498, 
103
  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675, 
104
  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441, 
105
  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183, 
106
  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712, 
107
  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400, 
108
  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163, 
109
  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641, 
110
  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573, 
111
  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737, 
112
  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493, 
113
  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373, 
114
  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368, 
115
  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671, 
116
  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767, 
117
  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740, 
118
  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751, 
119
  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635, 
120
  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593, 
121
  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944, 
122
  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933, 
123
  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575, 
124
  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532, 
125
  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
126
};
127

    
128
/* used to skip zeros at the end */
129
UINT8 zigzag_end[64];
130

    
131
UINT8 permutation[64];
132
//UINT8 invPermutation[64];
133

    
134
static void build_zigzag_end()
135
{
136
    int lastIndex;
137
    int lastIndexAfterPerm=0;
138
    for(lastIndex=0; lastIndex<64; lastIndex++)
139
    {
140
        if(zigzag_direct[lastIndex] > lastIndexAfterPerm) 
141
            lastIndexAfterPerm= zigzag_direct[lastIndex];
142
        zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
143
    }
144
}
145

    
146
void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size)
147
{
148
    DCTELEM *p;
149
    const UINT8 *pix;
150
    int i;
151

    
152
    /* read the pixels */
153
    p = block;
154
    pix = pixels;
155
    for(i=0;i<8;i++) {
156
        p[0] = pix[0];
157
        p[1] = pix[1];
158
        p[2] = pix[2];
159
        p[3] = pix[3];
160
        p[4] = pix[4];
161
        p[5] = pix[5];
162
        p[6] = pix[6];
163
        p[7] = pix[7];
164
        pix += line_size;
165
        p += 8;
166
    }
167
}
168

    
169
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
170
{
171
    const DCTELEM *p;
172
    UINT8 *pix;
173
    int i;
174
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
175
    
176
    /* read the pixels */
177
    p = block;
178
    pix = pixels;
179
    for(i=0;i<8;i++) {
180
        pix[0] = cm[p[0]];
181
        pix[1] = cm[p[1]];
182
        pix[2] = cm[p[2]];
183
        pix[3] = cm[p[3]];
184
        pix[4] = cm[p[4]];
185
        pix[5] = cm[p[5]];
186
        pix[6] = cm[p[6]];
187
        pix[7] = cm[p[7]];
188
        pix += line_size;
189
        p += 8;
190
    }
191
}
192

    
193
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size)
194
{
195
    const DCTELEM *p;
196
    UINT8 *pix;
197
    int i;
198
    UINT8 *cm = cropTbl + MAX_NEG_CROP;
199
    
200
    /* read the pixels */
201
    p = block;
202
    pix = pixels;
203
    for(i=0;i<8;i++) {
204
        pix[0] = cm[pix[0] + p[0]];
205
        pix[1] = cm[pix[1] + p[1]];
206
        pix[2] = cm[pix[2] + p[2]];
207
        pix[3] = cm[pix[3] + p[3]];
208
        pix[4] = cm[pix[4] + p[4]];
209
        pix[5] = cm[pix[5] + p[5]];
210
        pix[6] = cm[pix[6] + p[6]];
211
        pix[7] = cm[pix[7] + p[7]];
212
        pix += line_size;
213
        p += 8;
214
    }
215
}
216

    
217
#define PIXOP(BTYPE, OPNAME, OP, INCR)                                                   \
218
                                                                                         \
219
static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h)    \
220
{                                                                                        \
221
    BTYPE *p;                                                                            \
222
    const UINT8 *pix;                                                                    \
223
                                                                                         \
224
    p = block;                                                                           \
225
    pix = pixels;                                                                        \
226
    do {                                                                                 \
227
        OP(p[0], pix[0]);                                                                  \
228
        OP(p[1], pix[1]);                                                                  \
229
        OP(p[2], pix[2]);                                                                  \
230
        OP(p[3], pix[3]);                                                                  \
231
        OP(p[4], pix[4]);                                                                  \
232
        OP(p[5], pix[5]);                                                                  \
233
        OP(p[6], pix[6]);                                                                  \
234
        OP(p[7], pix[7]);                                                                  \
235
        pix += line_size;                                                                \
236
        p += INCR;                                                                       \
237
    } while (--h);;                                                                       \
238
}                                                                                        \
239
                                                                                         \
240
static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h)     \
241
{                                                                                        \
242
    BTYPE *p;                                                                          \
243
    const UINT8 *pix;                                                                    \
244
                                                                                         \
245
    p = block;                                                                           \
246
    pix = pixels;                                                                        \
247
    do {                                                                   \
248
        OP(p[0], avg2(pix[0], pix[1]));                                                    \
249
        OP(p[1], avg2(pix[1], pix[2]));                                                    \
250
        OP(p[2], avg2(pix[2], pix[3]));                                                    \
251
        OP(p[3], avg2(pix[3], pix[4]));                                                    \
252
        OP(p[4], avg2(pix[4], pix[5]));                                                    \
253
        OP(p[5], avg2(pix[5], pix[6]));                                                    \
254
        OP(p[6], avg2(pix[6], pix[7]));                                                    \
255
        OP(p[7], avg2(pix[7], pix[8]));                                                    \
256
        pix += line_size;                                                                \
257
        p += INCR;                                                                       \
258
    } while (--h);                                                                        \
259
}                                                                                        \
260
                                                                                         \
261
static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h)     \
262
{                                                                                        \
263
    BTYPE *p;                                                                          \
264
    const UINT8 *pix;                                                                    \
265
    const UINT8 *pix1;                                                                   \
266
                                                                                         \
267
    p = block;                                                                           \
268
    pix = pixels;                                                                        \
269
    pix1 = pixels + line_size;                                                           \
270
    do {                                                                                 \
271
        OP(p[0], avg2(pix[0], pix1[0]));                                                   \
272
        OP(p[1], avg2(pix[1], pix1[1]));                                                   \
273
        OP(p[2], avg2(pix[2], pix1[2]));                                                   \
274
        OP(p[3], avg2(pix[3], pix1[3]));                                                   \
275
        OP(p[4], avg2(pix[4], pix1[4]));                                                   \
276
        OP(p[5], avg2(pix[5], pix1[5]));                                                   \
277
        OP(p[6], avg2(pix[6], pix1[6]));                                                   \
278
        OP(p[7], avg2(pix[7], pix1[7]));                                                   \
279
        pix += line_size;                                                                \
280
        pix1 += line_size;                                                               \
281
        p += INCR;                                                                       \
282
    } while(--h);                                                                         \
283
}                                                                                        \
284
                                                                                         \
285
static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h)    \
286
{                                                                                        \
287
    BTYPE *p;                                                                          \
288
    const UINT8 *pix;                                                                    \
289
    const UINT8 *pix1;                                                                   \
290
                                                                                         \
291
    p = block;                                                                           \
292
    pix = pixels;                                                                        \
293
    pix1 = pixels + line_size;                                                           \
294
    do {                                                                   \
295
        OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1]));                                  \
296
        OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2]));                                  \
297
        OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3]));                                  \
298
        OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4]));                                  \
299
        OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5]));                                  \
300
        OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6]));                                  \
301
        OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7]));                                  \
302
        OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8]));                                  \
303
        pix += line_size;                                                                \
304
        pix1 += line_size;                                                               \
305
        p += INCR;                                                                       \
306
    } while(--h);                                                                         \
307
}                                                                                        \
308
                                                                                         \
309
void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \
310
    OPNAME ## _pixels,                                                                   \
311
    OPNAME ## _pixels_x2,                                                                \
312
    OPNAME ## _pixels_y2,                                                                \
313
    OPNAME ## _pixels_xy2,                                                               \
314
};
315

    
316

    
317
/* rounding primitives */
318
#define avg2(a,b) ((a+b+1)>>1)
319
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
320

    
321
#define op_put(a, b) a = b
322
#define op_avg(a, b) a = avg2(a, b)
323
#define op_sub(a, b) a -= b
324

    
325
PIXOP(UINT8, put, op_put, line_size)
326
PIXOP(UINT8, avg, op_avg, line_size)
327

    
328
PIXOP(DCTELEM, sub, op_sub, 8)
329

    
330
/* not rounding primitives */
331
#undef avg2
332
#undef avg4
333
#define avg2(a,b) ((a+b)>>1)
334
#define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
335

    
336
PIXOP(UINT8, put_no_rnd, op_put, line_size)
337
PIXOP(UINT8, avg_no_rnd, op_avg, line_size)
338

    
339
/* motion estimation */
340

    
341
#undef avg2
342
#undef avg4
343
#define avg2(a,b) ((a+b+1)>>1)
344
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
345

    
346
int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
347
{
348
    int s, i;
349

    
350
    s = 0;
351
    for(i=0;i<h;i++) {
352
        s += abs(pix1[0] - pix2[0]);
353
        s += abs(pix1[1] - pix2[1]);
354
        s += abs(pix1[2] - pix2[2]);
355
        s += abs(pix1[3] - pix2[3]);
356
        s += abs(pix1[4] - pix2[4]);
357
        s += abs(pix1[5] - pix2[5]);
358
        s += abs(pix1[6] - pix2[6]);
359
        s += abs(pix1[7] - pix2[7]);
360
        s += abs(pix1[8] - pix2[8]);
361
        s += abs(pix1[9] - pix2[9]);
362
        s += abs(pix1[10] - pix2[10]);
363
        s += abs(pix1[11] - pix2[11]);
364
        s += abs(pix1[12] - pix2[12]);
365
        s += abs(pix1[13] - pix2[13]);
366
        s += abs(pix1[14] - pix2[14]);
367
        s += abs(pix1[15] - pix2[15]);
368
        pix1 += line_size;
369
        pix2 += line_size;
370
    }
371
    return s;
372
}
373

    
374
int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
375
{
376
    int s, i;
377

    
378
    s = 0;
379
    for(i=0;i<h;i++) {
380
        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
381
        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
382
        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
383
        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
384
        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
385
        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
386
        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
387
        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
388
        s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
389
        s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
390
        s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
391
        s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
392
        s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
393
        s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
394
        s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
395
        s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
396
        pix1 += line_size;
397
        pix2 += line_size;
398
    }
399
    return s;
400
}
401

    
402
int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
403
{
404
    int s, i;
405
    UINT8 *pix3 = pix2 + line_size;
406

    
407
    s = 0;
408
    for(i=0;i<h;i++) {
409
        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
410
        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
411
        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
412
        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
413
        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
414
        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
415
        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
416
        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
417
        s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
418
        s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
419
        s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
420
        s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
421
        s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
422
        s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
423
        s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
424
        s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
425
        pix1 += line_size;
426
        pix2 += line_size;
427
        pix3 += line_size;
428
    }
429
    return s;
430
}
431

    
432
int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
433
{
434
    int s, i;
435
    UINT8 *pix3 = pix2 + line_size;
436

    
437
    s = 0;
438
    for(i=0;i<h;i++) {
439
        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
440
        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
441
        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
442
        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
443
        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
444
        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
445
        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
446
        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
447
        s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
448
        s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
449
        s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
450
        s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
451
        s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
452
        s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
453
        s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
454
        s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
455
        pix1 += line_size;
456
        pix2 += line_size;
457
        pix3 += line_size;
458
    }
459
    return s;
460
}
461

    
462
/* permute block according so that it corresponds to the MMX idct
463
   order */
464
#ifdef SIMPLE_IDCT
465
 /* general permutation, but perhaps slightly slower */
466
void block_permute(INT16 *block)
467
{
468
        int i;
469
        INT16 temp[64];
470

    
471
        for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i];
472

    
473
        for(i=0; i<64; i++) block[i] = temp[i];
474
}
475
#else
476

    
477
void block_permute(INT16 *block)
478
{
479
    int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
480
    int i;
481

    
482
    for(i=0;i<8;i++) {
483
        tmp1 = block[1];
484
        tmp2 = block[2];
485
        tmp3 = block[3];
486
        tmp4 = block[4];
487
        tmp5 = block[5];
488
        tmp6 = block[6];
489
        block[1] = tmp2;
490
        block[2] = tmp4;
491
        block[3] = tmp6;
492
        block[4] = tmp1;
493
        block[5] = tmp3;
494
        block[6] = tmp5;
495
        block += 8;
496
    }
497
}
498
#endif
499

    
500
void dsputil_init(void)
501
{
502
    int i, j;
503
    int use_permuted_idct;
504

    
505
    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
506
    for(i=0;i<MAX_NEG_CROP;i++) {
507
        cropTbl[i] = 0;
508
        cropTbl[i + MAX_NEG_CROP + 256] = 255;
509
    }
510

    
511
    for(i=0;i<512;i++) {
512
        squareTbl[i] = (i - 256) * (i - 256);
513
    }
514

    
515
#ifdef SIMPLE_IDCT
516
    ff_idct = simple_idct;
517
#else
518
    ff_idct = j_rev_dct;
519
#endif
520
    get_pixels = get_pixels_c;
521
    put_pixels_clamped = put_pixels_clamped_c;
522
    add_pixels_clamped = add_pixels_clamped_c;
523

    
524
    pix_abs16x16 = pix_abs16x16_c;
525
    pix_abs16x16_x2 = pix_abs16x16_x2_c;
526
    pix_abs16x16_y2 = pix_abs16x16_y2_c;
527
    pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
528
    av_fdct = jpeg_fdct_ifast;
529

    
530
    use_permuted_idct = 1;
531

    
532
#ifdef HAVE_MMX
533
    dsputil_init_mmx();
534
#endif
535
#ifdef ARCH_ARMV4L
536
    dsputil_init_armv4l();
537
#endif
538
#ifdef HAVE_MLIB
539
    dsputil_init_mlib();
540
    use_permuted_idct = 0;
541
#endif
542
#ifdef ARCH_ALPHA
543
    dsputil_init_alpha();
544
    use_permuted_idct = 0;
545
#endif
546

    
547
#ifdef SIMPLE_IDCT
548
    if(ff_idct == simple_idct) use_permuted_idct=0;
549
#endif
550

    
551
    if(use_permuted_idct)
552
#ifdef SIMPLE_IDCT
553
        for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i];
554
#else
555
        for(i=0; i<64; i++) permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
556
#endif
557
    else
558
        for(i=0; i<64; i++) permutation[i]=i;
559

    
560
    for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
561
    for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
562
    
563
    if (use_permuted_idct) {
564
        /* permute for IDCT */
565
        for(i=0;i<64;i++) {
566
            j = zigzag_direct[i];
567
            zigzag_direct[i] = block_permute_op(j);
568
            j = ff_alternate_horizontal_scan[i];
569
            ff_alternate_horizontal_scan[i] = block_permute_op(j);
570
            j = ff_alternate_vertical_scan[i];
571
            ff_alternate_vertical_scan[i] = block_permute_op(j);
572
        }
573
        block_permute(default_intra_matrix);
574
        block_permute(default_non_intra_matrix);
575
    }
576
    
577
    build_zigzag_end();
578
}