ffmpeg / libavcodec / dsputil.c @ 2f349de2
History  View  Annotate  Download (24.4 KB)
1 
/*


2 
* DSP utils

3 
* Copyright (c) 2000, 2001 Gerard Lantau.

4 
*

5 
* This program is free software; you can redistribute it and/or modify

6 
* it under the terms of the GNU General Public License as published by

7 
* the Free Software Foundation; either version 2 of the License, or

8 
* (at your option) any later version.

9 
*

10 
* This program is distributed in the hope that it will be useful,

11 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

12 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

13 
* GNU General Public License for more details.

14 
*

15 
* You should have received a copy of the GNU General Public License

16 
* along with this program; if not, write to the Free Software

17 
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

18 
*/

19 
#include <stdlib.h> 
20 
#include <stdio.h> 
21 
#include "avcodec.h" 
22 
#include "dsputil.h" 
23 
#include "simple_idct.h" 
24  
25 
void (*ff_idct)(DCTELEM *block);

26 
void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); 
27 
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); 
28 
void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); 
29  
30 
op_pixels_abs_func pix_abs16x16; 
31 
op_pixels_abs_func pix_abs16x16_x2; 
32 
op_pixels_abs_func pix_abs16x16_y2; 
33 
op_pixels_abs_func pix_abs16x16_xy2; 
34  
35 
UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; 
36 
UINT32 squareTbl[512];

37  
38 
extern UINT16 default_intra_matrix[64]; 
39 
extern UINT16 default_non_intra_matrix[64]; 
40  
41 
UINT8 zigzag_direct[64] = {

42 
0, 1, 8, 16, 9, 2, 3, 10, 
43 
17, 24, 32, 25, 18, 11, 4, 5, 
44 
12, 19, 26, 33, 40, 48, 41, 34, 
45 
27, 20, 13, 6, 7, 14, 21, 28, 
46 
35, 42, 49, 56, 57, 50, 43, 36, 
47 
29, 22, 15, 23, 30, 37, 44, 51, 
48 
58, 59, 52, 45, 38, 31, 39, 46, 
49 
53, 60, 61, 54, 47, 55, 62, 63 
50 
}; 
51  
52 
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */

53 
UINT16 __align8 inv_zigzag_direct16[64];

54  
55 
/* not permutated zigzag_direct for MMX quantizer */

56 
UINT8 zigzag_direct_noperm[64];

57  
58 
UINT8 ff_alternate_horizontal_scan[64] = {

59 
0, 1, 2, 3, 8, 9, 16, 17, 
60 
10, 11, 4, 5, 6, 7, 15, 14, 
61 
13, 12, 19, 18, 24, 25, 32, 33, 
62 
26, 27, 20, 21, 22, 23, 28, 29, 
63 
30, 31, 34, 35, 40, 41, 48, 49, 
64 
42, 43, 36, 37, 38, 39, 44, 45, 
65 
46, 47, 50, 51, 56, 57, 58, 59, 
66 
52, 53, 54, 55, 60, 61, 62, 63, 
67 
}; 
68  
69 
UINT8 ff_alternate_vertical_scan[64] = {

70 
0, 8, 16, 24, 1, 9, 2, 10, 
71 
17, 25, 32, 40, 48, 56, 57, 49, 
72 
41, 33, 26, 18, 3, 11, 4, 12, 
73 
19, 27, 34, 42, 50, 58, 35, 43, 
74 
51, 59, 20, 28, 5, 13, 6, 14, 
75 
21, 29, 36, 44, 52, 60, 37, 45, 
76 
53, 61, 22, 30, 7, 15, 23, 31, 
77 
38, 46, 54, 62, 39, 47, 55, 63, 
78 
}; 
79  
80 
/* Input permutation for the simple_idct_mmx */

81 
static UINT8 simple_mmx_permutation[64]={ 
82 
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 
83 
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 
84 
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 
85 
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 
86 
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 
87 
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 
88 
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 
89 
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, 
90 
}; 
91  
92 
/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */

93 
UINT32 inverse[256]={

94 
0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, 
95 
536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154, 
96 
268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709, 
97 
178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333, 
98 
134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367, 
99 
107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283, 
100 
89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315, 
101 
76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085, 
102 
67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498, 
103 
59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675, 
104 
53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441, 
105 
48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183, 
106 
44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712, 
107 
41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400, 
108 
38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163, 
109 
35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641, 
110 
33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573, 
111 
31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737, 
112 
29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493, 
113 
28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373, 
114 
26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368, 
115 
25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671, 
116 
24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767, 
117 
23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740, 
118 
22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751, 
119 
21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635, 
120 
20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593, 
121 
19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944, 
122 
19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933, 
123 
18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575, 
124 
17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532, 
125 
17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, 
126 
}; 
127  
128 
/* used to skip zeros at the end */

129 
UINT8 zigzag_end[64];

130  
131 
UINT8 permutation[64];

132 
//UINT8 invPermutation[64];

133  
134 
static void build_zigzag_end() 
135 
{ 
136 
int lastIndex;

137 
int lastIndexAfterPerm=0; 
138 
for(lastIndex=0; lastIndex<64; lastIndex++) 
139 
{ 
140 
if(zigzag_direct[lastIndex] > lastIndexAfterPerm)

141 
lastIndexAfterPerm= zigzag_direct[lastIndex]; 
142 
zigzag_end[lastIndex]= lastIndexAfterPerm + 1;

143 
} 
144 
} 
145  
146 
void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size) 
147 
{ 
148 
DCTELEM *p; 
149 
const UINT8 *pix;

150 
int i;

151  
152 
/* read the pixels */

153 
p = block; 
154 
pix = pixels; 
155 
for(i=0;i<8;i++) { 
156 
p[0] = pix[0]; 
157 
p[1] = pix[1]; 
158 
p[2] = pix[2]; 
159 
p[3] = pix[3]; 
160 
p[4] = pix[4]; 
161 
p[5] = pix[5]; 
162 
p[6] = pix[6]; 
163 
p[7] = pix[7]; 
164 
pix += line_size; 
165 
p += 8;

166 
} 
167 
} 
168  
169 
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size) 
170 
{ 
171 
const DCTELEM *p;

172 
UINT8 *pix; 
173 
int i;

174 
UINT8 *cm = cropTbl + MAX_NEG_CROP; 
175 

176 
/* read the pixels */

177 
p = block; 
178 
pix = pixels; 
179 
for(i=0;i<8;i++) { 
180 
pix[0] = cm[p[0]]; 
181 
pix[1] = cm[p[1]]; 
182 
pix[2] = cm[p[2]]; 
183 
pix[3] = cm[p[3]]; 
184 
pix[4] = cm[p[4]]; 
185 
pix[5] = cm[p[5]]; 
186 
pix[6] = cm[p[6]]; 
187 
pix[7] = cm[p[7]]; 
188 
pix += line_size; 
189 
p += 8;

190 
} 
191 
} 
192  
193 
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size) 
194 
{ 
195 
const DCTELEM *p;

196 
UINT8 *pix; 
197 
int i;

198 
UINT8 *cm = cropTbl + MAX_NEG_CROP; 
199 

200 
/* read the pixels */

201 
p = block; 
202 
pix = pixels; 
203 
for(i=0;i<8;i++) { 
204 
pix[0] = cm[pix[0] + p[0]]; 
205 
pix[1] = cm[pix[1] + p[1]]; 
206 
pix[2] = cm[pix[2] + p[2]]; 
207 
pix[3] = cm[pix[3] + p[3]]; 
208 
pix[4] = cm[pix[4] + p[4]]; 
209 
pix[5] = cm[pix[5] + p[5]]; 
210 
pix[6] = cm[pix[6] + p[6]]; 
211 
pix[7] = cm[pix[7] + p[7]]; 
212 
pix += line_size; 
213 
p += 8;

214 
} 
215 
} 
216  
217 
#define PIXOP(BTYPE, OPNAME, OP, INCR) \

218 
\ 
219 
static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h) \ 
220 
{ \ 
221 
BTYPE *p; \ 
222 
const UINT8 *pix; \

223 
\ 
224 
p = block; \ 
225 
pix = pixels; \ 
226 
do { \

227 
OP(p[0], pix[0]); \ 
228 
OP(p[1], pix[1]); \ 
229 
OP(p[2], pix[2]); \ 
230 
OP(p[3], pix[3]); \ 
231 
OP(p[4], pix[4]); \ 
232 
OP(p[5], pix[5]); \ 
233 
OP(p[6], pix[6]); \ 
234 
OP(p[7], pix[7]); \ 
235 
pix += line_size; \ 
236 
p += INCR; \ 
237 
} while (h);; \

238 
} \ 
239 
\ 
240 
static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \ 
241 
{ \ 
242 
BTYPE *p; \ 
243 
const UINT8 *pix; \

244 
\ 
245 
p = block; \ 
246 
pix = pixels; \ 
247 
do { \

248 
OP(p[0], avg2(pix[0], pix[1])); \ 
249 
OP(p[1], avg2(pix[1], pix[2])); \ 
250 
OP(p[2], avg2(pix[2], pix[3])); \ 
251 
OP(p[3], avg2(pix[3], pix[4])); \ 
252 
OP(p[4], avg2(pix[4], pix[5])); \ 
253 
OP(p[5], avg2(pix[5], pix[6])); \ 
254 
OP(p[6], avg2(pix[6], pix[7])); \ 
255 
OP(p[7], avg2(pix[7], pix[8])); \ 
256 
pix += line_size; \ 
257 
p += INCR; \ 
258 
} while (h); \

259 
} \ 
260 
\ 
261 
static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \ 
262 
{ \ 
263 
BTYPE *p; \ 
264 
const UINT8 *pix; \

265 
const UINT8 *pix1; \

266 
\ 
267 
p = block; \ 
268 
pix = pixels; \ 
269 
pix1 = pixels + line_size; \ 
270 
do { \

271 
OP(p[0], avg2(pix[0], pix1[0])); \ 
272 
OP(p[1], avg2(pix[1], pix1[1])); \ 
273 
OP(p[2], avg2(pix[2], pix1[2])); \ 
274 
OP(p[3], avg2(pix[3], pix1[3])); \ 
275 
OP(p[4], avg2(pix[4], pix1[4])); \ 
276 
OP(p[5], avg2(pix[5], pix1[5])); \ 
277 
OP(p[6], avg2(pix[6], pix1[6])); \ 
278 
OP(p[7], avg2(pix[7], pix1[7])); \ 
279 
pix += line_size; \ 
280 
pix1 += line_size; \ 
281 
p += INCR; \ 
282 
} while(h); \

283 
} \ 
284 
\ 
285 
static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \ 
286 
{ \ 
287 
BTYPE *p; \ 
288 
const UINT8 *pix; \

289 
const UINT8 *pix1; \

290 
\ 
291 
p = block; \ 
292 
pix = pixels; \ 
293 
pix1 = pixels + line_size; \ 
294 
do { \

295 
OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1])); \ 
296 
OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2])); \ 
297 
OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3])); \ 
298 
OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4])); \ 
299 
OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5])); \ 
300 
OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6])); \ 
301 
OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7])); \ 
302 
OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8])); \ 
303 
pix += line_size; \ 
304 
pix1 += line_size; \ 
305 
p += INCR; \ 
306 
} while(h); \

307 
} \ 
308 
\ 
309 
void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \ 
310 
OPNAME ## _pixels, \ 
311 
OPNAME ## _pixels_x2, \ 
312 
OPNAME ## _pixels_y2, \ 
313 
OPNAME ## _pixels_xy2, \ 
314 
}; 
315  
316  
317 
/* rounding primitives */

318 
#define avg2(a,b) ((a+b+1)>>1) 
319 
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) 
320  
321 
#define op_put(a, b) a = b

322 
#define op_avg(a, b) a = avg2(a, b)

323 
#define op_sub(a, b) a = b

324  
325 
PIXOP(UINT8, put, op_put, line_size) 
326 
PIXOP(UINT8, avg, op_avg, line_size) 
327  
328 
PIXOP(DCTELEM, sub, op_sub, 8)

329  
330 
/* not rounding primitives */

331 
#undef avg2

332 
#undef avg4

333 
#define avg2(a,b) ((a+b)>>1) 
334 
#define avg4(a,b,c,d) ((a+b+c+d+1)>>2) 
335  
336 
PIXOP(UINT8, put_no_rnd, op_put, line_size) 
337 
PIXOP(UINT8, avg_no_rnd, op_avg, line_size) 
338  
339 
/* motion estimation */

340  
341 
#undef avg2

342 
#undef avg4

343 
#define avg2(a,b) ((a+b+1)>>1) 
344 
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2) 
345  
346 
int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) 
347 
{ 
348 
int s, i;

349  
350 
s = 0;

351 
for(i=0;i<h;i++) { 
352 
s += abs(pix1[0]  pix2[0]); 
353 
s += abs(pix1[1]  pix2[1]); 
354 
s += abs(pix1[2]  pix2[2]); 
355 
s += abs(pix1[3]  pix2[3]); 
356 
s += abs(pix1[4]  pix2[4]); 
357 
s += abs(pix1[5]  pix2[5]); 
358 
s += abs(pix1[6]  pix2[6]); 
359 
s += abs(pix1[7]  pix2[7]); 
360 
s += abs(pix1[8]  pix2[8]); 
361 
s += abs(pix1[9]  pix2[9]); 
362 
s += abs(pix1[10]  pix2[10]); 
363 
s += abs(pix1[11]  pix2[11]); 
364 
s += abs(pix1[12]  pix2[12]); 
365 
s += abs(pix1[13]  pix2[13]); 
366 
s += abs(pix1[14]  pix2[14]); 
367 
s += abs(pix1[15]  pix2[15]); 
368 
pix1 += line_size; 
369 
pix2 += line_size; 
370 
} 
371 
return s;

372 
} 
373  
374 
int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) 
375 
{ 
376 
int s, i;

377  
378 
s = 0;

379 
for(i=0;i<h;i++) { 
380 
s += abs(pix1[0]  avg2(pix2[0], pix2[1])); 
381 
s += abs(pix1[1]  avg2(pix2[1], pix2[2])); 
382 
s += abs(pix1[2]  avg2(pix2[2], pix2[3])); 
383 
s += abs(pix1[3]  avg2(pix2[3], pix2[4])); 
384 
s += abs(pix1[4]  avg2(pix2[4], pix2[5])); 
385 
s += abs(pix1[5]  avg2(pix2[5], pix2[6])); 
386 
s += abs(pix1[6]  avg2(pix2[6], pix2[7])); 
387 
s += abs(pix1[7]  avg2(pix2[7], pix2[8])); 
388 
s += abs(pix1[8]  avg2(pix2[8], pix2[9])); 
389 
s += abs(pix1[9]  avg2(pix2[9], pix2[10])); 
390 
s += abs(pix1[10]  avg2(pix2[10], pix2[11])); 
391 
s += abs(pix1[11]  avg2(pix2[11], pix2[12])); 
392 
s += abs(pix1[12]  avg2(pix2[12], pix2[13])); 
393 
s += abs(pix1[13]  avg2(pix2[13], pix2[14])); 
394 
s += abs(pix1[14]  avg2(pix2[14], pix2[15])); 
395 
s += abs(pix1[15]  avg2(pix2[15], pix2[16])); 
396 
pix1 += line_size; 
397 
pix2 += line_size; 
398 
} 
399 
return s;

400 
} 
401  
402 
int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) 
403 
{ 
404 
int s, i;

405 
UINT8 *pix3 = pix2 + line_size; 
406  
407 
s = 0;

408 
for(i=0;i<h;i++) { 
409 
s += abs(pix1[0]  avg2(pix2[0], pix3[0])); 
410 
s += abs(pix1[1]  avg2(pix2[1], pix3[1])); 
411 
s += abs(pix1[2]  avg2(pix2[2], pix3[2])); 
412 
s += abs(pix1[3]  avg2(pix2[3], pix3[3])); 
413 
s += abs(pix1[4]  avg2(pix2[4], pix3[4])); 
414 
s += abs(pix1[5]  avg2(pix2[5], pix3[5])); 
415 
s += abs(pix1[6]  avg2(pix2[6], pix3[6])); 
416 
s += abs(pix1[7]  avg2(pix2[7], pix3[7])); 
417 
s += abs(pix1[8]  avg2(pix2[8], pix3[8])); 
418 
s += abs(pix1[9]  avg2(pix2[9], pix3[9])); 
419 
s += abs(pix1[10]  avg2(pix2[10], pix3[10])); 
420 
s += abs(pix1[11]  avg2(pix2[11], pix3[11])); 
421 
s += abs(pix1[12]  avg2(pix2[12], pix3[12])); 
422 
s += abs(pix1[13]  avg2(pix2[13], pix3[13])); 
423 
s += abs(pix1[14]  avg2(pix2[14], pix3[14])); 
424 
s += abs(pix1[15]  avg2(pix2[15], pix3[15])); 
425 
pix1 += line_size; 
426 
pix2 += line_size; 
427 
pix3 += line_size; 
428 
} 
429 
return s;

430 
} 
431  
432 
int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h) 
433 
{ 
434 
int s, i;

435 
UINT8 *pix3 = pix2 + line_size; 
436  
437 
s = 0;

438 
for(i=0;i<h;i++) { 
439 
s += abs(pix1[0]  avg4(pix2[0], pix2[1], pix3[0], pix3[1])); 
440 
s += abs(pix1[1]  avg4(pix2[1], pix2[2], pix3[1], pix3[2])); 
441 
s += abs(pix1[2]  avg4(pix2[2], pix2[3], pix3[2], pix3[3])); 
442 
s += abs(pix1[3]  avg4(pix2[3], pix2[4], pix3[3], pix3[4])); 
443 
s += abs(pix1[4]  avg4(pix2[4], pix2[5], pix3[4], pix3[5])); 
444 
s += abs(pix1[5]  avg4(pix2[5], pix2[6], pix3[5], pix3[6])); 
445 
s += abs(pix1[6]  avg4(pix2[6], pix2[7], pix3[6], pix3[7])); 
446 
s += abs(pix1[7]  avg4(pix2[7], pix2[8], pix3[7], pix3[8])); 
447 
s += abs(pix1[8]  avg4(pix2[8], pix2[9], pix3[8], pix3[9])); 
448 
s += abs(pix1[9]  avg4(pix2[9], pix2[10], pix3[9], pix3[10])); 
449 
s += abs(pix1[10]  avg4(pix2[10], pix2[11], pix3[10], pix3[11])); 
450 
s += abs(pix1[11]  avg4(pix2[11], pix2[12], pix3[11], pix3[12])); 
451 
s += abs(pix1[12]  avg4(pix2[12], pix2[13], pix3[12], pix3[13])); 
452 
s += abs(pix1[13]  avg4(pix2[13], pix2[14], pix3[13], pix3[14])); 
453 
s += abs(pix1[14]  avg4(pix2[14], pix2[15], pix3[14], pix3[15])); 
454 
s += abs(pix1[15]  avg4(pix2[15], pix2[16], pix3[15], pix3[16])); 
455 
pix1 += line_size; 
456 
pix2 += line_size; 
457 
pix3 += line_size; 
458 
} 
459 
return s;

460 
} 
461  
462 
/* permute block according so that it corresponds to the MMX idct

463 
order */

464 
#ifdef SIMPLE_IDCT

465 
/* general permutation, but perhaps slightly slower */

466 
void block_permute(INT16 *block)

467 
{ 
468 
int i;

469 
INT16 temp[64];

470  
471 
for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i]; 
472  
473 
for(i=0; i<64; i++) block[i] = temp[i]; 
474 
} 
475 
#else

476  
477 
void block_permute(INT16 *block)

478 
{ 
479 
int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;

480 
int i;

481  
482 
for(i=0;i<8;i++) { 
483 
tmp1 = block[1];

484 
tmp2 = block[2];

485 
tmp3 = block[3];

486 
tmp4 = block[4];

487 
tmp5 = block[5];

488 
tmp6 = block[6];

489 
block[1] = tmp2;

490 
block[2] = tmp4;

491 
block[3] = tmp6;

492 
block[4] = tmp1;

493 
block[5] = tmp3;

494 
block[6] = tmp5;

495 
block += 8;

496 
} 
497 
} 
498 
#endif

499  
500 
void dsputil_init(void) 
501 
{ 
502 
int i, j;

503 
int use_permuted_idct;

504  
505 
for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; 
506 
for(i=0;i<MAX_NEG_CROP;i++) { 
507 
cropTbl[i] = 0;

508 
cropTbl[i + MAX_NEG_CROP + 256] = 255; 
509 
} 
510  
511 
for(i=0;i<512;i++) { 
512 
squareTbl[i] = (i  256) * (i  256); 
513 
} 
514  
515 
#ifdef SIMPLE_IDCT

516 
ff_idct = simple_idct; 
517 
#else

518 
ff_idct = j_rev_dct; 
519 
#endif

520 
get_pixels = get_pixels_c; 
521 
put_pixels_clamped = put_pixels_clamped_c; 
522 
add_pixels_clamped = add_pixels_clamped_c; 
523  
524 
pix_abs16x16 = pix_abs16x16_c; 
525 
pix_abs16x16_x2 = pix_abs16x16_x2_c; 
526 
pix_abs16x16_y2 = pix_abs16x16_y2_c; 
527 
pix_abs16x16_xy2 = pix_abs16x16_xy2_c; 
528 
av_fdct = jpeg_fdct_ifast; 
529  
530 
use_permuted_idct = 1;

531  
532 
#ifdef HAVE_MMX

533 
dsputil_init_mmx(); 
534 
#endif

535 
#ifdef ARCH_ARMV4L

536 
dsputil_init_armv4l(); 
537 
#endif

538 
#ifdef HAVE_MLIB

539 
dsputil_init_mlib(); 
540 
use_permuted_idct = 0;

541 
#endif

542 
#ifdef ARCH_ALPHA

543 
dsputil_init_alpha(); 
544 
use_permuted_idct = 0;

545 
#endif

546  
547 
#ifdef SIMPLE_IDCT

548 
if(ff_idct == simple_idct) use_permuted_idct=0; 
549 
#endif

550  
551 
if(use_permuted_idct)

552 
#ifdef SIMPLE_IDCT

553 
for(i=0; i<64; i++) permutation[i]= simple_mmx_permutation[i]; 
554 
#else

555 
for(i=0; i<64; i++) permutation[i]= (i & 0x38)  ((i & 6) >> 1)  ((i & 1) << 2); 
556 
#endif

557 
else

558 
for(i=0; i<64; i++) permutation[i]=i; 
559  
560 
for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1; 
561 
for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i]; 
562 

563 
if (use_permuted_idct) {

564 
/* permute for IDCT */

565 
for(i=0;i<64;i++) { 
566 
j = zigzag_direct[i]; 
567 
zigzag_direct[i] = block_permute_op(j); 
568 
j = ff_alternate_horizontal_scan[i]; 
569 
ff_alternate_horizontal_scan[i] = block_permute_op(j); 
570 
j = ff_alternate_vertical_scan[i]; 
571 
ff_alternate_vertical_scan[i] = block_permute_op(j); 
572 
} 
573 
block_permute(default_intra_matrix); 
574 
block_permute(default_non_intra_matrix); 
575 
} 
576 

577 
build_zigzag_end(); 
578 
} 