ffmpeg / libavcodec / simple_idct.c @ d36a2466
History  View  Annotate  Download (12.4 KB)
1 
/*


2 
* Simple IDCT

3 
*

4 
* Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>

5 
*

6 
* This library is free software; you can redistribute it and/or

7 
* modify it under the terms of the GNU Lesser General Public

8 
* License as published by the Free Software Foundation; either

9 
* version 2 of the License, or (at your option) any later version.

10 
*

11 
* This library is distributed in the hope that it will be useful,

12 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

13 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

14 
* Lesser General Public License for more details.

15 
*

16 
* You should have received a copy of the GNU Lesser General Public

17 
* License along with this library; if not, write to the Free Software

18 
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 021111307 USA

19 
*/

20 
/*

21 
based upon some outcommented c code from mpeg2dec (idct_mmx.c

22 
written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)

23 
*/

24 
#include "avcodec.h" 
25 
#include "dsputil.h" 
26 
#include "simple_idct.h" 
27  
28 
#if 0

29 
#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */

30 
#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */

31 
#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */

32 
#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */

33 
#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */

34 
#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */

35 
#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */

36 
#define ROW_SHIFT 8

37 
#define COL_SHIFT 17

38 
#else

39 
#define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 
40 
#define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 
41 
#define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 
42 
#define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 
43 
#define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 
44 
#define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 
45 
#define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 
46 
#define ROW_SHIFT 11 
47 
#define COL_SHIFT 20 // 6 
48 
#endif

49  
50 
#ifdef ARCH_ALPHA

51 
#define FAST_64BIT

52 
#endif

53  
54 
#if defined(ARCH_POWERPC_405)

55  
56 
/* signed 16x16 > 32 multiply add accumulate */

57 
#define MAC16(rt, ra, rb) \

58 
asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); 
59  
60 
/* signed 16x16 > 32 multiply */

61 
#define MUL16(rt, ra, rb) \

62 
asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb)); 
63  
64 
#else

65  
66 
/* signed 16x16 > 32 multiply add accumulate */

67 
#define MAC16(rt, ra, rb) rt += (ra) * (rb)

68  
69 
/* signed 16x16 > 32 multiply */

70 
#define MUL16(rt, ra, rb) rt = (ra) * (rb)

71  
72 
#endif

73  
74 
#ifdef ARCH_ALPHA

75 
/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */

76 
static inline int idctRowCondDC(int16_t *row) 
77 
{ 
78 
int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; 
79 
uint64_t *lrow = (uint64_t *) row; 
80  
81 
if (lrow[1] == 0) { 
82 
if (lrow[0] == 0) 
83 
return 0; 
84 
if ((lrow[0] & ~0xffffULL) == 0) { 
85 
uint64_t v; 
86  
87 
a0 = W4 * row[0];

88 
a0 += 1 << (ROW_SHIFT  1); 
89 
a0 >>= ROW_SHIFT; 
90 
v = (uint16_t) a0; 
91 
v += v << 16;

92 
v += v << 32;

93 
lrow[0] = v;

94 
lrow[1] = v;

95  
96 
return 1; 
97 
} 
98 
} 
99  
100 
a0 = (W4 * row[0]) + (1 << (ROW_SHIFT  1)); 
101 
a1 = a0; 
102 
a2 = a0; 
103 
a3 = a0; 
104  
105 
if (row[2]) { 
106 
a0 += W2 * row[2];

107 
a1 += W6 * row[2];

108 
a2 = W6 * row[2];

109 
a3 = W2 * row[2];

110 
} 
111  
112 
if (row[4]) { 
113 
a0 += W4 * row[4];

114 
a1 = W4 * row[4];

115 
a2 = W4 * row[4];

116 
a3 += W4 * row[4];

117 
} 
118  
119 
if (row[6]) { 
120 
a0 += W6 * row[6];

121 
a1 = W2 * row[6];

122 
a2 += W2 * row[6];

123 
a3 = W6 * row[6];

124 
} 
125  
126 
if (row[1]) { 
127 
b0 = W1 * row[1];

128 
b1 = W3 * row[1];

129 
b2 = W5 * row[1];

130 
b3 = W7 * row[1];

131 
} else {

132 
b0 = 0;

133 
b1 = 0;

134 
b2 = 0;

135 
b3 = 0;

136 
} 
137  
138 
if (row[3]) { 
139 
b0 += W3 * row[3];

140 
b1 = W7 * row[3];

141 
b2 = W1 * row[3];

142 
b3 = W5 * row[3];

143 
} 
144  
145 
if (row[5]) { 
146 
b0 += W5 * row[5];

147 
b1 = W1 * row[5];

148 
b2 += W7 * row[5];

149 
b3 += W3 * row[5];

150 
} 
151  
152 
if (row[7]) { 
153 
b0 += W7 * row[7];

154 
b1 = W5 * row[7];

155 
b2 += W3 * row[7];

156 
b3 = W1 * row[7];

157 
} 
158  
159 
row[0] = (a0 + b0) >> ROW_SHIFT;

160 
row[1] = (a1 + b1) >> ROW_SHIFT;

161 
row[2] = (a2 + b2) >> ROW_SHIFT;

162 
row[3] = (a3 + b3) >> ROW_SHIFT;

163 
row[4] = (a3  b3) >> ROW_SHIFT;

164 
row[5] = (a2  b2) >> ROW_SHIFT;

165 
row[6] = (a1  b1) >> ROW_SHIFT;

166 
row[7] = (a0  b0) >> ROW_SHIFT;

167  
168 
return 2; 
169 
} 
170 
#else /* not ARCH_ALPHA */ 
171  
172 
static inline void idctRowCondDC (int16_t * row) 
173 
{ 
174 
int a0, a1, a2, a3, b0, b1, b2, b3;

175 
#ifdef FAST_64BIT

176 
uint64_t temp; 
177 
#else

178 
uint32_t temp; 
179 
#endif

180  
181 
#ifdef FAST_64BIT

182 
#ifdef WORDS_BIGENDIAN

183 
#define ROW0_MASK 0xffff000000000000LL 
184 
#else

185 
#define ROW0_MASK 0xffffLL 
186 
#endif

187 
if ( ((((uint64_t *)row)[0] & ~ROW0_MASK)  
188 
((uint64_t *)row)[1]) == 0) { 
189 
temp = (row[0] << 3) & 0xffff; 
190 
temp += temp << 16;

191 
temp += temp << 32;

192 
((uint64_t *)row)[0] = temp;

193 
((uint64_t *)row)[1] = temp;

194 
return;

195 
} 
196 
#else

197 
if (!(((uint32_t*)row)[1]  
198 
((uint32_t*)row)[2] 

199 
((uint32_t*)row)[3] 

200 
row[1])) {

201 
temp = (row[0] << 3) & 0xffff; 
202 
temp += temp << 16;

203 
((uint32_t*)row)[0]=((uint32_t*)row)[1] = 
204 
((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; 
205 
return;

206 
} 
207 
#endif

208  
209 
a0 = (W4 * row[0]) + (1 << (ROW_SHIFT  1)); 
210 
a1 = a0; 
211 
a2 = a0; 
212 
a3 = a0; 
213  
214 
/* no need to optimize : gcc does it */

215 
a0 += W2 * row[2];

216 
a1 += W6 * row[2];

217 
a2 = W6 * row[2];

218 
a3 = W2 * row[2];

219  
220 
MUL16(b0, W1, row[1]);

221 
MAC16(b0, W3, row[3]);

222 
MUL16(b1, W3, row[1]);

223 
MAC16(b1, W7, row[3]);

224 
MUL16(b2, W5, row[1]);

225 
MAC16(b2, W1, row[3]);

226 
MUL16(b3, W7, row[1]);

227 
MAC16(b3, W5, row[3]);

228  
229 
#ifdef FAST_64BIT

230 
temp = ((uint64_t*)row)[1];

231 
#else

232 
temp = ((uint32_t*)row)[2]  ((uint32_t*)row)[3]; 
233 
#endif

234 
if (temp != 0) { 
235 
a0 += W4*row[4] + W6*row[6]; 
236 
a1 +=  W4*row[4]  W2*row[6]; 
237 
a2 +=  W4*row[4] + W2*row[6]; 
238 
a3 += W4*row[4]  W6*row[6]; 
239  
240 
MAC16(b0, W5, row[5]);

241 
MAC16(b0, W7, row[7]);

242 

243 
MAC16(b1, W1, row[5]);

244 
MAC16(b1, W5, row[7]);

245 

246 
MAC16(b2, W7, row[5]);

247 
MAC16(b2, W3, row[7]);

248 

249 
MAC16(b3, W3, row[5]);

250 
MAC16(b3, W1, row[7]);

251 
} 
252  
253 
row[0] = (a0 + b0) >> ROW_SHIFT;

254 
row[7] = (a0  b0) >> ROW_SHIFT;

255 
row[1] = (a1 + b1) >> ROW_SHIFT;

256 
row[6] = (a1  b1) >> ROW_SHIFT;

257 
row[2] = (a2 + b2) >> ROW_SHIFT;

258 
row[5] = (a2  b2) >> ROW_SHIFT;

259 
row[3] = (a3 + b3) >> ROW_SHIFT;

260 
row[4] = (a3  b3) >> ROW_SHIFT;

261 
} 
262 
#endif /* not ARCH_ALPHA */ 
263  
264 
static inline void idctSparseColPut (UINT8 *dest, int line_size, 
265 
int16_t * col) 
266 
{ 
267 
int a0, a1, a2, a3, b0, b1, b2, b3;

268 
UINT8 *cm = cropTbl + MAX_NEG_CROP; 
269  
270 
/* XXX: I did that only to give same values as previous code */

271 
a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT1))/W4)); 
272 
a1 = a0; 
273 
a2 = a0; 
274 
a3 = a0; 
275  
276 
a0 += + W2*col[8*2]; 
277 
a1 += + W6*col[8*2]; 
278 
a2 +=  W6*col[8*2]; 
279 
a3 +=  W2*col[8*2]; 
280  
281 
MUL16(b0, W1, col[8*1]); 
282 
MUL16(b1, W3, col[8*1]); 
283 
MUL16(b2, W5, col[8*1]); 
284 
MUL16(b3, W7, col[8*1]); 
285  
286 
MAC16(b0, + W3, col[8*3]); 
287 
MAC16(b1,  W7, col[8*3]); 
288 
MAC16(b2,  W1, col[8*3]); 
289 
MAC16(b3,  W5, col[8*3]); 
290  
291 
if(col[8*4]){ 
292 
a0 += + W4*col[8*4]; 
293 
a1 +=  W4*col[8*4]; 
294 
a2 +=  W4*col[8*4]; 
295 
a3 += + W4*col[8*4]; 
296 
} 
297  
298 
if (col[8*5]) { 
299 
MAC16(b0, + W5, col[8*5]); 
300 
MAC16(b1,  W1, col[8*5]); 
301 
MAC16(b2, + W7, col[8*5]); 
302 
MAC16(b3, + W3, col[8*5]); 
303 
} 
304  
305 
if(col[8*6]){ 
306 
a0 += + W6*col[8*6]; 
307 
a1 +=  W2*col[8*6]; 
308 
a2 += + W2*col[8*6]; 
309 
a3 +=  W6*col[8*6]; 
310 
} 
311  
312 
if (col[8*7]) { 
313 
MAC16(b0, + W7, col[8*7]); 
314 
MAC16(b1,  W5, col[8*7]); 
315 
MAC16(b2, + W3, col[8*7]); 
316 
MAC16(b3,  W1, col[8*7]); 
317 
} 
318  
319 
dest[0] = cm[(a0 + b0) >> COL_SHIFT];

320 
dest += line_size; 
321 
dest[0] = cm[(a1 + b1) >> COL_SHIFT];

322 
dest += line_size; 
323 
dest[0] = cm[(a2 + b2) >> COL_SHIFT];

324 
dest += line_size; 
325 
dest[0] = cm[(a3 + b3) >> COL_SHIFT];

326 
dest += line_size; 
327 
dest[0] = cm[(a3  b3) >> COL_SHIFT];

328 
dest += line_size; 
329 
dest[0] = cm[(a2  b2) >> COL_SHIFT];

330 
dest += line_size; 
331 
dest[0] = cm[(a1  b1) >> COL_SHIFT];

332 
dest += line_size; 
333 
dest[0] = cm[(a0  b0) >> COL_SHIFT];

334 
} 
335  
336 
static inline void idctSparseColAdd (UINT8 *dest, int line_size, 
337 
int16_t * col) 
338 
{ 
339 
int a0, a1, a2, a3, b0, b1, b2, b3;

340 
UINT8 *cm = cropTbl + MAX_NEG_CROP; 
341  
342 
/* XXX: I did that only to give same values as previous code */

343 
a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT1))/W4)); 
344 
a1 = a0; 
345 
a2 = a0; 
346 
a3 = a0; 
347  
348 
a0 += + W2*col[8*2]; 
349 
a1 += + W6*col[8*2]; 
350 
a2 +=  W6*col[8*2]; 
351 
a3 +=  W2*col[8*2]; 
352  
353 
MUL16(b0, W1, col[8*1]); 
354 
MUL16(b1, W3, col[8*1]); 
355 
MUL16(b2, W5, col[8*1]); 
356 
MUL16(b3, W7, col[8*1]); 
357  
358 
MAC16(b0, + W3, col[8*3]); 
359 
MAC16(b1,  W7, col[8*3]); 
360 
MAC16(b2,  W1, col[8*3]); 
361 
MAC16(b3,  W5, col[8*3]); 
362  
363 
if(col[8*4]){ 
364 
a0 += + W4*col[8*4]; 
365 
a1 +=  W4*col[8*4]; 
366 
a2 +=  W4*col[8*4]; 
367 
a3 += + W4*col[8*4]; 
368 
} 
369  
370 
if (col[8*5]) { 
371 
MAC16(b0, + W5, col[8*5]); 
372 
MAC16(b1,  W1, col[8*5]); 
373 
MAC16(b2, + W7, col[8*5]); 
374 
MAC16(b3, + W3, col[8*5]); 
375 
} 
376  
377 
if(col[8*6]){ 
378 
a0 += + W6*col[8*6]; 
379 
a1 +=  W2*col[8*6]; 
380 
a2 += + W2*col[8*6]; 
381 
a3 +=  W6*col[8*6]; 
382 
} 
383  
384 
if (col[8*7]) { 
385 
MAC16(b0, + W7, col[8*7]); 
386 
MAC16(b1,  W5, col[8*7]); 
387 
MAC16(b2, + W3, col[8*7]); 
388 
MAC16(b3,  W1, col[8*7]); 
389 
} 
390  
391 
dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)]; 
392 
dest += line_size; 
393 
dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)]; 
394 
dest += line_size; 
395 
dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)]; 
396 
dest += line_size; 
397 
dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)]; 
398 
dest += line_size; 
399 
dest[0] = cm[dest[0] + ((a3  b3) >> COL_SHIFT)]; 
400 
dest += line_size; 
401 
dest[0] = cm[dest[0] + ((a2  b2) >> COL_SHIFT)]; 
402 
dest += line_size; 
403 
dest[0] = cm[dest[0] + ((a1  b1) >> COL_SHIFT)]; 
404 
dest += line_size; 
405 
dest[0] = cm[dest[0] + ((a0  b0) >> COL_SHIFT)]; 
406 
} 
407  
408 
#ifdef ARCH_ALPHA

409 
/* If all rows but the first one are zero after row transformation,

410 
all rows will be identical after column transformation. */

411 
static inline void idctCol2(int16_t *col) 
412 
{ 
413 
int i;

414 
uint64_t l, r; 
415 
uint64_t *lcol = (uint64_t *) col; 
416  
417 
for (i = 0; i < 8; ++i) { 
418 
int a0 = col[0] + (1 << (COL_SHIFT  1)) / W4; 
419  
420 
a0 *= W4; 
421 
col[0] = a0 >> COL_SHIFT;

422 
++col; 
423 
} 
424  
425 
l = lcol[0];

426 
r = lcol[1];

427 
lcol[ 2] = l; lcol[ 3] = r; 
428 
lcol[ 4] = l; lcol[ 5] = r; 
429 
lcol[ 6] = l; lcol[ 7] = r; 
430 
lcol[ 8] = l; lcol[ 9] = r; 
431 
lcol[10] = l; lcol[11] = r; 
432 
lcol[12] = l; lcol[13] = r; 
433 
lcol[14] = l; lcol[15] = r; 
434 
} 
435  
436 
void simple_idct (short *block) 
437 
{ 
438  
439 
int i;

440 
int rowsZero = 1; /* all rows except row 0 zero */ 
441 
int rowsConstant = 1; /* all rows consist of a constant value */ 
442  
443 
for (i = 0; i < 8; i++) { 
444 
int sparseness = idctRowCondDC(block + 8 * i); 
445  
446 
if (i > 0 && sparseness > 0) 
447 
rowsZero = 0;

448 
if (sparseness == 2) 
449 
rowsConstant = 0;

450 
} 
451  
452 
if (rowsZero) {

453 
idctCol2(block); 
454 
} else if (rowsConstant) { 
455 
uint64_t *lblock = (uint64_t *) block; 
456  
457 
idctSparseCol(block); 
458 
for (i = 0; i < 8; i++) { 
459 
uint64_t v = (uint16_t) block[i * 8];

460  
461 
v += v << 16;

462 
v += v << 32;

463 
lblock[0] = v;

464 
lblock[1] = v;

465 
lblock += 2;

466 
} 
467 
} else {

468 
for (i = 0; i < 8; i++) 
469 
idctSparseCol(block + i); 
470 
} 
471 
} 
472  
473 
/* XXX: suppress this mess */

474 
void simple_idct_put(UINT8 *dest, int line_size, DCTELEM *block) 
475 
{ 
476 
simple_idct(block); 
477 
put_pixels_clamped(block, dest, line_size); 
478 
} 
479  
480 
void simple_idct_add(UINT8 *dest, int line_size, DCTELEM *block) 
481 
{ 
482 
simple_idct(block); 
483 
add_pixels_clamped(block, dest, line_size); 
484 
} 
485  
486 
#else

487  
488 
void simple_idct_put(UINT8 *dest, int line_size, INT16 *block) 
489 
{ 
490 
int i;

491 
for(i=0; i<8; i++) 
492 
idctRowCondDC(block + i*8);

493 

494 
for(i=0; i<8; i++) 
495 
idctSparseColPut(dest + i, line_size, block + i); 
496 
} 
497  
498 
void simple_idct_add(UINT8 *dest, int line_size, INT16 *block) 
499 
{ 
500 
int i;

501 
for(i=0; i<8; i++) 
502 
idctRowCondDC(block + i*8);

503 

504 
for(i=0; i<8; i++) 
505 
idctSparseColAdd(dest + i, line_size, block + i); 
506 
} 
507  
508 
#endif

509  
510 
#undef COL_SHIFT
