ffmpeg / libavcodec / simple_idct.c @ 412ba501
History  View  Annotate  Download (9.46 KB)
1 
/*


2 
* Simple IDCT

3 
*

4 
* Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>

5 
*

6 
* This library is free software; you can redistribute it and/or

7 
* modify it under the terms of the GNU Lesser General Public

8 
* License as published by the Free Software Foundation; either

9 
* version 2 of the License, or (at your option) any later version.

10 
*

11 
* This library is distributed in the hope that it will be useful,

12 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

13 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

14 
* Lesser General Public License for more details.

15 
*

16 
* You should have received a copy of the GNU Lesser General Public

17 
* License along with this library; if not, write to the Free Software

18 
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 021111307 USA

19 
*/

20 
/*

21 
based upon some outcommented c code from mpeg2dec (idct_mmx.c

22 
written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)

23 
*/

24 
#include "avcodec.h" 
25  
26 
#include "simple_idct.h" 
27  
28 
#if 0

29 
#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */

30 
#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */

31 
#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */

32 
#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */

33 
#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */

34 
#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */

35 
#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */

36 
#define ROW_SHIFT 8

37 
#define COL_SHIFT 17

38 
#else

39 
#define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 
40 
#define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 
41 
#define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 
42 
#define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 
43 
#define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 
44 
#define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 
45 
#define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 
46 
#define ROW_SHIFT 11 
47 
#define COL_SHIFT 20 // 6 
48 
#endif

49  
50 
#ifdef ARCH_ALPHA

51 
#define FAST_64BIT

52 
#endif

53  
54 
#if defined(ARCH_POWERPC_405)

55  
56 
/* signed 16x16 > 32 multiply add accumulate */

57 
#define MAC16(rt, ra, rb) \

58 
asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); 
59  
60 
/* signed 16x16 > 32 multiply */

61 
#define MUL16(rt, ra, rb) \

62 
asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb)); 
63  
64 
#else

65  
66 
/* signed 16x16 > 32 multiply add accumulate */

67 
#define MAC16(rt, ra, rb) rt += (ra) * (rb)

68  
69 
/* signed 16x16 > 32 multiply */

70 
#define MUL16(rt, ra, rb) rt = (ra) * (rb)

71  
72 
#endif

73  
74 
#ifdef ARCH_ALPHA

75 
/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */

76 
static inline int idctRowCondDC(int16_t *row) 
77 
{ 
78 
int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; 
79 
uint64_t *lrow = (uint64_t *) row; 
80  
81 
if (lrow[1] == 0) { 
82 
if (lrow[0] == 0) 
83 
return 0; 
84 
if ((lrow[0] & ~0xffffULL) == 0) { 
85 
uint64_t v; 
86  
87 
a0 = W4 * row[0];

88 
a0 += 1 << (ROW_SHIFT  1); 
89 
a0 >>= ROW_SHIFT; 
90 
v = (uint16_t) a0; 
91 
v += v << 16;

92 
v += v << 32;

93 
lrow[0] = v;

94 
lrow[1] = v;

95  
96 
return 1; 
97 
} 
98 
} 
99  
100 
a0 = (W4 * row[0]) + (1 << (ROW_SHIFT  1)); 
101 
a1 = a0; 
102 
a2 = a0; 
103 
a3 = a0; 
104  
105 
if (row[2]) { 
106 
a0 += W2 * row[2];

107 
a1 += W6 * row[2];

108 
a2 = W6 * row[2];

109 
a3 = W2 * row[2];

110 
} 
111  
112 
if (row[4]) { 
113 
a0 += W4 * row[4];

114 
a1 = W4 * row[4];

115 
a2 = W4 * row[4];

116 
a3 += W4 * row[4];

117 
} 
118  
119 
if (row[6]) { 
120 
a0 += W6 * row[6];

121 
a1 = W2 * row[6];

122 
a2 += W2 * row[6];

123 
a3 = W6 * row[6];

124 
} 
125  
126 
if (row[1]) { 
127 
b0 = W1 * row[1];

128 
b1 = W3 * row[1];

129 
b2 = W5 * row[1];

130 
b3 = W7 * row[1];

131 
} else {

132 
b0 = 0;

133 
b1 = 0;

134 
b2 = 0;

135 
b3 = 0;

136 
} 
137  
138 
if (row[3]) { 
139 
b0 += W3 * row[3];

140 
b1 = W7 * row[3];

141 
b2 = W1 * row[3];

142 
b3 = W5 * row[3];

143 
} 
144  
145 
if (row[5]) { 
146 
b0 += W5 * row[5];

147 
b1 = W1 * row[5];

148 
b2 += W7 * row[5];

149 
b3 += W3 * row[5];

150 
} 
151  
152 
if (row[7]) { 
153 
b0 += W7 * row[7];

154 
b1 = W5 * row[7];

155 
b2 += W3 * row[7];

156 
b3 = W1 * row[7];

157 
} 
158  
159 
row[0] = (a0 + b0) >> ROW_SHIFT;

160 
row[1] = (a1 + b1) >> ROW_SHIFT;

161 
row[2] = (a2 + b2) >> ROW_SHIFT;

162 
row[3] = (a3 + b3) >> ROW_SHIFT;

163 
row[4] = (a3  b3) >> ROW_SHIFT;

164 
row[5] = (a2  b2) >> ROW_SHIFT;

165 
row[6] = (a1  b1) >> ROW_SHIFT;

166 
row[7] = (a0  b0) >> ROW_SHIFT;

167  
168 
return 2; 
169 
} 
170 
#else /* not ARCH_ALPHA */ 
171  
172 
static inline void idctRowCondDC (int16_t * row) 
173 
{ 
174 
int a0, a1, a2, a3, b0, b1, b2, b3;

175 
#ifdef FAST_64BIT

176 
uint64_t temp; 
177 
#else

178 
uint32_t temp; 
179 
#endif

180  
181 
#ifdef FAST_64BIT

182 
#ifdef WORDS_BIGENDIAN

183 
#define ROW0_MASK 0xffff000000000000LL 
184 
#else

185 
#define ROW0_MASK 0xffffLL 
186 
#endif

187 
if ( ((((uint64_t *)row)[0] & ~ROW0_MASK)  
188 
((uint64_t *)row)[1]) == 0) { 
189 
temp = (row[0] << 3) & 0xffff; 
190 
temp += temp << 16;

191 
temp += temp << 32;

192 
((uint64_t *)row)[0] = temp;

193 
((uint64_t *)row)[1] = temp;

194 
return;

195 
} 
196 
#else

197 
if (!(((uint32_t*)row)[1]  
198 
((uint32_t*)row)[2] 

199 
((uint32_t*)row)[3] 

200 
row[1])) {

201 
temp = (row[0] << 3) & 0xffff; 
202 
temp += temp << 16;

203 
((uint32_t*)row)[0]=((uint32_t*)row)[1] = 
204 
((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; 
205 
return;

206 
} 
207 
#endif

208  
209 
a0 = (W4 * row[0]) + (1 << (ROW_SHIFT  1)); 
210 
a1 = a0; 
211 
a2 = a0; 
212 
a3 = a0; 
213  
214 
/* no need to optimize : gcc does it */

215 
a0 += W2 * row[2];

216 
a1 += W6 * row[2];

217 
a2 = W6 * row[2];

218 
a3 = W2 * row[2];

219  
220 
MUL16(b0, W1, row[1]);

221 
MAC16(b0, W3, row[3]);

222 
MUL16(b1, W3, row[1]);

223 
MAC16(b1, W7, row[3]);

224 
MUL16(b2, W5, row[1]);

225 
MAC16(b2, W1, row[3]);

226 
MUL16(b3, W7, row[1]);

227 
MAC16(b3, W5, row[3]);

228  
229 
#ifdef FAST_64BIT

230 
temp = ((uint64_t*)row)[1];

231 
#else

232 
temp = ((uint32_t*)row)[2]  ((uint32_t*)row)[3]; 
233 
#endif

234 
if (temp != 0) { 
235 
a0 += W4*row[4] + W6*row[6]; 
236 
a1 +=  W4*row[4]  W2*row[6]; 
237 
a2 +=  W4*row[4] + W2*row[6]; 
238 
a3 += W4*row[4]  W6*row[6]; 
239  
240 
MAC16(b0, W5, row[5]);

241 
MAC16(b0, W7, row[7]);

242 

243 
MAC16(b1, W1, row[5]);

244 
MAC16(b1, W5, row[7]);

245 

246 
MAC16(b2, W7, row[5]);

247 
MAC16(b2, W3, row[7]);

248 

249 
MAC16(b3, W3, row[5]);

250 
MAC16(b3, W1, row[7]);

251 
} 
252  
253 
row[0] = (a0 + b0) >> ROW_SHIFT;

254 
row[7] = (a0  b0) >> ROW_SHIFT;

255 
row[1] = (a1 + b1) >> ROW_SHIFT;

256 
row[6] = (a1  b1) >> ROW_SHIFT;

257 
row[2] = (a2 + b2) >> ROW_SHIFT;

258 
row[5] = (a2  b2) >> ROW_SHIFT;

259 
row[3] = (a3 + b3) >> ROW_SHIFT;

260 
row[4] = (a3  b3) >> ROW_SHIFT;

261 
} 
262 
#endif /* not ARCH_ALPHA */ 
263  
264 
static inline void idctSparseCol (int16_t * col) 
265 
{ 
266 
int a0, a1, a2, a3, b0, b1, b2, b3;

267  
268 
/* XXX: I did that only to give same values as previous code */

269 
a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT1))/W4)); 
270 
a1 = a0; 
271 
a2 = a0; 
272 
a3 = a0; 
273  
274 
a0 += + W2*col[8*2]; 
275 
a1 += + W6*col[8*2]; 
276 
a2 +=  W6*col[8*2]; 
277 
a3 +=  W2*col[8*2]; 
278  
279 
MUL16(b0, W1, col[8*1]); 
280 
MUL16(b1, W3, col[8*1]); 
281 
MUL16(b2, W5, col[8*1]); 
282 
MUL16(b3, W7, col[8*1]); 
283  
284 
MAC16(b0, + W3, col[8*3]); 
285 
MAC16(b1,  W7, col[8*3]); 
286 
MAC16(b2,  W1, col[8*3]); 
287 
MAC16(b3,  W5, col[8*3]); 
288  
289 
if(col[8*4]){ 
290 
a0 += + W4*col[8*4]; 
291 
a1 +=  W4*col[8*4]; 
292 
a2 +=  W4*col[8*4]; 
293 
a3 += + W4*col[8*4]; 
294 
} 
295  
296 
if (col[8*5]) { 
297 
MAC16(b0, + W5, col[8*5]); 
298 
MAC16(b1,  W1, col[8*5]); 
299 
MAC16(b2, + W7, col[8*5]); 
300 
MAC16(b3, + W3, col[8*5]); 
301 
} 
302  
303 
if(col[8*6]){ 
304 
a0 += + W6*col[8*6]; 
305 
a1 +=  W2*col[8*6]; 
306 
a2 += + W2*col[8*6]; 
307 
a3 +=  W6*col[8*6]; 
308 
} 
309  
310 
if (col[8*7]) { 
311 
MAC16(b0, + W7, col[8*7]); 
312 
MAC16(b1,  W5, col[8*7]); 
313 
MAC16(b2, + W3, col[8*7]); 
314 
MAC16(b3,  W1, col[8*7]); 
315 
} 
316  
317 
col[8*0] = (a0 + b0) >> COL_SHIFT; 
318 
col[8*7] = (a0  b0) >> COL_SHIFT; 
319 
col[8*1] = (a1 + b1) >> COL_SHIFT; 
320 
col[8*6] = (a1  b1) >> COL_SHIFT; 
321 
col[8*2] = (a2 + b2) >> COL_SHIFT; 
322 
col[8*5] = (a2  b2) >> COL_SHIFT; 
323 
col[8*3] = (a3 + b3) >> COL_SHIFT; 
324 
col[8*4] = (a3  b3) >> COL_SHIFT; 
325 
} 
326  
327 
#ifdef ARCH_ALPHA

328 
/* If all rows but the first one are zero after row transformation,

329 
all rows will be identical after column transformation. */

330 
static inline void idctCol2(int16_t *col) 
331 
{ 
332 
int i;

333 
uint64_t l, r; 
334 
uint64_t *lcol = (uint64_t *) col; 
335  
336 
for (i = 0; i < 8; ++i) { 
337 
int a0 = col[0] + (1 << (COL_SHIFT  1)) / W4; 
338  
339 
a0 *= W4; 
340 
col[0] = a0 >> COL_SHIFT;

341 
++col; 
342 
} 
343  
344 
l = lcol[0];

345 
r = lcol[1];

346 
lcol[ 2] = l; lcol[ 3] = r; 
347 
lcol[ 4] = l; lcol[ 5] = r; 
348 
lcol[ 6] = l; lcol[ 7] = r; 
349 
lcol[ 8] = l; lcol[ 9] = r; 
350 
lcol[10] = l; lcol[11] = r; 
351 
lcol[12] = l; lcol[13] = r; 
352 
lcol[14] = l; lcol[15] = r; 
353 
} 
354  
355 
void simple_idct (short *block) 
356 
{ 
357  
358 
int i;

359 
int rowsZero = 1; /* all rows except row 0 zero */ 
360 
int rowsConstant = 1; /* all rows consist of a constant value */ 
361  
362 
for (i = 0; i < 8; i++) { 
363 
int sparseness = idctRowCondDC(block + 8 * i); 
364  
365 
if (i > 0 && sparseness > 0) 
366 
rowsZero = 0;

367 
if (sparseness == 2) 
368 
rowsConstant = 0;

369 
} 
370  
371 
if (rowsZero) {

372 
idctCol2(block); 
373 
} else if (rowsConstant) { 
374 
uint64_t *lblock = (uint64_t *) block; 
375  
376 
idctSparseCol(block); 
377 
for (i = 0; i < 8; i++) { 
378 
uint64_t v = (uint16_t) block[i * 8];

379  
380 
v += v << 16;

381 
v += v << 32;

382 
lblock[0] = v;

383 
lblock[1] = v;

384 
lblock += 2;

385 
} 
386 
} else {

387 
for (i = 0; i < 8; i++) 
388 
idctSparseCol(block + i); 
389 
} 
390 
} 
391  
392 
#else

393  
394 
void simple_idct (short *block) 
395 
{ 
396 
int i;

397 
for(i=0; i<8; i++) 
398 
idctRowCondDC(block + i*8);

399 

400 
for(i=0; i<8; i++) 
401 
idctSparseCol(block + i); 
402 
} 
403  
404 
#endif

405  
406 
#undef COL_SHIFT
