ffmpeg / libavcodec / jrevdct.c @ ea937d01
History  View  Annotate  Download (33.1 KB)
1 
/*


2 
* jrevdct.c

3 
*

4 
* Copyright (C) 1991, 1992, Thomas G. Lane.

5 
* This file is part of the Independent JPEG Group's software.

6 
* For conditions of distribution and use, see the accompanying README file.

7 
*

8 
* This file contains the basic inverseDCT transformation subroutine.

9 
*

10 
* This implementation is based on an algorithm described in

11 
* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1D DCT

12 
* Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,

13 
* Speech, and Signal Processing 1989 (ICASSP '89), pp. 988991.

14 
* The primary algorithm described there uses 11 multiplies and 29 adds.

15 
* We use their alternate method with 12 multiplies and 32 adds.

16 
* The advantage of this method is that no data path contains more than one

17 
* multiplication; this allows a very simple and accurate implementation in

18 
* scaled fixedpoint arithmetic, with a minimal number of shifts.

19 
*

20 
* I've made lots of modifications to attempt to take advantage of the

21 
* sparse nature of the DCT matrices we're getting. Although the logic

22 
* is cumbersome, it's straightforward and the resulting code is much

23 
* faster.

24 
*

25 
* A better way to do this would be to pass in the DCT block as a sparse

26 
* matrix, perhaps with the difference cases encoded.

27 
*/

28 
#include "common.h" 
29 
#include "dsputil.h" 
30  
31 
#define EIGHT_BIT_SAMPLES

32  
33 
#define DCTSIZE 8 
34 
#define DCTSIZE2 64 
35  
36 
#define GLOBAL

37  
38 
#define RIGHT_SHIFT(x, n) ((x) >> (n))

39  
40 
typedef DCTELEM DCTBLOCK[DCTSIZE2];

41  
42 
#define CONST_BITS 13 
43  
44 
/*

45 
* This routine is specialized to the case DCTSIZE = 8.

46 
*/

47  
48 
#if DCTSIZE != 8 
49 
Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ 
50 
#endif

51  
52  
53 
/*

54 
* A 2D IDCT can be done by 1D IDCT on each row followed by 1D IDCT

55 
* on each column. Direct algorithms are also available, but they are

56 
* much more complex and seem not to be any faster when reduced to code.

57 
*

58 
* The poop on this scaling stuff is as follows:

59 
*

60 
* Each 1D IDCT step produces outputs which are a factor of sqrt(N)

61 
* larger than the true IDCT outputs. The final outputs are therefore

62 
* a factor of N larger than desired; since N=8 this can be cured by

63 
* a simple right shift at the end of the algorithm. The advantage of

64 
* this arrangement is that we save two multiplications per 1D IDCT,

65 
* because the y0 and y4 inputs need not be divided by sqrt(N).

66 
*

67 
* We have to do addition and subtraction of the integer inputs, which

68 
* is no problem, and multiplication by fractional constants, which is

69 
* a problem to do in integer arithmetic. We multiply all the constants

70 
* by CONST_SCALE and convert them to integer constants (thus retaining

71 
* CONST_BITS bits of precision in the constants). After doing a

72 
* multiplication we have to divide the product by CONST_SCALE, with proper

73 
* rounding, to produce the correct output. This division can be done

74 
* cheaply as a right shift of CONST_BITS bits. We postpone shifting

75 
* as long as possible so that partial sums can be added together with

76 
* full fractional precision.

77 
*

78 
* The outputs of the first pass are scaled up by PASS1_BITS bits so that

79 
* they are represented to betterthanintegral precision. These outputs

80 
* require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16bit word

81 
* with the recommended scaling. (To scale up 12bit sample data further, an

82 
* intermediate int32 array would be needed.)

83 
*

84 
* To avoid overflow of the 32bit intermediate results in pass 2, we must

85 
* have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis

86 
* shows that the values given below are the most effective.

87 
*/

88  
89 
#ifdef EIGHT_BIT_SAMPLES

90 
#define PASS1_BITS 2 
91 
#else

92 
#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ 
93 
#endif

94  
95 
#define ONE ((INT32) 1) 
96  
97 
#define CONST_SCALE (ONE << CONST_BITS)

98  
99 
/* Convert a positive real constant to an integer scaled by CONST_SCALE.

100 
* IMPORTANT: if your compiler doesn't do this arithmetic at compile time,

101 
* you will pay a significant penalty in run time. In that case, figure

102 
* the correct integer constant values and insert them by hand.

103 
*/

104  
105 
/* Actually FIX is no longer used, we precomputed them all */

106 
#define FIX(x) ((INT32) ((x) * CONST_SCALE + 0.5)) 
107  
108 
/* Descale and correctly round an INT32 value that's scaled by N bits.

109 
* We assume RIGHT_SHIFT rounds towards minus infinity, so adding

110 
* the fudge factor is correct for either sign of X.

111 
*/

112  
113 
#define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)1)), n) 
114  
115 
/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.

116 
* For 8bit samples with the recommended scaling, all the variable

117 
* and constant values involved are no more than 16 bits wide, so a

118 
* 16x16>32 bit multiply can be used instead of a full 32x32 multiply;

119 
* this provides a useful speedup on many machines.

120 
* There is no way to specify a 16x16>32 multiply in portable C, but

121 
* some C compilers will do the right thing if you provide the correct

122 
* combination of casts.

123 
* NB: for 12bit samples, a full 32bit multiplication will be needed.

124 
*/

125  
126 
#ifdef EIGHT_BIT_SAMPLES

127 
#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ 
128 
#define MULTIPLY(var,const) (((INT16) (var)) * ((INT16) (const))) 
129 
#endif

130 
#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ 
131 
#define MULTIPLY(var,const) (((INT16) (var)) * ((INT32) (const))) 
132 
#endif

133 
#endif

134  
135 
#ifndef MULTIPLY /* default definition */ 
136 
#define MULTIPLY(var,const) ((var) * (const)) 
137 
#endif

138  
139  
140 
/*

141 
Unlike our decoder where we approximate the FIXes, we need to use exact

142 
ones here or successive Pframes will drift too much with Reference frame coding

143 
*/

144 
#define FIX_0_211164243 1730 
145 
#define FIX_0_275899380 2260 
146 
#define FIX_0_298631336 2446 
147 
#define FIX_0_390180644 3196 
148 
#define FIX_0_509795579 4176 
149 
#define FIX_0_541196100 4433 
150 
#define FIX_0_601344887 4926 
151 
#define FIX_0_765366865 6270 
152 
#define FIX_0_785694958 6436 
153 
#define FIX_0_899976223 7373 
154 
#define FIX_1_061594337 8697 
155 
#define FIX_1_111140466 9102 
156 
#define FIX_1_175875602 9633 
157 
#define FIX_1_306562965 10703 
158 
#define FIX_1_387039845 11363 
159 
#define FIX_1_451774981 11893 
160 
#define FIX_1_501321110 12299 
161 
#define FIX_1_662939225 13623 
162 
#define FIX_1_847759065 15137 
163 
#define FIX_1_961570560 16069 
164 
#define FIX_2_053119869 16819 
165 
#define FIX_2_172734803 17799 
166 
#define FIX_2_562915447 20995 
167 
#define FIX_3_072711026 25172 
168  
169 
/*

170 
* Perform the inverse DCT on one block of coefficients.

171 
*/

172  
173 
void j_rev_dct(DCTBLOCK data)

174 
{ 
175 
INT32 tmp0, tmp1, tmp2, tmp3; 
176 
INT32 tmp10, tmp11, tmp12, tmp13; 
177 
INT32 z1, z2, z3, z4, z5; 
178 
INT32 d0, d1, d2, d3, d4, d5, d6, d7; 
179 
register DCTELEM *dataptr;

180 
int rowctr;

181 

182 
/* Pass 1: process rows. */

183 
/* Note results are scaled up by sqrt(8) compared to a true IDCT; */

184 
/* furthermore, we scale the results by 2**PASS1_BITS. */

185  
186 
dataptr = data; 
187  
188 
for (rowctr = DCTSIZE1; rowctr >= 0; rowctr) { 
189 
/* Due to quantization, we will usually find that many of the input

190 
* coefficients are zero, especially the AC terms. We can exploit this

191 
* by shortcircuiting the IDCT calculation for any row in which all

192 
* the AC terms are zero. In that case each output is equal to the

193 
* DC coefficient (with scale factor as needed).

194 
* With typical images and quantization tables, half or more of the

195 
* row DCT calculations can be simplified this way.

196 
*/

197  
198 
register int *idataptr = (int*)dataptr; 
199  
200 
/* WARNING: we do the same permutation as MMX idct to simplify the

201 
video core */

202 
d0 = dataptr[0];

203 
d2 = dataptr[1];

204 
d4 = dataptr[2];

205 
d6 = dataptr[3];

206 
d1 = dataptr[4];

207 
d3 = dataptr[5];

208 
d5 = dataptr[6];

209 
d7 = dataptr[7];

210  
211 
if ((d1  d2  d3  d4  d5  d6  d7) == 0) { 
212 
/* AC terms all zero */

213 
if (d0) {

214 
/* Compute a 32 bit value to assign. */

215 
DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); 
216 
register int v = (dcval & 0xffff)  ((dcval << 16) & 0xffff0000); 
217 

218 
idataptr[0] = v;

219 
idataptr[1] = v;

220 
idataptr[2] = v;

221 
idataptr[3] = v;

222 
} 
223 

224 
dataptr += DCTSIZE; /* advance pointer to next row */

225 
continue;

226 
} 
227  
228 
/* Even part: reverse the even part of the forward DCT. */

229 
/* The rotator is sqrt(2)*c(6). */

230 
{ 
231 
if (d6) {

232 
if (d4) {

233 
if (d2) {

234 
if (d0) {

235 
/* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */

236 
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 
237 
tmp2 = z1 + MULTIPLY(d6, FIX_1_847759065); 
238 
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 
239  
240 
tmp0 = (d0 + d4) << CONST_BITS; 
241 
tmp1 = (d0  d4) << CONST_BITS; 
242  
243 
tmp10 = tmp0 + tmp3; 
244 
tmp13 = tmp0  tmp3; 
245 
tmp11 = tmp1 + tmp2; 
246 
tmp12 = tmp1  tmp2; 
247 
} else {

248 
/* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */

249 
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 
250 
tmp2 = z1 + MULTIPLY(d6, FIX_1_847759065); 
251 
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 
252  
253 
tmp0 = d4 << CONST_BITS; 
254  
255 
tmp10 = tmp0 + tmp3; 
256 
tmp13 = tmp0  tmp3; 
257 
tmp11 = tmp2  tmp0; 
258 
tmp12 = (tmp0 + tmp2); 
259 
} 
260 
} else {

261 
if (d0) {

262 
/* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */

263 
tmp2 = MULTIPLY(d6, FIX_1_306562965); 
264 
tmp3 = MULTIPLY(d6, FIX_0_541196100); 
265  
266 
tmp0 = (d0 + d4) << CONST_BITS; 
267 
tmp1 = (d0  d4) << CONST_BITS; 
268  
269 
tmp10 = tmp0 + tmp3; 
270 
tmp13 = tmp0  tmp3; 
271 
tmp11 = tmp1 + tmp2; 
272 
tmp12 = tmp1  tmp2; 
273 
} else {

274 
/* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */

275 
tmp2 = MULTIPLY(d6, FIX_1_306562965); 
276 
tmp3 = MULTIPLY(d6, FIX_0_541196100); 
277  
278 
tmp0 = d4 << CONST_BITS; 
279  
280 
tmp10 = tmp0 + tmp3; 
281 
tmp13 = tmp0  tmp3; 
282 
tmp11 = tmp2  tmp0; 
283 
tmp12 = (tmp0 + tmp2); 
284 
} 
285 
} 
286 
} else {

287 
if (d2) {

288 
if (d0) {

289 
/* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */

290 
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 
291 
tmp2 = z1 + MULTIPLY(d6, FIX_1_847759065); 
292 
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 
293  
294 
tmp0 = d0 << CONST_BITS; 
295  
296 
tmp10 = tmp0 + tmp3; 
297 
tmp13 = tmp0  tmp3; 
298 
tmp11 = tmp0 + tmp2; 
299 
tmp12 = tmp0  tmp2; 
300 
} else {

301 
/* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */

302 
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 
303 
tmp2 = z1 + MULTIPLY(d6, FIX_1_847759065); 
304 
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 
305  
306 
tmp10 = tmp3; 
307 
tmp13 = tmp3; 
308 
tmp11 = tmp2; 
309 
tmp12 = tmp2; 
310 
} 
311 
} else {

312 
if (d0) {

313 
/* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */

314 
tmp2 = MULTIPLY(d6, FIX_1_306562965); 
315 
tmp3 = MULTIPLY(d6, FIX_0_541196100); 
316  
317 
tmp0 = d0 << CONST_BITS; 
318  
319 
tmp10 = tmp0 + tmp3; 
320 
tmp13 = tmp0  tmp3; 
321 
tmp11 = tmp0 + tmp2; 
322 
tmp12 = tmp0  tmp2; 
323 
} else {

324 
/* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */

325 
tmp2 = MULTIPLY(d6, FIX_1_306562965); 
326 
tmp3 = MULTIPLY(d6, FIX_0_541196100); 
327  
328 
tmp10 = tmp3; 
329 
tmp13 = tmp3; 
330 
tmp11 = tmp2; 
331 
tmp12 = tmp2; 
332 
} 
333 
} 
334 
} 
335 
} else {

336 
if (d4) {

337 
if (d2) {

338 
if (d0) {

339 
/* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */

340 
tmp2 = MULTIPLY(d2, FIX_0_541196100); 
341 
tmp3 = MULTIPLY(d2, FIX_1_306562965); 
342  
343 
tmp0 = (d0 + d4) << CONST_BITS; 
344 
tmp1 = (d0  d4) << CONST_BITS; 
345  
346 
tmp10 = tmp0 + tmp3; 
347 
tmp13 = tmp0  tmp3; 
348 
tmp11 = tmp1 + tmp2; 
349 
tmp12 = tmp1  tmp2; 
350 
} else {

351 
/* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */

352 
tmp2 = MULTIPLY(d2, FIX_0_541196100); 
353 
tmp3 = MULTIPLY(d2, FIX_1_306562965); 
354  
355 
tmp0 = d4 << CONST_BITS; 
356  
357 
tmp10 = tmp0 + tmp3; 
358 
tmp13 = tmp0  tmp3; 
359 
tmp11 = tmp2  tmp0; 
360 
tmp12 = (tmp0 + tmp2); 
361 
} 
362 
} else {

363 
if (d0) {

364 
/* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */

365 
tmp10 = tmp13 = (d0 + d4) << CONST_BITS; 
366 
tmp11 = tmp12 = (d0  d4) << CONST_BITS; 
367 
} else {

368 
/* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */

369 
tmp10 = tmp13 = d4 << CONST_BITS; 
370 
tmp11 = tmp12 = tmp10; 
371 
} 
372 
} 
373 
} else {

374 
if (d2) {

375 
if (d0) {

376 
/* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */

377 
tmp2 = MULTIPLY(d2, FIX_0_541196100); 
378 
tmp3 = MULTIPLY(d2, FIX_1_306562965); 
379  
380 
tmp0 = d0 << CONST_BITS; 
381  
382 
tmp10 = tmp0 + tmp3; 
383 
tmp13 = tmp0  tmp3; 
384 
tmp11 = tmp0 + tmp2; 
385 
tmp12 = tmp0  tmp2; 
386 
} else {

387 
/* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */

388 
tmp2 = MULTIPLY(d2, FIX_0_541196100); 
389 
tmp3 = MULTIPLY(d2, FIX_1_306562965); 
390  
391 
tmp10 = tmp3; 
392 
tmp13 = tmp3; 
393 
tmp11 = tmp2; 
394 
tmp12 = tmp2; 
395 
} 
396 
} else {

397 
if (d0) {

398 
/* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */

399 
tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; 
400 
} else {

401 
/* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */

402 
tmp10 = tmp13 = tmp11 = tmp12 = 0;

403 
} 
404 
} 
405 
} 
406 
} 
407  
408 
/* Odd part per figure 8; the matrix is unitary and hence its

409 
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.

410 
*/

411  
412 
if (d7) {

413 
if (d5) {

414 
if (d3) {

415 
if (d1) {

416 
/* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */

417 
z1 = d7 + d1; 
418 
z2 = d5 + d3; 
419 
z3 = d7 + d3; 
420 
z4 = d5 + d1; 
421 
z5 = MULTIPLY(z3 + z4, FIX_1_175875602); 
422 

423 
tmp0 = MULTIPLY(d7, FIX_0_298631336); 
424 
tmp1 = MULTIPLY(d5, FIX_2_053119869); 
425 
tmp2 = MULTIPLY(d3, FIX_3_072711026); 
426 
tmp3 = MULTIPLY(d1, FIX_1_501321110); 
427 
z1 = MULTIPLY(z1, FIX_0_899976223); 
428 
z2 = MULTIPLY(z2, FIX_2_562915447); 
429 
z3 = MULTIPLY(z3, FIX_1_961570560); 
430 
z4 = MULTIPLY(z4, FIX_0_390180644); 
431 

432 
z3 += z5; 
433 
z4 += z5; 
434 

435 
tmp0 += z1 + z3; 
436 
tmp1 += z2 + z4; 
437 
tmp2 += z2 + z3; 
438 
tmp3 += z1 + z4; 
439 
} else {

440 
/* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */

441 
z2 = d5 + d3; 
442 
z3 = d7 + d3; 
443 
z5 = MULTIPLY(z3 + d5, FIX_1_175875602); 
444 

445 
tmp0 = MULTIPLY(d7, FIX_0_298631336); 
446 
tmp1 = MULTIPLY(d5, FIX_2_053119869); 
447 
tmp2 = MULTIPLY(d3, FIX_3_072711026); 
448 
z1 = MULTIPLY(d7, FIX_0_899976223); 
449 
z2 = MULTIPLY(z2, FIX_2_562915447); 
450 
z3 = MULTIPLY(z3, FIX_1_961570560); 
451 
z4 = MULTIPLY(d5, FIX_0_390180644); 
452 

453 
z3 += z5; 
454 
z4 += z5; 
455 

456 
tmp0 += z1 + z3; 
457 
tmp1 += z2 + z4; 
458 
tmp2 += z2 + z3; 
459 
tmp3 = z1 + z4; 
460 
} 
461 
} else {

462 
if (d1) {

463 
/* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */

464 
z1 = d7 + d1; 
465 
z4 = d5 + d1; 
466 
z5 = MULTIPLY(d7 + z4, FIX_1_175875602); 
467 

468 
tmp0 = MULTIPLY(d7, FIX_0_298631336); 
469 
tmp1 = MULTIPLY(d5, FIX_2_053119869); 
470 
tmp3 = MULTIPLY(d1, FIX_1_501321110); 
471 
z1 = MULTIPLY(z1, FIX_0_899976223); 
472 
z2 = MULTIPLY(d5, FIX_2_562915447); 
473 
z3 = MULTIPLY(d7, FIX_1_961570560); 
474 
z4 = MULTIPLY(z4, FIX_0_390180644); 
475 

476 
z3 += z5; 
477 
z4 += z5; 
478 

479 
tmp0 += z1 + z3; 
480 
tmp1 += z2 + z4; 
481 
tmp2 = z2 + z3; 
482 
tmp3 += z1 + z4; 
483 
} else {

484 
/* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */

485 
tmp0 = MULTIPLY(d7, FIX_0_601344887); 
486 
z1 = MULTIPLY(d7, FIX_0_899976223); 
487 
z3 = MULTIPLY(d7, FIX_1_961570560); 
488 
tmp1 = MULTIPLY(d5, FIX_0_509795579); 
489 
z2 = MULTIPLY(d5, FIX_2_562915447); 
490 
z4 = MULTIPLY(d5, FIX_0_390180644); 
491 
z5 = MULTIPLY(d5 + d7, FIX_1_175875602); 
492 

493 
z3 += z5; 
494 
z4 += z5; 
495 

496 
tmp0 += z3; 
497 
tmp1 += z4; 
498 
tmp2 = z2 + z3; 
499 
tmp3 = z1 + z4; 
500 
} 
501 
} 
502 
} else {

503 
if (d3) {

504 
if (d1) {

505 
/* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */

506 
z1 = d7 + d1; 
507 
z3 = d7 + d3; 
508 
z5 = MULTIPLY(z3 + d1, FIX_1_175875602); 
509 

510 
tmp0 = MULTIPLY(d7, FIX_0_298631336); 
511 
tmp2 = MULTIPLY(d3, FIX_3_072711026); 
512 
tmp3 = MULTIPLY(d1, FIX_1_501321110); 
513 
z1 = MULTIPLY(z1, FIX_0_899976223); 
514 
z2 = MULTIPLY(d3, FIX_2_562915447); 
515 
z3 = MULTIPLY(z3, FIX_1_961570560); 
516 
z4 = MULTIPLY(d1, FIX_0_390180644); 
517 

518 
z3 += z5; 
519 
z4 += z5; 
520 

521 
tmp0 += z1 + z3; 
522 
tmp1 = z2 + z4; 
523 
tmp2 += z2 + z3; 
524 
tmp3 += z1 + z4; 
525 
} else {

526 
/* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */

527 
z3 = d7 + d3; 
528 

529 
tmp0 = MULTIPLY(d7, FIX_0_601344887); 
530 
z1 = MULTIPLY(d7, FIX_0_899976223); 
531 
tmp2 = MULTIPLY(d3, FIX_0_509795579); 
532 
z2 = MULTIPLY(d3, FIX_2_562915447); 
533 
z5 = MULTIPLY(z3, FIX_1_175875602); 
534 
z3 = MULTIPLY(z3, FIX_0_785694958); 
535 

536 
tmp0 += z3; 
537 
tmp1 = z2 + z5; 
538 
tmp2 += z3; 
539 
tmp3 = z1 + z5; 
540 
} 
541 
} else {

542 
if (d1) {

543 
/* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */

544 
z1 = d7 + d1; 
545 
z5 = MULTIPLY(z1, FIX_1_175875602); 
546  
547 
z1 = MULTIPLY(z1, FIX_0_275899380); 
548 
z3 = MULTIPLY(d7, FIX_1_961570560); 
549 
tmp0 = MULTIPLY(d7, FIX_1_662939225); 
550 
z4 = MULTIPLY(d1, FIX_0_390180644); 
551 
tmp3 = MULTIPLY(d1, FIX_1_111140466); 
552  
553 
tmp0 += z1; 
554 
tmp1 = z4 + z5; 
555 
tmp2 = z3 + z5; 
556 
tmp3 += z1; 
557 
} else {

558 
/* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */

559 
tmp0 = MULTIPLY(d7, FIX_1_387039845); 
560 
tmp1 = MULTIPLY(d7, FIX_1_175875602); 
561 
tmp2 = MULTIPLY(d7, FIX_0_785694958); 
562 
tmp3 = MULTIPLY(d7, FIX_0_275899380); 
563 
} 
564 
} 
565 
} 
566 
} else {

567 
if (d5) {

568 
if (d3) {

569 
if (d1) {

570 
/* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */

571 
z2 = d5 + d3; 
572 
z4 = d5 + d1; 
573 
z5 = MULTIPLY(d3 + z4, FIX_1_175875602); 
574 

575 
tmp1 = MULTIPLY(d5, FIX_2_053119869); 
576 
tmp2 = MULTIPLY(d3, FIX_3_072711026); 
577 
tmp3 = MULTIPLY(d1, FIX_1_501321110); 
578 
z1 = MULTIPLY(d1, FIX_0_899976223); 
579 
z2 = MULTIPLY(z2, FIX_2_562915447); 
580 
z3 = MULTIPLY(d3, FIX_1_961570560); 
581 
z4 = MULTIPLY(z4, FIX_0_390180644); 
582 

583 
z3 += z5; 
584 
z4 += z5; 
585 

586 
tmp0 = z1 + z3; 
587 
tmp1 += z2 + z4; 
588 
tmp2 += z2 + z3; 
589 
tmp3 += z1 + z4; 
590 
} else {

591 
/* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */

592 
z2 = d5 + d3; 
593 

594 
z5 = MULTIPLY(z2, FIX_1_175875602); 
595 
tmp1 = MULTIPLY(d5, FIX_1_662939225); 
596 
z4 = MULTIPLY(d5, FIX_0_390180644); 
597 
z2 = MULTIPLY(z2, FIX_1_387039845); 
598 
tmp2 = MULTIPLY(d3, FIX_1_111140466); 
599 
z3 = MULTIPLY(d3, FIX_1_961570560); 
600 

601 
tmp0 = z3 + z5; 
602 
tmp1 += z2; 
603 
tmp2 += z2; 
604 
tmp3 = z4 + z5; 
605 
} 
606 
} else {

607 
if (d1) {

608 
/* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */

609 
z4 = d5 + d1; 
610 

611 
z5 = MULTIPLY(z4, FIX_1_175875602); 
612 
z1 = MULTIPLY(d1, FIX_0_899976223); 
613 
tmp3 = MULTIPLY(d1, FIX_0_601344887); 
614 
tmp1 = MULTIPLY(d5, FIX_0_509795579); 
615 
z2 = MULTIPLY(d5, FIX_2_562915447); 
616 
z4 = MULTIPLY(z4, FIX_0_785694958); 
617 

618 
tmp0 = z1 + z5; 
619 
tmp1 += z4; 
620 
tmp2 = z2 + z5; 
621 
tmp3 += z4; 
622 
} else {

623 
/* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */

624 
tmp0 = MULTIPLY(d5, FIX_1_175875602); 
625 
tmp1 = MULTIPLY(d5, FIX_0_275899380); 
626 
tmp2 = MULTIPLY(d5, FIX_1_387039845); 
627 
tmp3 = MULTIPLY(d5, FIX_0_785694958); 
628 
} 
629 
} 
630 
} else {

631 
if (d3) {

632 
if (d1) {

633 
/* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */

634 
z5 = d1 + d3; 
635 
tmp3 = MULTIPLY(d1, FIX_0_211164243); 
636 
tmp2 = MULTIPLY(d3, FIX_1_451774981); 
637 
z1 = MULTIPLY(d1, FIX_1_061594337); 
638 
z2 = MULTIPLY(d3, FIX_2_172734803); 
639 
z4 = MULTIPLY(z5, FIX_0_785694958); 
640 
z5 = MULTIPLY(z5, FIX_1_175875602); 
641 

642 
tmp0 = z1  z4; 
643 
tmp1 = z2 + z4; 
644 
tmp2 += z5; 
645 
tmp3 += z5; 
646 
} else {

647 
/* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */

648 
tmp0 = MULTIPLY(d3, FIX_0_785694958); 
649 
tmp1 = MULTIPLY(d3, FIX_1_387039845); 
650 
tmp2 = MULTIPLY(d3, FIX_0_275899380); 
651 
tmp3 = MULTIPLY(d3, FIX_1_175875602); 
652 
} 
653 
} else {

654 
if (d1) {

655 
/* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */

656 
tmp0 = MULTIPLY(d1, FIX_0_275899380); 
657 
tmp1 = MULTIPLY(d1, FIX_0_785694958); 
658 
tmp2 = MULTIPLY(d1, FIX_1_175875602); 
659 
tmp3 = MULTIPLY(d1, FIX_1_387039845); 
660 
} else {

661 
/* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */

662 
tmp0 = tmp1 = tmp2 = tmp3 = 0;

663 
} 
664 
} 
665 
} 
666 
} 
667 
} 
668 
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */

669  
670 
dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITSPASS1_BITS);

671 
dataptr[7] = (DCTELEM) DESCALE(tmp10  tmp3, CONST_BITSPASS1_BITS);

672 
dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITSPASS1_BITS);

673 
dataptr[6] = (DCTELEM) DESCALE(tmp11  tmp2, CONST_BITSPASS1_BITS);

674 
dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITSPASS1_BITS);

675 
dataptr[5] = (DCTELEM) DESCALE(tmp12  tmp1, CONST_BITSPASS1_BITS);

676 
dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITSPASS1_BITS);

677 
dataptr[4] = (DCTELEM) DESCALE(tmp13  tmp0, CONST_BITSPASS1_BITS);

678  
679 
dataptr += DCTSIZE; /* advance pointer to next row */

680 
} 
681  
682 
/* Pass 2: process columns. */

683 
/* Note that we must descale the results by a factor of 8 == 2**3, */

684 
/* and also undo the PASS1_BITS scaling. */

685  
686 
dataptr = data; 
687 
for (rowctr = DCTSIZE1; rowctr >= 0; rowctr) { 
688 
/* Columns of zeroes can be exploited in the same way as we did with rows.

689 
* However, the row calculation has created many nonzero AC terms, so the

690 
* simplification applies less often (typically 5% to 10% of the time).

691 
* On machines with very fast multiplication, it's possible that the

692 
* test takes more time than it's worth. In that case this section

693 
* may be commented out.

694 
*/

695  
696 
d0 = dataptr[DCTSIZE*0];

697 
d1 = dataptr[DCTSIZE*1];

698 
d2 = dataptr[DCTSIZE*2];

699 
d3 = dataptr[DCTSIZE*3];

700 
d4 = dataptr[DCTSIZE*4];

701 
d5 = dataptr[DCTSIZE*5];

702 
d6 = dataptr[DCTSIZE*6];

703 
d7 = dataptr[DCTSIZE*7];

704  
705 
/* Even part: reverse the even part of the forward DCT. */

706 
/* The rotator is sqrt(2)*c(6). */

707 
if (d6) {

708 
if (d4) {

709 
if (d2) {

710 
if (d0) {

711 
/* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */

712 
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 
713 
tmp2 = z1 + MULTIPLY(d6, FIX_1_847759065); 
714 
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 
715  
716 
tmp0 = (d0 + d4) << CONST_BITS; 
717 
tmp1 = (d0  d4) << CONST_BITS; 
718  
719 
tmp10 = tmp0 + tmp3; 
720 
tmp13 = tmp0  tmp3; 
721 
tmp11 = tmp1 + tmp2; 
722 
tmp12 = tmp1  tmp2; 
723 
} else {

724 
/* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */

725 
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 
726 
tmp2 = z1 + MULTIPLY(d6, FIX_1_847759065); 
727 
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 
728  
729 
tmp0 = d4 << CONST_BITS; 
730  
731 
tmp10 = tmp0 + tmp3; 
732 
tmp13 = tmp0  tmp3; 
733 
tmp11 = tmp2  tmp0; 
734 
tmp12 = (tmp0 + tmp2); 
735 
} 
736 
} else {

737 
if (d0) {

738 
/* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */

739 
tmp2 = MULTIPLY(d6, FIX_1_306562965); 
740 
tmp3 = MULTIPLY(d6, FIX_0_541196100); 
741  
742 
tmp0 = (d0 + d4) << CONST_BITS; 
743 
tmp1 = (d0  d4) << CONST_BITS; 
744  
745 
tmp10 = tmp0 + tmp3; 
746 
tmp13 = tmp0  tmp3; 
747 
tmp11 = tmp1 + tmp2; 
748 
tmp12 = tmp1  tmp2; 
749 
} else {

750 
/* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */

751 
tmp2 = MULTIPLY(d6, FIX_1_306562965); 
752 
tmp3 = MULTIPLY(d6, FIX_0_541196100); 
753  
754 
tmp0 = d4 << CONST_BITS; 
755  
756 
tmp10 = tmp0 + tmp3; 
757 
tmp13 = tmp0  tmp3; 
758 
tmp11 = tmp2  tmp0; 
759 
tmp12 = (tmp0 + tmp2); 
760 
} 
761 
} 
762 
} else {

763 
if (d2) {

764 
if (d0) {

765 
/* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */

766 
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 
767 
tmp2 = z1 + MULTIPLY(d6, FIX_1_847759065); 
768 
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 
769  
770 
tmp0 = d0 << CONST_BITS; 
771  
772 
tmp10 = tmp0 + tmp3; 
773 
tmp13 = tmp0  tmp3; 
774 
tmp11 = tmp0 + tmp2; 
775 
tmp12 = tmp0  tmp2; 
776 
} else {

777 
/* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */

778 
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 
779 
tmp2 = z1 + MULTIPLY(d6, FIX_1_847759065); 
780 
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 
781  
782 
tmp10 = tmp3; 
783 
tmp13 = tmp3; 
784 
tmp11 = tmp2; 
785 
tmp12 = tmp2; 
786 
} 
787 
} else {

788 
if (d0) {

789 
/* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */

790 
tmp2 = MULTIPLY(d6, FIX_1_306562965); 
791 
tmp3 = MULTIPLY(d6, FIX_0_541196100); 
792  
793 
tmp0 = d0 << CONST_BITS; 
794  
795 
tmp10 = tmp0 + tmp3; 
796 
tmp13 = tmp0  tmp3; 
797 
tmp11 = tmp0 + tmp2; 
798 
tmp12 = tmp0  tmp2; 
799 
} else {

800 
/* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */

801 
tmp2 = MULTIPLY(d6, FIX_1_306562965); 
802 
tmp3 = MULTIPLY(d6, FIX_0_541196100); 
803  
804 
tmp10 = tmp3; 
805 
tmp13 = tmp3; 
806 
tmp11 = tmp2; 
807 
tmp12 = tmp2; 
808 
} 
809 
} 
810 
} 
811 
} else {

812 
if (d4) {

813 
if (d2) {

814 
if (d0) {

815 
/* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */

816 
tmp2 = MULTIPLY(d2, FIX_0_541196100); 
817 
tmp3 = MULTIPLY(d2, FIX_1_306562965); 
818  
819 
tmp0 = (d0 + d4) << CONST_BITS; 
820 
tmp1 = (d0  d4) << CONST_BITS; 
821  
822 
tmp10 = tmp0 + tmp3; 
823 
tmp13 = tmp0  tmp3; 
824 
tmp11 = tmp1 + tmp2; 
825 
tmp12 = tmp1  tmp2; 
826 
} else {

827 
/* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */

828 
tmp2 = MULTIPLY(d2, FIX_0_541196100); 
829 
tmp3 = MULTIPLY(d2, FIX_1_306562965); 
830  
831 
tmp0 = d4 << CONST_BITS; 
832  
833 
tmp10 = tmp0 + tmp3; 
834 
tmp13 = tmp0  tmp3; 
835 
tmp11 = tmp2  tmp0; 
836 
tmp12 = (tmp0 + tmp2); 
837 
} 
838 
} else {

839 
if (d0) {

840 
/* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */

841 
tmp10 = tmp13 = (d0 + d4) << CONST_BITS; 
842 
tmp11 = tmp12 = (d0  d4) << CONST_BITS; 
843 
} else {

844 
/* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */

845 
tmp10 = tmp13 = d4 << CONST_BITS; 
846 
tmp11 = tmp12 = tmp10; 
847 
} 
848 
} 
849 
} else {

850 
if (d2) {

851 
if (d0) {

852 
/* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */

853 
tmp2 = MULTIPLY(d2, FIX_0_541196100); 
854 
tmp3 = MULTIPLY(d2, FIX_1_306562965); 
855  
856 
tmp0 = d0 << CONST_BITS; 
857  
858 
tmp10 = tmp0 + tmp3; 
859 
tmp13 = tmp0  tmp3; 
860 
tmp11 = tmp0 + tmp2; 
861 
tmp12 = tmp0  tmp2; 
862 
} else {

863 
/* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */

864 
tmp2 = MULTIPLY(d2, FIX_0_541196100); 
865 
tmp3 = MULTIPLY(d2, FIX_1_306562965); 
866  
867 
tmp10 = tmp3; 
868 
tmp13 = tmp3; 
869 
tmp11 = tmp2; 
870 
tmp12 = tmp2; 
871 
} 
872 
} else {

873 
if (d0) {

874 
/* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */

875 
tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; 
876 
} else {

877 
/* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */

878 
tmp10 = tmp13 = tmp11 = tmp12 = 0;

879 
} 
880 
} 
881 
} 
882 
} 
883  
884 
/* Odd part per figure 8; the matrix is unitary and hence its

885 
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.

886 
*/

887 
if (d7) {

888 
if (d5) {

889 
if (d3) {

890 
if (d1) {

891 
/* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */

892 
z1 = d7 + d1; 
893 
z2 = d5 + d3; 
894 
z3 = d7 + d3; 
895 
z4 = d5 + d1; 
896 
z5 = MULTIPLY(z3 + z4, FIX_1_175875602); 
897 

898 
tmp0 = MULTIPLY(d7, FIX_0_298631336); 
899 
tmp1 = MULTIPLY(d5, FIX_2_053119869); 
900 
tmp2 = MULTIPLY(d3, FIX_3_072711026); 
901 
tmp3 = MULTIPLY(d1, FIX_1_501321110); 
902 
z1 = MULTIPLY(z1, FIX_0_899976223); 
903 
z2 = MULTIPLY(z2, FIX_2_562915447); 
904 
z3 = MULTIPLY(z3, FIX_1_961570560); 
905 
z4 = MULTIPLY(z4, FIX_0_390180644); 
906 

907 
z3 += z5; 
908 
z4 += z5; 
909 

910 
tmp0 += z1 + z3; 
911 
tmp1 += z2 + z4; 
912 
tmp2 += z2 + z3; 
913 
tmp3 += z1 + z4; 
914 
} else {

915 
/* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */

916 
z1 = d7; 
917 
z2 = d5 + d3; 
918 
z3 = d7 + d3; 
919 
z5 = MULTIPLY(z3 + d5, FIX_1_175875602); 
920 

921 
tmp0 = MULTIPLY(d7, FIX_0_298631336); 
922 
tmp1 = MULTIPLY(d5, FIX_2_053119869); 
923 
tmp2 = MULTIPLY(d3, FIX_3_072711026); 
924 
z1 = MULTIPLY(d7, FIX_0_899976223); 
925 
z2 = MULTIPLY(z2, FIX_2_562915447); 
926 
z3 = MULTIPLY(z3, FIX_1_961570560); 
927 
z4 = MULTIPLY(d5, FIX_0_390180644); 
928 

929 
z3 += z5; 
930 
z4 += z5; 
931 

932 
tmp0 += z1 + z3; 
933 
tmp1 += z2 + z4; 
934 
tmp2 += z2 + z3; 
935 
tmp3 = z1 + z4; 
936 
} 
937 
} else {

938 
if (d1) {

939 
/* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */

940 
z1 = d7 + d1; 
941 
z2 = d5; 
942 
z3 = d7; 
943 
z4 = d5 + d1; 
944 
z5 = MULTIPLY(z3 + z4, FIX_1_175875602); 
945 

946 
tmp0 = MULTIPLY(d7, FIX_0_298631336); 
947 
tmp1 = MULTIPLY(d5, FIX_2_053119869); 
948 
tmp3 = MULTIPLY(d1, FIX_1_501321110); 
949 
z1 = MULTIPLY(z1, FIX_0_899976223); 
950 
z2 = MULTIPLY(d5, FIX_2_562915447); 
951 
z3 = MULTIPLY(d7, FIX_1_961570560); 
952 
z4 = MULTIPLY(z4, FIX_0_390180644); 
953 

954 
z3 += z5; 
955 
z4 += z5; 
956 

957 
tmp0 += z1 + z3; 
958 
tmp1 += z2 + z4; 
959 
tmp2 = z2 + z3; 
960 
tmp3 += z1 + z4; 
961 
} else {

962 
/* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */

963 
tmp0 = MULTIPLY(d7, FIX_0_601344887); 
964 
z1 = MULTIPLY(d7, FIX_0_899976223); 
965 
z3 = MULTIPLY(d7, FIX_1_961570560); 
966 
tmp1 = MULTIPLY(d5, FIX_0_509795579); 
967 
z2 = MULTIPLY(d5, FIX_2_562915447); 
968 
z4 = MULTIPLY(d5, FIX_0_390180644); 
969 
z5 = MULTIPLY(d5 + d7, FIX_1_175875602); 
970 

971 
z3 += z5; 
972 
z4 += z5; 
973 

974 
tmp0 += z3; 
975 
tmp1 += z4; 
976 
tmp2 = z2 + z3; 
977 
tmp3 = z1 + z4; 
978 
} 
979 
} 
980 
} else {

981 
if (d3) {

982 
if (d1) {

983 
/* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */

984 
z1 = d7 + d1; 
985 
z3 = d7 + d3; 
986 
z5 = MULTIPLY(z3 + d1, FIX_1_175875602); 
987 

988 
tmp0 = MULTIPLY(d7, FIX_0_298631336); 
989 
tmp2 = MULTIPLY(d3, FIX_3_072711026); 
990 
tmp3 = MULTIPLY(d1, FIX_1_501321110); 
991 
z1 = MULTIPLY(z1, FIX_0_899976223); 
992 
z2 = MULTIPLY(d3, FIX_2_562915447); 
993 
z3 = MULTIPLY(z3, FIX_1_961570560); 
994 
z4 = MULTIPLY(d1, FIX_0_390180644); 
995 

996 
z3 += z5; 
997 
z4 += z5; 
998 

999 
tmp0 += z1 + z3; 
1000 
tmp1 = z2 + z4; 
1001 
tmp2 += z2 + z3; 
1002 
tmp3 += z1 + z4; 
1003 
} else {

1004 
/* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */

1005 
z3 = d7 + d3; 
1006 

1007 
tmp0 = MULTIPLY(d7, FIX_0_601344887); 
1008 
z1 = MULTIPLY(d7, FIX_0_899976223); 
1009 
tmp2 = MULTIPLY(d3, FIX_0_509795579); 
1010 
z2 = MULTIPLY(d3, FIX_2_562915447); 
1011 
z5 = MULTIPLY(z3, FIX_1_175875602); 
1012 
z3 = MULTIPLY(z3, FIX_0_785694958); 
1013 

1014 
tmp0 += z3; 
1015 
tmp1 = z2 + z5; 
1016 
tmp2 += z3; 
1017 
tmp3 = z1 + z5; 
1018 
} 
1019 
} else {

1020 
if (d1) {

1021 
/* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */

1022 
z1 = d7 + d1; 
1023 
z5 = MULTIPLY(z1, FIX_1_175875602); 
1024  
1025 
z1 = MULTIPLY(z1, FIX_0_275899380); 
1026 
z3 = MULTIPLY(d7, FIX_1_961570560); 
1027 
tmp0 = MULTIPLY(d7, FIX_1_662939225); 
1028 
z4 = MULTIPLY(d1, FIX_0_390180644); 
1029 
tmp3 = MULTIPLY(d1, FIX_1_111140466); 
1030  
1031 
tmp0 += z1; 
1032 
tmp1 = z4 + z5; 
1033 
tmp2 = z3 + z5; 
1034 
tmp3 += z1; 
1035 
} else {

1036 
/* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */

1037 
tmp0 = MULTIPLY(d7, FIX_1_387039845); 
1038 
tmp1 = MULTIPLY(d7, FIX_1_175875602); 
1039 
tmp2 = MULTIPLY(d7, FIX_0_785694958); 
1040 
tmp3 = MULTIPLY(d7, FIX_0_275899380); 
1041 
} 
1042 
} 
1043 
} 
1044 
} else {

1045 
if (d5) {

1046 
if (d3) {

1047 
if (d1) {

1048 
/* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */

1049 
z2 = d5 + d3; 
1050 
z4 = d5 + d1; 
1051 
z5 = MULTIPLY(d3 + z4, FIX_1_175875602); 
1052 

1053 
tmp1 = MULTIPLY(d5, FIX_2_053119869); 
1054 
tmp2 = MULTIPLY(d3, FIX_3_072711026); 
1055 
tmp3 = MULTIPLY(d1, FIX_1_501321110); 
1056 
z1 = MULTIPLY(d1, FIX_0_899976223); 
1057 
z2 = MULTIPLY(z2, FIX_2_562915447); 
1058 
z3 = MULTIPLY(d3, FIX_1_961570560); 
1059 
z4 = MULTIPLY(z4, FIX_0_390180644); 
1060 

1061 
z3 += z5; 
1062 
z4 += z5; 
1063 

1064 
tmp0 = z1 + z3; 
1065 
tmp1 += z2 + z4; 
1066 
tmp2 += z2 + z3; 
1067 
tmp3 += z1 + z4; 
1068 
} else {

1069 
/* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */

1070 
z2 = d5 + d3; 
1071 

1072 
z5 = MULTIPLY(z2, FIX_1_175875602); 
1073 
tmp1 = MULTIPLY(d5, FIX_1_662939225); 
1074 
z4 = MULTIPLY(d5, FIX_0_390180644); 
1075 
z2 = MULTIPLY(z2, FIX_1_387039845); 
1076 
tmp2 = MULTIPLY(d3, FIX_1_111140466); 
1077 
z3 = MULTIPLY(d3, FIX_1_961570560); 
1078 

1079 
tmp0 = z3 + z5; 
1080 
tmp1 += z2; 
1081 
tmp2 += z2; 
1082 
tmp3 = z4 + z5; 
1083 
} 
1084 
} else {

1085 
if (d1) {

1086 
/* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */

1087 
z4 = d5 + d1; 
1088 

1089 
z5 = MULTIPLY(z4, FIX_1_175875602); 
1090 
z1 = MULTIPLY(d1, FIX_0_899976223); 
1091 
tmp3 = MULTIPLY(d1, FIX_0_601344887); 
1092 
tmp1 = MULTIPLY(d5, FIX_0_509795579); 
1093 
z2 = MULTIPLY(d5, FIX_2_562915447); 
1094 
z4 = MULTIPLY(z4, FIX_0_785694958); 
1095 

1096 
tmp0 = z1 + z5; 
1097 
tmp1 += z4; 
1098 
tmp2 = z2 + z5; 
1099 
tmp3 += z4; 
1100 
} else {

1101 
/* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */

1102 
tmp0 = MULTIPLY(d5, FIX_1_175875602); 
1103 
tmp1 = MULTIPLY(d5, FIX_0_275899380); 
1104 
tmp2 = MULTIPLY(d5, FIX_1_387039845); 
1105 
tmp3 = MULTIPLY(d5, FIX_0_785694958); 
1106 
} 
1107 
} 
1108 
} else {

1109 
if (d3) {

1110 
if (d1) {

1111 
/* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */

1112 
z5 = d1 + d3; 
1113 
tmp3 = MULTIPLY(d1, FIX_0_211164243); 
1114 
tmp2 = MULTIPLY(d3, FIX_1_451774981); 
1115 
z1 = MULTIPLY(d1, FIX_1_061594337); 
1116 
z2 = MULTIPLY(d3, FIX_2_172734803); 
1117 
z4 = MULTIPLY(z5, FIX_0_785694958); 
1118 
z5 = MULTIPLY(z5, FIX_1_175875602); 
1119 

1120 
tmp0 = z1  z4; 
1121 
tmp1 = z2 + z4; 
1122 
tmp2 += z5; 
1123 
tmp3 += z5; 
1124 
} else {

1125 
/* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */

1126 
tmp0 = MULTIPLY(d3, FIX_0_785694958); 
1127 
tmp1 = MULTIPLY(d3, FIX_1_387039845); 
1128 
tmp2 = MULTIPLY(d3, FIX_0_275899380); 
1129 
tmp3 = MULTIPLY(d3, FIX_1_175875602); 
1130 
} 
1131 
} else {

1132 
if (d1) {

1133 
/* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */

1134 
tmp0 = MULTIPLY(d1, FIX_0_275899380); 
1135 
tmp1 = MULTIPLY(d1, FIX_0_785694958); 
1136 
tmp2 = MULTIPLY(d1, FIX_1_175875602); 
1137 
tmp3 = MULTIPLY(d1, FIX_1_387039845); 
1138 
} else {

1139 
/* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */

1140 
tmp0 = tmp1 = tmp2 = tmp3 = 0;

1141 
} 
1142 
} 
1143 
} 
1144 
} 
1145  
1146 
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */

1147  
1148 
dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3,

1149 
CONST_BITS+PASS1_BITS+3);

1150 
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10  tmp3,

1151 
CONST_BITS+PASS1_BITS+3);

1152 
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2,

1153 
CONST_BITS+PASS1_BITS+3);

1154 
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11  tmp2,

1155 
CONST_BITS+PASS1_BITS+3);

1156 
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1,

1157 
CONST_BITS+PASS1_BITS+3);

1158 
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12  tmp1,

1159 
CONST_BITS+PASS1_BITS+3);

1160 
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0,

1161 
CONST_BITS+PASS1_BITS+3);

1162 
dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13  tmp0,

1163 
CONST_BITS+PASS1_BITS+3);

1164 

1165 
dataptr++; /* advance pointer to next column */

1166 
} 
1167 
} 
1168  
1169 
#undef FIX

1170 
#undef CONST_BITS
