ffmpeg / libavcodec / jrevdct.c @ 983e3246
History  View  Annotate  Download (33.2 KB)
1 
/*


2 
* jrevdct.c

3 
*

4 
* Copyright (C) 1991, 1992, Thomas G. Lane.

5 
* This file is part of the Independent JPEG Group's software.

6 
* For conditions of distribution and use, see the accompanying README file.

7 
*

8 
* This file contains the basic inverseDCT transformation subroutine.

9 
*

10 
* This implementation is based on an algorithm described in

11 
* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1D DCT

12 
* Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,

13 
* Speech, and Signal Processing 1989 (ICASSP '89), pp. 988991.

14 
* The primary algorithm described there uses 11 multiplies and 29 adds.

15 
* We use their alternate method with 12 multiplies and 32 adds.

16 
* The advantage of this method is that no data path contains more than one

17 
* multiplication; this allows a very simple and accurate implementation in

18 
* scaled fixedpoint arithmetic, with a minimal number of shifts.

19 
*

20 
* I've made lots of modifications to attempt to take advantage of the

21 
* sparse nature of the DCT matrices we're getting. Although the logic

22 
* is cumbersome, it's straightforward and the resulting code is much

23 
* faster.

24 
*

25 
* A better way to do this would be to pass in the DCT block as a sparse

26 
* matrix, perhaps with the difference cases encoded.

27 
*/

28 

29 
/**

30 
* @file jrevdct.c

31 
* Independent JPEG Group's LLM idct.

32 
*/

33 

34 
#include "common.h" 
35 
#include "dsputil.h" 
36  
37 
#define EIGHT_BIT_SAMPLES

38  
39 
#define DCTSIZE 8 
40 
#define DCTSIZE2 64 
41  
42 
#define GLOBAL

43  
44 
#define RIGHT_SHIFT(x, n) ((x) >> (n))

45  
46 
typedef DCTELEM DCTBLOCK[DCTSIZE2];

47  
48 
#define CONST_BITS 13 
49  
50 
/*

51 
* This routine is specialized to the case DCTSIZE = 8.

52 
*/

53  
54 
#if DCTSIZE != 8 
55 
Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */ 
56 
#endif

57  
58  
59 
/*

60 
* A 2D IDCT can be done by 1D IDCT on each row followed by 1D IDCT

61 
* on each column. Direct algorithms are also available, but they are

62 
* much more complex and seem not to be any faster when reduced to code.

63 
*

64 
* The poop on this scaling stuff is as follows:

65 
*

66 
* Each 1D IDCT step produces outputs which are a factor of sqrt(N)

67 
* larger than the true IDCT outputs. The final outputs are therefore

68 
* a factor of N larger than desired; since N=8 this can be cured by

69 
* a simple right shift at the end of the algorithm. The advantage of

70 
* this arrangement is that we save two multiplications per 1D IDCT,

71 
* because the y0 and y4 inputs need not be divided by sqrt(N).

72 
*

73 
* We have to do addition and subtraction of the integer inputs, which

74 
* is no problem, and multiplication by fractional constants, which is

75 
* a problem to do in integer arithmetic. We multiply all the constants

76 
* by CONST_SCALE and convert them to integer constants (thus retaining

77 
* CONST_BITS bits of precision in the constants). After doing a

78 
* multiplication we have to divide the product by CONST_SCALE, with proper

79 
* rounding, to produce the correct output. This division can be done

80 
* cheaply as a right shift of CONST_BITS bits. We postpone shifting

81 
* as long as possible so that partial sums can be added together with

82 
* full fractional precision.

83 
*

84 
* The outputs of the first pass are scaled up by PASS1_BITS bits so that

85 
* they are represented to betterthanintegral precision. These outputs

86 
* require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16bit word

87 
* with the recommended scaling. (To scale up 12bit sample data further, an

88 
* intermediate int32 array would be needed.)

89 
*

90 
* To avoid overflow of the 32bit intermediate results in pass 2, we must

91 
* have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis

92 
* shows that the values given below are the most effective.

93 
*/

94  
95 
#ifdef EIGHT_BIT_SAMPLES

96 
#define PASS1_BITS 2 
97 
#else

98 
#define PASS1_BITS 1 /* lose a little precision to avoid overflow */ 
99 
#endif

100  
101 
#define ONE ((int32_t) 1) 
102  
103 
#define CONST_SCALE (ONE << CONST_BITS)

104  
105 
/* Convert a positive real constant to an integer scaled by CONST_SCALE.

106 
* IMPORTANT: if your compiler doesn't do this arithmetic at compile time,

107 
* you will pay a significant penalty in run time. In that case, figure

108 
* the correct integer constant values and insert them by hand.

109 
*/

110  
111 
/* Actually FIX is no longer used, we precomputed them all */

112 
#define FIX(x) ((int32_t) ((x) * CONST_SCALE + 0.5)) 
113  
114 
/* Descale and correctly round an int32_t value that's scaled by N bits.

115 
* We assume RIGHT_SHIFT rounds towards minus infinity, so adding

116 
* the fudge factor is correct for either sign of X.

117 
*/

118  
119 
#define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)1)), n) 
120  
121 
/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result.

122 
* For 8bit samples with the recommended scaling, all the variable

123 
* and constant values involved are no more than 16 bits wide, so a

124 
* 16x16>32 bit multiply can be used instead of a full 32x32 multiply;

125 
* this provides a useful speedup on many machines.

126 
* There is no way to specify a 16x16>32 multiply in portable C, but

127 
* some C compilers will do the right thing if you provide the correct

128 
* combination of casts.

129 
* NB: for 12bit samples, a full 32bit multiplication will be needed.

130 
*/

131  
132 
#ifdef EIGHT_BIT_SAMPLES

133 
#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */ 
134 
#define MULTIPLY(var,const) (((int16_t) (var)) * ((int16_t) (const))) 
135 
#endif

136 
#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */ 
137 
#define MULTIPLY(var,const) (((int16_t) (var)) * ((int32_t) (const))) 
138 
#endif

139 
#endif

140  
141 
#ifndef MULTIPLY /* default definition */ 
142 
#define MULTIPLY(var,const) ((var) * (const)) 
143 
#endif

144  
145  
146 
/*

147 
Unlike our decoder where we approximate the FIXes, we need to use exact

148 
ones here or successive Pframes will drift too much with Reference frame coding

149 
*/

150 
#define FIX_0_211164243 1730 
151 
#define FIX_0_275899380 2260 
152 
#define FIX_0_298631336 2446 
153 
#define FIX_0_390180644 3196 
154 
#define FIX_0_509795579 4176 
155 
#define FIX_0_541196100 4433 
156 
#define FIX_0_601344887 4926 
157 
#define FIX_0_765366865 6270 
158 
#define FIX_0_785694958 6436 
159 
#define FIX_0_899976223 7373 
160 
#define FIX_1_061594337 8697 
161 
#define FIX_1_111140466 9102 
162 
#define FIX_1_175875602 9633 
163 
#define FIX_1_306562965 10703 
164 
#define FIX_1_387039845 11363 
165 
#define FIX_1_451774981 11893 
166 
#define FIX_1_501321110 12299 
167 
#define FIX_1_662939225 13623 
168 
#define FIX_1_847759065 15137 
169 
#define FIX_1_961570560 16069 
170 
#define FIX_2_053119869 16819 
171 
#define FIX_2_172734803 17799 
172 
#define FIX_2_562915447 20995 
173 
#define FIX_3_072711026 25172 
174  
175 
/*

176 
* Perform the inverse DCT on one block of coefficients.

177 
*/

178  
179 
void j_rev_dct(DCTBLOCK data)

180 
{ 
181 
int32_t tmp0, tmp1, tmp2, tmp3; 
182 
int32_t tmp10, tmp11, tmp12, tmp13; 
183 
int32_t z1, z2, z3, z4, z5; 
184 
int32_t d0, d1, d2, d3, d4, d5, d6, d7; 
185 
register DCTELEM *dataptr;

186 
int rowctr;

187 

188 
/* Pass 1: process rows. */

189 
/* Note results are scaled up by sqrt(8) compared to a true IDCT; */

190 
/* furthermore, we scale the results by 2**PASS1_BITS. */

191  
192 
dataptr = data; 
193  
194 
for (rowctr = DCTSIZE1; rowctr >= 0; rowctr) { 
195 
/* Due to quantization, we will usually find that many of the input

196 
* coefficients are zero, especially the AC terms. We can exploit this

197 
* by shortcircuiting the IDCT calculation for any row in which all

198 
* the AC terms are zero. In that case each output is equal to the

199 
* DC coefficient (with scale factor as needed).

200 
* With typical images and quantization tables, half or more of the

201 
* row DCT calculations can be simplified this way.

202 
*/

203  
204 
register int *idataptr = (int*)dataptr; 
205  
206 
/* WARNING: we do the same permutation as MMX idct to simplify the

207 
video core */

208 
d0 = dataptr[0];

209 
d2 = dataptr[1];

210 
d4 = dataptr[2];

211 
d6 = dataptr[3];

212 
d1 = dataptr[4];

213 
d3 = dataptr[5];

214 
d5 = dataptr[6];

215 
d7 = dataptr[7];

216  
217 
if ((d1  d2  d3  d4  d5  d6  d7) == 0) { 
218 
/* AC terms all zero */

219 
if (d0) {

220 
/* Compute a 32 bit value to assign. */

221 
DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS); 
222 
register int v = (dcval & 0xffff)  ((dcval << 16) & 0xffff0000); 
223 

224 
idataptr[0] = v;

225 
idataptr[1] = v;

226 
idataptr[2] = v;

227 
idataptr[3] = v;

228 
} 
229 

230 
dataptr += DCTSIZE; /* advance pointer to next row */

231 
continue;

232 
} 
233  
234 
/* Even part: reverse the even part of the forward DCT. */

235 
/* The rotator is sqrt(2)*c(6). */

236 
{ 
237 
if (d6) {

238 
if (d4) {

239 
if (d2) {

240 
if (d0) {

241 
/* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */

242 
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 
243 
tmp2 = z1 + MULTIPLY(d6, FIX_1_847759065); 
244 
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 
245  
246 
tmp0 = (d0 + d4) << CONST_BITS; 
247 
tmp1 = (d0  d4) << CONST_BITS; 
248  
249 
tmp10 = tmp0 + tmp3; 
250 
tmp13 = tmp0  tmp3; 
251 
tmp11 = tmp1 + tmp2; 
252 
tmp12 = tmp1  tmp2; 
253 
} else {

254 
/* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */

255 
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 
256 
tmp2 = z1 + MULTIPLY(d6, FIX_1_847759065); 
257 
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 
258  
259 
tmp0 = d4 << CONST_BITS; 
260  
261 
tmp10 = tmp0 + tmp3; 
262 
tmp13 = tmp0  tmp3; 
263 
tmp11 = tmp2  tmp0; 
264 
tmp12 = (tmp0 + tmp2); 
265 
} 
266 
} else {

267 
if (d0) {

268 
/* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */

269 
tmp2 = MULTIPLY(d6, FIX_1_306562965); 
270 
tmp3 = MULTIPLY(d6, FIX_0_541196100); 
271  
272 
tmp0 = (d0 + d4) << CONST_BITS; 
273 
tmp1 = (d0  d4) << CONST_BITS; 
274  
275 
tmp10 = tmp0 + tmp3; 
276 
tmp13 = tmp0  tmp3; 
277 
tmp11 = tmp1 + tmp2; 
278 
tmp12 = tmp1  tmp2; 
279 
} else {

280 
/* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */

281 
tmp2 = MULTIPLY(d6, FIX_1_306562965); 
282 
tmp3 = MULTIPLY(d6, FIX_0_541196100); 
283  
284 
tmp0 = d4 << CONST_BITS; 
285  
286 
tmp10 = tmp0 + tmp3; 
287 
tmp13 = tmp0  tmp3; 
288 
tmp11 = tmp2  tmp0; 
289 
tmp12 = (tmp0 + tmp2); 
290 
} 
291 
} 
292 
} else {

293 
if (d2) {

294 
if (d0) {

295 
/* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */

296 
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 
297 
tmp2 = z1 + MULTIPLY(d6, FIX_1_847759065); 
298 
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 
299  
300 
tmp0 = d0 << CONST_BITS; 
301  
302 
tmp10 = tmp0 + tmp3; 
303 
tmp13 = tmp0  tmp3; 
304 
tmp11 = tmp0 + tmp2; 
305 
tmp12 = tmp0  tmp2; 
306 
} else {

307 
/* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */

308 
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 
309 
tmp2 = z1 + MULTIPLY(d6, FIX_1_847759065); 
310 
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 
311  
312 
tmp10 = tmp3; 
313 
tmp13 = tmp3; 
314 
tmp11 = tmp2; 
315 
tmp12 = tmp2; 
316 
} 
317 
} else {

318 
if (d0) {

319 
/* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */

320 
tmp2 = MULTIPLY(d6, FIX_1_306562965); 
321 
tmp3 = MULTIPLY(d6, FIX_0_541196100); 
322  
323 
tmp0 = d0 << CONST_BITS; 
324  
325 
tmp10 = tmp0 + tmp3; 
326 
tmp13 = tmp0  tmp3; 
327 
tmp11 = tmp0 + tmp2; 
328 
tmp12 = tmp0  tmp2; 
329 
} else {

330 
/* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */

331 
tmp2 = MULTIPLY(d6, FIX_1_306562965); 
332 
tmp3 = MULTIPLY(d6, FIX_0_541196100); 
333  
334 
tmp10 = tmp3; 
335 
tmp13 = tmp3; 
336 
tmp11 = tmp2; 
337 
tmp12 = tmp2; 
338 
} 
339 
} 
340 
} 
341 
} else {

342 
if (d4) {

343 
if (d2) {

344 
if (d0) {

345 
/* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */

346 
tmp2 = MULTIPLY(d2, FIX_0_541196100); 
347 
tmp3 = MULTIPLY(d2, FIX_1_306562965); 
348  
349 
tmp0 = (d0 + d4) << CONST_BITS; 
350 
tmp1 = (d0  d4) << CONST_BITS; 
351  
352 
tmp10 = tmp0 + tmp3; 
353 
tmp13 = tmp0  tmp3; 
354 
tmp11 = tmp1 + tmp2; 
355 
tmp12 = tmp1  tmp2; 
356 
} else {

357 
/* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */

358 
tmp2 = MULTIPLY(d2, FIX_0_541196100); 
359 
tmp3 = MULTIPLY(d2, FIX_1_306562965); 
360  
361 
tmp0 = d4 << CONST_BITS; 
362  
363 
tmp10 = tmp0 + tmp3; 
364 
tmp13 = tmp0  tmp3; 
365 
tmp11 = tmp2  tmp0; 
366 
tmp12 = (tmp0 + tmp2); 
367 
} 
368 
} else {

369 
if (d0) {

370 
/* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */

371 
tmp10 = tmp13 = (d0 + d4) << CONST_BITS; 
372 
tmp11 = tmp12 = (d0  d4) << CONST_BITS; 
373 
} else {

374 
/* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */

375 
tmp10 = tmp13 = d4 << CONST_BITS; 
376 
tmp11 = tmp12 = tmp10; 
377 
} 
378 
} 
379 
} else {

380 
if (d2) {

381 
if (d0) {

382 
/* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */

383 
tmp2 = MULTIPLY(d2, FIX_0_541196100); 
384 
tmp3 = MULTIPLY(d2, FIX_1_306562965); 
385  
386 
tmp0 = d0 << CONST_BITS; 
387  
388 
tmp10 = tmp0 + tmp3; 
389 
tmp13 = tmp0  tmp3; 
390 
tmp11 = tmp0 + tmp2; 
391 
tmp12 = tmp0  tmp2; 
392 
} else {

393 
/* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */

394 
tmp2 = MULTIPLY(d2, FIX_0_541196100); 
395 
tmp3 = MULTIPLY(d2, FIX_1_306562965); 
396  
397 
tmp10 = tmp3; 
398 
tmp13 = tmp3; 
399 
tmp11 = tmp2; 
400 
tmp12 = tmp2; 
401 
} 
402 
} else {

403 
if (d0) {

404 
/* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */

405 
tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; 
406 
} else {

407 
/* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */

408 
tmp10 = tmp13 = tmp11 = tmp12 = 0;

409 
} 
410 
} 
411 
} 
412 
} 
413  
414 
/* Odd part per figure 8; the matrix is unitary and hence its

415 
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.

416 
*/

417  
418 
if (d7) {

419 
if (d5) {

420 
if (d3) {

421 
if (d1) {

422 
/* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */

423 
z1 = d7 + d1; 
424 
z2 = d5 + d3; 
425 
z3 = d7 + d3; 
426 
z4 = d5 + d1; 
427 
z5 = MULTIPLY(z3 + z4, FIX_1_175875602); 
428 

429 
tmp0 = MULTIPLY(d7, FIX_0_298631336); 
430 
tmp1 = MULTIPLY(d5, FIX_2_053119869); 
431 
tmp2 = MULTIPLY(d3, FIX_3_072711026); 
432 
tmp3 = MULTIPLY(d1, FIX_1_501321110); 
433 
z1 = MULTIPLY(z1, FIX_0_899976223); 
434 
z2 = MULTIPLY(z2, FIX_2_562915447); 
435 
z3 = MULTIPLY(z3, FIX_1_961570560); 
436 
z4 = MULTIPLY(z4, FIX_0_390180644); 
437 

438 
z3 += z5; 
439 
z4 += z5; 
440 

441 
tmp0 += z1 + z3; 
442 
tmp1 += z2 + z4; 
443 
tmp2 += z2 + z3; 
444 
tmp3 += z1 + z4; 
445 
} else {

446 
/* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */

447 
z2 = d5 + d3; 
448 
z3 = d7 + d3; 
449 
z5 = MULTIPLY(z3 + d5, FIX_1_175875602); 
450 

451 
tmp0 = MULTIPLY(d7, FIX_0_298631336); 
452 
tmp1 = MULTIPLY(d5, FIX_2_053119869); 
453 
tmp2 = MULTIPLY(d3, FIX_3_072711026); 
454 
z1 = MULTIPLY(d7, FIX_0_899976223); 
455 
z2 = MULTIPLY(z2, FIX_2_562915447); 
456 
z3 = MULTIPLY(z3, FIX_1_961570560); 
457 
z4 = MULTIPLY(d5, FIX_0_390180644); 
458 

459 
z3 += z5; 
460 
z4 += z5; 
461 

462 
tmp0 += z1 + z3; 
463 
tmp1 += z2 + z4; 
464 
tmp2 += z2 + z3; 
465 
tmp3 = z1 + z4; 
466 
} 
467 
} else {

468 
if (d1) {

469 
/* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */

470 
z1 = d7 + d1; 
471 
z4 = d5 + d1; 
472 
z5 = MULTIPLY(d7 + z4, FIX_1_175875602); 
473 

474 
tmp0 = MULTIPLY(d7, FIX_0_298631336); 
475 
tmp1 = MULTIPLY(d5, FIX_2_053119869); 
476 
tmp3 = MULTIPLY(d1, FIX_1_501321110); 
477 
z1 = MULTIPLY(z1, FIX_0_899976223); 
478 
z2 = MULTIPLY(d5, FIX_2_562915447); 
479 
z3 = MULTIPLY(d7, FIX_1_961570560); 
480 
z4 = MULTIPLY(z4, FIX_0_390180644); 
481 

482 
z3 += z5; 
483 
z4 += z5; 
484 

485 
tmp0 += z1 + z3; 
486 
tmp1 += z2 + z4; 
487 
tmp2 = z2 + z3; 
488 
tmp3 += z1 + z4; 
489 
} else {

490 
/* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */

491 
tmp0 = MULTIPLY(d7, FIX_0_601344887); 
492 
z1 = MULTIPLY(d7, FIX_0_899976223); 
493 
z3 = MULTIPLY(d7, FIX_1_961570560); 
494 
tmp1 = MULTIPLY(d5, FIX_0_509795579); 
495 
z2 = MULTIPLY(d5, FIX_2_562915447); 
496 
z4 = MULTIPLY(d5, FIX_0_390180644); 
497 
z5 = MULTIPLY(d5 + d7, FIX_1_175875602); 
498 

499 
z3 += z5; 
500 
z4 += z5; 
501 

502 
tmp0 += z3; 
503 
tmp1 += z4; 
504 
tmp2 = z2 + z3; 
505 
tmp3 = z1 + z4; 
506 
} 
507 
} 
508 
} else {

509 
if (d3) {

510 
if (d1) {

511 
/* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */

512 
z1 = d7 + d1; 
513 
z3 = d7 + d3; 
514 
z5 = MULTIPLY(z3 + d1, FIX_1_175875602); 
515 

516 
tmp0 = MULTIPLY(d7, FIX_0_298631336); 
517 
tmp2 = MULTIPLY(d3, FIX_3_072711026); 
518 
tmp3 = MULTIPLY(d1, FIX_1_501321110); 
519 
z1 = MULTIPLY(z1, FIX_0_899976223); 
520 
z2 = MULTIPLY(d3, FIX_2_562915447); 
521 
z3 = MULTIPLY(z3, FIX_1_961570560); 
522 
z4 = MULTIPLY(d1, FIX_0_390180644); 
523 

524 
z3 += z5; 
525 
z4 += z5; 
526 

527 
tmp0 += z1 + z3; 
528 
tmp1 = z2 + z4; 
529 
tmp2 += z2 + z3; 
530 
tmp3 += z1 + z4; 
531 
} else {

532 
/* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */

533 
z3 = d7 + d3; 
534 

535 
tmp0 = MULTIPLY(d7, FIX_0_601344887); 
536 
z1 = MULTIPLY(d7, FIX_0_899976223); 
537 
tmp2 = MULTIPLY(d3, FIX_0_509795579); 
538 
z2 = MULTIPLY(d3, FIX_2_562915447); 
539 
z5 = MULTIPLY(z3, FIX_1_175875602); 
540 
z3 = MULTIPLY(z3, FIX_0_785694958); 
541 

542 
tmp0 += z3; 
543 
tmp1 = z2 + z5; 
544 
tmp2 += z3; 
545 
tmp3 = z1 + z5; 
546 
} 
547 
} else {

548 
if (d1) {

549 
/* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */

550 
z1 = d7 + d1; 
551 
z5 = MULTIPLY(z1, FIX_1_175875602); 
552  
553 
z1 = MULTIPLY(z1, FIX_0_275899380); 
554 
z3 = MULTIPLY(d7, FIX_1_961570560); 
555 
tmp0 = MULTIPLY(d7, FIX_1_662939225); 
556 
z4 = MULTIPLY(d1, FIX_0_390180644); 
557 
tmp3 = MULTIPLY(d1, FIX_1_111140466); 
558  
559 
tmp0 += z1; 
560 
tmp1 = z4 + z5; 
561 
tmp2 = z3 + z5; 
562 
tmp3 += z1; 
563 
} else {

564 
/* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */

565 
tmp0 = MULTIPLY(d7, FIX_1_387039845); 
566 
tmp1 = MULTIPLY(d7, FIX_1_175875602); 
567 
tmp2 = MULTIPLY(d7, FIX_0_785694958); 
568 
tmp3 = MULTIPLY(d7, FIX_0_275899380); 
569 
} 
570 
} 
571 
} 
572 
} else {

573 
if (d5) {

574 
if (d3) {

575 
if (d1) {

576 
/* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */

577 
z2 = d5 + d3; 
578 
z4 = d5 + d1; 
579 
z5 = MULTIPLY(d3 + z4, FIX_1_175875602); 
580 

581 
tmp1 = MULTIPLY(d5, FIX_2_053119869); 
582 
tmp2 = MULTIPLY(d3, FIX_3_072711026); 
583 
tmp3 = MULTIPLY(d1, FIX_1_501321110); 
584 
z1 = MULTIPLY(d1, FIX_0_899976223); 
585 
z2 = MULTIPLY(z2, FIX_2_562915447); 
586 
z3 = MULTIPLY(d3, FIX_1_961570560); 
587 
z4 = MULTIPLY(z4, FIX_0_390180644); 
588 

589 
z3 += z5; 
590 
z4 += z5; 
591 

592 
tmp0 = z1 + z3; 
593 
tmp1 += z2 + z4; 
594 
tmp2 += z2 + z3; 
595 
tmp3 += z1 + z4; 
596 
} else {

597 
/* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */

598 
z2 = d5 + d3; 
599 

600 
z5 = MULTIPLY(z2, FIX_1_175875602); 
601 
tmp1 = MULTIPLY(d5, FIX_1_662939225); 
602 
z4 = MULTIPLY(d5, FIX_0_390180644); 
603 
z2 = MULTIPLY(z2, FIX_1_387039845); 
604 
tmp2 = MULTIPLY(d3, FIX_1_111140466); 
605 
z3 = MULTIPLY(d3, FIX_1_961570560); 
606 

607 
tmp0 = z3 + z5; 
608 
tmp1 += z2; 
609 
tmp2 += z2; 
610 
tmp3 = z4 + z5; 
611 
} 
612 
} else {

613 
if (d1) {

614 
/* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */

615 
z4 = d5 + d1; 
616 

617 
z5 = MULTIPLY(z4, FIX_1_175875602); 
618 
z1 = MULTIPLY(d1, FIX_0_899976223); 
619 
tmp3 = MULTIPLY(d1, FIX_0_601344887); 
620 
tmp1 = MULTIPLY(d5, FIX_0_509795579); 
621 
z2 = MULTIPLY(d5, FIX_2_562915447); 
622 
z4 = MULTIPLY(z4, FIX_0_785694958); 
623 

624 
tmp0 = z1 + z5; 
625 
tmp1 += z4; 
626 
tmp2 = z2 + z5; 
627 
tmp3 += z4; 
628 
} else {

629 
/* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */

630 
tmp0 = MULTIPLY(d5, FIX_1_175875602); 
631 
tmp1 = MULTIPLY(d5, FIX_0_275899380); 
632 
tmp2 = MULTIPLY(d5, FIX_1_387039845); 
633 
tmp3 = MULTIPLY(d5, FIX_0_785694958); 
634 
} 
635 
} 
636 
} else {

637 
if (d3) {

638 
if (d1) {

639 
/* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */

640 
z5 = d1 + d3; 
641 
tmp3 = MULTIPLY(d1, FIX_0_211164243); 
642 
tmp2 = MULTIPLY(d3, FIX_1_451774981); 
643 
z1 = MULTIPLY(d1, FIX_1_061594337); 
644 
z2 = MULTIPLY(d3, FIX_2_172734803); 
645 
z4 = MULTIPLY(z5, FIX_0_785694958); 
646 
z5 = MULTIPLY(z5, FIX_1_175875602); 
647 

648 
tmp0 = z1  z4; 
649 
tmp1 = z2 + z4; 
650 
tmp2 += z5; 
651 
tmp3 += z5; 
652 
} else {

653 
/* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */

654 
tmp0 = MULTIPLY(d3, FIX_0_785694958); 
655 
tmp1 = MULTIPLY(d3, FIX_1_387039845); 
656 
tmp2 = MULTIPLY(d3, FIX_0_275899380); 
657 
tmp3 = MULTIPLY(d3, FIX_1_175875602); 
658 
} 
659 
} else {

660 
if (d1) {

661 
/* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */

662 
tmp0 = MULTIPLY(d1, FIX_0_275899380); 
663 
tmp1 = MULTIPLY(d1, FIX_0_785694958); 
664 
tmp2 = MULTIPLY(d1, FIX_1_175875602); 
665 
tmp3 = MULTIPLY(d1, FIX_1_387039845); 
666 
} else {

667 
/* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */

668 
tmp0 = tmp1 = tmp2 = tmp3 = 0;

669 
} 
670 
} 
671 
} 
672 
} 
673 
} 
674 
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */

675  
676 
dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITSPASS1_BITS);

677 
dataptr[7] = (DCTELEM) DESCALE(tmp10  tmp3, CONST_BITSPASS1_BITS);

678 
dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITSPASS1_BITS);

679 
dataptr[6] = (DCTELEM) DESCALE(tmp11  tmp2, CONST_BITSPASS1_BITS);

680 
dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITSPASS1_BITS);

681 
dataptr[5] = (DCTELEM) DESCALE(tmp12  tmp1, CONST_BITSPASS1_BITS);

682 
dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITSPASS1_BITS);

683 
dataptr[4] = (DCTELEM) DESCALE(tmp13  tmp0, CONST_BITSPASS1_BITS);

684  
685 
dataptr += DCTSIZE; /* advance pointer to next row */

686 
} 
687  
688 
/* Pass 2: process columns. */

689 
/* Note that we must descale the results by a factor of 8 == 2**3, */

690 
/* and also undo the PASS1_BITS scaling. */

691  
692 
dataptr = data; 
693 
for (rowctr = DCTSIZE1; rowctr >= 0; rowctr) { 
694 
/* Columns of zeroes can be exploited in the same way as we did with rows.

695 
* However, the row calculation has created many nonzero AC terms, so the

696 
* simplification applies less often (typically 5% to 10% of the time).

697 
* On machines with very fast multiplication, it's possible that the

698 
* test takes more time than it's worth. In that case this section

699 
* may be commented out.

700 
*/

701  
702 
d0 = dataptr[DCTSIZE*0];

703 
d1 = dataptr[DCTSIZE*1];

704 
d2 = dataptr[DCTSIZE*2];

705 
d3 = dataptr[DCTSIZE*3];

706 
d4 = dataptr[DCTSIZE*4];

707 
d5 = dataptr[DCTSIZE*5];

708 
d6 = dataptr[DCTSIZE*6];

709 
d7 = dataptr[DCTSIZE*7];

710  
711 
/* Even part: reverse the even part of the forward DCT. */

712 
/* The rotator is sqrt(2)*c(6). */

713 
if (d6) {

714 
if (d4) {

715 
if (d2) {

716 
if (d0) {

717 
/* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */

718 
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 
719 
tmp2 = z1 + MULTIPLY(d6, FIX_1_847759065); 
720 
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 
721  
722 
tmp0 = (d0 + d4) << CONST_BITS; 
723 
tmp1 = (d0  d4) << CONST_BITS; 
724  
725 
tmp10 = tmp0 + tmp3; 
726 
tmp13 = tmp0  tmp3; 
727 
tmp11 = tmp1 + tmp2; 
728 
tmp12 = tmp1  tmp2; 
729 
} else {

730 
/* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */

731 
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 
732 
tmp2 = z1 + MULTIPLY(d6, FIX_1_847759065); 
733 
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 
734  
735 
tmp0 = d4 << CONST_BITS; 
736  
737 
tmp10 = tmp0 + tmp3; 
738 
tmp13 = tmp0  tmp3; 
739 
tmp11 = tmp2  tmp0; 
740 
tmp12 = (tmp0 + tmp2); 
741 
} 
742 
} else {

743 
if (d0) {

744 
/* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */

745 
tmp2 = MULTIPLY(d6, FIX_1_306562965); 
746 
tmp3 = MULTIPLY(d6, FIX_0_541196100); 
747  
748 
tmp0 = (d0 + d4) << CONST_BITS; 
749 
tmp1 = (d0  d4) << CONST_BITS; 
750  
751 
tmp10 = tmp0 + tmp3; 
752 
tmp13 = tmp0  tmp3; 
753 
tmp11 = tmp1 + tmp2; 
754 
tmp12 = tmp1  tmp2; 
755 
} else {

756 
/* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */

757 
tmp2 = MULTIPLY(d6, FIX_1_306562965); 
758 
tmp3 = MULTIPLY(d6, FIX_0_541196100); 
759  
760 
tmp0 = d4 << CONST_BITS; 
761  
762 
tmp10 = tmp0 + tmp3; 
763 
tmp13 = tmp0  tmp3; 
764 
tmp11 = tmp2  tmp0; 
765 
tmp12 = (tmp0 + tmp2); 
766 
} 
767 
} 
768 
} else {

769 
if (d2) {

770 
if (d0) {

771 
/* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */

772 
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 
773 
tmp2 = z1 + MULTIPLY(d6, FIX_1_847759065); 
774 
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 
775  
776 
tmp0 = d0 << CONST_BITS; 
777  
778 
tmp10 = tmp0 + tmp3; 
779 
tmp13 = tmp0  tmp3; 
780 
tmp11 = tmp0 + tmp2; 
781 
tmp12 = tmp0  tmp2; 
782 
} else {

783 
/* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */

784 
z1 = MULTIPLY(d2 + d6, FIX_0_541196100); 
785 
tmp2 = z1 + MULTIPLY(d6, FIX_1_847759065); 
786 
tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865); 
787  
788 
tmp10 = tmp3; 
789 
tmp13 = tmp3; 
790 
tmp11 = tmp2; 
791 
tmp12 = tmp2; 
792 
} 
793 
} else {

794 
if (d0) {

795 
/* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */

796 
tmp2 = MULTIPLY(d6, FIX_1_306562965); 
797 
tmp3 = MULTIPLY(d6, FIX_0_541196100); 
798  
799 
tmp0 = d0 << CONST_BITS; 
800  
801 
tmp10 = tmp0 + tmp3; 
802 
tmp13 = tmp0  tmp3; 
803 
tmp11 = tmp0 + tmp2; 
804 
tmp12 = tmp0  tmp2; 
805 
} else {

806 
/* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */

807 
tmp2 = MULTIPLY(d6, FIX_1_306562965); 
808 
tmp3 = MULTIPLY(d6, FIX_0_541196100); 
809  
810 
tmp10 = tmp3; 
811 
tmp13 = tmp3; 
812 
tmp11 = tmp2; 
813 
tmp12 = tmp2; 
814 
} 
815 
} 
816 
} 
817 
} else {

818 
if (d4) {

819 
if (d2) {

820 
if (d0) {

821 
/* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */

822 
tmp2 = MULTIPLY(d2, FIX_0_541196100); 
823 
tmp3 = MULTIPLY(d2, FIX_1_306562965); 
824  
825 
tmp0 = (d0 + d4) << CONST_BITS; 
826 
tmp1 = (d0  d4) << CONST_BITS; 
827  
828 
tmp10 = tmp0 + tmp3; 
829 
tmp13 = tmp0  tmp3; 
830 
tmp11 = tmp1 + tmp2; 
831 
tmp12 = tmp1  tmp2; 
832 
} else {

833 
/* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */

834 
tmp2 = MULTIPLY(d2, FIX_0_541196100); 
835 
tmp3 = MULTIPLY(d2, FIX_1_306562965); 
836  
837 
tmp0 = d4 << CONST_BITS; 
838  
839 
tmp10 = tmp0 + tmp3; 
840 
tmp13 = tmp0  tmp3; 
841 
tmp11 = tmp2  tmp0; 
842 
tmp12 = (tmp0 + tmp2); 
843 
} 
844 
} else {

845 
if (d0) {

846 
/* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */

847 
tmp10 = tmp13 = (d0 + d4) << CONST_BITS; 
848 
tmp11 = tmp12 = (d0  d4) << CONST_BITS; 
849 
} else {

850 
/* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */

851 
tmp10 = tmp13 = d4 << CONST_BITS; 
852 
tmp11 = tmp12 = tmp10; 
853 
} 
854 
} 
855 
} else {

856 
if (d2) {

857 
if (d0) {

858 
/* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */

859 
tmp2 = MULTIPLY(d2, FIX_0_541196100); 
860 
tmp3 = MULTIPLY(d2, FIX_1_306562965); 
861  
862 
tmp0 = d0 << CONST_BITS; 
863  
864 
tmp10 = tmp0 + tmp3; 
865 
tmp13 = tmp0  tmp3; 
866 
tmp11 = tmp0 + tmp2; 
867 
tmp12 = tmp0  tmp2; 
868 
} else {

869 
/* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */

870 
tmp2 = MULTIPLY(d2, FIX_0_541196100); 
871 
tmp3 = MULTIPLY(d2, FIX_1_306562965); 
872  
873 
tmp10 = tmp3; 
874 
tmp13 = tmp3; 
875 
tmp11 = tmp2; 
876 
tmp12 = tmp2; 
877 
} 
878 
} else {

879 
if (d0) {

880 
/* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */

881 
tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS; 
882 
} else {

883 
/* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */

884 
tmp10 = tmp13 = tmp11 = tmp12 = 0;

885 
} 
886 
} 
887 
} 
888 
} 
889  
890 
/* Odd part per figure 8; the matrix is unitary and hence its

891 
* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.

892 
*/

893 
if (d7) {

894 
if (d5) {

895 
if (d3) {

896 
if (d1) {

897 
/* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */

898 
z1 = d7 + d1; 
899 
z2 = d5 + d3; 
900 
z3 = d7 + d3; 
901 
z4 = d5 + d1; 
902 
z5 = MULTIPLY(z3 + z4, FIX_1_175875602); 
903 

904 
tmp0 = MULTIPLY(d7, FIX_0_298631336); 
905 
tmp1 = MULTIPLY(d5, FIX_2_053119869); 
906 
tmp2 = MULTIPLY(d3, FIX_3_072711026); 
907 
tmp3 = MULTIPLY(d1, FIX_1_501321110); 
908 
z1 = MULTIPLY(z1, FIX_0_899976223); 
909 
z2 = MULTIPLY(z2, FIX_2_562915447); 
910 
z3 = MULTIPLY(z3, FIX_1_961570560); 
911 
z4 = MULTIPLY(z4, FIX_0_390180644); 
912 

913 
z3 += z5; 
914 
z4 += z5; 
915 

916 
tmp0 += z1 + z3; 
917 
tmp1 += z2 + z4; 
918 
tmp2 += z2 + z3; 
919 
tmp3 += z1 + z4; 
920 
} else {

921 
/* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */

922 
z1 = d7; 
923 
z2 = d5 + d3; 
924 
z3 = d7 + d3; 
925 
z5 = MULTIPLY(z3 + d5, FIX_1_175875602); 
926 

927 
tmp0 = MULTIPLY(d7, FIX_0_298631336); 
928 
tmp1 = MULTIPLY(d5, FIX_2_053119869); 
929 
tmp2 = MULTIPLY(d3, FIX_3_072711026); 
930 
z1 = MULTIPLY(d7, FIX_0_899976223); 
931 
z2 = MULTIPLY(z2, FIX_2_562915447); 
932 
z3 = MULTIPLY(z3, FIX_1_961570560); 
933 
z4 = MULTIPLY(d5, FIX_0_390180644); 
934 

935 
z3 += z5; 
936 
z4 += z5; 
937 

938 
tmp0 += z1 + z3; 
939 
tmp1 += z2 + z4; 
940 
tmp2 += z2 + z3; 
941 
tmp3 = z1 + z4; 
942 
} 
943 
} else {

944 
if (d1) {

945 
/* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */

946 
z1 = d7 + d1; 
947 
z2 = d5; 
948 
z3 = d7; 
949 
z4 = d5 + d1; 
950 
z5 = MULTIPLY(z3 + z4, FIX_1_175875602); 
951 

952 
tmp0 = MULTIPLY(d7, FIX_0_298631336); 
953 
tmp1 = MULTIPLY(d5, FIX_2_053119869); 
954 
tmp3 = MULTIPLY(d1, FIX_1_501321110); 
955 
z1 = MULTIPLY(z1, FIX_0_899976223); 
956 
z2 = MULTIPLY(d5, FIX_2_562915447); 
957 
z3 = MULTIPLY(d7, FIX_1_961570560); 
958 
z4 = MULTIPLY(z4, FIX_0_390180644); 
959 

960 
z3 += z5; 
961 
z4 += z5; 
962 

963 
tmp0 += z1 + z3; 
964 
tmp1 += z2 + z4; 
965 
tmp2 = z2 + z3; 
966 
tmp3 += z1 + z4; 
967 
} else {

968 
/* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */

969 
tmp0 = MULTIPLY(d7, FIX_0_601344887); 
970 
z1 = MULTIPLY(d7, FIX_0_899976223); 
971 
z3 = MULTIPLY(d7, FIX_1_961570560); 
972 
tmp1 = MULTIPLY(d5, FIX_0_509795579); 
973 
z2 = MULTIPLY(d5, FIX_2_562915447); 
974 
z4 = MULTIPLY(d5, FIX_0_390180644); 
975 
z5 = MULTIPLY(d5 + d7, FIX_1_175875602); 
976 

977 
z3 += z5; 
978 
z4 += z5; 
979 

980 
tmp0 += z3; 
981 
tmp1 += z4; 
982 
tmp2 = z2 + z3; 
983 
tmp3 = z1 + z4; 
984 
} 
985 
} 
986 
} else {

987 
if (d3) {

988 
if (d1) {

989 
/* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */

990 
z1 = d7 + d1; 
991 
z3 = d7 + d3; 
992 
z5 = MULTIPLY(z3 + d1, FIX_1_175875602); 
993 

994 
tmp0 = MULTIPLY(d7, FIX_0_298631336); 
995 
tmp2 = MULTIPLY(d3, FIX_3_072711026); 
996 
tmp3 = MULTIPLY(d1, FIX_1_501321110); 
997 
z1 = MULTIPLY(z1, FIX_0_899976223); 
998 
z2 = MULTIPLY(d3, FIX_2_562915447); 
999 
z3 = MULTIPLY(z3, FIX_1_961570560); 
1000 
z4 = MULTIPLY(d1, FIX_0_390180644); 
1001 

1002 
z3 += z5; 
1003 
z4 += z5; 
1004 

1005 
tmp0 += z1 + z3; 
1006 
tmp1 = z2 + z4; 
1007 
tmp2 += z2 + z3; 
1008 
tmp3 += z1 + z4; 
1009 
} else {

1010 
/* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */

1011 
z3 = d7 + d3; 
1012 

1013 
tmp0 = MULTIPLY(d7, FIX_0_601344887); 
1014 
z1 = MULTIPLY(d7, FIX_0_899976223); 
1015 
tmp2 = MULTIPLY(d3, FIX_0_509795579); 
1016 
z2 = MULTIPLY(d3, FIX_2_562915447); 
1017 
z5 = MULTIPLY(z3, FIX_1_175875602); 
1018 
z3 = MULTIPLY(z3, FIX_0_785694958); 
1019 

1020 
tmp0 += z3; 
1021 
tmp1 = z2 + z5; 
1022 
tmp2 += z3; 
1023 
tmp3 = z1 + z5; 
1024 
} 
1025 
} else {

1026 
if (d1) {

1027 
/* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */

1028 
z1 = d7 + d1; 
1029 
z5 = MULTIPLY(z1, FIX_1_175875602); 
1030  
1031 
z1 = MULTIPLY(z1, FIX_0_275899380); 
1032 
z3 = MULTIPLY(d7, FIX_1_961570560); 
1033 
tmp0 = MULTIPLY(d7, FIX_1_662939225); 
1034 
z4 = MULTIPLY(d1, FIX_0_390180644); 
1035 
tmp3 = MULTIPLY(d1, FIX_1_111140466); 
1036  
1037 
tmp0 += z1; 
1038 
tmp1 = z4 + z5; 
1039 
tmp2 = z3 + z5; 
1040 
tmp3 += z1; 
1041 
} else {

1042 
/* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */

1043 
tmp0 = MULTIPLY(d7, FIX_1_387039845); 
1044 
tmp1 = MULTIPLY(d7, FIX_1_175875602); 
1045 
tmp2 = MULTIPLY(d7, FIX_0_785694958); 
1046 
tmp3 = MULTIPLY(d7, FIX_0_275899380); 
1047 
} 
1048 
} 
1049 
} 
1050 
} else {

1051 
if (d5) {

1052 
if (d3) {

1053 
if (d1) {

1054 
/* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */

1055 
z2 = d5 + d3; 
1056 
z4 = d5 + d1; 
1057 
z5 = MULTIPLY(d3 + z4, FIX_1_175875602); 
1058 

1059 
tmp1 = MULTIPLY(d5, FIX_2_053119869); 
1060 
tmp2 = MULTIPLY(d3, FIX_3_072711026); 
1061 
tmp3 = MULTIPLY(d1, FIX_1_501321110); 
1062 
z1 = MULTIPLY(d1, FIX_0_899976223); 
1063 
z2 = MULTIPLY(z2, FIX_2_562915447); 
1064 
z3 = MULTIPLY(d3, FIX_1_961570560); 
1065 
z4 = MULTIPLY(z4, FIX_0_390180644); 
1066 

1067 
z3 += z5; 
1068 
z4 += z5; 
1069 

1070 
tmp0 = z1 + z3; 
1071 
tmp1 += z2 + z4; 
1072 
tmp2 += z2 + z3; 
1073 
tmp3 += z1 + z4; 
1074 
} else {

1075 
/* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */

1076 
z2 = d5 + d3; 
1077 

1078 
z5 = MULTIPLY(z2, FIX_1_175875602); 
1079 
tmp1 = MULTIPLY(d5, FIX_1_662939225); 
1080 
z4 = MULTIPLY(d5, FIX_0_390180644); 
1081 
z2 = MULTIPLY(z2, FIX_1_387039845); 
1082 
tmp2 = MULTIPLY(d3, FIX_1_111140466); 
1083 
z3 = MULTIPLY(d3, FIX_1_961570560); 
1084 

1085 
tmp0 = z3 + z5; 
1086 
tmp1 += z2; 
1087 
tmp2 += z2; 
1088 
tmp3 = z4 + z5; 
1089 
} 
1090 
} else {

1091 
if (d1) {

1092 
/* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */

1093 
z4 = d5 + d1; 
1094 

1095 
z5 = MULTIPLY(z4, FIX_1_175875602); 
1096 
z1 = MULTIPLY(d1, FIX_0_899976223); 
1097 
tmp3 = MULTIPLY(d1, FIX_0_601344887); 
1098 
tmp1 = MULTIPLY(d5, FIX_0_509795579); 
1099 
z2 = MULTIPLY(d5, FIX_2_562915447); 
1100 
z4 = MULTIPLY(z4, FIX_0_785694958); 
1101 

1102 
tmp0 = z1 + z5; 
1103 
tmp1 += z4; 
1104 
tmp2 = z2 + z5; 
1105 
tmp3 += z4; 
1106 
} else {

1107 
/* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */

1108 
tmp0 = MULTIPLY(d5, FIX_1_175875602); 
1109 
tmp1 = MULTIPLY(d5, FIX_0_275899380); 
1110 
tmp2 = MULTIPLY(d5, FIX_1_387039845); 
1111 
tmp3 = MULTIPLY(d5, FIX_0_785694958); 
1112 
} 
1113 
} 
1114 
} else {

1115 
if (d3) {

1116 
if (d1) {

1117 
/* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */

1118 
z5 = d1 + d3; 
1119 
tmp3 = MULTIPLY(d1, FIX_0_211164243); 
1120 
tmp2 = MULTIPLY(d3, FIX_1_451774981); 
1121 
z1 = MULTIPLY(d1, FIX_1_061594337); 
1122 
z2 = MULTIPLY(d3, FIX_2_172734803); 
1123 
z4 = MULTIPLY(z5, FIX_0_785694958); 
1124 
z5 = MULTIPLY(z5, FIX_1_175875602); 
1125 

1126 
tmp0 = z1  z4; 
1127 
tmp1 = z2 + z4; 
1128 
tmp2 += z5; 
1129 
tmp3 += z5; 
1130 
} else {

1131 
/* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */

1132 
tmp0 = MULTIPLY(d3, FIX_0_785694958); 
1133 
tmp1 = MULTIPLY(d3, FIX_1_387039845); 
1134 
tmp2 = MULTIPLY(d3, FIX_0_275899380); 
1135 
tmp3 = MULTIPLY(d3, FIX_1_175875602); 
1136 
} 
1137 
} else {

1138 
if (d1) {

1139 
/* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */

1140 
tmp0 = MULTIPLY(d1, FIX_0_275899380); 
1141 
tmp1 = MULTIPLY(d1, FIX_0_785694958); 
1142 
tmp2 = MULTIPLY(d1, FIX_1_175875602); 
1143 
tmp3 = MULTIPLY(d1, FIX_1_387039845); 
1144 
} else {

1145 
/* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */

1146 
tmp0 = tmp1 = tmp2 = tmp3 = 0;

1147 
} 
1148 
} 
1149 
} 
1150 
} 
1151  
1152 
/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */

1153  
1154 
dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3,

1155 
CONST_BITS+PASS1_BITS+3);

1156 
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10  tmp3,

1157 
CONST_BITS+PASS1_BITS+3);

1158 
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2,

1159 
CONST_BITS+PASS1_BITS+3);

1160 
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11  tmp2,

1161 
CONST_BITS+PASS1_BITS+3);

1162 
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1,

1163 
CONST_BITS+PASS1_BITS+3);

1164 
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12  tmp1,

1165 
CONST_BITS+PASS1_BITS+3);

1166 
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0,

1167 
CONST_BITS+PASS1_BITS+3);

1168 
dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13  tmp0,

1169 
CONST_BITS+PASS1_BITS+3);

1170 

1171 
dataptr++; /* advance pointer to next column */

1172 
} 
1173 
} 
1174  
1175 
#undef FIX

1176 
#undef CONST_BITS
