Statistics
| Branch: | Revision:

ffmpeg / libavcodec / jrevdct.c @ 4cc281d9

History | View | Annotate | Download (33 KB)

1
/*
2
 * jrevdct.c
3
 *
4
 * Copyright (C) 1991, 1992, Thomas G. Lane.
5
 * This file is part of the Independent JPEG Group's software.
6
 * For conditions of distribution and use, see the accompanying README file.
7
 *
8
 * This file contains the basic inverse-DCT transformation subroutine.
9
 *
10
 * This implementation is based on an algorithm described in
11
 *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
12
 *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
13
 *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
14
 * The primary algorithm described there uses 11 multiplies and 29 adds.
15
 * We use their alternate method with 12 multiplies and 32 adds.
16
 * The advantage of this method is that no data path contains more than one
17
 * multiplication; this allows a very simple and accurate implementation in
18
 * scaled fixed-point arithmetic, with a minimal number of shifts.
19
 * 
20
 * I've made lots of modifications to attempt to take advantage of the
21
 * sparse nature of the DCT matrices we're getting.  Although the logic
22
 * is cumbersome, it's straightforward and the resulting code is much
23
 * faster.
24
 *
25
 * A better way to do this would be to pass in the DCT block as a sparse
26
 * matrix, perhaps with the difference cases encoded.
27
 */
28
#include "common.h"
29
#include "dsputil.h"
30

    
31
#define EIGHT_BIT_SAMPLES
32

    
33
#define DCTSIZE 8
34
#define DCTSIZE2 64
35

    
36
#define GLOBAL
37

    
38
#define RIGHT_SHIFT(x, n) ((x) >> (n))
39

    
40
typedef DCTELEM DCTBLOCK[DCTSIZE2];
41

    
42
#define CONST_BITS 13
43

    
44
/*
45
 * This routine is specialized to the case DCTSIZE = 8.
46
 */
47

    
48
#if DCTSIZE != 8
49
  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
50
#endif
51

    
52

    
53
/*
54
 * A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT
55
 * on each column.  Direct algorithms are also available, but they are
56
 * much more complex and seem not to be any faster when reduced to code.
57
 *
58
 * The poop on this scaling stuff is as follows:
59
 *
60
 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
61
 * larger than the true IDCT outputs.  The final outputs are therefore
62
 * a factor of N larger than desired; since N=8 this can be cured by
63
 * a simple right shift at the end of the algorithm.  The advantage of
64
 * this arrangement is that we save two multiplications per 1-D IDCT,
65
 * because the y0 and y4 inputs need not be divided by sqrt(N).
66
 *
67
 * We have to do addition and subtraction of the integer inputs, which
68
 * is no problem, and multiplication by fractional constants, which is
69
 * a problem to do in integer arithmetic.  We multiply all the constants
70
 * by CONST_SCALE and convert them to integer constants (thus retaining
71
 * CONST_BITS bits of precision in the constants).  After doing a
72
 * multiplication we have to divide the product by CONST_SCALE, with proper
73
 * rounding, to produce the correct output.  This division can be done
74
 * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
75
 * as long as possible so that partial sums can be added together with
76
 * full fractional precision.
77
 *
78
 * The outputs of the first pass are scaled up by PASS1_BITS bits so that
79
 * they are represented to better-than-integral precision.  These outputs
80
 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
81
 * with the recommended scaling.  (To scale up 12-bit sample data further, an
82
 * intermediate int32 array would be needed.)
83
 *
84
 * To avoid overflow of the 32-bit intermediate results in pass 2, we must
85
 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
86
 * shows that the values given below are the most effective.
87
 */
88

    
89
#ifdef EIGHT_BIT_SAMPLES
90
#define PASS1_BITS  2
91
#else
92
#define PASS1_BITS  1                /* lose a little precision to avoid overflow */
93
#endif
94

    
95
#define ONE        ((INT32) 1)
96

    
97
#define CONST_SCALE (ONE << CONST_BITS)
98

    
99
/* Convert a positive real constant to an integer scaled by CONST_SCALE.
100
 * IMPORTANT: if your compiler doesn't do this arithmetic at compile time,
101
 * you will pay a significant penalty in run time.  In that case, figure
102
 * the correct integer constant values and insert them by hand.
103
 */
104

    
105
/* Actually FIX is no longer used, we precomputed them all */
106
#define FIX(x)        ((INT32) ((x) * CONST_SCALE + 0.5)) 
107

    
108
/* Descale and correctly round an INT32 value that's scaled by N bits.
109
 * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
110
 * the fudge factor is correct for either sign of X.
111
 */
112

    
113
#define DESCALE(x,n)  RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
114

    
115
/* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
116
 * For 8-bit samples with the recommended scaling, all the variable
117
 * and constant values involved are no more than 16 bits wide, so a
118
 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply;
119
 * this provides a useful speedup on many machines.
120
 * There is no way to specify a 16x16->32 multiply in portable C, but
121
 * some C compilers will do the right thing if you provide the correct
122
 * combination of casts.
123
 * NB: for 12-bit samples, a full 32-bit multiplication will be needed.
124
 */
125

    
126
#ifdef EIGHT_BIT_SAMPLES
127
#ifdef SHORTxSHORT_32                /* may work if 'int' is 32 bits */
128
#define MULTIPLY(var,const)  (((INT16) (var)) * ((INT16) (const)))
129
#endif
130
#ifdef SHORTxLCONST_32                /* known to work with Microsoft C 6.0 */
131
#define MULTIPLY(var,const)  (((INT16) (var)) * ((INT32) (const)))
132
#endif
133
#endif
134

    
135
#ifndef MULTIPLY                /* default definition */
136
#define MULTIPLY(var,const)  ((var) * (const))
137
#endif
138

    
139

    
140
/* 
141
  Unlike our decoder where we approximate the FIXes, we need to use exact
142
ones here or successive P-frames will drift too much with Reference frame coding 
143
*/
144
#define FIX_0_211164243 1730
145
#define FIX_0_275899380 2260
146
#define FIX_0_298631336 2446
147
#define FIX_0_390180644 3196
148
#define FIX_0_509795579 4176
149
#define FIX_0_541196100 4433
150
#define FIX_0_601344887 4926
151
#define FIX_0_765366865 6270
152
#define FIX_0_785694958 6436
153
#define FIX_0_899976223 7373
154
#define FIX_1_061594337 8697
155
#define FIX_1_111140466 9102
156
#define FIX_1_175875602 9633
157
#define FIX_1_306562965 10703
158
#define FIX_1_387039845 11363
159
#define FIX_1_451774981 11893
160
#define FIX_1_501321110 12299
161
#define FIX_1_662939225 13623
162
#define FIX_1_847759065 15137
163
#define FIX_1_961570560 16069
164
#define FIX_2_053119869 16819
165
#define FIX_2_172734803 17799
166
#define FIX_2_562915447 20995
167
#define FIX_3_072711026 25172
168

    
169
/*
170
 * Perform the inverse DCT on one block of coefficients.
171
 */
172

    
173
void j_rev_dct(DCTBLOCK data)
174
{
175
  INT32 tmp0, tmp1, tmp2, tmp3;
176
  INT32 tmp10, tmp11, tmp12, tmp13;
177
  INT32 z1, z2, z3, z4, z5;
178
  INT32 d0, d1, d2, d3, d4, d5, d6, d7;
179
  register DCTELEM *dataptr;
180
  int rowctr;
181
   
182
  /* Pass 1: process rows. */
183
  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
184
  /* furthermore, we scale the results by 2**PASS1_BITS. */
185

    
186
  dataptr = data;
187

    
188
  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
189
    /* Due to quantization, we will usually find that many of the input
190
     * coefficients are zero, especially the AC terms.  We can exploit this
191
     * by short-circuiting the IDCT calculation for any row in which all
192
     * the AC terms are zero.  In that case each output is equal to the
193
     * DC coefficient (with scale factor as needed).
194
     * With typical images and quantization tables, half or more of the
195
     * row DCT calculations can be simplified this way.
196
     */
197

    
198
    register int *idataptr = (int*)dataptr;
199

    
200
    /* WARNING: we do the same permutation as MMX idct to simplify the
201
       video core */
202
    d0 = dataptr[0];
203
    d2 = dataptr[1];
204
    d4 = dataptr[2];
205
    d6 = dataptr[3];
206
    d1 = dataptr[4];
207
    d3 = dataptr[5];
208
    d5 = dataptr[6];
209
    d7 = dataptr[7];
210

    
211
    if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) {
212
      /* AC terms all zero */
213
      if (d0) {
214
          /* Compute a 32 bit value to assign. */
215
          DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
216
          register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
217
          
218
          idataptr[0] = v;
219
          idataptr[1] = v;
220
          idataptr[2] = v;
221
          idataptr[3] = v;
222
      }
223
      
224
      dataptr += DCTSIZE;        /* advance pointer to next row */
225
      continue;
226
    }
227

    
228
    /* Even part: reverse the even part of the forward DCT. */
229
    /* The rotator is sqrt(2)*c(-6). */
230
{
231
    if (d6) {
232
        if (d4) {
233
            if (d2) {
234
                if (d0) {
235
                    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
236
                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
237
                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
238
                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
239

    
240
                    tmp0 = (d0 + d4) << CONST_BITS;
241
                    tmp1 = (d0 - d4) << CONST_BITS;
242

    
243
                    tmp10 = tmp0 + tmp3;
244
                    tmp13 = tmp0 - tmp3;
245
                    tmp11 = tmp1 + tmp2;
246
                    tmp12 = tmp1 - tmp2;
247
                } else {
248
                    /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
249
                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
250
                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
251
                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
252

    
253
                    tmp0 = d4 << CONST_BITS;
254

    
255
                    tmp10 = tmp0 + tmp3;
256
                    tmp13 = tmp0 - tmp3;
257
                    tmp11 = tmp2 - tmp0;
258
                    tmp12 = -(tmp0 + tmp2);
259
                }
260
            } else {
261
                if (d0) {
262
                    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
263
                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
264
                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
265

    
266
                    tmp0 = (d0 + d4) << CONST_BITS;
267
                    tmp1 = (d0 - d4) << CONST_BITS;
268

    
269
                    tmp10 = tmp0 + tmp3;
270
                    tmp13 = tmp0 - tmp3;
271
                    tmp11 = tmp1 + tmp2;
272
                    tmp12 = tmp1 - tmp2;
273
                } else {
274
                    /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
275
                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
276
                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
277

    
278
                    tmp0 = d4 << CONST_BITS;
279

    
280
                    tmp10 = tmp0 + tmp3;
281
                    tmp13 = tmp0 - tmp3;
282
                    tmp11 = tmp2 - tmp0;
283
                    tmp12 = -(tmp0 + tmp2);
284
                }
285
            }
286
        } else {
287
            if (d2) {
288
                if (d0) {
289
                    /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
290
                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
291
                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
292
                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
293

    
294
                    tmp0 = d0 << CONST_BITS;
295

    
296
                    tmp10 = tmp0 + tmp3;
297
                    tmp13 = tmp0 - tmp3;
298
                    tmp11 = tmp0 + tmp2;
299
                    tmp12 = tmp0 - tmp2;
300
                } else {
301
                    /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
302
                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
303
                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
304
                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
305

    
306
                    tmp10 = tmp3;
307
                    tmp13 = -tmp3;
308
                    tmp11 = tmp2;
309
                    tmp12 = -tmp2;
310
                }
311
            } else {
312
                if (d0) {
313
                    /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
314
                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
315
                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
316

    
317
                    tmp0 = d0 << CONST_BITS;
318

    
319
                    tmp10 = tmp0 + tmp3;
320
                    tmp13 = tmp0 - tmp3;
321
                    tmp11 = tmp0 + tmp2;
322
                    tmp12 = tmp0 - tmp2;
323
                } else {
324
                    /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
325
                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
326
                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
327

    
328
                    tmp10 = tmp3;
329
                    tmp13 = -tmp3;
330
                    tmp11 = tmp2;
331
                    tmp12 = -tmp2;
332
                }
333
            }
334
        }
335
    } else {
336
        if (d4) {
337
            if (d2) {
338
                if (d0) {
339
                    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
340
                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
341
                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
342

    
343
                    tmp0 = (d0 + d4) << CONST_BITS;
344
                    tmp1 = (d0 - d4) << CONST_BITS;
345

    
346
                    tmp10 = tmp0 + tmp3;
347
                    tmp13 = tmp0 - tmp3;
348
                    tmp11 = tmp1 + tmp2;
349
                    tmp12 = tmp1 - tmp2;
350
                } else {
351
                    /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
352
                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
353
                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
354

    
355
                    tmp0 = d4 << CONST_BITS;
356

    
357
                    tmp10 = tmp0 + tmp3;
358
                    tmp13 = tmp0 - tmp3;
359
                    tmp11 = tmp2 - tmp0;
360
                    tmp12 = -(tmp0 + tmp2);
361
                }
362
            } else {
363
                if (d0) {
364
                    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
365
                    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
366
                    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
367
                } else {
368
                    /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
369
                    tmp10 = tmp13 = d4 << CONST_BITS;
370
                    tmp11 = tmp12 = -tmp10;
371
                }
372
            }
373
        } else {
374
            if (d2) {
375
                if (d0) {
376
                    /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
377
                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
378
                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
379

    
380
                    tmp0 = d0 << CONST_BITS;
381

    
382
                    tmp10 = tmp0 + tmp3;
383
                    tmp13 = tmp0 - tmp3;
384
                    tmp11 = tmp0 + tmp2;
385
                    tmp12 = tmp0 - tmp2;
386
                } else {
387
                    /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
388
                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
389
                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
390

    
391
                    tmp10 = tmp3;
392
                    tmp13 = -tmp3;
393
                    tmp11 = tmp2;
394
                    tmp12 = -tmp2;
395
                }
396
            } else {
397
                if (d0) {
398
                    /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
399
                    tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
400
                } else {
401
                    /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
402
                    tmp10 = tmp13 = tmp11 = tmp12 = 0;
403
                }
404
            }
405
        }
406
      }
407

    
408
    /* Odd part per figure 8; the matrix is unitary and hence its
409
     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
410
     */
411

    
412
    if (d7) {
413
        if (d5) {
414
            if (d3) {
415
                if (d1) {
416
                    /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
417
                    z1 = d7 + d1;
418
                    z2 = d5 + d3;
419
                    z3 = d7 + d3;
420
                    z4 = d5 + d1;
421
                    z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
422
                    
423
                    tmp0 = MULTIPLY(d7, FIX_0_298631336); 
424
                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
425
                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
426
                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
427
                    z1 = MULTIPLY(-z1, FIX_0_899976223);
428
                    z2 = MULTIPLY(-z2, FIX_2_562915447);
429
                    z3 = MULTIPLY(-z3, FIX_1_961570560);
430
                    z4 = MULTIPLY(-z4, FIX_0_390180644);
431
                    
432
                    z3 += z5;
433
                    z4 += z5;
434
                    
435
                    tmp0 += z1 + z3;
436
                    tmp1 += z2 + z4;
437
                    tmp2 += z2 + z3;
438
                    tmp3 += z1 + z4;
439
                } else {
440
                    /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
441
                    z2 = d5 + d3;
442
                    z3 = d7 + d3;
443
                    z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
444
                    
445
                    tmp0 = MULTIPLY(d7, FIX_0_298631336); 
446
                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
447
                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
448
                    z1 = MULTIPLY(-d7, FIX_0_899976223);
449
                    z2 = MULTIPLY(-z2, FIX_2_562915447);
450
                    z3 = MULTIPLY(-z3, FIX_1_961570560);
451
                    z4 = MULTIPLY(-d5, FIX_0_390180644);
452
                    
453
                    z3 += z5;
454
                    z4 += z5;
455
                    
456
                    tmp0 += z1 + z3;
457
                    tmp1 += z2 + z4;
458
                    tmp2 += z2 + z3;
459
                    tmp3 = z1 + z4;
460
                }
461
            } else {
462
                if (d1) {
463
                    /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
464
                    z1 = d7 + d1;
465
                    z4 = d5 + d1;
466
                    z5 = MULTIPLY(d7 + z4, FIX_1_175875602);
467
                    
468
                    tmp0 = MULTIPLY(d7, FIX_0_298631336); 
469
                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
470
                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
471
                    z1 = MULTIPLY(-z1, FIX_0_899976223);
472
                    z2 = MULTIPLY(-d5, FIX_2_562915447);
473
                    z3 = MULTIPLY(-d7, FIX_1_961570560);
474
                    z4 = MULTIPLY(-z4, FIX_0_390180644);
475
                    
476
                    z3 += z5;
477
                    z4 += z5;
478
                    
479
                    tmp0 += z1 + z3;
480
                    tmp1 += z2 + z4;
481
                    tmp2 = z2 + z3;
482
                    tmp3 += z1 + z4;
483
                } else {
484
                    /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
485
                    tmp0 = MULTIPLY(-d7, FIX_0_601344887); 
486
                    z1 = MULTIPLY(-d7, FIX_0_899976223);
487
                    z3 = MULTIPLY(-d7, FIX_1_961570560);
488
                    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
489
                    z2 = MULTIPLY(-d5, FIX_2_562915447);
490
                    z4 = MULTIPLY(-d5, FIX_0_390180644);
491
                    z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
492
                    
493
                    z3 += z5;
494
                    z4 += z5;
495
                    
496
                    tmp0 += z3;
497
                    tmp1 += z4;
498
                    tmp2 = z2 + z3;
499
                    tmp3 = z1 + z4;
500
                }
501
            }
502
        } else {
503
            if (d3) {
504
                if (d1) {
505
                    /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
506
                    z1 = d7 + d1;
507
                    z3 = d7 + d3;
508
                    z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
509
                    
510
                    tmp0 = MULTIPLY(d7, FIX_0_298631336); 
511
                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
512
                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
513
                    z1 = MULTIPLY(-z1, FIX_0_899976223);
514
                    z2 = MULTIPLY(-d3, FIX_2_562915447);
515
                    z3 = MULTIPLY(-z3, FIX_1_961570560);
516
                    z4 = MULTIPLY(-d1, FIX_0_390180644);
517
                    
518
                    z3 += z5;
519
                    z4 += z5;
520
                    
521
                    tmp0 += z1 + z3;
522
                    tmp1 = z2 + z4;
523
                    tmp2 += z2 + z3;
524
                    tmp3 += z1 + z4;
525
                } else {
526
                    /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
527
                    z3 = d7 + d3;
528
                    
529
                    tmp0 = MULTIPLY(-d7, FIX_0_601344887); 
530
                    z1 = MULTIPLY(-d7, FIX_0_899976223);
531
                    tmp2 = MULTIPLY(d3, FIX_0_509795579);
532
                    z2 = MULTIPLY(-d3, FIX_2_562915447);
533
                    z5 = MULTIPLY(z3, FIX_1_175875602);
534
                    z3 = MULTIPLY(-z3, FIX_0_785694958);
535
                    
536
                    tmp0 += z3;
537
                    tmp1 = z2 + z5;
538
                    tmp2 += z3;
539
                    tmp3 = z1 + z5;
540
                }
541
            } else {
542
                if (d1) {
543
                    /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
544
                    z1 = d7 + d1;
545
                    z5 = MULTIPLY(z1, FIX_1_175875602);
546

    
547
                    z1 = MULTIPLY(z1, FIX_0_275899380);
548
                    z3 = MULTIPLY(-d7, FIX_1_961570560);
549
                    tmp0 = MULTIPLY(-d7, FIX_1_662939225); 
550
                    z4 = MULTIPLY(-d1, FIX_0_390180644);
551
                    tmp3 = MULTIPLY(d1, FIX_1_111140466);
552

    
553
                    tmp0 += z1;
554
                    tmp1 = z4 + z5;
555
                    tmp2 = z3 + z5;
556
                    tmp3 += z1;
557
                } else {
558
                    /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
559
                    tmp0 = MULTIPLY(-d7, FIX_1_387039845);
560
                    tmp1 = MULTIPLY(d7, FIX_1_175875602);
561
                    tmp2 = MULTIPLY(-d7, FIX_0_785694958);
562
                    tmp3 = MULTIPLY(d7, FIX_0_275899380);
563
                }
564
            }
565
        }
566
    } else {
567
        if (d5) {
568
            if (d3) {
569
                if (d1) {
570
                    /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
571
                    z2 = d5 + d3;
572
                    z4 = d5 + d1;
573
                    z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
574
                    
575
                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
576
                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
577
                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
578
                    z1 = MULTIPLY(-d1, FIX_0_899976223);
579
                    z2 = MULTIPLY(-z2, FIX_2_562915447);
580
                    z3 = MULTIPLY(-d3, FIX_1_961570560);
581
                    z4 = MULTIPLY(-z4, FIX_0_390180644);
582
                    
583
                    z3 += z5;
584
                    z4 += z5;
585
                    
586
                    tmp0 = z1 + z3;
587
                    tmp1 += z2 + z4;
588
                    tmp2 += z2 + z3;
589
                    tmp3 += z1 + z4;
590
                } else {
591
                    /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
592
                    z2 = d5 + d3;
593
                    
594
                    z5 = MULTIPLY(z2, FIX_1_175875602);
595
                    tmp1 = MULTIPLY(d5, FIX_1_662939225);
596
                    z4 = MULTIPLY(-d5, FIX_0_390180644);
597
                    z2 = MULTIPLY(-z2, FIX_1_387039845);
598
                    tmp2 = MULTIPLY(d3, FIX_1_111140466);
599
                    z3 = MULTIPLY(-d3, FIX_1_961570560);
600
                    
601
                    tmp0 = z3 + z5;
602
                    tmp1 += z2;
603
                    tmp2 += z2;
604
                    tmp3 = z4 + z5;
605
                }
606
            } else {
607
                if (d1) {
608
                    /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
609
                    z4 = d5 + d1;
610
                    
611
                    z5 = MULTIPLY(z4, FIX_1_175875602);
612
                    z1 = MULTIPLY(-d1, FIX_0_899976223);
613
                    tmp3 = MULTIPLY(d1, FIX_0_601344887);
614
                    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
615
                    z2 = MULTIPLY(-d5, FIX_2_562915447);
616
                    z4 = MULTIPLY(z4, FIX_0_785694958);
617
                    
618
                    tmp0 = z1 + z5;
619
                    tmp1 += z4;
620
                    tmp2 = z2 + z5;
621
                    tmp3 += z4;
622
                } else {
623
                    /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
624
                    tmp0 = MULTIPLY(d5, FIX_1_175875602);
625
                    tmp1 = MULTIPLY(d5, FIX_0_275899380);
626
                    tmp2 = MULTIPLY(-d5, FIX_1_387039845);
627
                    tmp3 = MULTIPLY(d5, FIX_0_785694958);
628
                }
629
            }
630
        } else {
631
            if (d3) {
632
                if (d1) {
633
                    /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
634
                    z5 = d1 + d3;
635
                    tmp3 = MULTIPLY(d1, FIX_0_211164243);
636
                    tmp2 = MULTIPLY(-d3, FIX_1_451774981);
637
                    z1 = MULTIPLY(d1, FIX_1_061594337);
638
                    z2 = MULTIPLY(-d3, FIX_2_172734803);
639
                    z4 = MULTIPLY(z5, FIX_0_785694958);
640
                    z5 = MULTIPLY(z5, FIX_1_175875602);
641
                    
642
                    tmp0 = z1 - z4;
643
                    tmp1 = z2 + z4;
644
                    tmp2 += z5;
645
                    tmp3 += z5;
646
                } else {
647
                    /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
648
                    tmp0 = MULTIPLY(-d3, FIX_0_785694958);
649
                    tmp1 = MULTIPLY(-d3, FIX_1_387039845);
650
                    tmp2 = MULTIPLY(-d3, FIX_0_275899380);
651
                    tmp3 = MULTIPLY(d3, FIX_1_175875602);
652
                }
653
            } else {
654
                if (d1) {
655
                    /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
656
                    tmp0 = MULTIPLY(d1, FIX_0_275899380);
657
                    tmp1 = MULTIPLY(d1, FIX_0_785694958);
658
                    tmp2 = MULTIPLY(d1, FIX_1_175875602);
659
                    tmp3 = MULTIPLY(d1, FIX_1_387039845);
660
                } else {
661
                    /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
662
                    tmp0 = tmp1 = tmp2 = tmp3 = 0;
663
                }
664
            }
665
        }
666
    }
667
}
668
    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
669

    
670
    dataptr[0] = (DCTELEM) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
671
    dataptr[7] = (DCTELEM) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
672
    dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
673
    dataptr[6] = (DCTELEM) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
674
    dataptr[2] = (DCTELEM) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
675
    dataptr[5] = (DCTELEM) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
676
    dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
677
    dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
678

    
679
    dataptr += DCTSIZE;                /* advance pointer to next row */
680
  }
681

    
682
  /* Pass 2: process columns. */
683
  /* Note that we must descale the results by a factor of 8 == 2**3, */
684
  /* and also undo the PASS1_BITS scaling. */
685

    
686
  dataptr = data;
687
  for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
688
    /* Columns of zeroes can be exploited in the same way as we did with rows.
689
     * However, the row calculation has created many nonzero AC terms, so the
690
     * simplification applies less often (typically 5% to 10% of the time).
691
     * On machines with very fast multiplication, it's possible that the
692
     * test takes more time than it's worth.  In that case this section
693
     * may be commented out.
694
     */
695

    
696
    d0 = dataptr[DCTSIZE*0];
697
    d1 = dataptr[DCTSIZE*1];
698
    d2 = dataptr[DCTSIZE*2];
699
    d3 = dataptr[DCTSIZE*3];
700
    d4 = dataptr[DCTSIZE*4];
701
    d5 = dataptr[DCTSIZE*5];
702
    d6 = dataptr[DCTSIZE*6];
703
    d7 = dataptr[DCTSIZE*7];
704

    
705
    /* Even part: reverse the even part of the forward DCT. */
706
    /* The rotator is sqrt(2)*c(-6). */
707
    if (d6) {
708
        if (d4) {
709
            if (d2) {
710
                if (d0) {
711
                    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
712
                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
713
                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
714
                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
715

    
716
                    tmp0 = (d0 + d4) << CONST_BITS;
717
                    tmp1 = (d0 - d4) << CONST_BITS;
718

    
719
                    tmp10 = tmp0 + tmp3;
720
                    tmp13 = tmp0 - tmp3;
721
                    tmp11 = tmp1 + tmp2;
722
                    tmp12 = tmp1 - tmp2;
723
                } else {
724
                    /* d0 == 0, d2 != 0, d4 != 0, d6 != 0 */
725
                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
726
                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
727
                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
728

    
729
                    tmp0 = d4 << CONST_BITS;
730

    
731
                    tmp10 = tmp0 + tmp3;
732
                    tmp13 = tmp0 - tmp3;
733
                    tmp11 = tmp2 - tmp0;
734
                    tmp12 = -(tmp0 + tmp2);
735
                }
736
            } else {
737
                if (d0) {
738
                    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
739
                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
740
                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
741

    
742
                    tmp0 = (d0 + d4) << CONST_BITS;
743
                    tmp1 = (d0 - d4) << CONST_BITS;
744

    
745
                    tmp10 = tmp0 + tmp3;
746
                    tmp13 = tmp0 - tmp3;
747
                    tmp11 = tmp1 + tmp2;
748
                    tmp12 = tmp1 - tmp2;
749
                } else {
750
                    /* d0 == 0, d2 == 0, d4 != 0, d6 != 0 */
751
                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
752
                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
753

    
754
                    tmp0 = d4 << CONST_BITS;
755

    
756
                    tmp10 = tmp0 + tmp3;
757
                    tmp13 = tmp0 - tmp3;
758
                    tmp11 = tmp2 - tmp0;
759
                    tmp12 = -(tmp0 + tmp2);
760
                }
761
            }
762
        } else {
763
            if (d2) {
764
                if (d0) {
765
                    /* d0 != 0, d2 != 0, d4 == 0, d6 != 0 */
766
                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
767
                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
768
                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
769

    
770
                    tmp0 = d0 << CONST_BITS;
771

    
772
                    tmp10 = tmp0 + tmp3;
773
                    tmp13 = tmp0 - tmp3;
774
                    tmp11 = tmp0 + tmp2;
775
                    tmp12 = tmp0 - tmp2;
776
                } else {
777
                    /* d0 == 0, d2 != 0, d4 == 0, d6 != 0 */
778
                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
779
                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
780
                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
781

    
782
                    tmp10 = tmp3;
783
                    tmp13 = -tmp3;
784
                    tmp11 = tmp2;
785
                    tmp12 = -tmp2;
786
                }
787
            } else {
788
                if (d0) {
789
                    /* d0 != 0, d2 == 0, d4 == 0, d6 != 0 */
790
                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
791
                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
792

    
793
                    tmp0 = d0 << CONST_BITS;
794

    
795
                    tmp10 = tmp0 + tmp3;
796
                    tmp13 = tmp0 - tmp3;
797
                    tmp11 = tmp0 + tmp2;
798
                    tmp12 = tmp0 - tmp2;
799
                } else {
800
                    /* d0 == 0, d2 == 0, d4 == 0, d6 != 0 */
801
                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
802
                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
803

    
804
                    tmp10 = tmp3;
805
                    tmp13 = -tmp3;
806
                    tmp11 = tmp2;
807
                    tmp12 = -tmp2;
808
                }
809
            }
810
        }
811
    } else {
812
        if (d4) {
813
            if (d2) {
814
                if (d0) {
815
                    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
816
                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
817
                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
818

    
819
                    tmp0 = (d0 + d4) << CONST_BITS;
820
                    tmp1 = (d0 - d4) << CONST_BITS;
821

    
822
                    tmp10 = tmp0 + tmp3;
823
                    tmp13 = tmp0 - tmp3;
824
                    tmp11 = tmp1 + tmp2;
825
                    tmp12 = tmp1 - tmp2;
826
                } else {
827
                    /* d0 == 0, d2 != 0, d4 != 0, d6 == 0 */
828
                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
829
                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
830

    
831
                    tmp0 = d4 << CONST_BITS;
832

    
833
                    tmp10 = tmp0 + tmp3;
834
                    tmp13 = tmp0 - tmp3;
835
                    tmp11 = tmp2 - tmp0;
836
                    tmp12 = -(tmp0 + tmp2);
837
                }
838
            } else {
839
                if (d0) {
840
                    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
841
                    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
842
                    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
843
                } else {
844
                    /* d0 == 0, d2 == 0, d4 != 0, d6 == 0 */
845
                    tmp10 = tmp13 = d4 << CONST_BITS;
846
                    tmp11 = tmp12 = -tmp10;
847
                }
848
            }
849
        } else {
850
            if (d2) {
851
                if (d0) {
852
                    /* d0 != 0, d2 != 0, d4 == 0, d6 == 0 */
853
                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
854
                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
855

    
856
                    tmp0 = d0 << CONST_BITS;
857

    
858
                    tmp10 = tmp0 + tmp3;
859
                    tmp13 = tmp0 - tmp3;
860
                    tmp11 = tmp0 + tmp2;
861
                    tmp12 = tmp0 - tmp2;
862
                } else {
863
                    /* d0 == 0, d2 != 0, d4 == 0, d6 == 0 */
864
                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
865
                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
866

    
867
                    tmp10 = tmp3;
868
                    tmp13 = -tmp3;
869
                    tmp11 = tmp2;
870
                    tmp12 = -tmp2;
871
                }
872
            } else {
873
                if (d0) {
874
                    /* d0 != 0, d2 == 0, d4 == 0, d6 == 0 */
875
                    tmp10 = tmp13 = tmp11 = tmp12 = d0 << CONST_BITS;
876
                } else {
877
                    /* d0 == 0, d2 == 0, d4 == 0, d6 == 0 */
878
                    tmp10 = tmp13 = tmp11 = tmp12 = 0;
879
                }
880
            }
881
        }
882
    }
883

    
884
    /* Odd part per figure 8; the matrix is unitary and hence its
885
     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
886
     */
887
    if (d7) {
888
        if (d5) {
889
            if (d3) {
890
                if (d1) {
891
                    /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
892
                    z1 = d7 + d1;
893
                    z2 = d5 + d3;
894
                    z3 = d7 + d3;
895
                    z4 = d5 + d1;
896
                    z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
897
                    
898
                    tmp0 = MULTIPLY(d7, FIX_0_298631336); 
899
                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
900
                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
901
                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
902
                    z1 = MULTIPLY(-z1, FIX_0_899976223);
903
                    z2 = MULTIPLY(-z2, FIX_2_562915447);
904
                    z3 = MULTIPLY(-z3, FIX_1_961570560);
905
                    z4 = MULTIPLY(-z4, FIX_0_390180644);
906
                    
907
                    z3 += z5;
908
                    z4 += z5;
909
                    
910
                    tmp0 += z1 + z3;
911
                    tmp1 += z2 + z4;
912
                    tmp2 += z2 + z3;
913
                    tmp3 += z1 + z4;
914
                } else {
915
                    /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
916
                    z1 = d7;
917
                    z2 = d5 + d3;
918
                    z3 = d7 + d3;
919
                    z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
920
                    
921
                    tmp0 = MULTIPLY(d7, FIX_0_298631336); 
922
                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
923
                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
924
                    z1 = MULTIPLY(-d7, FIX_0_899976223);
925
                    z2 = MULTIPLY(-z2, FIX_2_562915447);
926
                    z3 = MULTIPLY(-z3, FIX_1_961570560);
927
                    z4 = MULTIPLY(-d5, FIX_0_390180644);
928
                    
929
                    z3 += z5;
930
                    z4 += z5;
931
                    
932
                    tmp0 += z1 + z3;
933
                    tmp1 += z2 + z4;
934
                    tmp2 += z2 + z3;
935
                    tmp3 = z1 + z4;
936
                }
937
            } else {
938
                if (d1) {
939
                    /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
940
                    z1 = d7 + d1;
941
                    z2 = d5;
942
                    z3 = d7;
943
                    z4 = d5 + d1;
944
                    z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
945
                    
946
                    tmp0 = MULTIPLY(d7, FIX_0_298631336); 
947
                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
948
                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
949
                    z1 = MULTIPLY(-z1, FIX_0_899976223);
950
                    z2 = MULTIPLY(-d5, FIX_2_562915447);
951
                    z3 = MULTIPLY(-d7, FIX_1_961570560);
952
                    z4 = MULTIPLY(-z4, FIX_0_390180644);
953
                    
954
                    z3 += z5;
955
                    z4 += z5;
956
                    
957
                    tmp0 += z1 + z3;
958
                    tmp1 += z2 + z4;
959
                    tmp2 = z2 + z3;
960
                    tmp3 += z1 + z4;
961
                } else {
962
                    /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
963
                    tmp0 = MULTIPLY(-d7, FIX_0_601344887); 
964
                    z1 = MULTIPLY(-d7, FIX_0_899976223);
965
                    z3 = MULTIPLY(-d7, FIX_1_961570560);
966
                    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
967
                    z2 = MULTIPLY(-d5, FIX_2_562915447);
968
                    z4 = MULTIPLY(-d5, FIX_0_390180644);
969
                    z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
970
                    
971
                    z3 += z5;
972
                    z4 += z5;
973
                    
974
                    tmp0 += z3;
975
                    tmp1 += z4;
976
                    tmp2 = z2 + z3;
977
                    tmp3 = z1 + z4;
978
                }
979
            }
980
        } else {
981
            if (d3) {
982
                if (d1) {
983
                    /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
984
                    z1 = d7 + d1;
985
                    z3 = d7 + d3;
986
                    z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
987
                    
988
                    tmp0 = MULTIPLY(d7, FIX_0_298631336); 
989
                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
990
                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
991
                    z1 = MULTIPLY(-z1, FIX_0_899976223);
992
                    z2 = MULTIPLY(-d3, FIX_2_562915447);
993
                    z3 = MULTIPLY(-z3, FIX_1_961570560);
994
                    z4 = MULTIPLY(-d1, FIX_0_390180644);
995
                    
996
                    z3 += z5;
997
                    z4 += z5;
998
                    
999
                    tmp0 += z1 + z3;
1000
                    tmp1 = z2 + z4;
1001
                    tmp2 += z2 + z3;
1002
                    tmp3 += z1 + z4;
1003
                } else {
1004
                    /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
1005
                    z3 = d7 + d3;
1006
                    
1007
                    tmp0 = MULTIPLY(-d7, FIX_0_601344887); 
1008
                    z1 = MULTIPLY(-d7, FIX_0_899976223);
1009
                    tmp2 = MULTIPLY(d3, FIX_0_509795579);
1010
                    z2 = MULTIPLY(-d3, FIX_2_562915447);
1011
                    z5 = MULTIPLY(z3, FIX_1_175875602);
1012
                    z3 = MULTIPLY(-z3, FIX_0_785694958);
1013
                    
1014
                    tmp0 += z3;
1015
                    tmp1 = z2 + z5;
1016
                    tmp2 += z3;
1017
                    tmp3 = z1 + z5;
1018
                }
1019
            } else {
1020
                if (d1) {
1021
                    /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
1022
                    z1 = d7 + d1;
1023
                    z5 = MULTIPLY(z1, FIX_1_175875602);
1024

    
1025
                    z1 = MULTIPLY(z1, FIX_0_275899380);
1026
                    z3 = MULTIPLY(-d7, FIX_1_961570560);
1027
                    tmp0 = MULTIPLY(-d7, FIX_1_662939225); 
1028
                    z4 = MULTIPLY(-d1, FIX_0_390180644);
1029
                    tmp3 = MULTIPLY(d1, FIX_1_111140466);
1030

    
1031
                    tmp0 += z1;
1032
                    tmp1 = z4 + z5;
1033
                    tmp2 = z3 + z5;
1034
                    tmp3 += z1;
1035
                } else {
1036
                    /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
1037
                    tmp0 = MULTIPLY(-d7, FIX_1_387039845);
1038
                    tmp1 = MULTIPLY(d7, FIX_1_175875602);
1039
                    tmp2 = MULTIPLY(-d7, FIX_0_785694958);
1040
                    tmp3 = MULTIPLY(d7, FIX_0_275899380);
1041
                }
1042
            }
1043
        }
1044
    } else {
1045
        if (d5) {
1046
            if (d3) {
1047
                if (d1) {
1048
                    /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
1049
                    z2 = d5 + d3;
1050
                    z4 = d5 + d1;
1051
                    z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
1052
                    
1053
                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
1054
                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
1055
                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
1056
                    z1 = MULTIPLY(-d1, FIX_0_899976223);
1057
                    z2 = MULTIPLY(-z2, FIX_2_562915447);
1058
                    z3 = MULTIPLY(-d3, FIX_1_961570560);
1059
                    z4 = MULTIPLY(-z4, FIX_0_390180644);
1060
                    
1061
                    z3 += z5;
1062
                    z4 += z5;
1063
                    
1064
                    tmp0 = z1 + z3;
1065
                    tmp1 += z2 + z4;
1066
                    tmp2 += z2 + z3;
1067
                    tmp3 += z1 + z4;
1068
                } else {
1069
                    /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
1070
                    z2 = d5 + d3;
1071
                    
1072
                    z5 = MULTIPLY(z2, FIX_1_175875602);
1073
                    tmp1 = MULTIPLY(d5, FIX_1_662939225);
1074
                    z4 = MULTIPLY(-d5, FIX_0_390180644);
1075
                    z2 = MULTIPLY(-z2, FIX_1_387039845);
1076
                    tmp2 = MULTIPLY(d3, FIX_1_111140466);
1077
                    z3 = MULTIPLY(-d3, FIX_1_961570560);
1078
                    
1079
                    tmp0 = z3 + z5;
1080
                    tmp1 += z2;
1081
                    tmp2 += z2;
1082
                    tmp3 = z4 + z5;
1083
                }
1084
            } else {
1085
                if (d1) {
1086
                    /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
1087
                    z4 = d5 + d1;
1088
                    
1089
                    z5 = MULTIPLY(z4, FIX_1_175875602);
1090
                    z1 = MULTIPLY(-d1, FIX_0_899976223);
1091
                    tmp3 = MULTIPLY(d1, FIX_0_601344887);
1092
                    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
1093
                    z2 = MULTIPLY(-d5, FIX_2_562915447);
1094
                    z4 = MULTIPLY(z4, FIX_0_785694958);
1095
                    
1096
                    tmp0 = z1 + z5;
1097
                    tmp1 += z4;
1098
                    tmp2 = z2 + z5;
1099
                    tmp3 += z4;
1100
                } else {
1101
                    /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
1102
                    tmp0 = MULTIPLY(d5, FIX_1_175875602);
1103
                    tmp1 = MULTIPLY(d5, FIX_0_275899380);
1104
                    tmp2 = MULTIPLY(-d5, FIX_1_387039845);
1105
                    tmp3 = MULTIPLY(d5, FIX_0_785694958);
1106
                }
1107
            }
1108
        } else {
1109
            if (d3) {
1110
                if (d1) {
1111
                    /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
1112
                    z5 = d1 + d3;
1113
                    tmp3 = MULTIPLY(d1, FIX_0_211164243);
1114
                    tmp2 = MULTIPLY(-d3, FIX_1_451774981);
1115
                    z1 = MULTIPLY(d1, FIX_1_061594337);
1116
                    z2 = MULTIPLY(-d3, FIX_2_172734803);
1117
                    z4 = MULTIPLY(z5, FIX_0_785694958);
1118
                    z5 = MULTIPLY(z5, FIX_1_175875602);
1119
                    
1120
                    tmp0 = z1 - z4;
1121
                    tmp1 = z2 + z4;
1122
                    tmp2 += z5;
1123
                    tmp3 += z5;
1124
                } else {
1125
                    /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
1126
                    tmp0 = MULTIPLY(-d3, FIX_0_785694958);
1127
                    tmp1 = MULTIPLY(-d3, FIX_1_387039845);
1128
                    tmp2 = MULTIPLY(-d3, FIX_0_275899380);
1129
                    tmp3 = MULTIPLY(d3, FIX_1_175875602);
1130
                }
1131
            } else {
1132
                if (d1) {
1133
                    /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
1134
                    tmp0 = MULTIPLY(d1, FIX_0_275899380);
1135
                    tmp1 = MULTIPLY(d1, FIX_0_785694958);
1136
                    tmp2 = MULTIPLY(d1, FIX_1_175875602);
1137
                    tmp3 = MULTIPLY(d1, FIX_1_387039845);
1138
                } else {
1139
                    /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
1140
                    tmp0 = tmp1 = tmp2 = tmp3 = 0;
1141
                }
1142
            }
1143
        }
1144
    }
1145

    
1146
    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
1147

    
1148
    dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3,
1149
                                           CONST_BITS+PASS1_BITS+3);
1150
    dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3,
1151
                                           CONST_BITS+PASS1_BITS+3);
1152
    dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2,
1153
                                           CONST_BITS+PASS1_BITS+3);
1154
    dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2,
1155
                                           CONST_BITS+PASS1_BITS+3);
1156
    dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1,
1157
                                           CONST_BITS+PASS1_BITS+3);
1158
    dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1,
1159
                                           CONST_BITS+PASS1_BITS+3);
1160
    dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0,
1161
                                           CONST_BITS+PASS1_BITS+3);
1162
    dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0,
1163
                                           CONST_BITS+PASS1_BITS+3);
1164
    
1165
    dataptr++;                        /* advance pointer to next column */
1166
  }
1167
}
1168

    
1169