Statistics
| Branch: | Revision:

ffmpeg / libavcodec / simple_idct.c @ d36a2466

History | View | Annotate | Download (12.4 KB)

1
/*
2
 * Simple IDCT
3
 *
4
 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19
 */
20
/*
21
  based upon some outcommented c code from mpeg2dec (idct_mmx.c
22
  written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) 
23
 */
24
#include "avcodec.h"
25
#include "dsputil.h"
26
#include "simple_idct.h"
27

    
28
#if 0
29
#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
30
#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
31
#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
32
#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
33
#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
34
#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
35
#define W7 565  /* 2048*sqrt (2)*cos (7*pi/16) */
36
#define ROW_SHIFT 8
37
#define COL_SHIFT 17
38
#else
39
#define W1  22725  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
40
#define W2  21407  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
41
#define W3  19266  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
42
#define W4  16383  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
43
#define W5  12873  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
44
#define W6  8867   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
45
#define W7  4520   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
46
#define ROW_SHIFT 11
47
#define COL_SHIFT 20 // 6
48
#endif
49

    
50
#ifdef ARCH_ALPHA
51
#define FAST_64BIT
52
#endif
53

    
54
#if defined(ARCH_POWERPC_405)
55

    
56
/* signed 16x16 -> 32 multiply add accumulate */
57
#define MAC16(rt, ra, rb) \
58
    asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
59

    
60
/* signed 16x16 -> 32 multiply */
61
#define MUL16(rt, ra, rb) \
62
    asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb));
63

    
64
#else
65

    
66
/* signed 16x16 -> 32 multiply add accumulate */
67
#define MAC16(rt, ra, rb) rt += (ra) * (rb)
68

    
69
/* signed 16x16 -> 32 multiply */
70
#define MUL16(rt, ra, rb) rt = (ra) * (rb)
71

    
72
#endif
73

    
74
#ifdef ARCH_ALPHA
75
/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise  */
76
static inline int idctRowCondDC(int16_t *row)
77
{
78
        int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
79
        uint64_t *lrow = (uint64_t *) row;
80

    
81
        if (lrow[1] == 0) {
82
                if (lrow[0] == 0)
83
                        return 0;
84
                if ((lrow[0] & ~0xffffULL) == 0) {
85
                        uint64_t v;
86

    
87
                        a0 = W4 * row[0];
88
                        a0 += 1 << (ROW_SHIFT - 1);
89
                        a0 >>= ROW_SHIFT;
90
                        v = (uint16_t) a0;
91
                        v += v << 16;
92
                        v += v << 32;
93
                        lrow[0] = v;
94
                        lrow[1] = v;
95

    
96
                        return 1;
97
                }
98
        }
99

    
100
        a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
101
        a1 = a0;
102
        a2 = a0;
103
        a3 = a0;
104

    
105
        if (row[2]) {
106
                a0 += W2 * row[2];
107
                a1 += W6 * row[2];
108
                a2 -= W6 * row[2];
109
                a3 -= W2 * row[2];
110
        }
111

    
112
        if (row[4]) {
113
                a0 += W4 * row[4];
114
                a1 -= W4 * row[4];
115
                a2 -= W4 * row[4];
116
                a3 += W4 * row[4];
117
        }
118

    
119
        if (row[6]) {
120
                a0 += W6 * row[6];
121
                a1 -= W2 * row[6];
122
                a2 += W2 * row[6];
123
                a3 -= W6 * row[6];
124
        }
125

    
126
        if (row[1]) {
127
                b0 = W1 * row[1];
128
                b1 = W3 * row[1];
129
                b2 = W5 * row[1];
130
                b3 = W7 * row[1];
131
        } else {
132
                b0 = 0;
133
                b1 = 0;
134
                b2 = 0;
135
                b3 = 0;
136
        }
137

    
138
        if (row[3]) {
139
                b0 += W3 * row[3];
140
                b1 -= W7 * row[3];
141
                b2 -= W1 * row[3];
142
                b3 -= W5 * row[3];
143
        }
144

    
145
        if (row[5]) {
146
                b0 += W5 * row[5];
147
                b1 -= W1 * row[5];
148
                b2 += W7 * row[5];
149
                b3 += W3 * row[5];
150
        }
151

    
152
        if (row[7]) {
153
                b0 += W7 * row[7];
154
                b1 -= W5 * row[7];
155
                b2 += W3 * row[7];
156
                b3 -= W1 * row[7];
157
        }
158

    
159
        row[0] = (a0 + b0) >> ROW_SHIFT;
160
        row[1] = (a1 + b1) >> ROW_SHIFT;
161
        row[2] = (a2 + b2) >> ROW_SHIFT;
162
        row[3] = (a3 + b3) >> ROW_SHIFT;
163
        row[4] = (a3 - b3) >> ROW_SHIFT;
164
        row[5] = (a2 - b2) >> ROW_SHIFT;
165
        row[6] = (a1 - b1) >> ROW_SHIFT;
166
        row[7] = (a0 - b0) >> ROW_SHIFT;
167

    
168
        return 2;
169
}
170
#else  /* not ARCH_ALPHA */
171

    
172
static inline void idctRowCondDC (int16_t * row)
173
{
174
        int a0, a1, a2, a3, b0, b1, b2, b3;
175
#ifdef FAST_64BIT
176
        uint64_t temp;
177
#else
178
        uint32_t temp;
179
#endif
180

    
181
#ifdef FAST_64BIT
182
#ifdef WORDS_BIGENDIAN
183
#define ROW0_MASK 0xffff000000000000LL
184
#else
185
#define ROW0_MASK 0xffffLL
186
#endif
187
        if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | 
188
              ((uint64_t *)row)[1]) == 0) {
189
            temp = (row[0] << 3) & 0xffff;
190
            temp += temp << 16;
191
            temp += temp << 32;
192
            ((uint64_t *)row)[0] = temp;
193
            ((uint64_t *)row)[1] = temp;
194
            return;
195
        }
196
#else
197
        if (!(((uint32_t*)row)[1] |
198
              ((uint32_t*)row)[2] |
199
              ((uint32_t*)row)[3] | 
200
              row[1])) {
201
            temp = (row[0] << 3) & 0xffff;
202
            temp += temp << 16;
203
            ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
204
                ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
205
                return;
206
        }
207
#endif
208

    
209
        a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
210
        a1 = a0;
211
        a2 = a0;
212
        a3 = a0;
213

    
214
        /* no need to optimize : gcc does it */
215
        a0 += W2 * row[2];
216
        a1 += W6 * row[2];
217
        a2 -= W6 * row[2];
218
        a3 -= W2 * row[2];
219

    
220
        MUL16(b0, W1, row[1]);
221
        MAC16(b0, W3, row[3]);
222
        MUL16(b1, W3, row[1]);
223
        MAC16(b1, -W7, row[3]);
224
        MUL16(b2, W5, row[1]);
225
        MAC16(b2, -W1, row[3]);
226
        MUL16(b3, W7, row[1]);
227
        MAC16(b3, -W5, row[3]);
228

    
229
#ifdef FAST_64BIT
230
        temp = ((uint64_t*)row)[1];
231
#else
232
        temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
233
#endif
234
        if (temp != 0) {
235
            a0 += W4*row[4] + W6*row[6];
236
            a1 += - W4*row[4] - W2*row[6];
237
            a2 += - W4*row[4] + W2*row[6];
238
            a3 += W4*row[4] - W6*row[6];
239

    
240
            MAC16(b0, W5, row[5]);
241
            MAC16(b0, W7, row[7]);
242
            
243
            MAC16(b1, -W1, row[5]);
244
            MAC16(b1, -W5, row[7]);
245
            
246
            MAC16(b2, W7, row[5]);
247
            MAC16(b2, W3, row[7]);
248
            
249
            MAC16(b3, W3, row[5]);
250
            MAC16(b3, -W1, row[7]);
251
        }
252

    
253
        row[0] = (a0 + b0) >> ROW_SHIFT;
254
        row[7] = (a0 - b0) >> ROW_SHIFT;
255
        row[1] = (a1 + b1) >> ROW_SHIFT;
256
        row[6] = (a1 - b1) >> ROW_SHIFT;
257
        row[2] = (a2 + b2) >> ROW_SHIFT;
258
        row[5] = (a2 - b2) >> ROW_SHIFT;
259
        row[3] = (a3 + b3) >> ROW_SHIFT;
260
        row[4] = (a3 - b3) >> ROW_SHIFT;
261
}
262
#endif /* not ARCH_ALPHA */
263

    
264
static inline void idctSparseColPut (UINT8 *dest, int line_size, 
265
                                     int16_t * col)
266
{
267
        int a0, a1, a2, a3, b0, b1, b2, b3;
268
        UINT8 *cm = cropTbl + MAX_NEG_CROP;
269

    
270
        /* XXX: I did that only to give same values as previous code */
271
        a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
272
        a1 = a0;
273
        a2 = a0;
274
        a3 = a0;
275

    
276
        a0 +=  + W2*col[8*2];
277
        a1 +=  + W6*col[8*2];
278
        a2 +=  - W6*col[8*2];
279
        a3 +=  - W2*col[8*2];
280

    
281
        MUL16(b0, W1, col[8*1]);
282
        MUL16(b1, W3, col[8*1]);
283
        MUL16(b2, W5, col[8*1]);
284
        MUL16(b3, W7, col[8*1]);
285

    
286
        MAC16(b0, + W3, col[8*3]);
287
        MAC16(b1, - W7, col[8*3]);
288
        MAC16(b2, - W1, col[8*3]);
289
        MAC16(b3, - W5, col[8*3]);
290

    
291
        if(col[8*4]){
292
            a0 += + W4*col[8*4];
293
            a1 += - W4*col[8*4];
294
            a2 += - W4*col[8*4];
295
            a3 += + W4*col[8*4];
296
        }
297

    
298
        if (col[8*5]) {
299
            MAC16(b0, + W5, col[8*5]);
300
            MAC16(b1, - W1, col[8*5]);
301
            MAC16(b2, + W7, col[8*5]);
302
            MAC16(b3, + W3, col[8*5]);
303
        }
304

    
305
        if(col[8*6]){
306
            a0 += + W6*col[8*6];
307
            a1 += - W2*col[8*6];
308
            a2 += + W2*col[8*6];
309
            a3 += - W6*col[8*6];
310
        }
311

    
312
        if (col[8*7]) {
313
            MAC16(b0, + W7, col[8*7]);
314
            MAC16(b1, - W5, col[8*7]);
315
            MAC16(b2, + W3, col[8*7]);
316
            MAC16(b3, - W1, col[8*7]);
317
        }
318

    
319
        dest[0] = cm[(a0 + b0) >> COL_SHIFT];
320
        dest += line_size;
321
        dest[0] = cm[(a1 + b1) >> COL_SHIFT];
322
        dest += line_size;
323
        dest[0] = cm[(a2 + b2) >> COL_SHIFT];
324
        dest += line_size;
325
        dest[0] = cm[(a3 + b3) >> COL_SHIFT];
326
        dest += line_size;
327
        dest[0] = cm[(a3 - b3) >> COL_SHIFT];
328
        dest += line_size;
329
        dest[0] = cm[(a2 - b2) >> COL_SHIFT];
330
        dest += line_size;
331
        dest[0] = cm[(a1 - b1) >> COL_SHIFT];
332
        dest += line_size;
333
        dest[0] = cm[(a0 - b0) >> COL_SHIFT];
334
}
335

    
336
static inline void idctSparseColAdd (UINT8 *dest, int line_size, 
337
                                     int16_t * col)
338
{
339
        int a0, a1, a2, a3, b0, b1, b2, b3;
340
        UINT8 *cm = cropTbl + MAX_NEG_CROP;
341

    
342
        /* XXX: I did that only to give same values as previous code */
343
        a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
344
        a1 = a0;
345
        a2 = a0;
346
        a3 = a0;
347

    
348
        a0 +=  + W2*col[8*2];
349
        a1 +=  + W6*col[8*2];
350
        a2 +=  - W6*col[8*2];
351
        a3 +=  - W2*col[8*2];
352

    
353
        MUL16(b0, W1, col[8*1]);
354
        MUL16(b1, W3, col[8*1]);
355
        MUL16(b2, W5, col[8*1]);
356
        MUL16(b3, W7, col[8*1]);
357

    
358
        MAC16(b0, + W3, col[8*3]);
359
        MAC16(b1, - W7, col[8*3]);
360
        MAC16(b2, - W1, col[8*3]);
361
        MAC16(b3, - W5, col[8*3]);
362

    
363
        if(col[8*4]){
364
            a0 += + W4*col[8*4];
365
            a1 += - W4*col[8*4];
366
            a2 += - W4*col[8*4];
367
            a3 += + W4*col[8*4];
368
        }
369

    
370
        if (col[8*5]) {
371
            MAC16(b0, + W5, col[8*5]);
372
            MAC16(b1, - W1, col[8*5]);
373
            MAC16(b2, + W7, col[8*5]);
374
            MAC16(b3, + W3, col[8*5]);
375
        }
376

    
377
        if(col[8*6]){
378
            a0 += + W6*col[8*6];
379
            a1 += - W2*col[8*6];
380
            a2 += + W2*col[8*6];
381
            a3 += - W6*col[8*6];
382
        }
383

    
384
        if (col[8*7]) {
385
            MAC16(b0, + W7, col[8*7]);
386
            MAC16(b1, - W5, col[8*7]);
387
            MAC16(b2, + W3, col[8*7]);
388
            MAC16(b3, - W1, col[8*7]);
389
        }
390

    
391
        dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)];
392
        dest += line_size;
393
        dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)];
394
        dest += line_size;
395
        dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)];
396
        dest += line_size;
397
        dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)];
398
        dest += line_size;
399
        dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)];
400
        dest += line_size;
401
        dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)];
402
        dest += line_size;
403
        dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)];
404
        dest += line_size;
405
        dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)];
406
}
407

    
408
#ifdef ARCH_ALPHA
409
/* If all rows but the first one are zero after row transformation,
410
   all rows will be identical after column transformation.  */
411
static inline void idctCol2(int16_t *col)
412
{
413
        int i;
414
        uint64_t l, r;
415
        uint64_t *lcol = (uint64_t *) col;
416

    
417
        for (i = 0; i < 8; ++i) {
418
                int a0 = col[0] + (1 << (COL_SHIFT - 1)) / W4;
419

    
420
                a0 *= W4;
421
                col[0] = a0 >> COL_SHIFT;
422
                ++col;
423
        }
424

    
425
        l = lcol[0];
426
        r = lcol[1];
427
        lcol[ 2] = l; lcol[ 3] = r;
428
        lcol[ 4] = l; lcol[ 5] = r;
429
        lcol[ 6] = l; lcol[ 7] = r;
430
        lcol[ 8] = l; lcol[ 9] = r;
431
        lcol[10] = l; lcol[11] = r;
432
        lcol[12] = l; lcol[13] = r;
433
        lcol[14] = l; lcol[15] = r;
434
}
435

    
436
void simple_idct (short *block)
437
{
438

    
439
        int i;
440
        int rowsZero = 1;       /* all rows except row 0 zero */
441
        int rowsConstant = 1;        /* all rows consist of a constant value */
442

    
443
        for (i = 0; i < 8; i++) {
444
                int sparseness = idctRowCondDC(block + 8 * i);
445

    
446
                if (i > 0 && sparseness > 0)
447
                        rowsZero = 0;
448
                if (sparseness == 2)
449
                        rowsConstant = 0;
450
        }
451

    
452
        if (rowsZero) {
453
                idctCol2(block);
454
        } else if (rowsConstant) {
455
                uint64_t *lblock = (uint64_t *) block;
456

    
457
                idctSparseCol(block);
458
                for (i = 0; i < 8; i++) {
459
                        uint64_t v = (uint16_t) block[i * 8];
460

    
461
                        v += v << 16;
462
                        v += v << 32;
463
                        lblock[0] = v;
464
                        lblock[1] = v;
465
                        lblock += 2;
466
                }
467
        } else {
468
                for (i = 0; i < 8; i++)
469
                        idctSparseCol(block + i);
470
        }
471
}
472

    
473
/* XXX: suppress this mess */
474
void simple_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
475
{
476
    simple_idct(block);
477
    put_pixels_clamped(block, dest, line_size);
478
}
479

    
480
void simple_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
481
{
482
    simple_idct(block);
483
    add_pixels_clamped(block, dest, line_size);
484
}
485

    
486
#else
487

    
488
void simple_idct_put(UINT8 *dest, int line_size, INT16 *block)
489
{
490
    int i;
491
    for(i=0; i<8; i++)
492
        idctRowCondDC(block + i*8);
493
    
494
    for(i=0; i<8; i++)
495
        idctSparseColPut(dest + i, line_size, block + i);
496
}
497

    
498
void simple_idct_add(UINT8 *dest, int line_size, INT16 *block)
499
{
500
    int i;
501
    for(i=0; i<8; i++)
502
        idctRowCondDC(block + i*8);
503
    
504
    for(i=0; i<8; i++)
505
        idctSparseColAdd(dest + i, line_size, block + i);
506
}
507

    
508
#endif
509

    
510
#undef COL_SHIFT