Statistics
| Branch: | Revision:

ffmpeg / libavcodec / simple_idct.c @ 412ba501

History | View | Annotate | Download (9.46 KB)

1
/*
2
 * Simple IDCT
3
 *
4
 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19
 */
20
/*
21
  based upon some outcommented c code from mpeg2dec (idct_mmx.c
22
  written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) 
23
 */
24
#include "avcodec.h"
25

    
26
#include "simple_idct.h"
27

    
28
#if 0
29
#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
30
#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
31
#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
32
#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
33
#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
34
#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
35
#define W7 565  /* 2048*sqrt (2)*cos (7*pi/16) */
36
#define ROW_SHIFT 8
37
#define COL_SHIFT 17
38
#else
39
#define W1  22725  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
40
#define W2  21407  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
41
#define W3  19266  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
42
#define W4  16383  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
43
#define W5  12873  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
44
#define W6  8867   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
45
#define W7  4520   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
46
#define ROW_SHIFT 11
47
#define COL_SHIFT 20 // 6
48
#endif
49

    
50
#ifdef ARCH_ALPHA
51
#define FAST_64BIT
52
#endif
53

    
54
#if defined(ARCH_POWERPC_405)
55

    
56
/* signed 16x16 -> 32 multiply add accumulate */
57
#define MAC16(rt, ra, rb) \
58
    asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
59

    
60
/* signed 16x16 -> 32 multiply */
61
#define MUL16(rt, ra, rb) \
62
    asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb));
63

    
64
#else
65

    
66
/* signed 16x16 -> 32 multiply add accumulate */
67
#define MAC16(rt, ra, rb) rt += (ra) * (rb)
68

    
69
/* signed 16x16 -> 32 multiply */
70
#define MUL16(rt, ra, rb) rt = (ra) * (rb)
71

    
72
#endif
73

    
74
#ifdef ARCH_ALPHA
75
/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise  */
76
static inline int idctRowCondDC(int16_t *row)
77
{
78
        int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
79
        uint64_t *lrow = (uint64_t *) row;
80

    
81
        if (lrow[1] == 0) {
82
                if (lrow[0] == 0)
83
                        return 0;
84
                if ((lrow[0] & ~0xffffULL) == 0) {
85
                        uint64_t v;
86

    
87
                        a0 = W4 * row[0];
88
                        a0 += 1 << (ROW_SHIFT - 1);
89
                        a0 >>= ROW_SHIFT;
90
                        v = (uint16_t) a0;
91
                        v += v << 16;
92
                        v += v << 32;
93
                        lrow[0] = v;
94
                        lrow[1] = v;
95

    
96
                        return 1;
97
                }
98
        }
99

    
100
        a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
101
        a1 = a0;
102
        a2 = a0;
103
        a3 = a0;
104

    
105
        if (row[2]) {
106
                a0 += W2 * row[2];
107
                a1 += W6 * row[2];
108
                a2 -= W6 * row[2];
109
                a3 -= W2 * row[2];
110
        }
111

    
112
        if (row[4]) {
113
                a0 += W4 * row[4];
114
                a1 -= W4 * row[4];
115
                a2 -= W4 * row[4];
116
                a3 += W4 * row[4];
117
        }
118

    
119
        if (row[6]) {
120
                a0 += W6 * row[6];
121
                a1 -= W2 * row[6];
122
                a2 += W2 * row[6];
123
                a3 -= W6 * row[6];
124
        }
125

    
126
        if (row[1]) {
127
                b0 = W1 * row[1];
128
                b1 = W3 * row[1];
129
                b2 = W5 * row[1];
130
                b3 = W7 * row[1];
131
        } else {
132
                b0 = 0;
133
                b1 = 0;
134
                b2 = 0;
135
                b3 = 0;
136
        }
137

    
138
        if (row[3]) {
139
                b0 += W3 * row[3];
140
                b1 -= W7 * row[3];
141
                b2 -= W1 * row[3];
142
                b3 -= W5 * row[3];
143
        }
144

    
145
        if (row[5]) {
146
                b0 += W5 * row[5];
147
                b1 -= W1 * row[5];
148
                b2 += W7 * row[5];
149
                b3 += W3 * row[5];
150
        }
151

    
152
        if (row[7]) {
153
                b0 += W7 * row[7];
154
                b1 -= W5 * row[7];
155
                b2 += W3 * row[7];
156
                b3 -= W1 * row[7];
157
        }
158

    
159
        row[0] = (a0 + b0) >> ROW_SHIFT;
160
        row[1] = (a1 + b1) >> ROW_SHIFT;
161
        row[2] = (a2 + b2) >> ROW_SHIFT;
162
        row[3] = (a3 + b3) >> ROW_SHIFT;
163
        row[4] = (a3 - b3) >> ROW_SHIFT;
164
        row[5] = (a2 - b2) >> ROW_SHIFT;
165
        row[6] = (a1 - b1) >> ROW_SHIFT;
166
        row[7] = (a0 - b0) >> ROW_SHIFT;
167

    
168
        return 2;
169
}
170
#else  /* not ARCH_ALPHA */
171

    
172
static inline void idctRowCondDC (int16_t * row)
173
{
174
        int a0, a1, a2, a3, b0, b1, b2, b3;
175
#ifdef FAST_64BIT
176
        uint64_t temp;
177
#else
178
        uint32_t temp;
179
#endif
180

    
181
#ifdef FAST_64BIT
182
#ifdef WORDS_BIGENDIAN
183
#define ROW0_MASK 0xffff000000000000LL
184
#else
185
#define ROW0_MASK 0xffffLL
186
#endif
187
        if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | 
188
              ((uint64_t *)row)[1]) == 0) {
189
            temp = (row[0] << 3) & 0xffff;
190
            temp += temp << 16;
191
            temp += temp << 32;
192
            ((uint64_t *)row)[0] = temp;
193
            ((uint64_t *)row)[1] = temp;
194
            return;
195
        }
196
#else
197
        if (!(((uint32_t*)row)[1] |
198
              ((uint32_t*)row)[2] |
199
              ((uint32_t*)row)[3] | 
200
              row[1])) {
201
            temp = (row[0] << 3) & 0xffff;
202
            temp += temp << 16;
203
            ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
204
                ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
205
                return;
206
        }
207
#endif
208

    
209
        a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
210
        a1 = a0;
211
        a2 = a0;
212
        a3 = a0;
213

    
214
        /* no need to optimize : gcc does it */
215
        a0 += W2 * row[2];
216
        a1 += W6 * row[2];
217
        a2 -= W6 * row[2];
218
        a3 -= W2 * row[2];
219

    
220
        MUL16(b0, W1, row[1]);
221
        MAC16(b0, W3, row[3]);
222
        MUL16(b1, W3, row[1]);
223
        MAC16(b1, -W7, row[3]);
224
        MUL16(b2, W5, row[1]);
225
        MAC16(b2, -W1, row[3]);
226
        MUL16(b3, W7, row[1]);
227
        MAC16(b3, -W5, row[3]);
228

    
229
#ifdef FAST_64BIT
230
        temp = ((uint64_t*)row)[1];
231
#else
232
        temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
233
#endif
234
        if (temp != 0) {
235
            a0 += W4*row[4] + W6*row[6];
236
            a1 += - W4*row[4] - W2*row[6];
237
            a2 += - W4*row[4] + W2*row[6];
238
            a3 += W4*row[4] - W6*row[6];
239

    
240
            MAC16(b0, W5, row[5]);
241
            MAC16(b0, W7, row[7]);
242
            
243
            MAC16(b1, -W1, row[5]);
244
            MAC16(b1, -W5, row[7]);
245
            
246
            MAC16(b2, W7, row[5]);
247
            MAC16(b2, W3, row[7]);
248
            
249
            MAC16(b3, W3, row[5]);
250
            MAC16(b3, -W1, row[7]);
251
        }
252

    
253
        row[0] = (a0 + b0) >> ROW_SHIFT;
254
        row[7] = (a0 - b0) >> ROW_SHIFT;
255
        row[1] = (a1 + b1) >> ROW_SHIFT;
256
        row[6] = (a1 - b1) >> ROW_SHIFT;
257
        row[2] = (a2 + b2) >> ROW_SHIFT;
258
        row[5] = (a2 - b2) >> ROW_SHIFT;
259
        row[3] = (a3 + b3) >> ROW_SHIFT;
260
        row[4] = (a3 - b3) >> ROW_SHIFT;
261
}
262
#endif /* not ARCH_ALPHA */
263

    
264
static inline void idctSparseCol (int16_t * col)
265
{
266
        int a0, a1, a2, a3, b0, b1, b2, b3;
267

    
268
        /* XXX: I did that only to give same values as previous code */
269
        a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
270
        a1 = a0;
271
        a2 = a0;
272
        a3 = a0;
273

    
274
        a0 +=  + W2*col[8*2];
275
        a1 +=  + W6*col[8*2];
276
        a2 +=  - W6*col[8*2];
277
        a3 +=  - W2*col[8*2];
278

    
279
        MUL16(b0, W1, col[8*1]);
280
        MUL16(b1, W3, col[8*1]);
281
        MUL16(b2, W5, col[8*1]);
282
        MUL16(b3, W7, col[8*1]);
283

    
284
        MAC16(b0, + W3, col[8*3]);
285
        MAC16(b1, - W7, col[8*3]);
286
        MAC16(b2, - W1, col[8*3]);
287
        MAC16(b3, - W5, col[8*3]);
288

    
289
        if(col[8*4]){
290
            a0 += + W4*col[8*4];
291
            a1 += - W4*col[8*4];
292
            a2 += - W4*col[8*4];
293
            a3 += + W4*col[8*4];
294
        }
295

    
296
        if (col[8*5]) {
297
            MAC16(b0, + W5, col[8*5]);
298
            MAC16(b1, - W1, col[8*5]);
299
            MAC16(b2, + W7, col[8*5]);
300
            MAC16(b3, + W3, col[8*5]);
301
        }
302

    
303
        if(col[8*6]){
304
            a0 += + W6*col[8*6];
305
            a1 += - W2*col[8*6];
306
            a2 += + W2*col[8*6];
307
            a3 += - W6*col[8*6];
308
        }
309

    
310
        if (col[8*7]) {
311
            MAC16(b0, + W7, col[8*7]);
312
            MAC16(b1, - W5, col[8*7]);
313
            MAC16(b2, + W3, col[8*7]);
314
            MAC16(b3, - W1, col[8*7]);
315
        }
316

    
317
        col[8*0] = (a0 + b0) >> COL_SHIFT;
318
        col[8*7] = (a0 - b0) >> COL_SHIFT;
319
        col[8*1] = (a1 + b1) >> COL_SHIFT;
320
        col[8*6] = (a1 - b1) >> COL_SHIFT;
321
        col[8*2] = (a2 + b2) >> COL_SHIFT;
322
        col[8*5] = (a2 - b2) >> COL_SHIFT;
323
        col[8*3] = (a3 + b3) >> COL_SHIFT;
324
        col[8*4] = (a3 - b3) >> COL_SHIFT;
325
}
326

    
327
#ifdef ARCH_ALPHA
328
/* If all rows but the first one are zero after row transformation,
329
   all rows will be identical after column transformation.  */
330
static inline void idctCol2(int16_t *col)
331
{
332
        int i;
333
        uint64_t l, r;
334
        uint64_t *lcol = (uint64_t *) col;
335

    
336
        for (i = 0; i < 8; ++i) {
337
                int a0 = col[0] + (1 << (COL_SHIFT - 1)) / W4;
338

    
339
                a0 *= W4;
340
                col[0] = a0 >> COL_SHIFT;
341
                ++col;
342
        }
343

    
344
        l = lcol[0];
345
        r = lcol[1];
346
        lcol[ 2] = l; lcol[ 3] = r;
347
        lcol[ 4] = l; lcol[ 5] = r;
348
        lcol[ 6] = l; lcol[ 7] = r;
349
        lcol[ 8] = l; lcol[ 9] = r;
350
        lcol[10] = l; lcol[11] = r;
351
        lcol[12] = l; lcol[13] = r;
352
        lcol[14] = l; lcol[15] = r;
353
}
354

    
355
void simple_idct (short *block)
356
{
357

    
358
        int i;
359
        int rowsZero = 1;       /* all rows except row 0 zero */
360
        int rowsConstant = 1;        /* all rows consist of a constant value */
361

    
362
        for (i = 0; i < 8; i++) {
363
                int sparseness = idctRowCondDC(block + 8 * i);
364

    
365
                if (i > 0 && sparseness > 0)
366
                        rowsZero = 0;
367
                if (sparseness == 2)
368
                        rowsConstant = 0;
369
        }
370

    
371
        if (rowsZero) {
372
                idctCol2(block);
373
        } else if (rowsConstant) {
374
                uint64_t *lblock = (uint64_t *) block;
375

    
376
                idctSparseCol(block);
377
                for (i = 0; i < 8; i++) {
378
                        uint64_t v = (uint16_t) block[i * 8];
379

    
380
                        v += v << 16;
381
                        v += v << 32;
382
                        lblock[0] = v;
383
                        lblock[1] = v;
384
                        lblock += 2;
385
                }
386
        } else {
387
                for (i = 0; i < 8; i++)
388
                        idctSparseCol(block + i);
389
        }
390
}
391

    
392
#else
393

    
394
void simple_idct (short *block)
395
{
396
    int i;
397
    for(i=0; i<8; i++)
398
        idctRowCondDC(block + i*8);
399
    
400
    for(i=0; i<8; i++)
401
        idctSparseCol(block + i);
402
}
403

    
404
#endif
405

    
406
#undef COL_SHIFT