Statistics
| Branch: | Revision:

ffmpeg / libavcodec / alpha / simple_idct_alpha.c @ 186447f8

History | View | Annotate | Download (7.63 KB)

1 8b313a47 Falk Hüffner
/*
2
 * Simple IDCT (Alpha optimized)
3
 *
4
 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19
 *
20
 * based upon some outcommented c code from mpeg2dec (idct_mmx.c
21
 * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
22
 *
23
 * Alpha optimiziations by M?ns Rullg?rd <mru@users.sourceforge.net>
24
 *                     and Falk Hueffner <falk@debian.org>
25
 */
26
27
#include "asm.h"
28
#include "../dsputil.h"
29
30 3354b0c9 Falk Hüffner
extern void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
31
                                        int line_size);
32
extern void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels, 
33
                                        int line_size);
34
35 8b313a47 Falk Hüffner
// cos(i * M_PI / 16) * sqrt(2) * (1 << 14)
36
// W4 is actually exactly 16384, but using 16383 works around
37
// accumulating rounding errors for some encoders
38
#define W1 ((int_fast32_t) 22725)
39
#define W2 ((int_fast32_t) 21407)
40
#define W3 ((int_fast32_t) 19266)
41
#define W4 ((int_fast32_t) 16383)
42
#define W5 ((int_fast32_t) 12873)
43
#define W6 ((int_fast32_t)  8867)
44
#define W7 ((int_fast32_t)  4520)
45
#define ROW_SHIFT 11
46
#define COL_SHIFT 20
47
48
/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise  */
49
static inline int idct_row(DCTELEM *row)
50
{
51
    int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3, t;
52 df081b2f Michael Niedermayer
    uint64_t l, r, t2;
53 8b313a47 Falk Hüffner
    l = ldq(row);
54
    r = ldq(row + 4);
55
56
    if (l == 0 && r == 0)
57
        return 0;
58
    
59
    a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1));
60
61
    if (((l & ~0xffffUL) | r) == 0) {
62
        a0 >>= ROW_SHIFT;
63 df081b2f Michael Niedermayer
        t2 = (uint16_t) a0;
64
        t2 |= t2 << 16;
65
        t2 |= t2 << 32;
66 8b313a47 Falk Hüffner
        
67 df081b2f Michael Niedermayer
        stq(t2, row);
68
        stq(t2, row + 4);
69 8b313a47 Falk Hüffner
        return 1;
70
    }
71
72
    a1 = a0;
73
    a2 = a0;
74
    a3 = a0;
75
76
    t = extwl(l, 4);            /* row[2] */
77
    if (t != 0) {
78
        t = sextw(t);
79
        a0 += W2 * t;
80
        a1 += W6 * t;
81
        a2 -= W6 * t;
82
        a3 -= W2 * t;
83
    }
84
85
    t = extwl(r, 0);            /* row[4] */
86
    if (t != 0) {
87
        t = sextw(t);
88
        a0 += W4 * t;
89
        a1 -= W4 * t;
90
        a2 -= W4 * t;
91
        a3 += W4 * t;
92
    }
93
94
    t = extwl(r, 4);            /* row[6] */
95
    if (t != 0) {
96
        t = sextw(t);
97
        a0 += W6 * t;
98
        a1 -= W2 * t;
99
        a2 += W2 * t;
100
        a3 -= W6 * t;
101
    }
102
103
    t = extwl(l, 2);            /* row[1] */
104
    if (t != 0) {
105
        t = sextw(t);
106
        b0 = W1 * t;
107
        b1 = W3 * t;
108
        b2 = W5 * t;
109
        b3 = W7 * t;
110
    } else {
111
        b0 = 0;
112
        b1 = 0;
113
        b2 = 0;
114
        b3 = 0;
115
    }
116
117
    t = extwl(l, 6);            /* row[3] */
118
    if (t) {
119
        t = sextw(t);
120
        b0 += W3 * t;
121
        b1 -= W7 * t;
122
        b2 -= W1 * t;
123
        b3 -= W5 * t;
124
    }
125
126
    
127
    t = extwl(r, 2);            /* row[5] */
128
    if (t) {
129
        t = sextw(t);
130
        b0 += W5 * t;
131
        b1 -= W1 * t;
132
        b2 += W7 * t;
133
        b3 += W3 * t;
134
    }
135
136
    t = extwl(r, 6);            /* row[7] */
137
    if (t) {
138
        t = sextw(t);
139
        b0 += W7 * t;
140
        b1 -= W5 * t;
141
        b2 += W3 * t;
142
        b3 -= W1 * t;
143
    }
144
145
    row[0] = (a0 + b0) >> ROW_SHIFT;
146
    row[1] = (a1 + b1) >> ROW_SHIFT;
147
    row[2] = (a2 + b2) >> ROW_SHIFT;
148
    row[3] = (a3 + b3) >> ROW_SHIFT;
149
    row[4] = (a3 - b3) >> ROW_SHIFT;
150
    row[5] = (a2 - b2) >> ROW_SHIFT;
151
    row[6] = (a1 - b1) >> ROW_SHIFT;
152
    row[7] = (a0 - b0) >> ROW_SHIFT;
153
154
    return 2;
155
}
156
157
static inline void idct_col(DCTELEM *col)
158
{
159
    int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
160
161
    col[0] += (1 << (COL_SHIFT - 1)) / W4;
162
163
    a0 = W4 * col[8 * 0];
164
    a1 = W4 * col[8 * 0];
165
    a2 = W4 * col[8 * 0];
166
    a3 = W4 * col[8 * 0];
167
168
    if (col[8 * 2]) {
169
        a0 += W2 * col[8 * 2];
170
        a1 += W6 * col[8 * 2];
171
        a2 -= W6 * col[8 * 2];
172
        a3 -= W2 * col[8 * 2];
173
    }
174
175
    if (col[8 * 4]) {
176
        a0 += W4 * col[8 * 4];
177
        a1 -= W4 * col[8 * 4];
178
        a2 -= W4 * col[8 * 4];
179
        a3 += W4 * col[8 * 4];
180
    }
181
182
    if (col[8 * 6]) {
183
        a0 += W6 * col[8 * 6];
184
        a1 -= W2 * col[8 * 6];
185
        a2 += W2 * col[8 * 6];
186
        a3 -= W6 * col[8 * 6];
187
    }
188
189
    if (col[8 * 1]) {
190
        b0 = W1 * col[8 * 1];
191
        b1 = W3 * col[8 * 1];
192
        b2 = W5 * col[8 * 1];
193
        b3 = W7 * col[8 * 1];
194
    } else {
195
        b0 = 0;
196
        b1 = 0;
197
        b2 = 0;
198
        b3 = 0;
199
    }
200
201
    if (col[8 * 3]) {
202
        b0 += W3 * col[8 * 3];
203
        b1 -= W7 * col[8 * 3];
204
        b2 -= W1 * col[8 * 3];
205
        b3 -= W5 * col[8 * 3];
206
    }
207
208
    if (col[8 * 5]) {
209
        b0 += W5 * col[8 * 5];
210
        b1 -= W1 * col[8 * 5];
211
        b2 += W7 * col[8 * 5];
212
        b3 += W3 * col[8 * 5];
213
    }
214
215
    if (col[8 * 7]) {
216
        b0 += W7 * col[8 * 7];
217
        b1 -= W5 * col[8 * 7];
218
        b2 += W3 * col[8 * 7];
219
        b3 -= W1 * col[8 * 7];
220
    }
221
222
    col[8 * 0] = (a0 + b0) >> COL_SHIFT;
223
    col[8 * 7] = (a0 - b0) >> COL_SHIFT;
224
    col[8 * 1] = (a1 + b1) >> COL_SHIFT;
225
    col[8 * 6] = (a1 - b1) >> COL_SHIFT;
226
    col[8 * 2] = (a2 + b2) >> COL_SHIFT;
227
    col[8 * 5] = (a2 - b2) >> COL_SHIFT;
228
    col[8 * 3] = (a3 + b3) >> COL_SHIFT;
229
    col[8 * 4] = (a3 - b3) >> COL_SHIFT;
230
}
231
232
/* If all rows but the first one are zero after row transformation,
233
   all rows will be identical after column transformation.  */
234
static inline void idct_col2(DCTELEM *col)
235
{
236
    int i;
237
    uint64_t l, r;
238
    uint64_t *lcol = (uint64_t *) col;
239
240
    for (i = 0; i < 8; ++i) {
241
        int_fast32_t a0 = col[0] + (1 << (COL_SHIFT - 1)) / W4;
242
243
        a0 *= W4;
244
        col[0] = a0 >> COL_SHIFT;
245
        ++col;
246
    }
247
248
    l = lcol[0];
249
    r = lcol[1];
250
    lcol[ 2] = l; lcol[ 3] = r;
251
    lcol[ 4] = l; lcol[ 5] = r;
252
    lcol[ 6] = l; lcol[ 7] = r;
253
    lcol[ 8] = l; lcol[ 9] = r;
254
    lcol[10] = l; lcol[11] = r;
255
    lcol[12] = l; lcol[13] = r;
256
    lcol[14] = l; lcol[15] = r;
257
}
258
259
void simple_idct_axp(DCTELEM *block)
260
{
261
262
    int i;
263
    int rowsZero = 1;           /* all rows except row 0 zero */
264
    int rowsConstant = 1;       /* all rows consist of a constant value */
265
266
    for (i = 0; i < 8; i++) {
267
        int sparseness = idct_row(block + 8 * i);
268
269
        if (i > 0 && sparseness > 0)
270
            rowsZero = 0;
271
        if (sparseness == 2)
272
            rowsConstant = 0;
273
    }
274
275
    if (rowsZero) {
276
        idct_col2(block);
277
    } else if (rowsConstant) {
278
        uint64_t *lblock = (uint64_t *) block;
279
280
        idct_col(block);
281
        for (i = 0; i < 8; i += 2) {
282
            uint64_t v = (uint16_t) block[i * 8];
283
            uint64_t w = (uint16_t) block[i * 8 + 8];
284
285
            v |= v << 16;
286
            w |= w << 16;
287
            v |= v << 32;
288
            w |= w << 32;
289
            lblock[0] = v;
290
            lblock[1] = v;
291
            lblock[2] = w;
292
            lblock[3] = w;
293
            lblock += 4;
294
        }
295
    } else {
296
        for (i = 0; i < 8; i++)
297
            idct_col(block + i);
298
    }
299
}
300
301
void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block)
302
{
303
    simple_idct_axp(block);
304 3354b0c9 Falk Hüffner
    put_pixels_clamped_axp_p(block, dest, line_size);
305 8b313a47 Falk Hüffner
}
306
307
void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block)
308
{
309
    simple_idct_axp(block);
310 3354b0c9 Falk Hüffner
    add_pixels_clamped_axp_p(block, dest, line_size);
311 8b313a47 Falk Hüffner
}