Statistics
| Branch: | Revision:

ffmpeg / libavcodec / alpha / simple_idct_alpha.c @ 5509bffa

History | View | Annotate | Download (7.66 KB)

1
/*
2
 * Simple IDCT (Alpha optimized)
3
 *
4
 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 *
20
 * based upon some outcommented c code from mpeg2dec (idct_mmx.c
21
 * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
22
 *
23
 * Alpha optimiziations by M?ns Rullg?rd <mru@users.sourceforge.net>
24
 *                     and Falk Hueffner <falk@debian.org>
25
 */
26

    
27
#include "asm.h"
28
#include "../dsputil.h"
29

    
30
extern void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
31
                                        int line_size);
32
extern void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
33
                                        int line_size);
34

    
35
// cos(i * M_PI / 16) * sqrt(2) * (1 << 14)
36
// W4 is actually exactly 16384, but using 16383 works around
37
// accumulating rounding errors for some encoders
38
#define W1 ((int_fast32_t) 22725)
39
#define W2 ((int_fast32_t) 21407)
40
#define W3 ((int_fast32_t) 19266)
41
#define W4 ((int_fast32_t) 16383)
42
#define W5 ((int_fast32_t) 12873)
43
#define W6 ((int_fast32_t)  8867)
44
#define W7 ((int_fast32_t)  4520)
45
#define ROW_SHIFT 11
46
#define COL_SHIFT 20
47

    
48
/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise  */
49
static inline int idct_row(DCTELEM *row)
50
{
51
    int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3, t;
52
    uint64_t l, r, t2;
53
    l = ldq(row);
54
    r = ldq(row + 4);
55

    
56
    if (l == 0 && r == 0)
57
        return 0;
58

    
59
    a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1));
60

    
61
    if (((l & ~0xffffUL) | r) == 0) {
62
        a0 >>= ROW_SHIFT;
63
        t2 = (uint16_t) a0;
64
        t2 |= t2 << 16;
65
        t2 |= t2 << 32;
66

    
67
        stq(t2, row);
68
        stq(t2, row + 4);
69
        return 1;
70
    }
71

    
72
    a1 = a0;
73
    a2 = a0;
74
    a3 = a0;
75

    
76
    t = extwl(l, 4);            /* row[2] */
77
    if (t != 0) {
78
        t = sextw(t);
79
        a0 += W2 * t;
80
        a1 += W6 * t;
81
        a2 -= W6 * t;
82
        a3 -= W2 * t;
83
    }
84

    
85
    t = extwl(r, 0);            /* row[4] */
86
    if (t != 0) {
87
        t = sextw(t);
88
        a0 += W4 * t;
89
        a1 -= W4 * t;
90
        a2 -= W4 * t;
91
        a3 += W4 * t;
92
    }
93

    
94
    t = extwl(r, 4);            /* row[6] */
95
    if (t != 0) {
96
        t = sextw(t);
97
        a0 += W6 * t;
98
        a1 -= W2 * t;
99
        a2 += W2 * t;
100
        a3 -= W6 * t;
101
    }
102

    
103
    t = extwl(l, 2);            /* row[1] */
104
    if (t != 0) {
105
        t = sextw(t);
106
        b0 = W1 * t;
107
        b1 = W3 * t;
108
        b2 = W5 * t;
109
        b3 = W7 * t;
110
    } else {
111
        b0 = 0;
112
        b1 = 0;
113
        b2 = 0;
114
        b3 = 0;
115
    }
116

    
117
    t = extwl(l, 6);            /* row[3] */
118
    if (t) {
119
        t = sextw(t);
120
        b0 += W3 * t;
121
        b1 -= W7 * t;
122
        b2 -= W1 * t;
123
        b3 -= W5 * t;
124
    }
125

    
126

    
127
    t = extwl(r, 2);            /* row[5] */
128
    if (t) {
129
        t = sextw(t);
130
        b0 += W5 * t;
131
        b1 -= W1 * t;
132
        b2 += W7 * t;
133
        b3 += W3 * t;
134
    }
135

    
136
    t = extwl(r, 6);            /* row[7] */
137
    if (t) {
138
        t = sextw(t);
139
        b0 += W7 * t;
140
        b1 -= W5 * t;
141
        b2 += W3 * t;
142
        b3 -= W1 * t;
143
    }
144

    
145
    row[0] = (a0 + b0) >> ROW_SHIFT;
146
    row[1] = (a1 + b1) >> ROW_SHIFT;
147
    row[2] = (a2 + b2) >> ROW_SHIFT;
148
    row[3] = (a3 + b3) >> ROW_SHIFT;
149
    row[4] = (a3 - b3) >> ROW_SHIFT;
150
    row[5] = (a2 - b2) >> ROW_SHIFT;
151
    row[6] = (a1 - b1) >> ROW_SHIFT;
152
    row[7] = (a0 - b0) >> ROW_SHIFT;
153

    
154
    return 2;
155
}
156

    
157
static inline void idct_col(DCTELEM *col)
158
{
159
    int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
160

    
161
    col[0] += (1 << (COL_SHIFT - 1)) / W4;
162

    
163
    a0 = W4 * col[8 * 0];
164
    a1 = W4 * col[8 * 0];
165
    a2 = W4 * col[8 * 0];
166
    a3 = W4 * col[8 * 0];
167

    
168
    if (col[8 * 2]) {
169
        a0 += W2 * col[8 * 2];
170
        a1 += W6 * col[8 * 2];
171
        a2 -= W6 * col[8 * 2];
172
        a3 -= W2 * col[8 * 2];
173
    }
174

    
175
    if (col[8 * 4]) {
176
        a0 += W4 * col[8 * 4];
177
        a1 -= W4 * col[8 * 4];
178
        a2 -= W4 * col[8 * 4];
179
        a3 += W4 * col[8 * 4];
180
    }
181

    
182
    if (col[8 * 6]) {
183
        a0 += W6 * col[8 * 6];
184
        a1 -= W2 * col[8 * 6];
185
        a2 += W2 * col[8 * 6];
186
        a3 -= W6 * col[8 * 6];
187
    }
188

    
189
    if (col[8 * 1]) {
190
        b0 = W1 * col[8 * 1];
191
        b1 = W3 * col[8 * 1];
192
        b2 = W5 * col[8 * 1];
193
        b3 = W7 * col[8 * 1];
194
    } else {
195
        b0 = 0;
196
        b1 = 0;
197
        b2 = 0;
198
        b3 = 0;
199
    }
200

    
201
    if (col[8 * 3]) {
202
        b0 += W3 * col[8 * 3];
203
        b1 -= W7 * col[8 * 3];
204
        b2 -= W1 * col[8 * 3];
205
        b3 -= W5 * col[8 * 3];
206
    }
207

    
208
    if (col[8 * 5]) {
209
        b0 += W5 * col[8 * 5];
210
        b1 -= W1 * col[8 * 5];
211
        b2 += W7 * col[8 * 5];
212
        b3 += W3 * col[8 * 5];
213
    }
214

    
215
    if (col[8 * 7]) {
216
        b0 += W7 * col[8 * 7];
217
        b1 -= W5 * col[8 * 7];
218
        b2 += W3 * col[8 * 7];
219
        b3 -= W1 * col[8 * 7];
220
    }
221

    
222
    col[8 * 0] = (a0 + b0) >> COL_SHIFT;
223
    col[8 * 7] = (a0 - b0) >> COL_SHIFT;
224
    col[8 * 1] = (a1 + b1) >> COL_SHIFT;
225
    col[8 * 6] = (a1 - b1) >> COL_SHIFT;
226
    col[8 * 2] = (a2 + b2) >> COL_SHIFT;
227
    col[8 * 5] = (a2 - b2) >> COL_SHIFT;
228
    col[8 * 3] = (a3 + b3) >> COL_SHIFT;
229
    col[8 * 4] = (a3 - b3) >> COL_SHIFT;
230
}
231

    
232
/* If all rows but the first one are zero after row transformation,
233
   all rows will be identical after column transformation.  */
234
static inline void idct_col2(DCTELEM *col)
235
{
236
    int i;
237
    uint64_t l, r;
238

    
239
    for (i = 0; i < 8; ++i) {
240
        int_fast32_t a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4;
241

    
242
        a0 *= W4;
243
        col[i] = a0 >> COL_SHIFT;
244
    }
245

    
246
    l = ldq(col + 0 * 4); r = ldq(col + 1 * 4);
247
    stq(l, col +  2 * 4); stq(r, col +  3 * 4);
248
    stq(l, col +  4 * 4); stq(r, col +  5 * 4);
249
    stq(l, col +  6 * 4); stq(r, col +  7 * 4);
250
    stq(l, col +  8 * 4); stq(r, col +  9 * 4);
251
    stq(l, col + 10 * 4); stq(r, col + 11 * 4);
252
    stq(l, col + 12 * 4); stq(r, col + 13 * 4);
253
    stq(l, col + 14 * 4); stq(r, col + 15 * 4);
254
}
255

    
256
void simple_idct_axp(DCTELEM *block)
257
{
258

    
259
    int i;
260
    int rowsZero = 1;           /* all rows except row 0 zero */
261
    int rowsConstant = 1;       /* all rows consist of a constant value */
262

    
263
    for (i = 0; i < 8; i++) {
264
        int sparseness = idct_row(block + 8 * i);
265

    
266
        if (i > 0 && sparseness > 0)
267
            rowsZero = 0;
268
        if (sparseness == 2)
269
            rowsConstant = 0;
270
    }
271

    
272
    if (rowsZero) {
273
        idct_col2(block);
274
    } else if (rowsConstant) {
275
        idct_col(block);
276
        for (i = 0; i < 8; i += 2) {
277
            uint64_t v = (uint16_t) block[0];
278
            uint64_t w = (uint16_t) block[8];
279

    
280
            v |= v << 16;
281
            w |= w << 16;
282
            v |= v << 32;
283
            w |= w << 32;
284
            stq(v, block + 0 * 4);
285
            stq(v, block + 1 * 4);
286
            stq(w, block + 2 * 4);
287
            stq(w, block + 3 * 4);
288
            block += 4 * 4;
289
        }
290
    } else {
291
        for (i = 0; i < 8; i++)
292
            idct_col(block + i);
293
    }
294
}
295

    
296
void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block)
297
{
298
    simple_idct_axp(block);
299
    put_pixels_clamped_axp_p(block, dest, line_size);
300
}
301

    
302
void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block)
303
{
304
    simple_idct_axp(block);
305
    add_pixels_clamped_axp_p(block, dest, line_size);
306
}