Statistics
| Branch: | Revision:

ffmpeg / libavcodec / alpha / simple_idct_alpha.c @ b550bfaa

History | View | Annotate | Download (7.68 KB)

1
/*
2
 * Simple IDCT (Alpha optimized)
3
 *
4
 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 *
22
 * based upon some outcommented c code from mpeg2dec (idct_mmx.c
23
 * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
24
 *
25
 * Alpha optimiziations by M?ns Rullg?rd <mru@users.sourceforge.net>
26
 *                     and Falk Hueffner <falk@debian.org>
27
 */
28

    
29
#include "asm.h"
30
#include "dsputil.h"
31

    
32
extern void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
33
                                        int line_size);
34
extern void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
35
                                        int line_size);
36

    
37
// cos(i * M_PI / 16) * sqrt(2) * (1 << 14)
38
// W4 is actually exactly 16384, but using 16383 works around
39
// accumulating rounding errors for some encoders
40
#define W1 ((int_fast32_t) 22725)
41
#define W2 ((int_fast32_t) 21407)
42
#define W3 ((int_fast32_t) 19266)
43
#define W4 ((int_fast32_t) 16383)
44
#define W5 ((int_fast32_t) 12873)
45
#define W6 ((int_fast32_t)  8867)
46
#define W7 ((int_fast32_t)  4520)
47
#define ROW_SHIFT 11
48
#define COL_SHIFT 20
49

    
50
/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise  */
51
static inline int idct_row(DCTELEM *row)
52
{
53
    int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3, t;
54
    uint64_t l, r, t2;
55
    l = ldq(row);
56
    r = ldq(row + 4);
57

    
58
    if (l == 0 && r == 0)
59
        return 0;
60

    
61
    a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1));
62

    
63
    if (((l & ~0xffffUL) | r) == 0) {
64
        a0 >>= ROW_SHIFT;
65
        t2 = (uint16_t) a0;
66
        t2 |= t2 << 16;
67
        t2 |= t2 << 32;
68

    
69
        stq(t2, row);
70
        stq(t2, row + 4);
71
        return 1;
72
    }
73

    
74
    a1 = a0;
75
    a2 = a0;
76
    a3 = a0;
77

    
78
    t = extwl(l, 4);            /* row[2] */
79
    if (t != 0) {
80
        t = sextw(t);
81
        a0 += W2 * t;
82
        a1 += W6 * t;
83
        a2 -= W6 * t;
84
        a3 -= W2 * t;
85
    }
86

    
87
    t = extwl(r, 0);            /* row[4] */
88
    if (t != 0) {
89
        t = sextw(t);
90
        a0 += W4 * t;
91
        a1 -= W4 * t;
92
        a2 -= W4 * t;
93
        a3 += W4 * t;
94
    }
95

    
96
    t = extwl(r, 4);            /* row[6] */
97
    if (t != 0) {
98
        t = sextw(t);
99
        a0 += W6 * t;
100
        a1 -= W2 * t;
101
        a2 += W2 * t;
102
        a3 -= W6 * t;
103
    }
104

    
105
    t = extwl(l, 2);            /* row[1] */
106
    if (t != 0) {
107
        t = sextw(t);
108
        b0 = W1 * t;
109
        b1 = W3 * t;
110
        b2 = W5 * t;
111
        b3 = W7 * t;
112
    } else {
113
        b0 = 0;
114
        b1 = 0;
115
        b2 = 0;
116
        b3 = 0;
117
    }
118

    
119
    t = extwl(l, 6);            /* row[3] */
120
    if (t) {
121
        t = sextw(t);
122
        b0 += W3 * t;
123
        b1 -= W7 * t;
124
        b2 -= W1 * t;
125
        b3 -= W5 * t;
126
    }
127

    
128

    
129
    t = extwl(r, 2);            /* row[5] */
130
    if (t) {
131
        t = sextw(t);
132
        b0 += W5 * t;
133
        b1 -= W1 * t;
134
        b2 += W7 * t;
135
        b3 += W3 * t;
136
    }
137

    
138
    t = extwl(r, 6);            /* row[7] */
139
    if (t) {
140
        t = sextw(t);
141
        b0 += W7 * t;
142
        b1 -= W5 * t;
143
        b2 += W3 * t;
144
        b3 -= W1 * t;
145
    }
146

    
147
    row[0] = (a0 + b0) >> ROW_SHIFT;
148
    row[1] = (a1 + b1) >> ROW_SHIFT;
149
    row[2] = (a2 + b2) >> ROW_SHIFT;
150
    row[3] = (a3 + b3) >> ROW_SHIFT;
151
    row[4] = (a3 - b3) >> ROW_SHIFT;
152
    row[5] = (a2 - b2) >> ROW_SHIFT;
153
    row[6] = (a1 - b1) >> ROW_SHIFT;
154
    row[7] = (a0 - b0) >> ROW_SHIFT;
155

    
156
    return 2;
157
}
158

    
159
static inline void idct_col(DCTELEM *col)
160
{
161
    int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
162

    
163
    col[0] += (1 << (COL_SHIFT - 1)) / W4;
164

    
165
    a0 = W4 * col[8 * 0];
166
    a1 = W4 * col[8 * 0];
167
    a2 = W4 * col[8 * 0];
168
    a3 = W4 * col[8 * 0];
169

    
170
    if (col[8 * 2]) {
171
        a0 += W2 * col[8 * 2];
172
        a1 += W6 * col[8 * 2];
173
        a2 -= W6 * col[8 * 2];
174
        a3 -= W2 * col[8 * 2];
175
    }
176

    
177
    if (col[8 * 4]) {
178
        a0 += W4 * col[8 * 4];
179
        a1 -= W4 * col[8 * 4];
180
        a2 -= W4 * col[8 * 4];
181
        a3 += W4 * col[8 * 4];
182
    }
183

    
184
    if (col[8 * 6]) {
185
        a0 += W6 * col[8 * 6];
186
        a1 -= W2 * col[8 * 6];
187
        a2 += W2 * col[8 * 6];
188
        a3 -= W6 * col[8 * 6];
189
    }
190

    
191
    if (col[8 * 1]) {
192
        b0 = W1 * col[8 * 1];
193
        b1 = W3 * col[8 * 1];
194
        b2 = W5 * col[8 * 1];
195
        b3 = W7 * col[8 * 1];
196
    } else {
197
        b0 = 0;
198
        b1 = 0;
199
        b2 = 0;
200
        b3 = 0;
201
    }
202

    
203
    if (col[8 * 3]) {
204
        b0 += W3 * col[8 * 3];
205
        b1 -= W7 * col[8 * 3];
206
        b2 -= W1 * col[8 * 3];
207
        b3 -= W5 * col[8 * 3];
208
    }
209

    
210
    if (col[8 * 5]) {
211
        b0 += W5 * col[8 * 5];
212
        b1 -= W1 * col[8 * 5];
213
        b2 += W7 * col[8 * 5];
214
        b3 += W3 * col[8 * 5];
215
    }
216

    
217
    if (col[8 * 7]) {
218
        b0 += W7 * col[8 * 7];
219
        b1 -= W5 * col[8 * 7];
220
        b2 += W3 * col[8 * 7];
221
        b3 -= W1 * col[8 * 7];
222
    }
223

    
224
    col[8 * 0] = (a0 + b0) >> COL_SHIFT;
225
    col[8 * 7] = (a0 - b0) >> COL_SHIFT;
226
    col[8 * 1] = (a1 + b1) >> COL_SHIFT;
227
    col[8 * 6] = (a1 - b1) >> COL_SHIFT;
228
    col[8 * 2] = (a2 + b2) >> COL_SHIFT;
229
    col[8 * 5] = (a2 - b2) >> COL_SHIFT;
230
    col[8 * 3] = (a3 + b3) >> COL_SHIFT;
231
    col[8 * 4] = (a3 - b3) >> COL_SHIFT;
232
}
233

    
234
/* If all rows but the first one are zero after row transformation,
235
   all rows will be identical after column transformation.  */
236
static inline void idct_col2(DCTELEM *col)
237
{
238
    int i;
239
    uint64_t l, r;
240

    
241
    for (i = 0; i < 8; ++i) {
242
        int_fast32_t a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4;
243

    
244
        a0 *= W4;
245
        col[i] = a0 >> COL_SHIFT;
246
    }
247

    
248
    l = ldq(col + 0 * 4); r = ldq(col + 1 * 4);
249
    stq(l, col +  2 * 4); stq(r, col +  3 * 4);
250
    stq(l, col +  4 * 4); stq(r, col +  5 * 4);
251
    stq(l, col +  6 * 4); stq(r, col +  7 * 4);
252
    stq(l, col +  8 * 4); stq(r, col +  9 * 4);
253
    stq(l, col + 10 * 4); stq(r, col + 11 * 4);
254
    stq(l, col + 12 * 4); stq(r, col + 13 * 4);
255
    stq(l, col + 14 * 4); stq(r, col + 15 * 4);
256
}
257

    
258
void simple_idct_axp(DCTELEM *block)
259
{
260

    
261
    int i;
262
    int rowsZero = 1;           /* all rows except row 0 zero */
263
    int rowsConstant = 1;       /* all rows consist of a constant value */
264

    
265
    for (i = 0; i < 8; i++) {
266
        int sparseness = idct_row(block + 8 * i);
267

    
268
        if (i > 0 && sparseness > 0)
269
            rowsZero = 0;
270
        if (sparseness == 2)
271
            rowsConstant = 0;
272
    }
273

    
274
    if (rowsZero) {
275
        idct_col2(block);
276
    } else if (rowsConstant) {
277
        idct_col(block);
278
        for (i = 0; i < 8; i += 2) {
279
            uint64_t v = (uint16_t) block[0];
280
            uint64_t w = (uint16_t) block[8];
281

    
282
            v |= v << 16;
283
            w |= w << 16;
284
            v |= v << 32;
285
            w |= w << 32;
286
            stq(v, block + 0 * 4);
287
            stq(v, block + 1 * 4);
288
            stq(w, block + 2 * 4);
289
            stq(w, block + 3 * 4);
290
            block += 4 * 4;
291
        }
292
    } else {
293
        for (i = 0; i < 8; i++)
294
            idct_col(block + i);
295
    }
296
}
297

    
298
void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block)
299
{
300
    simple_idct_axp(block);
301
    put_pixels_clamped_axp_p(block, dest, line_size);
302
}
303

    
304
void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block)
305
{
306
    simple_idct_axp(block);
307
    add_pixels_clamped_axp_p(block, dest, line_size);
308
}