ffmpeg / libavcodec / alpha / simple_idct_alpha.c @ 5509bffa
History  View  Annotate  Download (7.66 KB)
1 
/*


2 
* Simple IDCT (Alpha optimized)

3 
*

4 
* Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>

5 
*

6 
* This library is free software; you can redistribute it and/or

7 
* modify it under the terms of the GNU Lesser General Public

8 
* License as published by the Free Software Foundation; either

9 
* version 2 of the License, or (at your option) any later version.

10 
*

11 
* This library is distributed in the hope that it will be useful,

12 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

13 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

14 
* Lesser General Public License for more details.

15 
*

16 
* You should have received a copy of the GNU Lesser General Public

17 
* License along with this library; if not, write to the Free Software

18 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

19 
*

20 
* based upon some outcommented c code from mpeg2dec (idct_mmx.c

21 
* written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)

22 
*

23 
* Alpha optimiziations by M?ns Rullg?rd <mru@users.sourceforge.net>

24 
* and Falk Hueffner <falk@debian.org>

25 
*/

26  
27 
#include "asm.h" 
28 
#include "../dsputil.h" 
29  
30 
extern void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels, 
31 
int line_size);

32 
extern void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels, 
33 
int line_size);

34  
35 
// cos(i * M_PI / 16) * sqrt(2) * (1 << 14)

36 
// W4 is actually exactly 16384, but using 16383 works around

37 
// accumulating rounding errors for some encoders

38 
#define W1 ((int_fast32_t) 22725) 
39 
#define W2 ((int_fast32_t) 21407) 
40 
#define W3 ((int_fast32_t) 19266) 
41 
#define W4 ((int_fast32_t) 16383) 
42 
#define W5 ((int_fast32_t) 12873) 
43 
#define W6 ((int_fast32_t) 8867) 
44 
#define W7 ((int_fast32_t) 4520) 
45 
#define ROW_SHIFT 11 
46 
#define COL_SHIFT 20 
47  
48 
/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */

49 
static inline int idct_row(DCTELEM *row) 
50 
{ 
51 
int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3, t; 
52 
uint64_t l, r, t2; 
53 
l = ldq(row); 
54 
r = ldq(row + 4);

55  
56 
if (l == 0 && r == 0) 
57 
return 0; 
58  
59 
a0 = W4 * sextw(l) + (1 << (ROW_SHIFT  1)); 
60  
61 
if (((l & ~0xffffUL)  r) == 0) { 
62 
a0 >>= ROW_SHIFT; 
63 
t2 = (uint16_t) a0; 
64 
t2 = t2 << 16;

65 
t2 = t2 << 32;

66  
67 
stq(t2, row); 
68 
stq(t2, row + 4);

69 
return 1; 
70 
} 
71  
72 
a1 = a0; 
73 
a2 = a0; 
74 
a3 = a0; 
75  
76 
t = extwl(l, 4); /* row[2] */ 
77 
if (t != 0) { 
78 
t = sextw(t); 
79 
a0 += W2 * t; 
80 
a1 += W6 * t; 
81 
a2 = W6 * t; 
82 
a3 = W2 * t; 
83 
} 
84  
85 
t = extwl(r, 0); /* row[4] */ 
86 
if (t != 0) { 
87 
t = sextw(t); 
88 
a0 += W4 * t; 
89 
a1 = W4 * t; 
90 
a2 = W4 * t; 
91 
a3 += W4 * t; 
92 
} 
93  
94 
t = extwl(r, 4); /* row[6] */ 
95 
if (t != 0) { 
96 
t = sextw(t); 
97 
a0 += W6 * t; 
98 
a1 = W2 * t; 
99 
a2 += W2 * t; 
100 
a3 = W6 * t; 
101 
} 
102  
103 
t = extwl(l, 2); /* row[1] */ 
104 
if (t != 0) { 
105 
t = sextw(t); 
106 
b0 = W1 * t; 
107 
b1 = W3 * t; 
108 
b2 = W5 * t; 
109 
b3 = W7 * t; 
110 
} else {

111 
b0 = 0;

112 
b1 = 0;

113 
b2 = 0;

114 
b3 = 0;

115 
} 
116  
117 
t = extwl(l, 6); /* row[3] */ 
118 
if (t) {

119 
t = sextw(t); 
120 
b0 += W3 * t; 
121 
b1 = W7 * t; 
122 
b2 = W1 * t; 
123 
b3 = W5 * t; 
124 
} 
125  
126  
127 
t = extwl(r, 2); /* row[5] */ 
128 
if (t) {

129 
t = sextw(t); 
130 
b0 += W5 * t; 
131 
b1 = W1 * t; 
132 
b2 += W7 * t; 
133 
b3 += W3 * t; 
134 
} 
135  
136 
t = extwl(r, 6); /* row[7] */ 
137 
if (t) {

138 
t = sextw(t); 
139 
b0 += W7 * t; 
140 
b1 = W5 * t; 
141 
b2 += W3 * t; 
142 
b3 = W1 * t; 
143 
} 
144  
145 
row[0] = (a0 + b0) >> ROW_SHIFT;

146 
row[1] = (a1 + b1) >> ROW_SHIFT;

147 
row[2] = (a2 + b2) >> ROW_SHIFT;

148 
row[3] = (a3 + b3) >> ROW_SHIFT;

149 
row[4] = (a3  b3) >> ROW_SHIFT;

150 
row[5] = (a2  b2) >> ROW_SHIFT;

151 
row[6] = (a1  b1) >> ROW_SHIFT;

152 
row[7] = (a0  b0) >> ROW_SHIFT;

153  
154 
return 2; 
155 
} 
156  
157 
static inline void idct_col(DCTELEM *col) 
158 
{ 
159 
int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; 
160  
161 
col[0] += (1 << (COL_SHIFT  1)) / W4; 
162  
163 
a0 = W4 * col[8 * 0]; 
164 
a1 = W4 * col[8 * 0]; 
165 
a2 = W4 * col[8 * 0]; 
166 
a3 = W4 * col[8 * 0]; 
167  
168 
if (col[8 * 2]) { 
169 
a0 += W2 * col[8 * 2]; 
170 
a1 += W6 * col[8 * 2]; 
171 
a2 = W6 * col[8 * 2]; 
172 
a3 = W2 * col[8 * 2]; 
173 
} 
174  
175 
if (col[8 * 4]) { 
176 
a0 += W4 * col[8 * 4]; 
177 
a1 = W4 * col[8 * 4]; 
178 
a2 = W4 * col[8 * 4]; 
179 
a3 += W4 * col[8 * 4]; 
180 
} 
181  
182 
if (col[8 * 6]) { 
183 
a0 += W6 * col[8 * 6]; 
184 
a1 = W2 * col[8 * 6]; 
185 
a2 += W2 * col[8 * 6]; 
186 
a3 = W6 * col[8 * 6]; 
187 
} 
188  
189 
if (col[8 * 1]) { 
190 
b0 = W1 * col[8 * 1]; 
191 
b1 = W3 * col[8 * 1]; 
192 
b2 = W5 * col[8 * 1]; 
193 
b3 = W7 * col[8 * 1]; 
194 
} else {

195 
b0 = 0;

196 
b1 = 0;

197 
b2 = 0;

198 
b3 = 0;

199 
} 
200  
201 
if (col[8 * 3]) { 
202 
b0 += W3 * col[8 * 3]; 
203 
b1 = W7 * col[8 * 3]; 
204 
b2 = W1 * col[8 * 3]; 
205 
b3 = W5 * col[8 * 3]; 
206 
} 
207  
208 
if (col[8 * 5]) { 
209 
b0 += W5 * col[8 * 5]; 
210 
b1 = W1 * col[8 * 5]; 
211 
b2 += W7 * col[8 * 5]; 
212 
b3 += W3 * col[8 * 5]; 
213 
} 
214  
215 
if (col[8 * 7]) { 
216 
b0 += W7 * col[8 * 7]; 
217 
b1 = W5 * col[8 * 7]; 
218 
b2 += W3 * col[8 * 7]; 
219 
b3 = W1 * col[8 * 7]; 
220 
} 
221  
222 
col[8 * 0] = (a0 + b0) >> COL_SHIFT; 
223 
col[8 * 7] = (a0  b0) >> COL_SHIFT; 
224 
col[8 * 1] = (a1 + b1) >> COL_SHIFT; 
225 
col[8 * 6] = (a1  b1) >> COL_SHIFT; 
226 
col[8 * 2] = (a2 + b2) >> COL_SHIFT; 
227 
col[8 * 5] = (a2  b2) >> COL_SHIFT; 
228 
col[8 * 3] = (a3 + b3) >> COL_SHIFT; 
229 
col[8 * 4] = (a3  b3) >> COL_SHIFT; 
230 
} 
231  
232 
/* If all rows but the first one are zero after row transformation,

233 
all rows will be identical after column transformation. */

234 
static inline void idct_col2(DCTELEM *col) 
235 
{ 
236 
int i;

237 
uint64_t l, r; 
238  
239 
for (i = 0; i < 8; ++i) { 
240 
int_fast32_t a0 = col[i] + (1 << (COL_SHIFT  1)) / W4; 
241  
242 
a0 *= W4; 
243 
col[i] = a0 >> COL_SHIFT; 
244 
} 
245  
246 
l = ldq(col + 0 * 4); r = ldq(col + 1 * 4); 
247 
stq(l, col + 2 * 4); stq(r, col + 3 * 4); 
248 
stq(l, col + 4 * 4); stq(r, col + 5 * 4); 
249 
stq(l, col + 6 * 4); stq(r, col + 7 * 4); 
250 
stq(l, col + 8 * 4); stq(r, col + 9 * 4); 
251 
stq(l, col + 10 * 4); stq(r, col + 11 * 4); 
252 
stq(l, col + 12 * 4); stq(r, col + 13 * 4); 
253 
stq(l, col + 14 * 4); stq(r, col + 15 * 4); 
254 
} 
255  
256 
void simple_idct_axp(DCTELEM *block)

257 
{ 
258  
259 
int i;

260 
int rowsZero = 1; /* all rows except row 0 zero */ 
261 
int rowsConstant = 1; /* all rows consist of a constant value */ 
262  
263 
for (i = 0; i < 8; i++) { 
264 
int sparseness = idct_row(block + 8 * i); 
265  
266 
if (i > 0 && sparseness > 0) 
267 
rowsZero = 0;

268 
if (sparseness == 2) 
269 
rowsConstant = 0;

270 
} 
271  
272 
if (rowsZero) {

273 
idct_col2(block); 
274 
} else if (rowsConstant) { 
275 
idct_col(block); 
276 
for (i = 0; i < 8; i += 2) { 
277 
uint64_t v = (uint16_t) block[0];

278 
uint64_t w = (uint16_t) block[8];

279  
280 
v = v << 16;

281 
w = w << 16;

282 
v = v << 32;

283 
w = w << 32;

284 
stq(v, block + 0 * 4); 
285 
stq(v, block + 1 * 4); 
286 
stq(w, block + 2 * 4); 
287 
stq(w, block + 3 * 4); 
288 
block += 4 * 4; 
289 
} 
290 
} else {

291 
for (i = 0; i < 8; i++) 
292 
idct_col(block + i); 
293 
} 
294 
} 
295  
296 
void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block) 
297 
{ 
298 
simple_idct_axp(block); 
299 
put_pixels_clamped_axp_p(block, dest, line_size); 
300 
} 
301  
302 
void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block) 
303 
{ 
304 
simple_idct_axp(block); 
305 
add_pixels_clamped_axp_p(block, dest, line_size); 
306 
} 