ffmpeg / libavcodec / alpha / simple_idct_alpha.c @ ad1862d6
History  View  Annotate  Download (7.31 KB)
1 
/*


2 
* Simple IDCT (Alpha optimized)

3 
*

4 
* Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>

5 
*

6 
* based upon some outcommented C code from mpeg2dec (idct_mmx.c

7 
* written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)

8 
*

9 
* Alpha optimizations by Måns Rullgård <mans@mansr.com>

10 
* and Falk Hueffner <falk@debian.org>

11 
*

12 
* This file is part of Libav.

13 
*

14 
* Libav is free software; you can redistribute it and/or

15 
* modify it under the terms of the GNU Lesser General Public

16 
* License as published by the Free Software Foundation; either

17 
* version 2.1 of the License, or (at your option) any later version.

18 
*

19 
* Libav is distributed in the hope that it will be useful,

20 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

21 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

22 
* Lesser General Public License for more details.

23 
*

24 
* You should have received a copy of the GNU Lesser General Public

25 
* License along with Libav; if not, write to the Free Software

26 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

27 
*/

28  
29 
#include "libavcodec/dsputil.h" 
30 
#include "dsputil_alpha.h" 
31 
#include "asm.h" 
32  
33 
// cos(i * M_PI / 16) * sqrt(2) * (1 << 14)

34 
// W4 is actually exactly 16384, but using 16383 works around

35 
// accumulating rounding errors for some encoders

36 
#define W1 22725 
37 
#define W2 21407 
38 
#define W3 19266 
39 
#define W4 16383 
40 
#define W5 12873 
41 
#define W6 8867 
42 
#define W7 4520 
43 
#define ROW_SHIFT 11 
44 
#define COL_SHIFT 20 
45  
46 
/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */

47 
static inline int idct_row(DCTELEM *row) 
48 
{ 
49 
int a0, a1, a2, a3, b0, b1, b2, b3, t;

50 
uint64_t l, r, t2; 
51 
l = ldq(row); 
52 
r = ldq(row + 4);

53  
54 
if (l == 0 && r == 0) 
55 
return 0; 
56  
57 
a0 = W4 * sextw(l) + (1 << (ROW_SHIFT  1)); 
58  
59 
if (((l & ~0xffffUL)  r) == 0) { 
60 
a0 >>= ROW_SHIFT; 
61 
t2 = (uint16_t) a0; 
62 
t2 = t2 << 16;

63 
t2 = t2 << 32;

64  
65 
stq(t2, row); 
66 
stq(t2, row + 4);

67 
return 1; 
68 
} 
69  
70 
a1 = a0; 
71 
a2 = a0; 
72 
a3 = a0; 
73  
74 
t = extwl(l, 4); /* row[2] */ 
75 
if (t != 0) { 
76 
t = sextw(t); 
77 
a0 += W2 * t; 
78 
a1 += W6 * t; 
79 
a2 = W6 * t; 
80 
a3 = W2 * t; 
81 
} 
82  
83 
t = extwl(r, 0); /* row[4] */ 
84 
if (t != 0) { 
85 
t = sextw(t); 
86 
a0 += W4 * t; 
87 
a1 = W4 * t; 
88 
a2 = W4 * t; 
89 
a3 += W4 * t; 
90 
} 
91  
92 
t = extwl(r, 4); /* row[6] */ 
93 
if (t != 0) { 
94 
t = sextw(t); 
95 
a0 += W6 * t; 
96 
a1 = W2 * t; 
97 
a2 += W2 * t; 
98 
a3 = W6 * t; 
99 
} 
100  
101 
t = extwl(l, 2); /* row[1] */ 
102 
if (t != 0) { 
103 
t = sextw(t); 
104 
b0 = W1 * t; 
105 
b1 = W3 * t; 
106 
b2 = W5 * t; 
107 
b3 = W7 * t; 
108 
} else {

109 
b0 = 0;

110 
b1 = 0;

111 
b2 = 0;

112 
b3 = 0;

113 
} 
114  
115 
t = extwl(l, 6); /* row[3] */ 
116 
if (t) {

117 
t = sextw(t); 
118 
b0 += W3 * t; 
119 
b1 = W7 * t; 
120 
b2 = W1 * t; 
121 
b3 = W5 * t; 
122 
} 
123  
124  
125 
t = extwl(r, 2); /* row[5] */ 
126 
if (t) {

127 
t = sextw(t); 
128 
b0 += W5 * t; 
129 
b1 = W1 * t; 
130 
b2 += W7 * t; 
131 
b3 += W3 * t; 
132 
} 
133  
134 
t = extwl(r, 6); /* row[7] */ 
135 
if (t) {

136 
t = sextw(t); 
137 
b0 += W7 * t; 
138 
b1 = W5 * t; 
139 
b2 += W3 * t; 
140 
b3 = W1 * t; 
141 
} 
142  
143 
row[0] = (a0 + b0) >> ROW_SHIFT;

144 
row[1] = (a1 + b1) >> ROW_SHIFT;

145 
row[2] = (a2 + b2) >> ROW_SHIFT;

146 
row[3] = (a3 + b3) >> ROW_SHIFT;

147 
row[4] = (a3  b3) >> ROW_SHIFT;

148 
row[5] = (a2  b2) >> ROW_SHIFT;

149 
row[6] = (a1  b1) >> ROW_SHIFT;

150 
row[7] = (a0  b0) >> ROW_SHIFT;

151  
152 
return 2; 
153 
} 
154  
155 
static inline void idct_col(DCTELEM *col) 
156 
{ 
157 
int a0, a1, a2, a3, b0, b1, b2, b3;

158  
159 
col[0] += (1 << (COL_SHIFT  1)) / W4; 
160  
161 
a0 = W4 * col[8 * 0]; 
162 
a1 = W4 * col[8 * 0]; 
163 
a2 = W4 * col[8 * 0]; 
164 
a3 = W4 * col[8 * 0]; 
165  
166 
if (col[8 * 2]) { 
167 
a0 += W2 * col[8 * 2]; 
168 
a1 += W6 * col[8 * 2]; 
169 
a2 = W6 * col[8 * 2]; 
170 
a3 = W2 * col[8 * 2]; 
171 
} 
172  
173 
if (col[8 * 4]) { 
174 
a0 += W4 * col[8 * 4]; 
175 
a1 = W4 * col[8 * 4]; 
176 
a2 = W4 * col[8 * 4]; 
177 
a3 += W4 * col[8 * 4]; 
178 
} 
179  
180 
if (col[8 * 6]) { 
181 
a0 += W6 * col[8 * 6]; 
182 
a1 = W2 * col[8 * 6]; 
183 
a2 += W2 * col[8 * 6]; 
184 
a3 = W6 * col[8 * 6]; 
185 
} 
186  
187 
if (col[8 * 1]) { 
188 
b0 = W1 * col[8 * 1]; 
189 
b1 = W3 * col[8 * 1]; 
190 
b2 = W5 * col[8 * 1]; 
191 
b3 = W7 * col[8 * 1]; 
192 
} else {

193 
b0 = 0;

194 
b1 = 0;

195 
b2 = 0;

196 
b3 = 0;

197 
} 
198  
199 
if (col[8 * 3]) { 
200 
b0 += W3 * col[8 * 3]; 
201 
b1 = W7 * col[8 * 3]; 
202 
b2 = W1 * col[8 * 3]; 
203 
b3 = W5 * col[8 * 3]; 
204 
} 
205  
206 
if (col[8 * 5]) { 
207 
b0 += W5 * col[8 * 5]; 
208 
b1 = W1 * col[8 * 5]; 
209 
b2 += W7 * col[8 * 5]; 
210 
b3 += W3 * col[8 * 5]; 
211 
} 
212  
213 
if (col[8 * 7]) { 
214 
b0 += W7 * col[8 * 7]; 
215 
b1 = W5 * col[8 * 7]; 
216 
b2 += W3 * col[8 * 7]; 
217 
b3 = W1 * col[8 * 7]; 
218 
} 
219  
220 
col[8 * 0] = (a0 + b0) >> COL_SHIFT; 
221 
col[8 * 7] = (a0  b0) >> COL_SHIFT; 
222 
col[8 * 1] = (a1 + b1) >> COL_SHIFT; 
223 
col[8 * 6] = (a1  b1) >> COL_SHIFT; 
224 
col[8 * 2] = (a2 + b2) >> COL_SHIFT; 
225 
col[8 * 5] = (a2  b2) >> COL_SHIFT; 
226 
col[8 * 3] = (a3 + b3) >> COL_SHIFT; 
227 
col[8 * 4] = (a3  b3) >> COL_SHIFT; 
228 
} 
229  
230 
/* If all rows but the first one are zero after row transformation,

231 
all rows will be identical after column transformation. */

232 
static inline void idct_col2(DCTELEM *col) 
233 
{ 
234 
int i;

235 
uint64_t l, r; 
236  
237 
for (i = 0; i < 8; ++i) { 
238 
int a0 = col[i] + (1 << (COL_SHIFT  1)) / W4; 
239  
240 
a0 *= W4; 
241 
col[i] = a0 >> COL_SHIFT; 
242 
} 
243  
244 
l = ldq(col + 0 * 4); r = ldq(col + 1 * 4); 
245 
stq(l, col + 2 * 4); stq(r, col + 3 * 4); 
246 
stq(l, col + 4 * 4); stq(r, col + 5 * 4); 
247 
stq(l, col + 6 * 4); stq(r, col + 7 * 4); 
248 
stq(l, col + 8 * 4); stq(r, col + 9 * 4); 
249 
stq(l, col + 10 * 4); stq(r, col + 11 * 4); 
250 
stq(l, col + 12 * 4); stq(r, col + 13 * 4); 
251 
stq(l, col + 14 * 4); stq(r, col + 15 * 4); 
252 
} 
253  
254 
void ff_simple_idct_axp(DCTELEM *block)

255 
{ 
256  
257 
int i;

258 
int rowsZero = 1; /* all rows except row 0 zero */ 
259 
int rowsConstant = 1; /* all rows consist of a constant value */ 
260  
261 
for (i = 0; i < 8; i++) { 
262 
int sparseness = idct_row(block + 8 * i); 
263  
264 
if (i > 0 && sparseness > 0) 
265 
rowsZero = 0;

266 
if (sparseness == 2) 
267 
rowsConstant = 0;

268 
} 
269  
270 
if (rowsZero) {

271 
idct_col2(block); 
272 
} else if (rowsConstant) { 
273 
idct_col(block); 
274 
for (i = 0; i < 8; i += 2) { 
275 
uint64_t v = (uint16_t) block[0];

276 
uint64_t w = (uint16_t) block[8];

277  
278 
v = v << 16;

279 
w = w << 16;

280 
v = v << 32;

281 
w = w << 32;

282 
stq(v, block + 0 * 4); 
283 
stq(v, block + 1 * 4); 
284 
stq(w, block + 2 * 4); 
285 
stq(w, block + 3 * 4); 
286 
block += 4 * 4; 
287 
} 
288 
} else {

289 
for (i = 0; i < 8; i++) 
290 
idct_col(block + i); 
291 
} 
292 
} 
293  
294 
void ff_simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block) 
295 
{ 
296 
ff_simple_idct_axp(block); 
297 
put_pixels_clamped_axp_p(block, dest, line_size); 
298 
} 
299  
300 
void ff_simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block) 
301 
{ 
302 
ff_simple_idct_axp(block); 
303 
add_pixels_clamped_axp_p(block, dest, line_size); 
304 
} 