ffmpeg / libavcodec / sh4 / idct_sh4.c @ be449fca
History  View  Annotate  Download (9.76 KB)
1 
/*


2 
* idct for sh4

3 
*

4 
* Copyright (c) 20012003 BERO <bero@geocities.co.jp>

5 
*

6 
* This file is part of FFmpeg.

7 
*

8 
* FFmpeg is free software; you can redistribute it and/or

9 
* modify it under the terms of the GNU Lesser General Public

10 
* License as published by the Free Software Foundation; either

11 
* version 2.1 of the License, or (at your option) any later version.

12 
*

13 
* FFmpeg is distributed in the hope that it will be useful,

14 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

15 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

16 
* Lesser General Public License for more details.

17 
*

18 
* You should have received a copy of the GNU Lesser General Public

19 
* License along with FFmpeg; if not, write to the Free Software

20 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

21 
*/

22  
23 
#include "libavcodec/dsputil.h" 
24 
#define c1 1.38703984532214752434 /* sqrt(2)*cos(1*pi/16) */ 
25 
#define c2 1.30656296487637657577 /* sqrt(2)*cos(2*pi/16) */ 
26 
#define c3 1.17587560241935884520 /* sqrt(2)*cos(3*pi/16) */ 
27 
#define c4 1.00000000000000000000 /* sqrt(2)*cos(4*pi/16) */ 
28 
#define c5 0.78569495838710234903 /* sqrt(2)*cos(5*pi/16) */ 
29 
#define c6 0.54119610014619712324 /* sqrt(2)*cos(6*pi/16) */ 
30 
#define c7 0.27589937928294311353 /* sqrt(2)*cos(7*pi/16) */ 
31  
32 
static const float even_table[] __attribute__ ((aligned(8))) = { 
33 
c4, c4, c4, c4, 
34 
c2, c6,c6,c2, 
35 
c4,c4,c4, c4, 
36 
c6,c2, c2,c6 
37 
}; 
38  
39 
static const float odd_table[] __attribute__ ((aligned(8))) = { 
40 
c1, c3, c5, c7, 
41 
c3,c7,c1,c5, 
42 
c5,c1, c7, c3, 
43 
c7,c5, c3,c1 
44 
}; 
45  
46 
#undef c1

47 
#undef c2

48 
#undef c3

49 
#undef c4

50 
#undef c5

51 
#undef c6

52 
#undef c7

53  
54 
#if defined(__SH4_SINGLE__)  defined(__SH4_SINGLE_ONLY__)

55  
56 
#define load_matrix(table) \

57 
__asm__ volatile( \

58 
" fschg\n" \

59 
" fmov @%0+,xd0\n" \

60 
" fmov @%0+,xd2\n" \

61 
" fmov @%0+,xd4\n" \

62 
" fmov @%0+,xd6\n" \

63 
" fmov @%0+,xd8\n" \

64 
" fmov @%0+,xd10\n" \

65 
" fmov @%0+,xd12\n" \

66 
" fmov @%0+,xd14\n" \

67 
" fschg\n" \

68 
:\ 
69 
: "r"(table)\

70 
: "0" \

71 
) 
72  
73 
#define ftrv() \

74 
__asm__ volatile("ftrv xmtrx,fv0" \ 
75 
: "=f"(fr0),"=f"(fr1),"=f"(fr2),"=f"(fr3) \ 
76 
: "0"(fr0), "1"(fr1), "2"(fr2), "3"(fr3) ); 
77  
78 
#define DEFREG \

79 
register float fr0 __asm__("fr0"); \ 
80 
register float fr1 __asm__("fr1"); \ 
81 
register float fr2 __asm__("fr2"); \ 
82 
register float fr3 __asm__("fr3") 
83  
84 
#else

85  
86 
/* generic C code for check */

87  
88 
static void ftrv_(const float xf[],float fv[]) 
89 
{ 
90 
float f0,f1,f2,f3;

91 
f0 = fv[0];

92 
f1 = fv[1];

93 
f2 = fv[2];

94 
f3 = fv[3];

95 
fv[0] = xf[0]*f0 + xf[4]*f1 + xf[ 8]*f2 + xf[12]*f3; 
96 
fv[1] = xf[1]*f0 + xf[5]*f1 + xf[ 9]*f2 + xf[13]*f3; 
97 
fv[2] = xf[2]*f0 + xf[6]*f1 + xf[10]*f2 + xf[14]*f3; 
98 
fv[3] = xf[3]*f0 + xf[7]*f1 + xf[11]*f2 + xf[15]*f3; 
99 
} 
100  
101 
static void load_matrix_(float xf[],const float table[]) 
102 
{ 
103 
int i;

104 
for(i=0;i<16;i++) xf[i]=table[i]; 
105 
} 
106  
107 
#define ftrv() ftrv_(xf,fv)

108 
#define load_matrix(table) load_matrix_(xf,table)

109  
110 
#define DEFREG \

111 
float fv[4],xf[16] 
112  
113 
#define fr0 fv[0] 
114 
#define fr1 fv[1] 
115 
#define fr2 fv[2] 
116 
#define fr3 fv[3] 
117  
118 
#endif

119  
120 
#if 1 
121 
#define DESCALE(x,n) (x)*(1.0f/(1<<(n))) 
122 
#else

123 
#define DESCALE(x,n) (((int)(x)+(1<<(n1)))>>(n)) 
124 
#endif

125  
126 
/* this code work worse on gcc cvs. 3.2.3 work fine */

127  
128  
129 
#if 1 
130 
//optimized

131  
132 
void idct_sh4(DCTELEM *block)

133 
{ 
134 
DEFREG; 
135  
136 
int i;

137 
float tblock[8*8],*fblock; 
138 
int ofs1,ofs2,ofs3;

139  
140 
#if defined(__SH4__)

141 
#error "FIXME!! change to single float" 
142 
#endif

143  
144 
/* row */

145  
146 
/* even part */

147 
load_matrix(even_table); 
148  
149 
fblock = tblock+4;

150 
i = 8;

151 
do {

152 
fr0 = block[0];

153 
fr1 = block[2];

154 
fr2 = block[4];

155 
fr3 = block[6];

156 
block+=8;

157 
ftrv(); 
158 
*fblock = fr3; 
159 
*fblock = fr2; 
160 
*fblock = fr1; 
161 
*fblock = fr0; 
162 
fblock+=8+4; 
163 
} while(i);

164 
block=8*8; 
165 
fblock=8*8+4; 
166  
167 
load_matrix(odd_table); 
168  
169 
i = 8;

170  
171 
// ofs1 = sizeof(float)*1;

172 
// ofs2 = sizeof(float)*2;

173 
// ofs3 = sizeof(float)*3;

174  
175 
do {

176 
float t0,t1,t2,t3;

177 
fr0 = block[1];

178 
fr1 = block[3];

179 
fr2 = block[5];

180 
fr3 = block[7];

181 
block+=8;

182 
ftrv(); 
183 
t0 = *fblock++; 
184 
t1 = *fblock++; 
185 
t2 = *fblock++; 
186 
t3 = *fblock++; 
187 
fblock+=4;

188 
*fblock = t0  fr0; 
189 
*fblock = t1  fr1; 
190 
*fblock = t2  fr2; 
191 
*fblock = t3  fr3; 
192 
*fblock = t3 + fr3; 
193 
*fblock = t2 + fr2; 
194 
*fblock = t1 + fr1; 
195 
*fblock = t0 + fr0; 
196 
fblock+=8;

197 
} while(i);

198 
block=8*8; 
199 
fblock=8*8; 
200  
201 
/* col */

202  
203 
/* even part */

204 
load_matrix(even_table); 
205  
206 
ofs1 = sizeof(float)*2*8; 
207 
ofs2 = sizeof(float)*4*8; 
208 
ofs3 = sizeof(float)*6*8; 
209  
210 
i = 8;

211  
212 
#define OA(fblock,ofs) *(float*)((char*)fblock + ofs) 
213  
214 
do {

215 
fr0 = OA(fblock, 0);

216 
fr1 = OA(fblock,ofs1); 
217 
fr2 = OA(fblock,ofs2); 
218 
fr3 = OA(fblock,ofs3); 
219 
ftrv(); 
220 
OA(fblock,0 ) = fr0;

221 
OA(fblock,ofs1) = fr1; 
222 
OA(fblock,ofs2) = fr2; 
223 
OA(fblock,ofs3) = fr3; 
224 
fblock++; 
225 
} while(i);

226 
fblock=8;

227  
228 
load_matrix(odd_table); 
229  
230 
i=8;

231 
do {

232 
float t0,t1,t2,t3;

233 
t0 = OA(fblock, 0); /* [8*0] */ 
234 
t1 = OA(fblock,ofs1); /* [8*2] */

235 
t2 = OA(fblock,ofs2); /* [8*4] */

236 
t3 = OA(fblock,ofs3); /* [8*6] */

237 
fblock+=8;

238 
fr0 = OA(fblock, 0); /* [8*1] */ 
239 
fr1 = OA(fblock,ofs1); /* [8*3] */

240 
fr2 = OA(fblock,ofs2); /* [8*5] */

241 
fr3 = OA(fblock,ofs3); /* [8*7] */

242 
fblock+=8+1; 
243 
ftrv(); 
244 
block[8*0] = DESCALE(t0 + fr0,3); 
245 
block[8*7] = DESCALE(t0  fr0,3); 
246 
block[8*1] = DESCALE(t1 + fr1,3); 
247 
block[8*6] = DESCALE(t1  fr1,3); 
248 
block[8*2] = DESCALE(t2 + fr2,3); 
249 
block[8*5] = DESCALE(t2  fr2,3); 
250 
block[8*3] = DESCALE(t3 + fr3,3); 
251 
block[8*4] = DESCALE(t3  fr3,3); 
252 
block++; 
253 
} while(i);

254  
255 
#if defined(__SH4__)

256 
#error "FIXME!! change to double" 
257 
#endif

258 
} 
259 
#else

260 
void idct_sh4(DCTELEM *block)

261 
{ 
262 
DEFREG; 
263  
264 
int i;

265 
float tblock[8*8],*fblock; 
266  
267 
/* row */

268  
269 
/* even part */

270 
load_matrix(even_table); 
271  
272 
fblock = tblock; 
273 
i = 8;

274 
do {

275 
fr0 = block[0];

276 
fr1 = block[2];

277 
fr2 = block[4];

278 
fr3 = block[6];

279 
block+=8;

280 
ftrv(); 
281 
fblock[0] = fr0;

282 
fblock[2] = fr1;

283 
fblock[4] = fr2;

284 
fblock[6] = fr3;

285 
fblock+=8;

286 
} while(i);

287 
block=8*8; 
288 
fblock=8*8; 
289  
290 
load_matrix(odd_table); 
291  
292 
i = 8;

293  
294 
do {

295 
float t0,t1,t2,t3;

296 
fr0 = block[1];

297 
fr1 = block[3];

298 
fr2 = block[5];

299 
fr3 = block[7];

300 
block+=8;

301 
ftrv(); 
302 
t0 = fblock[0];

303 
t1 = fblock[2];

304 
t2 = fblock[4];

305 
t3 = fblock[6];

306 
fblock[0] = t0 + fr0;

307 
fblock[7] = t0  fr0;

308 
fblock[1] = t1 + fr1;

309 
fblock[6] = t1  fr1;

310 
fblock[2] = t2 + fr2;

311 
fblock[5] = t2  fr2;

312 
fblock[3] = t3 + fr3;

313 
fblock[4] = t3  fr3;

314 
fblock+=8;

315 
} while(i);

316 
block=8*8; 
317 
fblock=8*8; 
318  
319 
/* col */

320  
321 
/* even part */

322 
load_matrix(even_table); 
323  
324 
i = 8;

325  
326 
do {

327 
fr0 = fblock[8*0]; 
328 
fr1 = fblock[8*2]; 
329 
fr2 = fblock[8*4]; 
330 
fr3 = fblock[8*6]; 
331 
ftrv(); 
332 
fblock[8*0] = fr0; 
333 
fblock[8*2] = fr1; 
334 
fblock[8*4] = fr2; 
335 
fblock[8*6] = fr3; 
336 
fblock++; 
337 
} while(i);

338 
fblock=8;

339  
340 
load_matrix(odd_table); 
341  
342 
i=8;

343 
do {

344 
float t0,t1,t2,t3;

345 
fr0 = fblock[8*1]; 
346 
fr1 = fblock[8*3]; 
347 
fr2 = fblock[8*5]; 
348 
fr3 = fblock[8*7]; 
349 
ftrv(); 
350 
t0 = fblock[8*0]; 
351 
t1 = fblock[8*2]; 
352 
t2 = fblock[8*4]; 
353 
t3 = fblock[8*6]; 
354 
fblock++; 
355 
block[8*0] = DESCALE(t0 + fr0,3); 
356 
block[8*7] = DESCALE(t0  fr0,3); 
357 
block[8*1] = DESCALE(t1 + fr1,3); 
358 
block[8*6] = DESCALE(t1  fr1,3); 
359 
block[8*2] = DESCALE(t2 + fr2,3); 
360 
block[8*5] = DESCALE(t2  fr2,3); 
361 
block[8*3] = DESCALE(t3 + fr3,3); 
362 
block[8*4] = DESCALE(t3  fr3,3); 
363 
block++; 
364 
} while(i);

365 
} 
366 
#endif
