Statistics
| Branch: | Revision:

ffmpeg / libavcodec / sh4 / idct_sh4.c @ 2912e87a

History | View | Annotate | Download (9.57 KB)

1
/*
2
 * idct for sh4
3
 *
4
 * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
5
 *
6
 * This file is part of Libav.
7
 *
8
 * Libav is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * Libav is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with Libav; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

    
23
#include "libavcodec/dsputil.h"
24
#include "dsputil_sh4.h"
25
#include "sh4.h"
26

    
27
#define c1      1.38703984532214752434  /* sqrt(2)*cos(1*pi/16) */
28
#define c2      1.30656296487637657577  /* sqrt(2)*cos(2*pi/16) */
29
#define c3      1.17587560241935884520  /* sqrt(2)*cos(3*pi/16) */
30
#define c4      1.00000000000000000000  /* sqrt(2)*cos(4*pi/16) */
31
#define c5      0.78569495838710234903  /* sqrt(2)*cos(5*pi/16) */
32
#define c6      0.54119610014619712324  /* sqrt(2)*cos(6*pi/16) */
33
#define c7      0.27589937928294311353  /* sqrt(2)*cos(7*pi/16) */
34

    
35
static const float even_table[] __attribute__ ((aligned(8))) = {
36
        c4, c4, c4, c4,
37
        c2, c6,-c6,-c2,
38
        c4,-c4,-c4, c4,
39
        c6,-c2, c2,-c6
40
};
41

    
42
static const float odd_table[] __attribute__ ((aligned(8))) = {
43
        c1, c3, c5, c7,
44
        c3,-c7,-c1,-c5,
45
        c5,-c1, c7, c3,
46
        c7,-c5, c3,-c1
47
};
48

    
49
#undef  c1
50
#undef  c2
51
#undef  c3
52
#undef  c4
53
#undef  c5
54
#undef  c6
55
#undef  c7
56

    
57
#if 1
58

    
59
#define         load_matrix(table) \
60
    do { \
61
        const float *t = table; \
62
        __asm__ volatile( \
63
        "       fschg\n" \
64
        "       fmov   @%0+,xd0\n" \
65
        "       fmov   @%0+,xd2\n" \
66
        "       fmov   @%0+,xd4\n" \
67
        "       fmov   @%0+,xd6\n" \
68
        "       fmov   @%0+,xd8\n" \
69
        "       fmov   @%0+,xd10\n" \
70
        "       fmov   @%0+,xd12\n" \
71
        "       fmov   @%0+,xd14\n" \
72
        "       fschg\n" \
73
        : "+r"(t) \
74
        ); \
75
    } while (0)
76

    
77
#define         ftrv() \
78
                __asm__ volatile("ftrv xmtrx,fv0" \
79
                : "+f"(fr0),"+f"(fr1),"+f"(fr2),"+f"(fr3));
80

    
81
#define         DEFREG        \
82
        register float fr0 __asm__("fr0"); \
83
        register float fr1 __asm__("fr1"); \
84
        register float fr2 __asm__("fr2"); \
85
        register float fr3 __asm__("fr3")
86

    
87
#else
88

    
89
/* generic C code for check */
90

    
91
static void ftrv_(const float xf[],float fv[])
92
{
93
        float f0,f1,f2,f3;
94
        f0 = fv[0];
95
        f1 = fv[1];
96
        f2 = fv[2];
97
        f3 = fv[3];
98
        fv[0] = xf[0]*f0 + xf[4]*f1 + xf[ 8]*f2 + xf[12]*f3;
99
        fv[1] = xf[1]*f0 + xf[5]*f1 + xf[ 9]*f2 + xf[13]*f3;
100
        fv[2] = xf[2]*f0 + xf[6]*f1 + xf[10]*f2 + xf[14]*f3;
101
        fv[3] = xf[3]*f0 + xf[7]*f1 + xf[11]*f2 + xf[15]*f3;
102
}
103

    
104
static void load_matrix_(float xf[],const float table[])
105
{
106
        int i;
107
        for(i=0;i<16;i++) xf[i]=table[i];
108
}
109

    
110
#define         ftrv()                  ftrv_(xf,fv)
111
#define         load_matrix(table)      load_matrix_(xf,table)
112

    
113
#define         DEFREG \
114
        float fv[4],xf[16]
115

    
116
#define         fr0     fv[0]
117
#define         fr1     fv[1]
118
#define         fr2     fv[2]
119
#define         fr3     fv[3]
120

    
121
#endif
122

    
123
#if 1
124
#define         DESCALE(x,n)    (x)*(1.0f/(1<<(n)))
125
#else
126
#define         DESCALE(x,n)    (((int)(x)+(1<<(n-1)))>>(n))
127
#endif
128

    
129
/* this code work worse on gcc cvs. 3.2.3 work fine */
130

    
131

    
132
#if 1
133
//optimized
134

    
135
void idct_sh4(DCTELEM *block)
136
{
137
        DEFREG;
138

    
139
        int i;
140
        float        tblock[8*8],*fblock;
141
        int ofs1,ofs2,ofs3;
142
        int fpscr;
143

    
144
        fp_single_enter(fpscr);
145

    
146
        /* row */
147

    
148
        /* even part */
149
        load_matrix(even_table);
150

    
151
        fblock = tblock+4;
152
        i = 8;
153
        do {
154
                fr0 = block[0];
155
                fr1 = block[2];
156
                fr2 = block[4];
157
                fr3 = block[6];
158
                block+=8;
159
                ftrv();
160
                *--fblock = fr3;
161
                *--fblock = fr2;
162
                *--fblock = fr1;
163
                *--fblock = fr0;
164
                fblock+=8+4;
165
        } while(--i);
166
        block-=8*8;
167
        fblock-=8*8+4;
168

    
169
        load_matrix(odd_table);
170

    
171
        i = 8;
172

    
173
        do {
174
                float t0,t1,t2,t3;
175
                fr0 = block[1];
176
                fr1 = block[3];
177
                fr2 = block[5];
178
                fr3 = block[7];
179
                block+=8;
180
                ftrv();
181
                t0 = *fblock++;
182
                t1 = *fblock++;
183
                t2 = *fblock++;
184
                t3 = *fblock++;
185
                fblock+=4;
186
                *--fblock = t0 - fr0;
187
                *--fblock = t1 - fr1;
188
                *--fblock = t2 - fr2;
189
                *--fblock = t3 - fr3;
190
                *--fblock = t3 + fr3;
191
                *--fblock = t2 + fr2;
192
                *--fblock = t1 + fr1;
193
                *--fblock = t0 + fr0;
194
                fblock+=8;
195
        } while(--i);
196
        block-=8*8;
197
        fblock-=8*8;
198

    
199
        /* col */
200

    
201
        /* even part */
202
        load_matrix(even_table);
203

    
204
        ofs1 = sizeof(float)*2*8;
205
        ofs2 = sizeof(float)*4*8;
206
        ofs3 = sizeof(float)*6*8;
207

    
208
        i = 8;
209

    
210
#define        OA(fblock,ofs)   *(float*)((char*)fblock + ofs)
211

    
212
        do {
213
                fr0 = OA(fblock,   0);
214
                fr1 = OA(fblock,ofs1);
215
                fr2 = OA(fblock,ofs2);
216
                fr3 = OA(fblock,ofs3);
217
                ftrv();
218
                OA(fblock,0   ) = fr0;
219
                OA(fblock,ofs1) = fr1;
220
                OA(fblock,ofs2) = fr2;
221
                OA(fblock,ofs3) = fr3;
222
                fblock++;
223
        } while(--i);
224
        fblock-=8;
225

    
226
        load_matrix(odd_table);
227

    
228
        i=8;
229
        do {
230
                float t0,t1,t2,t3;
231
                t0 = OA(fblock,   0); /* [8*0] */
232
                t1 = OA(fblock,ofs1); /* [8*2] */
233
                t2 = OA(fblock,ofs2); /* [8*4] */
234
                t3 = OA(fblock,ofs3); /* [8*6] */
235
                fblock+=8;
236
                fr0 = OA(fblock,   0); /* [8*1] */
237
                fr1 = OA(fblock,ofs1); /* [8*3] */
238
                fr2 = OA(fblock,ofs2); /* [8*5] */
239
                fr3 = OA(fblock,ofs3); /* [8*7] */
240
                fblock+=-8+1;
241
                ftrv();
242
                block[8*0] = DESCALE(t0 + fr0,3);
243
                block[8*7] = DESCALE(t0 - fr0,3);
244
                block[8*1] = DESCALE(t1 + fr1,3);
245
                block[8*6] = DESCALE(t1 - fr1,3);
246
                block[8*2] = DESCALE(t2 + fr2,3);
247
                block[8*5] = DESCALE(t2 - fr2,3);
248
                block[8*3] = DESCALE(t3 + fr3,3);
249
                block[8*4] = DESCALE(t3 - fr3,3);
250
                block++;
251
        } while(--i);
252

    
253
        fp_single_leave(fpscr);
254
}
255
#else
256
void idct_sh4(DCTELEM *block)
257
{
258
        DEFREG;
259

    
260
        int i;
261
        float   tblock[8*8],*fblock;
262

    
263
        /* row */
264

    
265
        /* even part */
266
        load_matrix(even_table);
267

    
268
        fblock = tblock;
269
        i = 8;
270
        do {
271
                fr0 = block[0];
272
                fr1 = block[2];
273
                fr2 = block[4];
274
                fr3 = block[6];
275
                block+=8;
276
                ftrv();
277
                fblock[0] = fr0;
278
                fblock[2] = fr1;
279
                fblock[4] = fr2;
280
                fblock[6] = fr3;
281
                fblock+=8;
282
        } while(--i);
283
        block-=8*8;
284
        fblock-=8*8;
285

    
286
        load_matrix(odd_table);
287

    
288
        i = 8;
289

    
290
        do {
291
                float t0,t1,t2,t3;
292
                fr0 = block[1];
293
                fr1 = block[3];
294
                fr2 = block[5];
295
                fr3 = block[7];
296
                block+=8;
297
                ftrv();
298
                t0 = fblock[0];
299
                t1 = fblock[2];
300
                t2 = fblock[4];
301
                t3 = fblock[6];
302
                fblock[0] = t0 + fr0;
303
                fblock[7] = t0 - fr0;
304
                fblock[1] = t1 + fr1;
305
                fblock[6] = t1 - fr1;
306
                fblock[2] = t2 + fr2;
307
                fblock[5] = t2 - fr2;
308
                fblock[3] = t3 + fr3;
309
                fblock[4] = t3 - fr3;
310
                fblock+=8;
311
        } while(--i);
312
        block-=8*8;
313
        fblock-=8*8;
314

    
315
        /* col */
316

    
317
        /* even part */
318
        load_matrix(even_table);
319

    
320
        i = 8;
321

    
322
        do {
323
                fr0 = fblock[8*0];
324
                fr1 = fblock[8*2];
325
                fr2 = fblock[8*4];
326
                fr3 = fblock[8*6];
327
                ftrv();
328
                fblock[8*0] = fr0;
329
                fblock[8*2] = fr1;
330
                fblock[8*4] = fr2;
331
                fblock[8*6] = fr3;
332
                fblock++;
333
        } while(--i);
334
        fblock-=8;
335

    
336
        load_matrix(odd_table);
337

    
338
        i=8;
339
        do {
340
                float t0,t1,t2,t3;
341
                fr0 = fblock[8*1];
342
                fr1 = fblock[8*3];
343
                fr2 = fblock[8*5];
344
                fr3 = fblock[8*7];
345
                ftrv();
346
                t0 = fblock[8*0];
347
                t1 = fblock[8*2];
348
                t2 = fblock[8*4];
349
                t3 = fblock[8*6];
350
                fblock++;
351
                block[8*0] = DESCALE(t0 + fr0,3);
352
                block[8*7] = DESCALE(t0 - fr0,3);
353
                block[8*1] = DESCALE(t1 + fr1,3);
354
                block[8*6] = DESCALE(t1 - fr1,3);
355
                block[8*2] = DESCALE(t2 + fr2,3);
356
                block[8*5] = DESCALE(t2 - fr2,3);
357
                block[8*3] = DESCALE(t3 + fr3,3);
358
                block[8*4] = DESCALE(t3 - fr3,3);
359
                block++;
360
        } while(--i);
361
}
362
#endif