Statistics
| Branch: | Revision:

ffmpeg / libavcodec / alpha / dsputil_alpha.c @ 1e98dffb

History | View | Annotate | Download (6.1 KB)

1
/*
2
 * Alpha optimized DSP utils
3
 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
 */
19

    
20
#include "asm.h"
21
#include "../dsputil.h"
22

    
23
void simple_idct_axp(DCTELEM *block);
24

    
25
static void put_pixels_clamped_axp(const DCTELEM *block, UINT8 *pixels, 
26
                                   int line_size)
27
{
28
    int i = 8;
29
    do {
30
        UINT64 shorts;
31

    
32
        shorts = ldq(block);
33
        shorts = maxsw4(shorts, 0);
34
        shorts = minsw4(shorts, WORD_VEC(0x00ff));
35
        stl(pkwb(shorts), pixels);
36

    
37
        shorts = ldq(block + 4);
38
        shorts = maxsw4(shorts, 0);
39
        shorts = minsw4(shorts, WORD_VEC(0x00ff));
40
        stl(pkwb(shorts), pixels + 4);
41

    
42
        pixels += line_size;
43
        block += 8;
44
    } while (--i);
45
}
46

    
47
static void add_pixels_clamped_axp(const DCTELEM *block, UINT8 *pixels, 
48
                                   int line_size)
49
{
50
    int i = 8;
51
    do {
52
        UINT64 shorts; 
53

    
54
        shorts = ldq(block);
55
        shorts &= ~WORD_VEC(0x8000); /* clear highest bit to avoid overflow */
56
        shorts += unpkbw(ldl(pixels));
57
        shorts &= ~WORD_VEC(0x8000); /* hibit would be set for e. g. -2 + 3 */
58
        shorts = minuw4(shorts, WORD_VEC(0x4000)); /* set neg. to 0x4000 */
59
        shorts &= ~WORD_VEC(0x4000); /* ...and zap them */
60
        shorts = minsw4(shorts, WORD_VEC(0x00ff)); /* clamp to 255 */
61
        stl(pkwb(shorts), pixels);
62

    
63
        /* next 4 */
64
        shorts = ldq(block + 4);
65
        shorts &= ~WORD_VEC(0x8000);
66
        shorts += unpkbw(ldl(pixels + 4));
67
        shorts &= ~WORD_VEC(0x8000);
68
        shorts = minuw4(shorts, WORD_VEC(0x4000));
69
        shorts &= ~WORD_VEC(0x4000);
70
        shorts = minsw4(shorts, WORD_VEC(0x00ff));
71
        stl(pkwb(shorts), pixels + 4);
72

    
73
        pixels += line_size;
74
        block += 8;
75
    } while (--i);
76
}
77

    
78
/* Average 8 unsigned bytes in parallel: (b1 + b2) >> 1
79
   Since the immediate result could be greater than 255, we do the
80
   shift first. The result is too low by one if the bytes were both
81
   odd, so we need to add (l1 & l2) & BYTE_VEC(0x01).  */
82
static inline UINT64 avg2_no_rnd(UINT64 l1, UINT64 l2)
83
{
84
    UINT64 correction = (l1 & l2) & BYTE_VEC(0x01);
85
    l1 = (l1 & ~BYTE_VEC(0x01)) >> 1;
86
    l2 = (l2 & ~BYTE_VEC(0x01)) >> 1;
87
    return l1 + l2 + correction;
88
}
89

    
90
/* Average 8 bytes with rounding: (b1 + b2 + 1) >> 1
91
   The '1' only has an effect when one byte is even and the other odd,
92
   i. e. we also need to add (l1 ^ l2) & BYTE_VEC(0x01).
93
   Incidentally, that is equivalent to (l1 | l2) & BYTE_VEC(0x01).  */
94
static inline UINT64 avg2(UINT64 l1, UINT64 l2)
95
{
96
    UINT64 correction = (l1 | l2) & BYTE_VEC(0x01);
97
    l1 = (l1 & ~BYTE_VEC(0x01)) >> 1;
98
    l2 = (l2 & ~BYTE_VEC(0x01)) >> 1;
99
    return l1 + l2 + correction;
100
}
101

    
102
static inline UINT64 avg4(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4)
103
{
104
    UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
105
              + ((l2 & ~BYTE_VEC(0x03)) >> 2)
106
              + ((l3 & ~BYTE_VEC(0x03)) >> 2)
107
              + ((l4 & ~BYTE_VEC(0x03)) >> 2);
108
    UINT64 r2 = ((  (l1 & BYTE_VEC(0x03))
109
                  + (l2 & BYTE_VEC(0x03))
110
                  + (l3 & BYTE_VEC(0x03))
111
                  + (l4 & BYTE_VEC(0x03))
112
                  + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
113
    return r1 + r2;
114
}
115

    
116
static inline UINT64 avg4_no_rnd(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4)
117
{
118
    UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
119
              + ((l2 & ~BYTE_VEC(0x03)) >> 2)
120
              + ((l3 & ~BYTE_VEC(0x03)) >> 2)
121
              + ((l4 & ~BYTE_VEC(0x03)) >> 2);
122
    UINT64 r2 = (( (l1 & BYTE_VEC(0x03))
123
                 + (l2 & BYTE_VEC(0x03))
124
                 + (l3 & BYTE_VEC(0x03))
125
                 + (l4 & BYTE_VEC(0x03))
126
                 + BYTE_VEC(0x01)) >> 2) & BYTE_VEC(0x03);
127
    return r1 + r2;
128
}
129

    
130
#define PIXOPNAME(suffix) put ## suffix
131
#define BTYPE UINT8
132
#define AVG2 avg2
133
#define AVG4 avg4
134
#define STORE(l, b) stq(l, b)
135
#include "pixops.h"
136
#undef PIXOPNAME
137
#undef BTYPE
138
#undef AVG2
139
#undef AVG4
140
#undef STORE
141

    
142
#define PIXOPNAME(suffix) put_no_rnd ## suffix
143
#define BTYPE UINT8
144
#define AVG2 avg2_no_rnd
145
#define AVG4 avg4_no_rnd
146
#define STORE(l, b) stq(l, b)
147
#include "pixops.h"
148
#undef PIXOPNAME
149
#undef BTYPE
150
#undef AVG2
151
#undef AVG4
152
#undef STORE
153

    
154
/* The following functions are untested.  */
155
#if 0
156

157
#define PIXOPNAME(suffix) avg ## suffix
158
#define BTYPE UINT8
159
#define AVG2 avg2
160
#define AVG4 avg4
161
#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
162
#include "pixops.h"
163
#undef PIXOPNAME
164
#undef BTYPE
165
#undef AVG2
166
#undef AVG4
167
#undef STORE
168

169
#define PIXOPNAME(suffix) avg_no_rnd ## suffix
170
#define BTYPE UINT8
171
#define AVG2 avg2_no_rnd
172
#define AVG4 avg4_no_rnd
173
#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
174
#include "pixops.h"
175
#undef PIXOPNAME
176
#undef BTYPE
177
#undef AVG2
178
#undef AVG4
179
#undef STORE
180

181
#define PIXOPNAME(suffix) sub ## suffix
182
#define BTYPE DCTELEM
183
#define AVG2 avg2
184
#define AVG4 avg4
185
#define STORE(l, block) do {                \
186
    UINT64 xxx = l;                        \
187
    (block)[0] -= (xxx >>  0) & 0xff;        \
188
    (block)[1] -= (xxx >>  8) & 0xff;        \
189
    (block)[2] -= (xxx >> 16) & 0xff;        \
190
    (block)[3] -= (xxx >> 24) & 0xff;        \
191
    (block)[4] -= (xxx >> 32) & 0xff;        \
192
    (block)[5] -= (xxx >> 40) & 0xff;        \
193
    (block)[6] -= (xxx >> 48) & 0xff;        \
194
    (block)[7] -= (xxx >> 56) & 0xff;        \
195
} while (0)
196
#include "pixops.h"
197
#undef PIXOPNAME
198
#undef BTYPE
199
#undef AVG2
200
#undef AVG4
201
#undef STORE
202

203
#endif
204

    
205
void dsputil_init_alpha(void)
206
{
207
    put_pixels_tab[0] = put_pixels_axp;
208
    put_pixels_tab[1] = put_pixels_x2_axp;
209
    put_pixels_tab[2] = put_pixels_y2_axp;
210
    put_pixels_tab[3] = put_pixels_xy2_axp;
211

    
212
    put_no_rnd_pixels_tab[0] = put_pixels_axp;
213
    put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_axp;
214
    put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_axp;
215
    put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_axp;
216

    
217
    /* amask clears all bits that correspond to present features.  */
218
    if (amask(AMASK_MVI) == 0) {
219
        fprintf(stderr, "MVI extension detected\n");
220
        put_pixels_clamped = put_pixels_clamped_axp;
221
        add_pixels_clamped = add_pixels_clamped_axp;
222
    }
223
}