Revision f5abd9fd libavcodec/alpha/dsputil_alpha.c

View differences:

libavcodec/alpha/dsputil_alpha.c
133 133
    return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);    
134 134
}
135 135

  
136
#if 0
137
/* The XY2 routines basically utilize this scheme, but reuse parts in
138
   each iteration.  */
136 139
static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
137 140
{
138 141
    uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
......
146 149
		    + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
147 150
    return r1 + r2;
148 151
}
149

  
150
static inline uint64_t avg4_no_rnd(uint64_t l1, uint64_t l2,
151
				   uint64_t l3, uint64_t l4)
152
{
153
    uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
154
		+ ((l2 & ~BYTE_VEC(0x03)) >> 2)
155
		+ ((l3 & ~BYTE_VEC(0x03)) >> 2)
156
		+ ((l4 & ~BYTE_VEC(0x03)) >> 2);
157
    uint64_t r2 = ((  (l1 & BYTE_VEC(0x03))
158
		    + (l2 & BYTE_VEC(0x03))
159
		    + (l3 & BYTE_VEC(0x03))
160
		    + (l4 & BYTE_VEC(0x03))
161
		    + BYTE_VEC(0x01)) >> 2) & BYTE_VEC(0x03);
162
    return r1 + r2;
163
}
152
#endif
164 153

  
165 154
#define OP(LOAD, STORE, INCR)			\
166 155
    do {					\
......
194 183
	} while (--h);				\
195 184
    } while (0)
196 185

  
197
#define OP_XY2(LOAD, STORE, INCR)					\
198
    do {								\
199
	uint64_t pix1 = LOAD(pixels);					\
200
	uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56);	\
201
									\
202
	do {								\
203
	    uint64_t next_pix1, next_pix2;				\
204
									\
205
	    pixels += line_size;					\
206
	    next_pix1 = LOAD(pixels);					\
207
	    next_pix2 = next_pix1 >> 8 | ((uint64_t) pixels[8] << 56);	\
208
									\
209
	    STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block);	\
210
									\
211
	    block += INCR;						\
212
	    pix1 = next_pix1;						\
213
	    pix2 = next_pix2;						\
214
	} while (--h);							\
186
#define OP_XY2(LOAD, STORE, INCR)                                           \
187
    do {                                                                    \
188
        uint64_t pix1 = LOAD(pixels);                                       \
189
        uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56);           \
190
        uint64_t pix_l = (pix1 & BYTE_VEC(0x03))                            \
191
                       + (pix2 & BYTE_VEC(0x03));                           \
192
        uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2)                    \
193
                       + ((pix2 & ~BYTE_VEC(0x03)) >> 2);                   \
194
                                                                            \
195
        do {                                                                \
196
            uint64_t npix1, npix2;                                          \
197
            uint64_t npix_l, npix_h;                                        \
198
            uint64_t avg;                                                   \
199
                                                                            \
200
            pixels += line_size;                                            \
201
            npix1 = LOAD(pixels);                                           \
202
            npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56);              \
203
            npix_l = (npix1 & BYTE_VEC(0x03))                               \
204
                   + (npix2 & BYTE_VEC(0x03));                              \
205
            npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2)                       \
206
                   + ((npix2 & ~BYTE_VEC(0x03)) >> 2);                      \
207
            avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
208
                + pix_h + npix_h;                                           \
209
            STORE(avg, block);                                              \
210
                                                                            \
211
            block += INCR;                                                  \
212
            pix_l = npix_l;                                                 \
213
            pix_h = npix_h;                                                 \
214
        } while (--h);                                                      \
215 215
    } while (0)
216 216

  
217
#define MAKE_OP(BTYPE, OPNAME, SUFF, OPKIND, STORE, INCR)		\
218
static void OPNAME ## _pixels ## SUFF ## _axp(BTYPE *block,		\
219
					      const uint8_t *pixels,	\
220
					      int line_size, int h)	\
221
{									\
222
    if ((size_t) pixels & 0x7) {					\
223
	OPKIND(uldq, STORE, INCR);					\
224
    } else {								\
225
	OPKIND(ldq, STORE, INCR);					\
226
    }									\
217
#define MAKE_OP(BTYPE, OPNAME, SUFF, OPKIND, STORE, INCR)       \
218
static void OPNAME ## _pixels ## SUFF ## _axp                   \
219
        (BTYPE *restrict block, const uint8_t *restrict pixels, \
220
         int line_size, int h)                                  \
221
{                                                               \
222
    if ((size_t) pixels & 0x7) {                                \
223
        OPKIND(uldq, STORE, INCR);                              \
224
    } else {                                                    \
225
        OPKIND(ldq, STORE, INCR);                               \
226
    }                                                           \
227 227
}
228 228

  
229 229
#define PIXOP(BTYPE, OPNAME, STORE, INCR)		\
......
235 235
/* Rounding primitives.  */
236 236
#define AVG2 avg2
237 237
#define AVG4 avg4
238
#define AVG4_ROUNDER BYTE_VEC(0x02)
238 239
#define STORE(l, b) stq(l, b)
239 240
PIXOP(uint8_t, put, STORE, line_size);
240 241

  
......
245 246
/* Not rounding primitives.  */
246 247
#undef AVG2
247 248
#undef AVG4
249
#undef AVG4_ROUNDER
248 250
#undef STORE
249 251
#define AVG2 avg2_no_rnd
250 252
#define AVG4 avg4_no_rnd
253
#define AVG4_ROUNDER BYTE_VEC(0x01)
251 254
#define STORE(l, b) stq(l, b)
252 255
PIXOP(uint8_t, put_no_rnd, STORE, line_size);
253 256

  

Also available in: Unified diff