Revision 1e98dffb

View differences:

libavcodec/Makefile
37 37
CFLAGS += $(MLIB_INC)
38 38
endif
39 39

  
40
# alpha specific stuff
41
ifeq ($(TARGET_ARCH_ALPHA),yes)
42
OBJS += alpha/dsputil_alpha.o alpha/mpegvideo_alpha.o
43
CFLAGS += -Wa,-mpca56
44
endif
45

  
40 46
SRCS = $(OBJS:.o=.c) $(ASM_OBJS:.o=.s)
41 47

  
42 48
LIB= libavcodec.a
......
74 80
	rm -f *.o *~ $(LIB) $(SLIB) *.so i386/*.o i386/*~ \
75 81
	   armv4l/*.o armv4l/*~ \
76 82
	   mlib/*.o mlib/*~ \
83
	   alpha/*.o alpha/*~ \
77 84
           libac3/*.o libac3/*~ \
78 85
           apiexample $(TESTS)
79 86

  
libavcodec/alpha/asm.h
1
/*
2
 * Alpha optimized DSP utils
3
 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
 */
19

  
20
#ifndef LIBAVCODEC_ALPHA_ASM_H
21
#define LIBAVCODEC_ALPHA_ASM_H
22

  
23
#include <stdint.h>
24

  
25
#define AMASK_BWX (1 << 0)
26
#define AMASK_FIX (1 << 1)
27
#define AMASK_MVI (1 << 8)
28

  
29
static inline uint64_t BYTE_VEC(uint64_t x)
30
{
31
    x |= x <<  8;
32
    x |= x << 16;
33
    x |= x << 32;
34
    return x;
35
}
36
static inline uint64_t WORD_VEC(uint64_t x)
37
{
38
    x |= x << 16;
39
    x |= x << 32;
40
    return x;
41
}
42

  
43
static inline int32_t ldl(const void* p)
44
{
45
    return *(const int32_t*) p;
46
}
47
static inline uint64_t ldq(const void* p)
48
{
49
    return *(const uint64_t*) p;
50
}
51
/* FIXME ccc doesn't seem to get it? Use inline asm?  */
52
static inline uint64_t ldq_u(const void* p)
53
{
54
    return *(const uint64_t*) ((uintptr_t) p & ~7ul);
55
}
56
static inline void stl(uint32_t l, void* p)
57
{
58
    *(uint32_t*) p = l;
59
}
60
static inline void stq(uint64_t l, void* p)
61
{
62
    *(uint64_t*) p = l;
63
}
64

  
65
#ifdef __GNUC__
66
#define OPCODE1(name)						\
67
static inline uint64_t name(uint64_t l)				\
68
{								\
69
    uint64_t r;							\
70
    asm (#name " %1, %0" : "=r" (r) : "r" (l));			\
71
    return r;							\
72
}
73

  
74
#define OPCODE2(name)						\
75
static inline uint64_t name(uint64_t l1, uint64_t l2)		\
76
{								\
77
    uint64_t r;							\
78
    asm (#name " %1, %2, %0" : "=r" (r) : "r" (l1), "rI" (l2));	\
79
    return r;							\
80
}
81

  
82
/* We don't want gcc to move this around or combine it with another
83
   rpcc, so mark it volatile.  */
84
static inline uint64_t rpcc(void)
85
{
86
    uint64_t r;
87
    asm volatile ("rpcc %0" : "=r" (r));
88
    return r;
89
}
90

  
91
static inline uint64_t uldq(const void* v)
92
{
93
    struct foo {
94
	unsigned long l;
95
    } __attribute__((packed));
96

  
97
    return ((const struct foo*) v)->l;
98
}
99

  
100
#elif defined(__DECC)		/* Compaq "ccc" compiler */
101

  
102
#include <c_asm.h>
103
#define OPCODE1(name)							\
104
static inline uint64_t name(uint64_t l)					\
105
{									\
106
    return asm (#name " %a0, %v0", l);					\
107
}
108

  
109
#define OPCODE2(name)							\
110
static inline uint64_t name(uint64_t l1, uint64_t l2)			\
111
{									\
112
    return asm (#name " %a0, %a1, %v0", l1, l2);			\
113
}
114

  
115
static inline uint64_t rpcc(void)
116
{
117
    return asm  ("rpcc %v0");
118
}
119

  
120
static inline uint64_t uldq(const void* v)
121
{
122
    return *(const __unaligned uint64_t *) v;
123
}
124

  
125
#endif
126

  
127
OPCODE1(amask);
128
OPCODE1(unpkbw);
129
OPCODE1(pkwb);
130
OPCODE2(extql);
131
OPCODE2(extqh);
132
OPCODE2(zap);
133
OPCODE2(cmpbge);
134
OPCODE2(minsw4);
135
OPCODE2(minuw4);
136
OPCODE2(minub8);
137
OPCODE2(maxsw4);
138
OPCODE2(maxuw4);
139
OPCODE2(perr);
140

  
141
#endif /* LIBAVCODEC_ALPHA_ASM_H */
libavcodec/alpha/dsputil_alpha.c
1
/*
2
 * Alpha optimized DSP utils
3
 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
 */
19

  
20
#include "asm.h"
21
#include "../dsputil.h"
22

  
23
void simple_idct_axp(DCTELEM *block);
24

  
25
static void put_pixels_clamped_axp(const DCTELEM *block, UINT8 *pixels, 
26
				   int line_size)
27
{
28
    int i = 8;
29
    do {
30
	UINT64 shorts;
31

  
32
	shorts = ldq(block);
33
	shorts = maxsw4(shorts, 0);
34
	shorts = minsw4(shorts, WORD_VEC(0x00ff));
35
	stl(pkwb(shorts), pixels);
36

  
37
	shorts = ldq(block + 4);
38
	shorts = maxsw4(shorts, 0);
39
	shorts = minsw4(shorts, WORD_VEC(0x00ff));
40
	stl(pkwb(shorts), pixels + 4);
41

  
42
	pixels += line_size;
43
	block += 8;
44
    } while (--i);
45
}
46

  
47
static void add_pixels_clamped_axp(const DCTELEM *block, UINT8 *pixels, 
48
				   int line_size)
49
{
50
    int i = 8;
51
    do {
52
	UINT64 shorts; 
53

  
54
	shorts = ldq(block);
55
	shorts &= ~WORD_VEC(0x8000); /* clear highest bit to avoid overflow */
56
	shorts += unpkbw(ldl(pixels));
57
	shorts &= ~WORD_VEC(0x8000); /* hibit would be set for e. g. -2 + 3 */
58
	shorts = minuw4(shorts, WORD_VEC(0x4000)); /* set neg. to 0x4000 */
59
	shorts &= ~WORD_VEC(0x4000); /* ...and zap them */
60
	shorts = minsw4(shorts, WORD_VEC(0x00ff)); /* clamp to 255 */
61
	stl(pkwb(shorts), pixels);
62

  
63
	/* next 4 */
64
	shorts = ldq(block + 4);
65
	shorts &= ~WORD_VEC(0x8000);
66
	shorts += unpkbw(ldl(pixels + 4));
67
	shorts &= ~WORD_VEC(0x8000);
68
	shorts = minuw4(shorts, WORD_VEC(0x4000));
69
	shorts &= ~WORD_VEC(0x4000);
70
	shorts = minsw4(shorts, WORD_VEC(0x00ff));
71
	stl(pkwb(shorts), pixels + 4);
72

  
73
	pixels += line_size;
74
	block += 8;
75
    } while (--i);
76
}
77

  
78
/* Average 8 unsigned bytes in parallel: (b1 + b2) >> 1
79
   Since the immediate result could be greater than 255, we do the
80
   shift first. The result is too low by one if the bytes were both
81
   odd, so we need to add (l1 & l2) & BYTE_VEC(0x01).  */
82
static inline UINT64 avg2_no_rnd(UINT64 l1, UINT64 l2)
83
{
84
    UINT64 correction = (l1 & l2) & BYTE_VEC(0x01);
85
    l1 = (l1 & ~BYTE_VEC(0x01)) >> 1;
86
    l2 = (l2 & ~BYTE_VEC(0x01)) >> 1;
87
    return l1 + l2 + correction;
88
}
89

  
90
/* Average 8 bytes with rounding: (b1 + b2 + 1) >> 1
91
   The '1' only has an effect when one byte is even and the other odd,
92
   i. e. we also need to add (l1 ^ l2) & BYTE_VEC(0x01).
93
   Incidentally, that is equivalent to (l1 | l2) & BYTE_VEC(0x01).  */
94
static inline UINT64 avg2(UINT64 l1, UINT64 l2)
95
{
96
    UINT64 correction = (l1 | l2) & BYTE_VEC(0x01);
97
    l1 = (l1 & ~BYTE_VEC(0x01)) >> 1;
98
    l2 = (l2 & ~BYTE_VEC(0x01)) >> 1;
99
    return l1 + l2 + correction;
100
}
101

  
102
static inline UINT64 avg4(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4)
103
{
104
    UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
105
	      + ((l2 & ~BYTE_VEC(0x03)) >> 2)
106
	      + ((l3 & ~BYTE_VEC(0x03)) >> 2)
107
	      + ((l4 & ~BYTE_VEC(0x03)) >> 2);
108
    UINT64 r2 = ((  (l1 & BYTE_VEC(0x03))
109
		  + (l2 & BYTE_VEC(0x03))
110
		  + (l3 & BYTE_VEC(0x03))
111
		  + (l4 & BYTE_VEC(0x03))
112
		  + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
113
    return r1 + r2;
114
}
115

  
116
static inline UINT64 avg4_no_rnd(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4)
117
{
118
    UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
119
	      + ((l2 & ~BYTE_VEC(0x03)) >> 2)
120
	      + ((l3 & ~BYTE_VEC(0x03)) >> 2)
121
	      + ((l4 & ~BYTE_VEC(0x03)) >> 2);
122
    UINT64 r2 = (( (l1 & BYTE_VEC(0x03))
123
		 + (l2 & BYTE_VEC(0x03))
124
		 + (l3 & BYTE_VEC(0x03))
125
		 + (l4 & BYTE_VEC(0x03))
126
		 + BYTE_VEC(0x01)) >> 2) & BYTE_VEC(0x03);
127
    return r1 + r2;
128
}
129

  
130
#define PIXOPNAME(suffix) put ## suffix
131
#define BTYPE UINT8
132
#define AVG2 avg2
133
#define AVG4 avg4
134
#define STORE(l, b) stq(l, b)
135
#include "pixops.h"
136
#undef PIXOPNAME
137
#undef BTYPE
138
#undef AVG2
139
#undef AVG4
140
#undef STORE
141

  
142
#define PIXOPNAME(suffix) put_no_rnd ## suffix
143
#define BTYPE UINT8
144
#define AVG2 avg2_no_rnd
145
#define AVG4 avg4_no_rnd
146
#define STORE(l, b) stq(l, b)
147
#include "pixops.h"
148
#undef PIXOPNAME
149
#undef BTYPE
150
#undef AVG2
151
#undef AVG4
152
#undef STORE
153

  
154
/* The following functions are untested.  */
155
#if 0
156

  
157
#define PIXOPNAME(suffix) avg ## suffix
158
#define BTYPE UINT8
159
#define AVG2 avg2
160
#define AVG4 avg4
161
#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
162
#include "pixops.h"
163
#undef PIXOPNAME
164
#undef BTYPE
165
#undef AVG2
166
#undef AVG4
167
#undef STORE
168

  
169
#define PIXOPNAME(suffix) avg_no_rnd ## suffix
170
#define BTYPE UINT8
171
#define AVG2 avg2_no_rnd
172
#define AVG4 avg4_no_rnd
173
#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
174
#include "pixops.h"
175
#undef PIXOPNAME
176
#undef BTYPE
177
#undef AVG2
178
#undef AVG4
179
#undef STORE
180

  
181
#define PIXOPNAME(suffix) sub ## suffix
182
#define BTYPE DCTELEM
183
#define AVG2 avg2
184
#define AVG4 avg4
185
#define STORE(l, block) do {		\
186
    UINT64 xxx = l;			\
187
    (block)[0] -= (xxx >>  0) & 0xff;	\
188
    (block)[1] -= (xxx >>  8) & 0xff;	\
189
    (block)[2] -= (xxx >> 16) & 0xff;	\
190
    (block)[3] -= (xxx >> 24) & 0xff;	\
191
    (block)[4] -= (xxx >> 32) & 0xff;	\
192
    (block)[5] -= (xxx >> 40) & 0xff;	\
193
    (block)[6] -= (xxx >> 48) & 0xff;	\
194
    (block)[7] -= (xxx >> 56) & 0xff;	\
195
} while (0)
196
#include "pixops.h"
197
#undef PIXOPNAME
198
#undef BTYPE
199
#undef AVG2
200
#undef AVG4
201
#undef STORE
202

  
203
#endif
204

  
205
void dsputil_init_alpha(void)
206
{
207
    put_pixels_tab[0] = put_pixels_axp;
208
    put_pixels_tab[1] = put_pixels_x2_axp;
209
    put_pixels_tab[2] = put_pixels_y2_axp;
210
    put_pixels_tab[3] = put_pixels_xy2_axp;
211

  
212
    put_no_rnd_pixels_tab[0] = put_pixels_axp;
213
    put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_axp;
214
    put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_axp;
215
    put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_axp;
216

  
217
    /* amask clears all bits that correspond to present features.  */
218
    if (amask(AMASK_MVI) == 0) {
219
	fprintf(stderr, "MVI extension detected\n");
220
	put_pixels_clamped = put_pixels_clamped_axp;
221
	add_pixels_clamped = add_pixels_clamped_axp;
222
    }
223
}
libavcodec/alpha/mpegvideo_alpha.c
1
/*
2
 * Alpha optimized DSP utils
3
 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
 */
19

  
20
#include "asm.h"
21
#include "../dsputil.h"
22
#include "../mpegvideo.h"
23

  
24
extern UINT8 zigzag_end[64];
25

  
26
static void dct_unquantize_h263_axp(MpegEncContext *s, 
27
				    DCTELEM *block, int n, int qscale)
28
{
29
    int i, level;
30
    UINT64 qmul, qadd;
31
    if (s->mb_intra) {
32
        if (n < 4) 
33
            block[0] = block[0] * s->y_dc_scale;
34
        else
35
            block[0] = block[0] * s->c_dc_scale;
36
	/* Catch up to aligned point.  */
37
	qmul = s->qscale << 1;
38
	qadd = (s->qscale - 1) | 1;
39
	for (i = 1; i < 4; ++i) {
40
	    level = block[i];
41
	    if (level) {
42
		if (level < 0) {
43
		    level = level * qmul - qadd;
44
		} else {
45
		    level = level * qmul + qadd;
46
		}
47
		block[i] = level;
48
	    }
49
	}
50
	block += 4;
51
	i = 60 / 4;
52
    } else {
53
        i = zigzag_end[s->block_last_index[n]] / 4;
54
    }
55
    qmul = s->qscale << 1;
56
    qadd = WORD_VEC((qscale - 1) | 1);
57
    do {
58
	UINT64 levels, negmask, zeromask, corr;
59
	levels = ldq(block);
60
	if (levels == 0)
61
	    continue;
62
	zeromask = cmpbge(0, levels);
63
	zeromask &= zeromask >> 1;
64
	/* Negate all negative words.  */
65
	negmask = maxsw4(levels, WORD_VEC(0xffff)); /* negative -> ffff (-1) */
66
	negmask = minsw4(negmask, 0);		    /* positive -> 0000 (0) */
67
	corr    = negmask & WORD_VEC(0x0001); /* twos-complement correction */
68
	levels ^= negmask;
69
	levels += corr;
70

  
71
	levels = levels * qmul;
72
	levels += zap(qadd, zeromask);
73

  
74
	/* Re-negate negative words.  */
75
	levels -= corr;
76
	levels ^= negmask;
77

  
78
	stq(levels, block);
79
    } while (block += 4, --i);
80
}
81

  
82
void MPV_common_init_axp(MpegEncContext *s)
83
{
84
    if (amask(AMASK_MVI) == 0) {
85
        if (s->out_format == FMT_H263)
86
	    s->dct_unquantize = dct_unquantize_h263_axp;
87
    }
88
}
libavcodec/alpha/pixops.h
1
/*
2
 * Alpha optimized DSP utils
3
 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
 */
19

  
20
/* This file is intended to be #included with proper definitions of
21
 * PIXOPNAME, BTYPE, AVG2, AVG4 and STORE.  */
22

  
23
static void PIXOPNAME(_pixels_axp)(BTYPE *block, const UINT8 *pixels,
24
				   int line_size, int h)
25
{
26
    if ((size_t) pixels & 0x7) {
27
	do {
28
	    STORE(uldq(pixels), block);
29
	    pixels += line_size;
30
	    block  += line_size;
31
	} while (--h);
32
    } else {
33
	do {
34
	    STORE(ldq(pixels), block);
35
	    pixels += line_size;
36
	    block  += line_size;
37
	} while (--h);
38
    }
39
}
40

  
41
static void PIXOPNAME(_pixels_x2_axp)(BTYPE *block, const UINT8 *pixels,
42
				      int line_size, int h)
43
{
44
    if ((size_t) pixels & 0x7) {
45
	do {
46
	    UINT64 pix1, pix2;
47

  
48
	    pix1 = uldq(pixels);
49
	    pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
50
	    STORE(AVG2(pix1, pix2), block);
51
	    pixels += line_size;
52
	    block += line_size;
53
	} while (--h);
54
    } else {
55
	do {
56
	    UINT64 pix1, pix2;
57

  
58
	    pix1 = ldq(pixels);
59
	    pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
60
	    STORE(AVG2(pix1, pix2), block);
61
	    pixels += line_size;
62
	    block += line_size;
63
	} while (--h);
64
    }
65
}
66

  
67
static void PIXOPNAME(_pixels_y2_axp)(BTYPE *block, const UINT8 *pixels,
68
				      int line_size, int h)
69
{
70
    if ((size_t) pixels & 0x7) {
71
	UINT64 pix = uldq(pixels);
72
	do {
73
	    UINT64 next_pix;
74

  
75
	    pixels += line_size;
76
	    next_pix = uldq(pixels);
77
	    STORE(AVG2(pix, next_pix), block);
78
	    block += line_size;
79
	    pix = next_pix;
80
	} while (--h);
81
    } else {
82
	UINT64 pix = ldq(pixels);
83
	do {
84
	    UINT64 next_pix;
85

  
86
	    pixels += line_size;
87
	    next_pix = ldq(pixels);
88
	    STORE(AVG2(pix, next_pix), block);
89
	    block += line_size;
90
	    pix = next_pix;
91
	} while (--h);
92
    }
93
}
94

  
95
/* This could be further sped up by recycling AVG4 intermediate
96
  results from the previous loop pass.  */
97
static void PIXOPNAME(_pixels_xy2_axp)(BTYPE *block, const UINT8 *pixels,
98
				       int line_size, int h)
99
{
100
    if ((size_t) pixels & 0x7) {
101
	UINT64 pix1 = uldq(pixels);
102
	UINT64 pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
103

  
104
	do {
105
	    UINT64 next_pix1, next_pix2;
106

  
107
	    pixels += line_size;
108
	    next_pix1 = uldq(pixels);
109
	    next_pix2 = next_pix1 >> 8 | ((UINT64) pixels[8] << 56);
110

  
111
	    STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block);
112

  
113
	    block += line_size;
114
	    pix1 = next_pix1;
115
	    pix2 = next_pix2;
116
	} while (--h);
117
    } else {
118
	UINT64 pix1 = ldq(pixels);
119
	UINT64 pix2 = pix1 >> 8 | ((UINT64) pixels[8] << 56);
120

  
121
	do {
122
	    UINT64 next_pix1, next_pix2;
123

  
124
	    pixels += line_size;
125
	    next_pix1 = ldq(pixels);
126
	    next_pix2 = next_pix1 >> 8 | ((UINT64) pixels[8] << 56);
127

  
128
	    STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block);
129

  
130
	    block += line_size;
131
	    pix1 = next_pix1;
132
	    pix2 = next_pix2;
133
	} while (--h);
134
    }
135
}
libavcodec/dsputil.c
497 497
    dsputil_init_mlib();
498 498
    use_permuted_idct = 0;
499 499
#endif
500
#ifdef ARCH_ALPHA
501
    dsputil_init_alpha();
502
    use_permuted_idct = 0;
503
#endif
500 504

  
501 505
#ifdef SIMPLE_IDCT
502 506
    if(ff_idct == simple_idct) use_permuted_idct=0;
libavcodec/dsputil.h
123 123

  
124 124
void dsputil_init_mlib(void);   
125 125

  
126
#elif defined(ARCH_ALPHA)
127

  
128
#define emms_c()
129
#define __align8 __attribute__ ((aligned (8)))
130

  
131
void dsputil_init_alpha(void);
132

  
126 133
#else
127 134

  
128 135
#define emms_c()
libavcodec/msmpeg4.c
460 460
	: "r" (scale)
461 461
	: "%eax", "%edx"
462 462
    );
463
#else    
463
#elif defined (ARCH_ALPHA)
464
    /* Divisions are extremely costly on Alpha; optimize the most
465
       common case.  */
466
    if (scale == 8) {
467
	a = (a + (8 >> 1)) / 8;
468
	b = (b + (8 >> 1)) / 8;
469
	c = (c + (8 >> 1)) / 8;
470
    } else {
471
	a = (a + (scale >> 1)) / scale;
472
	b = (b + (scale >> 1)) / scale;
473
	c = (c + (scale >> 1)) / scale;
474
    }
475
#else
464 476
    a = (a + (scale >> 1)) / scale;
465 477
    b = (b + (scale >> 1)) / scale;
466 478
    c = (c + (scale >> 1)) / scale;
libavcodec/simple_idct.c
23 23
#include <inttypes.h>
24 24

  
25 25
#include "simple_idct.h"
26
#include "../config.h"
26 27

  
27 28
#if 0
28 29
#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
......
102 103
	return 1;
103 104
}
104 105

  
106
#ifdef ARCH_ALPHA
107
static int inline idctRowCondDC(int16_t *row)
108
{
109
	int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3;
110
	uint64_t *lrow = (uint64_t *) row;
111

  
112
	if (lrow[1] == 0) {
113
		if (lrow[0] == 0)
114
			return 0;
115
		if ((lrow[0] & ~0xffffULL) == 0) {
116
			uint64_t v;
117

  
118
			a0 = W4 * row[0];
119
			a0 += 1 << (ROW_SHIFT - 1);
120
			a0 >>= ROW_SHIFT;
121
			v = (uint16_t) a0;
122
			v += v << 16;
123
			v += v << 32;
124
			lrow[0] = v;
125
			lrow[1] = v;
126

  
127
			return 1;
128
		}
129
	}
130

  
131
	a0 = W4 * row[0];
132
	a1 = W4 * row[0];
133
	a2 = W4 * row[0];
134
	a3 = W4 * row[0];
135

  
136
	if (row[2]) {
137
		a0 += W2 * row[2];
138
		a1 += W6 * row[2];
139
		a2 -= W6 * row[2];
140
		a3 -= W2 * row[2];
141
	}
142

  
143
	if (row[4]) {
144
		a0 += W4 * row[4];
145
		a1 -= W4 * row[4];
146
		a2 -= W4 * row[4];
147
		a3 += W4 * row[4];
148
	}
149

  
150
	if (row[6]) {
151
		a0 += W6 * row[6];
152
		a1 -= W2 * row[6];
153
		a2 += W2 * row[6];
154
		a3 -= W6 * row[6];
155
	}
156

  
157
	a0 += 1 << (ROW_SHIFT - 1);
158
	a1 += 1 << (ROW_SHIFT - 1);
159
	a2 += 1 << (ROW_SHIFT - 1);
160
	a3 += 1 << (ROW_SHIFT - 1);
161

  
162
	if (row[1]) {
163
		b0 = W1 * row[1];
164
		b1 = W3 * row[1];
165
		b2 = W5 * row[1];
166
		b3 = W7 * row[1];
167
	} else {
168
		b0 = 0;
169
		b1 = 0;
170
		b2 = 0;
171
		b3 = 0;
172
	}
173

  
174
	if (row[3]) {
175
		b0 += W3 * row[3];
176
		b1 -= W7 * row[3];
177
		b2 -= W1 * row[3];
178
		b3 -= W5 * row[3];
179
	}
180

  
181
	if (row[5]) {
182
		b0 += W5 * row[5];
183
		b1 -= W1 * row[5];
184
		b2 += W7 * row[5];
185
		b3 += W3 * row[5];
186
	}
187

  
188
	if (row[7]) {
189
		b0 += W7 * row[7];
190
		b1 -= W5 * row[7];
191
		b2 += W3 * row[7];
192
		b3 -= W1 * row[7];
193
	}
194

  
195
	row[0] = (a0 + b0) >> ROW_SHIFT;
196
	row[1] = (a1 + b1) >> ROW_SHIFT;
197
	row[2] = (a2 + b2) >> ROW_SHIFT;
198
	row[3] = (a3 + b3) >> ROW_SHIFT;
199
	row[4] = (a3 - b3) >> ROW_SHIFT;
200
	row[5] = (a2 - b2) >> ROW_SHIFT;
201
	row[6] = (a1 - b1) >> ROW_SHIFT;
202
	row[7] = (a0 - b0) >> ROW_SHIFT;
203

  
204
	return 1;
205
}
206
#else  /* not ARCH_ALPHA */
105 207
static int inline idctRowCondDC (int16_t * row)
106 208
{
107 209
	int a0, a1, a2, a3, b0, b1, b2, b3;
......
147 249
	
148 250
	return 1;
149 251
}
252
#endif /* not ARCH_ALPHA */
150 253

  
151 254
static void inline idctCol (int16_t * col)
152 255
{
......
243 346
		b3 += - W1*col[8*7];
244 347
	}
245 348

  
349
#ifndef ARCH_ALPHA
246 350
	if(!(b0|b1|b2|b3)){
247 351
		col[8*0] = (a0) >> COL_SHIFT;
248 352
		col[8*7] = (a0) >> COL_SHIFT;
......
253 357
		col[8*3] = (a3) >> COL_SHIFT;
254 358
		col[8*4] = (a3) >> COL_SHIFT;
255 359
	}else{
360
#endif
256 361
		col[8*0] = (a0 + b0) >> COL_SHIFT;
257 362
		col[8*7] = (a0 - b0) >> COL_SHIFT;
258 363
		col[8*1] = (a1 + b1) >> COL_SHIFT;
......
261 366
		col[8*5] = (a2 - b2) >> COL_SHIFT;
262 367
		col[8*3] = (a3 + b3) >> COL_SHIFT;
263 368
		col[8*4] = (a3 - b3) >> COL_SHIFT;
369
#ifndef ARCH_ALPHA
264 370
	}
371
#endif
265 372
}
266 373

  
267 374
static void inline idctSparse2Col (int16_t * col)
......
337 444
	col[8*4] = (a3 - b3) >> COL_SHIFT;
338 445
}
339 446

  
447
#ifdef ARCH_ALPHA
448
/* If all rows but the first one are zero after row transformation,
449
   all rows will be identical after column transformation.  */
450
static inline void idctCol2(int16_t *col)
451
{
452
	int i;
453
	uint64_t l, r;
454
	uint64_t *lcol = (uint64_t *) col;
455

  
456
	for (i = 0; i < 8; ++i) {
457
		int a0 = col[0] + (1 << (COL_SHIFT - 1)) / W4;
458

  
459
		a0 *= W4;
460
		col[0] = a0 >> COL_SHIFT;
461
		++col;
462
	}
463

  
464
	l = lcol[0];
465
	r = lcol[1];
466
	lcol[ 2] = l; lcol[ 3] = r;
467
	lcol[ 4] = l; lcol[ 5] = r;
468
	lcol[ 6] = l; lcol[ 7] = r;
469
	lcol[ 8] = l; lcol[ 9] = r;
470
	lcol[10] = l; lcol[11] = r;
471
	lcol[12] = l; lcol[13] = r;
472
	lcol[14] = l; lcol[15] = r;
473
}
474
#endif
340 475

  
341 476
void simple_idct (short *block)
342 477
{
......
411 546
		for(i=0; i<8; i++)
412 547
			idctSparse2Col(block + i);
413 548
	}
414
#else		
549
#elif defined(ARCH_ALPHA)
550
	int shortcut = 1;
551

  
552
	for (i = 0; i < 8; i++) {
553
		int anynonzero = idctRowCondDC(block + 8 * i);
554
		if (i > 0 && anynonzero)
555
			shortcut = 0;
556
	}
557

  
558
	if (shortcut) {
559
		idctCol2(block);
560
	} else {
561
		for (i = 0; i < 8; i++)
562
			idctSparseCol(block + i);
563
	}
564
#else
415 565
	for(i=0; i<8; i++)
416 566
		idctRowCondDC(block + i*8);
417 567
	

Also available in: Unified diff