1 
/* 

2 
* Alpha optimized DSP utils 
3 
* Copyright (c) 2002 Falk Hueffner <falk@debian.org> 
4 
* 
5 
* This program is free software; you can redistribute it and/or modify 
6 
* it under the terms of the GNU General Public License as published by 
7 
* the Free Software Foundation; either version 2 of the License, or 
8 
* (at your option) any later version. 
9 
* 
10 
* This program is distributed in the hope that it will be useful, 
11 
* but WITHOUT ANY WARRANTY; without even the implied warranty of 
12 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 
13 
* GNU General Public License for more details. 
14 
* 
15 
* You should have received a copy of the GNU General Public License 
16 
* along with this program; if not, write to the Free Software 
17 
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 
18 
*/ 
19  
20 
#include "regdef.h" 
21  
22 
/* Some nicer register names. */ 
23 
#define ta t10 
24 
#define tb t11 
25 
#define tc t12 
26 
#define td AT 
27 
/* Danger: these overlap with the argument list and the return value */ 
28 
#define te a5 
29 
#define tf a4 
30 
#define tg a3 
31 
#define th v0 
32 

33 
.set noat 
34 
.set noreorder 
35 
.arch pca56 
36 
.text 
37  
38 
/***************************************************************************** 
39 
* int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size) 
40 
* 
41 
* This code is written with a pca56 in mind. For ev6, one should 
42 
* really take the increased latency of 3 cycles for MVI instructions 
43 
* into account. 
44 
* 
45 
* It is important to keep the loading and first use of a register as 
46 
* far apart as possible, because if a register is accessed before it 
47 
* has been fetched from memory, the CPU will stall. 
48 
*/ 
49 
.align 4 
50 
.globl pix_abs16x16_mvi_asm 
51 
.ent pix_abs16x16_mvi_asm 
52 
pix_abs16x16_mvi_asm: 
53 
.frame sp, 0, ra, 0 
54 
.prologue 0 
55  
56 
#ifdef HAVE_GPROF 
57 
lda AT, _mcount 
58 
jsr AT, (AT), _mcount 
59 
#endif 
60  
61 
and a1, 7, t0 
62 
clr v0 
63 
lda a3, 16 
64 
beq t0, $aligned 
65 
.align 4 
66 
$unaligned: 
67 
/* Registers: 
68 
line 0: 
69 
t0: left_u > left lo > left 
70 
t1: mid 
71 
t2: right_u > right hi > right 
72 
t3: ref left 
73 
t4: ref right 
74 
line 1: 
75 
t5: left_u > left lo > left 
76 
t6: mid 
77 
t7: right_u > right hi > right 
78 
t8: ref left 
79 
t9: ref right 
80 
temp: 
81 
ta: left hi 
82 
tb: right lo 
83 
tc: error left 
84 
td: error right */ 
85  
86 
/* load line 0 */ 
87 
ldq_u t0, 0(a1) # left_u 
88 
ldq_u t1, 8(a1) # mid 
89 
ldq_u t2, 16(a1) # right_u 
90 
ldq t3, 0(a0) # ref left 
91 
ldq t4, 8(a0) # ref right 
92 
addq a0, a2, a0 # pix1 
93 
addq a1, a2, a1 # pix2 
94 
/* load line 1 */ 
95 
ldq_u t5, 0(a1) # left_u 
96 
ldq_u t6, 8(a1) # mid 
97 
ldq_u t7, 16(a1) # right_u 
98 
ldq t8, 0(a0) # ref left 
99 
ldq t9, 8(a0) # ref right 
100 
addq a0, a2, a0 # pix1 
101 
addq a1, a2, a1 # pix2 
102 
/* calc line 0 */ 
103 
extql t0, a1, t0 # left lo 
104 
extqh t1, a1, ta # left hi 
105 
extql t1, a1, tb # right lo 
106 
or t0, ta, t0 # left 
107 
extqh t2, a1, t2 # right hi 
108 
perr t3, t0, tc # error left 
109 
or t2, tb, t2 # right 
110 
perr t4, t2, td # error right 
111 
addq v0, tc, v0 # add error left 
112 
addq v0, td, v0 # add error left 
113 
/* calc line 1 */ 
114 
extql t5, a1, t5 # left lo 
115 
extqh t6, a1, ta # left hi 
116 
extql t6, a1, tb # right lo 
117 
or t5, ta, t5 # left 
118 
extqh t7, a1, t7 # right hi 
119 
perr t8, t5, tc # error left 
120 
or t7, tb, t7 # right 
121 
perr t9, t7, td # error right 
122 
addq v0, tc, v0 # add error left 
123 
addq v0, td, v0 # add error left 
124 
/* loop */ 
125 
subq a3, 2, a3 # h = 2 
126 
bne a3, $unaligned 
127 
ret 
128  
129 
.align 4 
130 
$aligned: 
131 
/* load line 0 */ 
132 
ldq t0, 0(a1) # left 
133 
ldq t1, 8(a1) # right 
134 
addq a1, a2, a1 # pix2 
135 
ldq t2, 0(a0) # ref left 
136 
ldq t3, 8(a0) # ref right 
137 
addq a0, a2, a0 # pix1 
138 
/* load line 1 */ 
139 
ldq t4, 0(a1) # left 
140 
ldq t5, 8(a1) # right 
141 
addq a1, a2, a1 # pix2 
142 
ldq t6, 0(a0) # ref left 
143 
ldq t7, 8(a0) # ref right 
144 
addq a0, a2, a0 # pix1 
145 
/* load line 2 */ 
146 
ldq t8, 0(a1) # left 
147 
ldq t9, 8(a1) # right 
148 
addq a1, a2, a1 # pix2 
149 
ldq ta, 0(a0) # ref left 
150 
ldq tb, 8(a0) # ref right 
151 
addq a0, a2, a0 # pix1 
152 
/* load line 3 */ 
153 
ldq tc, 0(a1) # left 
154 
ldq td, 8(a1) # right 
155 
addq a1, a2, a1 # pix2 
156 
ldq te, 0(a0) # ref left 
157 
ldq tf, 8(a0) # ref right 
158 
/* calc line 0 */ 
159 
perr t0, t2, t0 # error left 
160 
addq a0, a2, a0 # pix1 
161 
perr t1, t3, t1 # error right 
162 
addq v0, t0, v0 # add error left 
163 
/* calc line 1 */ 
164 
perr t4, t6, t0 # error left 
165 
addq v0, t1, v0 # add error right 
166 
perr t5, t7, t1 # error right 
167 
addq v0, t0, v0 # add error left 
168 
/* calc line 2 */ 
169 
perr t8, ta, t0 # error left 
170 
addq v0, t1, v0 # add error right 
171 
perr t9, tb, t1 # error right 
172 
addq v0, t0, v0 # add error left 
173 
/* calc line 3 */ 
174 
perr tc, te, t0 # error left 
175 
addq v0, t1, v0 # add error right 
176 
perr td, tf, t1 # error right 
177 
addq v0, t0, v0 # add error left 
178 
addq v0, t1, v0 # add error right 
179 
/* loop */ 
180 
subq a3, 4, a3 # h = 4 
181 
bne a3, $aligned 
182 
ret 
183 
.end pix_abs16x16_mvi_asm 