Statistics
| Branch: | Revision:

ffmpeg / libavcodec / alpha / motion_est_mvi_asm.S @ 5509bffa

History | View | Annotate | Download (6.11 KB)

1 f9bb4bdf Falk Hüffner
/*
2
 * Alpha optimized DSP utils
3
 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
 */
19
20
#include "regdef.h"
21
22
/* Some nicer register names.  */
23
#define ta t10
24
#define tb t11
25
#define tc t12
26
#define td AT
27
/* Danger: these overlap with the argument list and the return value */
28
#define te a5
29
#define tf a4
30
#define tg a3
31
#define th v0
32 115329f1 Diego Biurrun
33 f9bb4bdf Falk Hüffner
        .set noat
34
        .set noreorder
35
        .arch pca56
36
        .text
37
38
/*****************************************************************************
39
 * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
40
 *
41
 * This code is written with a pca56 in mind. For ev6, one should
42
 * really take the increased latency of 3 cycles for MVI instructions
43
 * into account.
44
 *
45
 * It is important to keep the loading and first use of a register as
46
 * far apart as possible, because if a register is accessed before it
47
 * has been fetched from memory, the CPU will stall.
48
 */
49
        .align 4
50
        .globl pix_abs16x16_mvi_asm
51
        .ent pix_abs16x16_mvi_asm
52
pix_abs16x16_mvi_asm:
53
        .frame sp, 0, ra, 0
54
        .prologue 0
55
56
#ifdef HAVE_GPROF
57
        lda     AT, _mcount
58
        jsr     AT, (AT), _mcount
59
#endif
60
61
        and     a1, 7, t0
62
        clr     v0
63
        lda     a3, 16
64
        beq     t0, $aligned
65
        .align 4
66
$unaligned:
67
        /* Registers:
68
           line 0:
69
           t0:  left_u -> left lo -> left
70
           t1:  mid
71
           t2:  right_u -> right hi -> right
72
           t3:  ref left
73
           t4:  ref right
74
           line 1:
75
           t5:  left_u -> left lo -> left
76
           t6:  mid
77
           t7:  right_u -> right hi -> right
78
           t8:  ref left
79
           t9:  ref right
80
           temp:
81
           ta:  left hi
82
           tb:  right lo
83
           tc:  error left
84
           td:  error right  */
85
86
        /* load line 0 */
87
        ldq_u   t0, 0(a1)       # left_u
88
        ldq_u   t1, 8(a1)       # mid
89
        ldq_u   t2, 16(a1)      # right_u
90
        ldq     t3, 0(a0)       # ref left
91
        ldq     t4, 8(a0)       # ref right
92
        addq    a0, a2, a0      # pix1
93
        addq    a1, a2, a1      # pix2
94 115329f1 Diego Biurrun
        /* load line 1 */
95 f9bb4bdf Falk Hüffner
        ldq_u   t5, 0(a1)       # left_u
96
        ldq_u   t6, 8(a1)       # mid
97
        ldq_u   t7, 16(a1)      # right_u
98
        ldq     t8, 0(a0)       # ref left
99
        ldq     t9, 8(a0)       # ref right
100
        addq    a0, a2, a0      # pix1
101
        addq    a1, a2, a1      # pix2
102
        /* calc line 0 */
103
        extql   t0, a1, t0      # left lo
104
        extqh   t1, a1, ta      # left hi
105
        extql   t1, a1, tb      # right lo
106
        or      t0, ta, t0      # left
107
        extqh   t2, a1, t2      # right hi
108
        perr    t3, t0, tc      # error left
109
        or      t2, tb, t2      # right
110
        perr    t4, t2, td      # error right
111
        addq    v0, tc, v0      # add error left
112
        addq    v0, td, v0      # add error left
113
        /* calc line 1 */
114
        extql   t5, a1, t5      # left lo
115
        extqh   t6, a1, ta      # left hi
116
        extql   t6, a1, tb      # right lo
117
        or      t5, ta, t5      # left
118
        extqh   t7, a1, t7      # right hi
119
        perr    t8, t5, tc      # error left
120
        or      t7, tb, t7      # right
121
        perr    t9, t7, td      # error right
122
        addq    v0, tc, v0      # add error left
123
        addq    v0, td, v0      # add error left
124
        /* loop */
125
        subq    a3,  2, a3      # h -= 2
126
        bne     a3, $unaligned
127
        ret
128
129
        .align 4
130
$aligned:
131
        /* load line 0 */
132
        ldq     t0, 0(a1)       # left
133
        ldq     t1, 8(a1)       # right
134
        addq    a1, a2, a1      # pix2
135
        ldq     t2, 0(a0)       # ref left
136
        ldq     t3, 8(a0)       # ref right
137
        addq    a0, a2, a0      # pix1
138
        /* load line 1 */
139
        ldq     t4, 0(a1)       # left
140
        ldq     t5, 8(a1)       # right
141
        addq    a1, a2, a1      # pix2
142
        ldq     t6, 0(a0)       # ref left
143
        ldq     t7, 8(a0)       # ref right
144
        addq    a0, a2, a0      # pix1
145
        /* load line 2 */
146
        ldq     t8, 0(a1)       # left
147
        ldq     t9, 8(a1)       # right
148
        addq    a1, a2, a1      # pix2
149
        ldq     ta, 0(a0)       # ref left
150
        ldq     tb, 8(a0)       # ref right
151
        addq    a0, a2, a0      # pix1
152
        /* load line 3 */
153
        ldq     tc, 0(a1)       # left
154
        ldq     td, 8(a1)       # right
155
        addq    a1, a2, a1      # pix2
156
        ldq     te, 0(a0)       # ref left
157
        ldq     tf, 8(a0)       # ref right
158
        /* calc line 0 */
159
        perr    t0, t2, t0      # error left
160
        addq    a0, a2, a0      # pix1
161
        perr    t1, t3, t1      # error right
162
        addq    v0, t0, v0      # add error left
163
        /* calc line 1 */
164
        perr    t4, t6, t0      # error left
165
        addq    v0, t1, v0      # add error right
166
        perr    t5, t7, t1      # error right
167
        addq    v0, t0, v0      # add error left
168
        /* calc line 2 */
169
        perr    t8, ta, t0      # error left
170
        addq    v0, t1, v0      # add error right
171
        perr    t9, tb, t1      # error right
172
        addq    v0, t0, v0      # add error left
173
        /* calc line 3 */
174
        perr    tc, te, t0      # error left
175
        addq    v0, t1, v0      # add error right
176
        perr    td, tf, t1      # error right
177
        addq    v0, t0, v0      # add error left
178
        addq    v0, t1, v0      # add error right
179
        /* loop */
180
        subq    a3,  4, a3      # h -= 4
181
        bne     a3, $aligned
182
        ret
183
        .end pix_abs16x16_mvi_asm