Statistics
| Branch: | Revision:

ffmpeg / libavcodec / alpha / motion_est_mvi_asm.S @ 2912e87a

History | View | Annotate | Download (6.05 KB)

1
/*
2
 * Alpha optimized DSP utils
3
 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
4
 *
5
 * This file is part of Libav.
6
 *
7
 * Libav is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * Libav is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with Libav; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

    
22
#include "regdef.h"
23

    
24
/* Some nicer register names.  */
25
#define ta t10
26
#define tb t11
27
#define tc t12
28
#define td AT
29
/* Danger: these overlap with the argument list and the return value */
30
#define te a5
31
#define tf a4
32
#define tg a3
33
#define th v0
34

    
35
        .set noat
36
        .set noreorder
37
        .arch pca56
38
        .text
39

    
40
/*****************************************************************************
41
 * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
42
 *
43
 * This code is written with a pca56 in mind. For ev6, one should
44
 * really take the increased latency of 3 cycles for MVI instructions
45
 * into account.
46
 *
47
 * It is important to keep the loading and first use of a register as
48
 * far apart as possible, because if a register is accessed before it
49
 * has been fetched from memory, the CPU will stall.
50
 */
51
        .align 4
52
        .globl pix_abs16x16_mvi_asm
53
        .ent pix_abs16x16_mvi_asm
54
pix_abs16x16_mvi_asm:
55
        .frame sp, 0, ra, 0
56
        .prologue 0
57

    
58
        and     a2, 7, t0
59
        clr     v0
60
        beq     t0, $aligned
61
        .align 4
62
$unaligned:
63
        /* Registers:
64
           line 0:
65
           t0:  left_u -> left lo -> left
66
           t1:  mid
67
           t2:  right_u -> right hi -> right
68
           t3:  ref left
69
           t4:  ref right
70
           line 1:
71
           t5:  left_u -> left lo -> left
72
           t6:  mid
73
           t7:  right_u -> right hi -> right
74
           t8:  ref left
75
           t9:  ref right
76
           temp:
77
           ta:  left hi
78
           tb:  right lo
79
           tc:  error left
80
           td:  error right  */
81

    
82
        /* load line 0 */
83
        ldq_u   t0, 0(a2)       # left_u
84
        ldq_u   t1, 8(a2)       # mid
85
        ldq_u   t2, 16(a2)      # right_u
86
        ldq     t3, 0(a1)       # ref left
87
        ldq     t4, 8(a1)       # ref right
88
        addq    a1, a3, a1      # pix1
89
        addq    a2, a3, a2      # pix2
90
        /* load line 1 */
91
        ldq_u   t5, 0(a2)       # left_u
92
        ldq_u   t6, 8(a2)       # mid
93
        ldq_u   t7, 16(a2)      # right_u
94
        ldq     t8, 0(a1)       # ref left
95
        ldq     t9, 8(a1)       # ref right
96
        addq    a1, a3, a1      # pix1
97
        addq    a2, a3, a2      # pix2
98
        /* calc line 0 */
99
        extql   t0, a2, t0      # left lo
100
        extqh   t1, a2, ta      # left hi
101
        extql   t1, a2, tb      # right lo
102
        or      t0, ta, t0      # left
103
        extqh   t2, a2, t2      # right hi
104
        perr    t3, t0, tc      # error left
105
        or      t2, tb, t2      # right
106
        perr    t4, t2, td      # error right
107
        addq    v0, tc, v0      # add error left
108
        addq    v0, td, v0      # add error left
109
        /* calc line 1 */
110
        extql   t5, a2, t5      # left lo
111
        extqh   t6, a2, ta      # left hi
112
        extql   t6, a2, tb      # right lo
113
        or      t5, ta, t5      # left
114
        extqh   t7, a2, t7      # right hi
115
        perr    t8, t5, tc      # error left
116
        or      t7, tb, t7      # right
117
        perr    t9, t7, td      # error right
118
        addq    v0, tc, v0      # add error left
119
        addq    v0, td, v0      # add error left
120
        /* loop */
121
        subq    a4,  2, a4      # h -= 2
122
        bne     a4, $unaligned
123
        ret
124

    
125
        .align 4
126
$aligned:
127
        /* load line 0 */
128
        ldq     t0, 0(a2)       # left
129
        ldq     t1, 8(a2)       # right
130
        addq    a2, a3, a2      # pix2
131
        ldq     t2, 0(a1)       # ref left
132
        ldq     t3, 8(a1)       # ref right
133
        addq    a1, a3, a1      # pix1
134
        /* load line 1 */
135
        ldq     t4, 0(a2)       # left
136
        ldq     t5, 8(a2)       # right
137
        addq    a2, a3, a2      # pix2
138
        ldq     t6, 0(a1)       # ref left
139
        ldq     t7, 8(a1)       # ref right
140
        addq    a1, a3, a1      # pix1
141
        /* load line 2 */
142
        ldq     t8, 0(a2)       # left
143
        ldq     t9, 8(a2)       # right
144
        addq    a2, a3, a2      # pix2
145
        ldq     ta, 0(a1)       # ref left
146
        ldq     tb, 8(a1)       # ref right
147
        addq    a1, a3, a1      # pix1
148
        /* load line 3 */
149
        ldq     tc, 0(a2)       # left
150
        ldq     td, 8(a2)       # right
151
        addq    a2, a3, a2      # pix2
152
        ldq     te, 0(a1)       # ref left
153
        ldq     a0, 8(a1)       # ref right
154
        /* calc line 0 */
155
        perr    t0, t2, t0      # error left
156
        addq    a1, a3, a1      # pix1
157
        perr    t1, t3, t1      # error right
158
        addq    v0, t0, v0      # add error left
159
        /* calc line 1 */
160
        perr    t4, t6, t0      # error left
161
        addq    v0, t1, v0      # add error right
162
        perr    t5, t7, t1      # error right
163
        addq    v0, t0, v0      # add error left
164
        /* calc line 2 */
165
        perr    t8, ta, t0      # error left
166
        addq    v0, t1, v0      # add error right
167
        perr    t9, tb, t1      # error right
168
        addq    v0, t0, v0      # add error left
169
        /* calc line 3 */
170
        perr    tc, te, t0      # error left
171
        addq    v0, t1, v0      # add error right
172
        perr    td, a0, t1      # error right
173
        addq    v0, t0, v0      # add error left
174
        addq    v0, t1, v0      # add error right
175
        /* loop */
176
        subq    a4,  4, a4      # h -= 4
177
        bne     a4, $aligned
178
        ret
179
        .end pix_abs16x16_mvi_asm