Revision f9bb4bdf

View differences:

libavcodec/Makefile
68 68
# alpha specific stuff
69 69
ifeq ($(TARGET_ARCH_ALPHA),yes)
70 70
OBJS += alpha/dsputil_alpha.o alpha/mpegvideo_alpha.o alpha/motion_est_alpha.o
71
ASM_OBJS += alpha/dsputil_alpha_asm.o
71
ASM_OBJS += alpha/dsputil_alpha_asm.o alpha/motion_est_mvi_asm.o
72 72
CFLAGS += -fforce-addr -freduce-all-givs
73 73
endif
74 74

  
libavcodec/alpha/dsputil_alpha.c
34 34
void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2,
35 35
                     int stride);
36 36
int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
37
int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
37
int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size);
38 38
int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
39 39
int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
40 40
int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size);
......
335 335
        get_pixels       = get_pixels_mvi;
336 336
        diff_pixels      = diff_pixels_mvi;
337 337
        pix_abs8x8       = pix_abs8x8_mvi;
338
        pix_abs16x16     = pix_abs16x16_mvi;
338
        pix_abs16x16     = pix_abs16x16_mvi_asm;
339 339
        pix_abs16x16_x2  = pix_abs16x16_x2_mvi;
340 340
        pix_abs16x16_y2  = pix_abs16x16_y2_mvi;
341 341
        pix_abs16x16_xy2 = pix_abs16x16_xy2_mvi;
libavcodec/alpha/motion_est_alpha.c
117 117
    return result;
118 118
}
119 119

  
120
#if 0				/* now done in assembly */
120 121
int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
121 122
{
122 123
    int result = 0;
......
157 158

  
158 159
    return result;
159 160
}
161
#endif
160 162

  
161 163
int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
162 164
{
libavcodec/alpha/motion_est_mvi_asm.S
1
/*
2
 * Alpha optimized DSP utils
3
 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
 */
19

  
20
#include "regdef.h"
21
#ifdef HAVE_AV_CONFIG_H	
22
#include "config.h"
23
#endif
24

  
25
/* Some nicer register names.  */
26
#define ta t10
27
#define tb t11
28
#define tc t12
29
#define td AT
30
/* Danger: these overlap with the argument list and the return value */
31
#define te a5
32
#define tf a4
33
#define tg a3
34
#define th v0
35
        
36
        .set noat
37
        .set noreorder
38
        .arch pca56
39
        .text
40

  
41
/*****************************************************************************
42
 * int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size)
43
 *
44
 * This code is written with a pca56 in mind. For ev6, one should
45
 * really take the increased latency of 3 cycles for MVI instructions
46
 * into account.
47
 *
48
 * It is important to keep the loading and first use of a register as
49
 * far apart as possible, because if a register is accessed before it
50
 * has been fetched from memory, the CPU will stall.
51
 */
52
        .align 4
53
        .globl pix_abs16x16_mvi_asm
54
        .ent pix_abs16x16_mvi_asm
55
pix_abs16x16_mvi_asm:
56
        .frame sp, 0, ra, 0
57
        .prologue 0
58

  
59
#ifdef HAVE_GPROF
60
        lda     AT, _mcount
61
        jsr     AT, (AT), _mcount
62
#endif
63

  
64
        and     a1, 7, t0
65
        clr     v0
66
        lda     a3, 16
67
        beq     t0, $aligned
68
        .align 4
69
$unaligned:
70
        /* Registers:
71
           line 0:
72
           t0:  left_u -> left lo -> left
73
           t1:  mid
74
           t2:  right_u -> right hi -> right
75
           t3:  ref left
76
           t4:  ref right
77
           line 1:
78
           t5:  left_u -> left lo -> left
79
           t6:  mid
80
           t7:  right_u -> right hi -> right
81
           t8:  ref left
82
           t9:  ref right
83
           temp:
84
           ta:  left hi
85
           tb:  right lo
86
           tc:  error left
87
           td:  error right  */
88

  
89
        /* load line 0 */
90
        ldq_u   t0, 0(a1)       # left_u
91
        ldq_u   t1, 8(a1)       # mid
92
        ldq_u   t2, 16(a1)      # right_u
93
        ldq     t3, 0(a0)       # ref left
94
        ldq     t4, 8(a0)       # ref right
95
        addq    a0, a2, a0      # pix1
96
        addq    a1, a2, a1      # pix2
97
        /* load line 1 */        
98
        ldq_u   t5, 0(a1)       # left_u
99
        ldq_u   t6, 8(a1)       # mid
100
        ldq_u   t7, 16(a1)      # right_u
101
        ldq     t8, 0(a0)       # ref left
102
        ldq     t9, 8(a0)       # ref right
103
        addq    a0, a2, a0      # pix1
104
        addq    a1, a2, a1      # pix2
105
        /* calc line 0 */
106
        extql   t0, a1, t0      # left lo
107
        extqh   t1, a1, ta      # left hi
108
        extql   t1, a1, tb      # right lo
109
        or      t0, ta, t0      # left
110
        extqh   t2, a1, t2      # right hi
111
        perr    t3, t0, tc      # error left
112
        or      t2, tb, t2      # right
113
        perr    t4, t2, td      # error right
114
        addq    v0, tc, v0      # add error left
115
        addq    v0, td, v0      # add error left
116
        /* calc line 1 */
117
        extql   t5, a1, t5      # left lo
118
        extqh   t6, a1, ta      # left hi
119
        extql   t6, a1, tb      # right lo
120
        or      t5, ta, t5      # left
121
        extqh   t7, a1, t7      # right hi
122
        perr    t8, t5, tc      # error left
123
        or      t7, tb, t7      # right
124
        perr    t9, t7, td      # error right
125
        addq    v0, tc, v0      # add error left
126
        addq    v0, td, v0      # add error left
127
        /* loop */
128
        subq    a3,  2, a3      # h -= 2
129
        bne     a3, $unaligned
130
        ret
131

  
132
        .align 4
133
$aligned:
134
        /* load line 0 */
135
        ldq     t0, 0(a1)       # left
136
        ldq     t1, 8(a1)       # right
137
        addq    a1, a2, a1      # pix2
138
        ldq     t2, 0(a0)       # ref left
139
        ldq     t3, 8(a0)       # ref right
140
        addq    a0, a2, a0      # pix1
141
        /* load line 1 */
142
        ldq     t4, 0(a1)       # left
143
        ldq     t5, 8(a1)       # right
144
        addq    a1, a2, a1      # pix2
145
        ldq     t6, 0(a0)       # ref left
146
        ldq     t7, 8(a0)       # ref right
147
        addq    a0, a2, a0      # pix1
148
        /* load line 2 */
149
        ldq     t8, 0(a1)       # left
150
        ldq     t9, 8(a1)       # right
151
        addq    a1, a2, a1      # pix2
152
        ldq     ta, 0(a0)       # ref left
153
        ldq     tb, 8(a0)       # ref right
154
        addq    a0, a2, a0      # pix1
155
        /* load line 3 */
156
        ldq     tc, 0(a1)       # left
157
        ldq     td, 8(a1)       # right
158
        addq    a1, a2, a1      # pix2
159
        ldq     te, 0(a0)       # ref left
160
        ldq     tf, 8(a0)       # ref right
161
        /* calc line 0 */
162
        perr    t0, t2, t0      # error left
163
        addq    a0, a2, a0      # pix1
164
        perr    t1, t3, t1      # error right
165
        addq    v0, t0, v0      # add error left
166
        /* calc line 1 */
167
        perr    t4, t6, t0      # error left
168
        addq    v0, t1, v0      # add error right
169
        perr    t5, t7, t1      # error right
170
        addq    v0, t0, v0      # add error left
171
        /* calc line 2 */
172
        perr    t8, ta, t0      # error left
173
        addq    v0, t1, v0      # add error right
174
        perr    t9, tb, t1      # error right
175
        addq    v0, t0, v0      # add error left
176
        /* calc line 3 */
177
        perr    tc, te, t0      # error left
178
        addq    v0, t1, v0      # add error right
179
        perr    td, tf, t1      # error right
180
        addq    v0, t0, v0      # add error left
181
        addq    v0, t1, v0      # add error right
182
        /* loop */
183
        subq    a3,  4, a3      # h -= 4
184
        bne     a3, $aligned
185
        ret
186
        .end pix_abs16x16_mvi_asm

Also available in: Unified diff