Statistics
| Branch: | Revision:

ffmpeg / libavcodec / alpha / dsputil_alpha_asm.S @ d13c54cd

History | View | Annotate | Download (5.26 KB)

1 bb7d4939 Falk Hüffner
/*
2
 * Alpha optimized DSP utils
3
 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
4
 *
5
 * This program is free software; you can redistribute it and/or modify
6
 * it under the terms of the GNU General Public License as published by
7
 * the Free Software Foundation; either version 2 of the License, or
8
 * (at your option) any later version.
9
 *
10
 * This program is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program; if not, write to the Free Software
17
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
 */
19
20
/*
21
 * These functions are scheduled for pca56. They should work
22
 * reasonably on ev6, though.
23
 */
24
25
#include "regdef.h"
26 d13c54cd Falk Hüffner
#ifdef HAVE_AV_CONFIG_H	
27
#include "config.h"
28
#endif
29 bb7d4939 Falk Hüffner
30
/* Some nicer register names.  */
31
#define ta t10
32
#define tb t11
33
#define tc t12
34
#define td AT
35
/* Danger: these overlap with the argument list and the return value */
36
#define te a5
37
#define tf a4
38
#define tg a3
39
#define th v0
40
                
41
        .set noat
42
        .set noreorder
43
        .arch pca56
44
        .text
45
46
/************************************************************************
47
 * void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, 
48
 *                                 int line_size)
49
 */
50
        .align 6
51
        .globl put_pixels_clamped_mvi_asm
52
        .ent put_pixels_clamped_mvi_asm
53
put_pixels_clamped_mvi_asm:
54
        .frame sp, 0, ra
55
        .prologue 0
56
57 d13c54cd Falk Hüffner
#ifdef HAVE_GPROF
58
        lda     AT, _mcount
59
        jsr     AT, (AT), _mcount
60
#endif
61
62 bb7d4939 Falk Hüffner
        lda     t8, -1
63
        lda     t9, 8           # loop counter
64
        zap     t8, 0xaa, t8    # 00ff00ff00ff00ff
65
66
        .align 4
67
1:      ldq     t0,  0(a0)
68
        ldq     t1,  8(a0)
69
        ldq     t2, 16(a0)
70
        ldq     t3, 24(a0)
71
72
        maxsw4  t0, zero, t0
73
        subq    t9, 2, t9
74
        maxsw4  t1, zero, t1
75
        lda     a0, 32(a0)
76
77
        maxsw4  t2, zero, t2
78
        addq    a1, a2, ta
79
        maxsw4  t3, zero, t3
80
        minsw4  t0, t8, t0
81
        
82
        minsw4  t1, t8, t1
83
        minsw4  t2, t8, t2
84
        minsw4  t3, t8, t3
85
        pkwb    t0, t0
86
        
87
        pkwb    t1, t1
88
        pkwb    t2, t2
89
        pkwb    t3, t3
90
        stl     t0, 0(a1)
91
        
92
        stl     t1, 4(a1)
93
        addq    ta, a2, a1
94
        stl     t2, 0(ta)
95
        stl     t3, 4(ta)
96
97
        bne     t9, 1b
98
        ret
99
        .end put_pixels_clamped_mvi_asm
100
101
/************************************************************************
102
 * void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, 
103
 *                                 int line_size)
104
 */
105
        .align 6
106
        .globl add_pixels_clamped_mvi_asm
107
        .ent add_pixels_clamped_mvi_asm
108
add_pixels_clamped_mvi_asm:
109
        .frame sp, 0, ra
110
        .prologue 0
111
112 d13c54cd Falk Hüffner
#ifdef HAVE_GPROF
113
        lda     AT, _mcount
114
        jsr     AT, (AT), _mcount
115
#endif
116
117 bb7d4939 Falk Hüffner
        lda     t1, -1
118
        lda     th, 8
119
        zap     t1, 0x33, tg
120
        nop
121
122
        srl     tg, 1, t0
123
        xor     tg, t0, tg      # 0x8000800080008000
124
        zap     t1, 0xaa, tf    # 0x00ff00ff00ff00ff
125
126
        .align 4
127
1:      ldl     t1, 0(a1)       # pix0 (try to hit cache line soon)
128
        ldl     t4, 4(a1)       # pix1
129
        addq    a1, a2, te      # pixels += line_size
130
        ldq     t0, 0(a0)       # shorts0
131
132
        ldl     t7, 0(te)       # pix2 (try to hit cache line soon)
133
        ldl     ta, 4(te)       # pix3
134
        ldq     t3, 8(a0)       # shorts1
135
        ldq     t6, 16(a0)      # shorts2
136
137
        ldq     t9, 24(a0)      # shorts3
138
        unpkbw  t1, t1          # 0 0 (quarter/op no.)
139
        and     t0, tg, t2      # 0 1
140
        unpkbw  t4, t4          # 1 0
141
142
        bic     t0, tg, t0      # 0 2
143
        unpkbw  t7, t7          # 2 0
144
        and     t3, tg, t5      # 1 1
145
        addq    t0, t1, t0      # 0 3 
146
147
        xor     t0, t2, t0      # 0 4
148
        unpkbw  ta, ta          # 3 0
149
        and     t6, tg, t8      # 2 1
150
        maxsw4  t0, zero, t0    # 0 5
151
        
152
        bic     t3, tg, t3      # 1 2
153
        bic     t6, tg, t6      # 2 2
154
        minsw4  t0, tf, t0      # 0 6
155
        addq    t3, t4, t3      # 1 3
156
        
157
        pkwb    t0, t0          # 0 7
158
        xor     t3, t5, t3      # 1 4
159
        maxsw4  t3, zero, t3    # 1 5
160
        addq    t6, t7, t6      # 2 3
161
162
        xor     t6, t8, t6      # 2 4
163
        and     t9, tg, tb      # 3 1
164
        minsw4  t3, tf, t3      # 1 6
165
        bic     t9, tg, t9      # 3 2
166
167
        maxsw4  t6, zero, t6    # 2 5
168
        addq    t9, ta, t9      # 3 3
169
        stl     t0, 0(a1)       # 0 8   
170
        minsw4  t6, tf, t6      # 2 6
171
172
        xor     t9, tb, t9      # 3 4
173
        maxsw4  t9, zero, t9    # 3 5
174
        lda     a0, 32(a0)      # block += 16;
175
        pkwb    t3, t3          # 1 7
176
        
177
        minsw4  t9, tf, t9      # 3 6
178
        subq    th, 2, th
179
        pkwb    t6, t6          # 2 7
180
        pkwb    t9, t9          # 3 7
181
182
        stl     t3, 4(a1)       # 1 8
183
        addq    te, a2, a1      # pixels += line_size
184
        stl     t6, 0(te)       # 2 8
185
        stl     t9, 4(te)       # 3 8
186
187
        bne     th, 1b
188
        ret     
189
        .end add_pixels_clamped_mvi_asm