Statistics
| Branch: | Revision:

ffmpeg / libavcodec / arm / dsputil_arm.S @ 0115b3ea

History | View | Annotate | Download (24.5 KB)

1 6ad1fa5a Bernhard Rosenkränzer
@
2 a2fc0f6a Måns Rullgård
@ ARMv4 optimized DSP utils
3 6ad1fa5a Bernhard Rosenkränzer
@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4
@
5 b78e7197 Diego Biurrun
@ This file is part of FFmpeg.
6
@
7
@ FFmpeg is free software; you can redistribute it and/or
8 6ad1fa5a Bernhard Rosenkränzer
@ modify it under the terms of the GNU Lesser General Public
9
@ License as published by the Free Software Foundation; either
10 b78e7197 Diego Biurrun
@ version 2.1 of the License, or (at your option) any later version.
11 6ad1fa5a Bernhard Rosenkränzer
@
12 b78e7197 Diego Biurrun
@ FFmpeg is distributed in the hope that it will be useful,
13 6ad1fa5a Bernhard Rosenkränzer
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
@ Lesser General Public License for more details.
16
@
17
@ You should have received a copy of the GNU Lesser General Public
18 b78e7197 Diego Biurrun
@ License along with FFmpeg; if not, write to the Free Software
19 5509bffa Diego Biurrun
@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 6ad1fa5a Bernhard Rosenkränzer
@
21
22 b4e806b2 Matthieu Castet
#include "config.h"
23 c130bedc Måns Rullgård
#include "asm.S"
24 b4e806b2 Matthieu Castet
25 a648516b Måns Rullgård
        preserve8
26
27 b250f9c6 Aurelien Jacobs
#if !HAVE_PLD
28 b4e806b2 Matthieu Castet
.macro pld reg
29
.endm
30
#endif
31
32 b250f9c6 Aurelien Jacobs
#if HAVE_ARMV5TE
33 a648516b Måns Rullgård
function ff_prefetch_arm, export=1
34 abff992d Måns Rullgård
        subs            r2,  r2,  #1
35
        pld             [r0]
36
        add             r0,  r0,  r1
37
        bne             ff_prefetch_arm
38
        bx              lr
39 a648516b Måns Rullgård
        .endfunc
40
#endif
41
42 1febba1e Måns Rullgård
.macro  ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
43 abff992d Måns Rullgård
        mov             \Rd0, \Rn0, lsr #(\shift * 8)
44
        mov             \Rd1, \Rn1, lsr #(\shift * 8)
45
        mov             \Rd2, \Rn2, lsr #(\shift * 8)
46
        mov             \Rd3, \Rn3, lsr #(\shift * 8)
47
        orr             \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
48
        orr             \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
49
        orr             \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
50
        orr             \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
51 6ad1fa5a Bernhard Rosenkränzer
.endm
52 1febba1e Måns Rullgård
.macro  ALIGN_DWORD shift, R0, R1, R2
53 abff992d Måns Rullgård
        mov             \R0, \R0, lsr #(\shift * 8)
54
        orr             \R0, \R0, \R1, lsl #(32 - \shift * 8)
55
        mov             \R1, \R1, lsr #(\shift * 8)
56
        orr             \R1, \R1, \R2, lsl #(32 - \shift * 8)
57 6ad1fa5a Bernhard Rosenkränzer
.endm
58 1febba1e Måns Rullgård
.macro  ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
59 abff992d Måns Rullgård
        mov             \Rdst0, \Rsrc0, lsr #(\shift * 8)
60
        mov             \Rdst1, \Rsrc1, lsr #(\shift * 8)
61
        orr             \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
62
        orr             \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
63 6ad1fa5a Bernhard Rosenkränzer
.endm
64
65
.macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
66
        @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
67
        @ Rmask = 0xFEFEFEFE
68
        @ Rn = destroy
69 abff992d Måns Rullgård
        eor             \Rd0, \Rn0, \Rm0
70
        eor             \Rd1, \Rn1, \Rm1
71
        orr             \Rn0, \Rn0, \Rm0
72
        orr             \Rn1, \Rn1, \Rm1
73
        and             \Rd0, \Rd0, \Rmask
74
        and             \Rd1, \Rd1, \Rmask
75
        sub             \Rd0, \Rn0, \Rd0, lsr #1
76
        sub             \Rd1, \Rn1, \Rd1, lsr #1
77 6ad1fa5a Bernhard Rosenkränzer
.endm
78
79
.macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
80
        @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
81
        @ Rmask = 0xFEFEFEFE
82
        @ Rn = destroy
83 abff992d Måns Rullgård
        eor             \Rd0, \Rn0, \Rm0
84
        eor             \Rd1, \Rn1, \Rm1
85
        and             \Rn0, \Rn0, \Rm0
86
        and             \Rn1, \Rn1, \Rm1
87
        and             \Rd0, \Rd0, \Rmask
88
        and             \Rd1, \Rd1, \Rmask
89
        add             \Rd0, \Rn0, \Rd0, lsr #1
90
        add             \Rd1, \Rn1, \Rd1, lsr #1
91 6ad1fa5a Bernhard Rosenkränzer
.endm
92
93 ca6532f6 Måns Rullgård
.macro  JMP_ALIGN tmp, reg
94 abff992d Måns Rullgård
        ands            \tmp, \reg, #3
95
        bic             \reg, \reg, #3
96
        beq             1f
97
        subs            \tmp, \tmp, #1
98
        beq             2f
99
        subs            \tmp, \tmp, #1
100
        beq             3f
101 00c5727d Måns Rullgård
        b    4f
102
.endm
103
104 6ad1fa5a Bernhard Rosenkränzer
@ ----------------------------------------------------------------
105 fc252eba Måns Rullgård
        .align 5
106 55c0e1e6 Måns Rullgård
function ff_put_pixels16_arm, export=1
107 6ad1fa5a Bernhard Rosenkränzer
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
108
        @ block = word aligned, pixles = unaligned
109 abff992d Måns Rullgård
        pld             [r1]
110 2e823300 Måns Rullgård
        push            {r4-r11, lr}
111 abff992d Måns Rullgård
        JMP_ALIGN       r5,  r1
112 6ad1fa5a Bernhard Rosenkränzer
1:
113 2e823300 Måns Rullgård
        ldm             r1,  {r4-r7}
114 abff992d Måns Rullgård
        add             r1,  r1,  r2
115 2e823300 Måns Rullgård
        stm             r0,  {r4-r7}
116 abff992d Måns Rullgård
        pld             [r1]
117
        subs            r3,  r3,  #1
118
        add             r0,  r0,  r2
119
        bne             1b
120 2e823300 Måns Rullgård
        pop             {r4-r11, pc}
121 fc252eba Måns Rullgård
        .align 5
122 6ad1fa5a Bernhard Rosenkränzer
2:
123 2e823300 Måns Rullgård
        ldm             r1,  {r4-r8}
124 abff992d Måns Rullgård
        add             r1,  r1,  r2
125 1febba1e Måns Rullgård
        ALIGN_QWORD_D   1,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
126 abff992d Måns Rullgård
        pld             [r1]
127
        subs            r3,  r3,  #1
128 2e823300 Måns Rullgård
        stm             r0,  {r9-r12}
129 abff992d Måns Rullgård
        add             r0,  r0,  r2
130
        bne             2b
131 2e823300 Måns Rullgård
        pop             {r4-r11, pc}
132 fc252eba Måns Rullgård
        .align 5
133 6ad1fa5a Bernhard Rosenkränzer
3:
134 2e823300 Måns Rullgård
        ldm             r1,  {r4-r8}
135 abff992d Måns Rullgård
        add             r1,  r1,  r2
136 1febba1e Måns Rullgård
        ALIGN_QWORD_D   2,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
137 abff992d Måns Rullgård
        pld             [r1]
138
        subs            r3,  r3,  #1
139 2e823300 Måns Rullgård
        stm             r0,  {r9-r12}
140 abff992d Måns Rullgård
        add             r0,  r0,  r2
141
        bne             3b
142 2e823300 Måns Rullgård
        pop             {r4-r11, pc}
143 fc252eba Måns Rullgård
        .align 5
144 6ad1fa5a Bernhard Rosenkränzer
4:
145 2e823300 Måns Rullgård
        ldm             r1,  {r4-r8}
146 abff992d Måns Rullgård
        add             r1,  r1,  r2
147 1febba1e Måns Rullgård
        ALIGN_QWORD_D   3,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
148 abff992d Måns Rullgård
        pld             [r1]
149
        subs            r3,  r3,  #1
150 2e823300 Måns Rullgård
        stm             r0,  {r9-r12}
151 abff992d Måns Rullgård
        add             r0,  r0,  r2
152
        bne             4b
153 2e823300 Måns Rullgård
        pop             {r4-r11,pc}
154 c130bedc Måns Rullgård
        .endfunc
155 6ad1fa5a Bernhard Rosenkränzer
156
@ ----------------------------------------------------------------
157 fc252eba Måns Rullgård
        .align 5
158 55c0e1e6 Måns Rullgård
function ff_put_pixels8_arm, export=1
159 6ad1fa5a Bernhard Rosenkränzer
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
160
        @ block = word aligned, pixles = unaligned
161 abff992d Måns Rullgård
        pld             [r1]
162 2e823300 Måns Rullgård
        push            {r4-r5,lr}
163 abff992d Måns Rullgård
        JMP_ALIGN       r5,  r1
164 6ad1fa5a Bernhard Rosenkränzer
1:
165 2e823300 Måns Rullgård
        ldm             r1,  {r4-r5}
166 abff992d Måns Rullgård
        add             r1,  r1,  r2
167
        subs            r3,  r3,  #1
168
        pld             [r1]
169 2e823300 Måns Rullgård
        stm             r0,  {r4-r5}
170 abff992d Måns Rullgård
        add             r0,  r0,  r2
171
        bne             1b
172 2e823300 Måns Rullgård
        pop             {r4-r5,pc}
173 fc252eba Måns Rullgård
        .align 5
174 6ad1fa5a Bernhard Rosenkränzer
2:
175 2e823300 Måns Rullgård
        ldm             r1,  {r4-r5, r12}
176 abff992d Måns Rullgård
        add             r1,  r1,  r2
177 1febba1e Måns Rullgård
        ALIGN_DWORD     1,   r4,  r5,  r12
178 abff992d Måns Rullgård
        pld             [r1]
179
        subs            r3,  r3,  #1
180 2e823300 Måns Rullgård
        stm             r0,  {r4-r5}
181 abff992d Måns Rullgård
        add             r0,  r0,  r2
182
        bne             2b
183 2e823300 Måns Rullgård
        pop             {r4-r5,pc}
184 fc252eba Måns Rullgård
        .align 5
185 6ad1fa5a Bernhard Rosenkränzer
3:
186 2e823300 Måns Rullgård
        ldm             r1,  {r4-r5, r12}
187 abff992d Måns Rullgård
        add             r1,  r1,  r2
188 1febba1e Måns Rullgård
        ALIGN_DWORD     2,   r4,  r5,  r12
189 abff992d Måns Rullgård
        pld             [r1]
190
        subs            r3,  r3,  #1
191 2e823300 Måns Rullgård
        stm             r0,  {r4-r5}
192 abff992d Måns Rullgård
        add             r0,  r0,  r2
193
        bne             3b
194 2e823300 Måns Rullgård
        pop             {r4-r5,pc}
195 fc252eba Måns Rullgård
        .align 5
196 6ad1fa5a Bernhard Rosenkränzer
4:
197 2e823300 Måns Rullgård
        ldm             r1,  {r4-r5, r12}
198 abff992d Måns Rullgård
        add             r1,  r1,  r2
199 1febba1e Måns Rullgård
        ALIGN_DWORD     3,   r4,  r5,  r12
200 abff992d Måns Rullgård
        pld             [r1]
201
        subs            r3,  r3,  #1
202 2e823300 Måns Rullgård
        stm             r0,  {r4-r5}
203 abff992d Måns Rullgård
        add             r0,  r0,  r2
204
        bne             4b
205 2e823300 Måns Rullgård
        pop             {r4-r5,pc}
206 c130bedc Måns Rullgård
        .endfunc
207 6ad1fa5a Bernhard Rosenkränzer
208
@ ----------------------------------------------------------------
209 fc252eba Måns Rullgård
        .align 5
210 55c0e1e6 Måns Rullgård
function ff_put_pixels8_x2_arm, export=1
211 6ad1fa5a Bernhard Rosenkränzer
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
212
        @ block = word aligned, pixles = unaligned
213 abff992d Måns Rullgård
        pld             [r1]
214 2e823300 Måns Rullgård
        push            {r4-r10,lr}
215 abff992d Måns Rullgård
        ldr             r12, =0xfefefefe
216
        JMP_ALIGN       r5,  r1
217 6ad1fa5a Bernhard Rosenkränzer
1:
218 2e823300 Måns Rullgård
        ldm             r1,  {r4-r5, r10}
219 abff992d Måns Rullgård
        add             r1,  r1,  r2
220 1febba1e Måns Rullgård
        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
221 abff992d Måns Rullgård
        pld             [r1]
222
        RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
223
        subs            r3,  r3,  #1
224 2e823300 Måns Rullgård
        stm             r0,  {r8-r9}
225 abff992d Måns Rullgård
        add             r0,  r0,  r2
226
        bne             1b
227 2e823300 Måns Rullgård
        pop             {r4-r10,pc}
228 fc252eba Måns Rullgård
        .align 5
229 6ad1fa5a Bernhard Rosenkränzer
2:
230 2e823300 Måns Rullgård
        ldm             r1,  {r4-r5, r10}
231 abff992d Måns Rullgård
        add             r1,  r1,  r2
232 1febba1e Måns Rullgård
        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
233
        ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
234 abff992d Måns Rullgård
        pld             [r1]
235
        RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
236
        subs            r3,  r3,  #1
237 2e823300 Måns Rullgård
        stm             r0,  {r4-r5}
238 abff992d Måns Rullgård
        add             r0,  r0,  r2
239
        bne             2b
240 2e823300 Måns Rullgård
        pop             {r4-r10,pc}
241 fc252eba Måns Rullgård
        .align 5
242 6ad1fa5a Bernhard Rosenkränzer
3:
243 2e823300 Måns Rullgård
        ldm             r1,  {r4-r5, r10}
244 abff992d Måns Rullgård
        add             r1,  r1,  r2
245 1febba1e Måns Rullgård
        ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
246
        ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
247 abff992d Måns Rullgård
        pld             [r1]
248
        RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
249
        subs            r3,  r3,  #1
250 2e823300 Måns Rullgård
        stm             r0,  {r4-r5}
251 abff992d Måns Rullgård
        add             r0,  r0,  r2
252
        bne             3b
253 2e823300 Måns Rullgård
        pop             {r4-r10,pc}
254 fc252eba Måns Rullgård
        .align 5
255 6ad1fa5a Bernhard Rosenkränzer
4:
256 2e823300 Måns Rullgård
        ldm             r1,  {r4-r5, r10}
257 abff992d Måns Rullgård
        add             r1,  r1,  r2
258 1febba1e Måns Rullgård
        ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
259 abff992d Måns Rullgård
        pld             [r1]
260
        RND_AVG32       r8,  r9,  r6,  r7,  r5,  r10, r12
261
        subs            r3,  r3,  #1
262 2e823300 Måns Rullgård
        stm             r0,  {r8-r9}
263 abff992d Måns Rullgård
        add             r0,  r0,  r2
264
        bne             4b
265 2e823300 Måns Rullgård
        pop             {r4-r10,pc}
266 c130bedc Måns Rullgård
        .endfunc
267 6ad1fa5a Bernhard Rosenkränzer
268 fc252eba Måns Rullgård
        .align 5
269 55c0e1e6 Måns Rullgård
function ff_put_no_rnd_pixels8_x2_arm, export=1
270 6ad1fa5a Bernhard Rosenkränzer
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
271
        @ block = word aligned, pixles = unaligned
272 abff992d Måns Rullgård
        pld             [r1]
273 2e823300 Måns Rullgård
        push            {r4-r10,lr}
274 abff992d Måns Rullgård
        ldr             r12, =0xfefefefe
275
        JMP_ALIGN       r5,  r1
276 6ad1fa5a Bernhard Rosenkränzer
1:
277 2e823300 Måns Rullgård
        ldm             r1,  {r4-r5, r10}
278 abff992d Måns Rullgård
        add             r1,  r1,  r2
279 1febba1e Måns Rullgård
        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
280 abff992d Måns Rullgård
        pld             [r1]
281
        NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
282
        subs            r3,  r3,  #1
283 2e823300 Måns Rullgård
        stm             r0,  {r8-r9}
284 abff992d Måns Rullgård
        add             r0,  r0,  r2
285
        bne             1b
286 2e823300 Måns Rullgård
        pop             {r4-r10,pc}
287 fc252eba Måns Rullgård
        .align 5
288 6ad1fa5a Bernhard Rosenkränzer
2:
289 2e823300 Måns Rullgård
        ldm             r1,  {r4-r5, r10}
290 abff992d Måns Rullgård
        add             r1,  r1,  r2
291 1febba1e Måns Rullgård
        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
292
        ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
293 abff992d Måns Rullgård
        pld             [r1]
294
        NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
295
        subs            r3,  r3,  #1
296 2e823300 Måns Rullgård
        stm             r0,  {r4-r5}
297 abff992d Måns Rullgård
        add             r0,  r0,  r2
298
        bne             2b
299 2e823300 Måns Rullgård
        pop             {r4-r10,pc}
300 fc252eba Måns Rullgård
        .align 5
301 6ad1fa5a Bernhard Rosenkränzer
3:
302 2e823300 Måns Rullgård
        ldm             r1,  {r4-r5, r10}
303 abff992d Måns Rullgård
        add             r1,  r1,  r2
304 1febba1e Måns Rullgård
        ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
305
        ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
306 abff992d Måns Rullgård
        pld             [r1]
307
        NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
308
        subs            r3,  r3,  #1
309 2e823300 Måns Rullgård
        stm             r0,  {r4-r5}
310 abff992d Måns Rullgård
        add             r0,  r0,  r2
311
        bne             3b
312 2e823300 Måns Rullgård
        pop             {r4-r10,pc}
313 fc252eba Måns Rullgård
        .align 5
314 6ad1fa5a Bernhard Rosenkränzer
4:
315 2e823300 Måns Rullgård
        ldm             r1,  {r4-r5, r10}
316 abff992d Måns Rullgård
        add             r1,  r1,  r2
317 1febba1e Måns Rullgård
        ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
318 abff992d Måns Rullgård
        pld             [r1]
319
        NO_RND_AVG32    r8,  r9,  r6,  r7,  r5,  r10, r12
320
        subs            r3,  r3,  #1
321 2e823300 Måns Rullgård
        stm             r0,  {r8-r9}
322 abff992d Måns Rullgård
        add             r0,  r0,  r2
323
        bne             4b
324 2e823300 Måns Rullgård
        pop             {r4-r10,pc}
325 c130bedc Måns Rullgård
        .endfunc
326 6ad1fa5a Bernhard Rosenkränzer
327
328
@ ----------------------------------------------------------------
329 fc252eba Måns Rullgård
        .align 5
330 55c0e1e6 Måns Rullgård
function ff_put_pixels8_y2_arm, export=1
331 6ad1fa5a Bernhard Rosenkränzer
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
332
        @ block = word aligned, pixles = unaligned
333 abff992d Måns Rullgård
        pld             [r1]
334 2e823300 Måns Rullgård
        push            {r4-r11,lr}
335 abff992d Måns Rullgård
        mov             r3,  r3,  lsr #1
336
        ldr             r12, =0xfefefefe
337
        JMP_ALIGN       r5,  r1
338 6ad1fa5a Bernhard Rosenkränzer
1:
339 2e823300 Måns Rullgård
        ldm             r1,  {r4-r5}
340 abff992d Måns Rullgård
        add             r1,  r1,  r2
341 2e823300 Måns Rullgård
6:      ldm             r1,  {r6-r7}
342 abff992d Måns Rullgård
        add             r1,  r1,  r2
343
        pld             [r1]
344
        RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
345 2e823300 Måns Rullgård
        ldm             r1,  {r4-r5}
346 abff992d Måns Rullgård
        add             r1,  r1,  r2
347 2e823300 Måns Rullgård
        stm             r0,  {r8-r9}
348 abff992d Måns Rullgård
        add             r0,  r0,  r2
349
        pld             [r1]
350
        RND_AVG32       r8,  r9,  r6,  r7,  r4,  r5,  r12
351
        subs            r3,  r3,  #1
352 2e823300 Måns Rullgård
        stm             r0,  {r8-r9}
353 abff992d Måns Rullgård
        add             r0,  r0,  r2
354
        bne             6b
355 2e823300 Måns Rullgård
        pop             {r4-r11,pc}
356 fc252eba Måns Rullgård
        .align 5
357 6ad1fa5a Bernhard Rosenkränzer
2:
358 2e823300 Måns Rullgård
        ldm             r1,  {r4-r6}
359 abff992d Måns Rullgård
        add             r1,  r1,  r2
360
        pld             [r1]
361 1febba1e Måns Rullgård
        ALIGN_DWORD     1,   r4,  r5,  r6
362 2e823300 Måns Rullgård
6:      ldm             r1,  {r7-r9}
363 abff992d Måns Rullgård
        add             r1,  r1,  r2
364
        pld             [r1]
365 1febba1e Måns Rullgård
        ALIGN_DWORD     1,   r7,  r8,  r9
366 abff992d Måns Rullgård
        RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
367 2e823300 Måns Rullgård
        stm             r0,  {r10-r11}
368 abff992d Måns Rullgård
        add             r0,  r0,  r2
369 2e823300 Måns Rullgård
        ldm             r1,  {r4-r6}
370 abff992d Måns Rullgård
        add             r1,  r1,  r2
371
        pld             [r1]
372 1febba1e Måns Rullgård
        ALIGN_DWORD     1,   r4,  r5,  r6
373 abff992d Måns Rullgård
        subs            r3,  r3,  #1
374
        RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
375 2e823300 Måns Rullgård
        stm             r0,  {r10-r11}
376 abff992d Måns Rullgård
        add             r0,  r0,  r2
377
        bne             6b
378 2e823300 Måns Rullgård
        pop             {r4-r11,pc}
379 fc252eba Måns Rullgård
        .align 5
380 6ad1fa5a Bernhard Rosenkränzer
3:
381 2e823300 Måns Rullgård
        ldm             r1,  {r4-r6}
382 abff992d Måns Rullgård
        add             r1,  r1,  r2
383
        pld             [r1]
384 1febba1e Måns Rullgård
        ALIGN_DWORD     2,   r4,  r5,  r6
385 2e823300 Måns Rullgård
6:      ldm             r1,  {r7-r9}
386 abff992d Måns Rullgård
        add             r1,  r1,  r2
387
        pld             [r1]
388 1febba1e Måns Rullgård
        ALIGN_DWORD     2,   r7,  r8,  r9
389 abff992d Måns Rullgård
        RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
390 2e823300 Måns Rullgård
        stm             r0,  {r10-r11}
391 abff992d Måns Rullgård
        add             r0,  r0,  r2
392 2e823300 Måns Rullgård
        ldm             r1,  {r4-r6}
393 abff992d Måns Rullgård
        add             r1,  r1,  r2
394
        pld             [r1]
395 1febba1e Måns Rullgård
        ALIGN_DWORD     2,   r4,  r5,  r6
396 abff992d Måns Rullgård
        subs            r3,  r3,  #1
397
        RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
398 2e823300 Måns Rullgård
        stm             r0,  {r10-r11}
399 abff992d Måns Rullgård
        add             r0,  r0,  r2
400
        bne             6b
401 2e823300 Måns Rullgård
        pop             {r4-r11,pc}
402 fc252eba Måns Rullgård
        .align 5
403 6ad1fa5a Bernhard Rosenkränzer
4:
404 2e823300 Måns Rullgård
        ldm             r1,  {r4-r6}
405 abff992d Måns Rullgård
        add             r1,  r1,  r2
406
        pld             [r1]
407 1febba1e Måns Rullgård
        ALIGN_DWORD     3,   r4,  r5,  r6
408 2e823300 Måns Rullgård
6:      ldm             r1,  {r7-r9}
409 abff992d Måns Rullgård
        add             r1,  r1,  r2
410
        pld             [r1]
411 1febba1e Måns Rullgård
        ALIGN_DWORD     3,   r7,  r8,  r9
412 abff992d Måns Rullgård
        RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
413 2e823300 Måns Rullgård
        stm             r0,  {r10-r11}
414 abff992d Måns Rullgård
        add             r0,  r0,  r2
415 2e823300 Måns Rullgård
        ldm             r1,  {r4-r6}
416 abff992d Måns Rullgård
        add             r1,  r1,  r2
417
        pld             [r1]
418 1febba1e Måns Rullgård
        ALIGN_DWORD     3,   r4,  r5,  r6
419 abff992d Måns Rullgård
        subs            r3,  r3,  #1
420
        RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
421 2e823300 Måns Rullgård
        stm             r0,  {r10-r11}
422 abff992d Måns Rullgård
        add             r0,  r0,  r2
423
        bne             6b
424 2e823300 Måns Rullgård
        pop             {r4-r11,pc}
425 c130bedc Måns Rullgård
        .endfunc
426 6ad1fa5a Bernhard Rosenkränzer
427 fc252eba Måns Rullgård
        .align 5
428 55c0e1e6 Måns Rullgård
function ff_put_no_rnd_pixels8_y2_arm, export=1
429 6ad1fa5a Bernhard Rosenkränzer
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
430
        @ block = word aligned, pixles = unaligned
431 abff992d Måns Rullgård
        pld             [r1]
432 2e823300 Måns Rullgård
        push            {r4-r11,lr}
433 abff992d Måns Rullgård
        mov             r3,  r3,  lsr #1
434
        ldr             r12, =0xfefefefe
435
        JMP_ALIGN       r5,  r1
436 6ad1fa5a Bernhard Rosenkränzer
1:
437 2e823300 Måns Rullgård
        ldm             r1,  {r4-r5}
438 abff992d Måns Rullgård
        add             r1,  r1,  r2
439 2e823300 Måns Rullgård
6:      ldm             r1,  {r6-r7}
440 abff992d Måns Rullgård
        add             r1,  r1,  r2
441
        pld             [r1]
442
        NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
443 2e823300 Måns Rullgård
        ldm             r1,  {r4-r5}
444 abff992d Måns Rullgård
        add             r1,  r1,  r2
445 2e823300 Måns Rullgård
        stm             r0,  {r8-r9}
446 abff992d Måns Rullgård
        add             r0,  r0,  r2
447
        pld             [r1]
448
        NO_RND_AVG32    r8,  r9,  r6,  r7,  r4,  r5,  r12
449
        subs            r3,  r3,  #1
450 2e823300 Måns Rullgård
        stm             r0,  {r8-r9}
451 abff992d Måns Rullgård
        add             r0,  r0,  r2
452
        bne             6b
453 2e823300 Måns Rullgård
        pop             {r4-r11,pc}
454 fc252eba Måns Rullgård
        .align 5
455 6ad1fa5a Bernhard Rosenkränzer
2:
456 2e823300 Måns Rullgård
        ldm             r1,  {r4-r6}
457 abff992d Måns Rullgård
        add             r1,  r1,  r2
458
        pld             [r1]
459 1febba1e Måns Rullgård
        ALIGN_DWORD     1,   r4,  r5,  r6
460 2e823300 Måns Rullgård
6:      ldm             r1,  {r7-r9}
461 abff992d Måns Rullgård
        add             r1,  r1,  r2
462
        pld             [r1]
463 1febba1e Måns Rullgård
        ALIGN_DWORD     1,   r7,  r8,  r9
464 abff992d Måns Rullgård
        NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
465 2e823300 Måns Rullgård
        stm             r0,  {r10-r11}
466 abff992d Måns Rullgård
        add             r0,  r0,  r2
467 2e823300 Måns Rullgård
        ldm             r1,  {r4-r6}
468 abff992d Måns Rullgård
        add             r1,  r1,  r2
469
        pld             [r1]
470 1febba1e Måns Rullgård
        ALIGN_DWORD     1,   r4,  r5,  r6
471 abff992d Måns Rullgård
        subs            r3,  r3,  #1
472
        NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
473 2e823300 Måns Rullgård
        stm             r0,  {r10-r11}
474 abff992d Måns Rullgård
        add             r0,  r0,  r2
475
        bne             6b
476 2e823300 Måns Rullgård
        pop             {r4-r11,pc}
477 fc252eba Måns Rullgård
        .align 5
478 6ad1fa5a Bernhard Rosenkränzer
3:
479 2e823300 Måns Rullgård
        ldm             r1,  {r4-r6}
480 abff992d Måns Rullgård
        add             r1,  r1,  r2
481
        pld             [r1]
482 1febba1e Måns Rullgård
        ALIGN_DWORD     2,   r4,  r5,  r6
483 2e823300 Måns Rullgård
6:      ldm             r1,  {r7-r9}
484 abff992d Måns Rullgård
        add             r1,  r1,  r2
485
        pld             [r1]
486 1febba1e Måns Rullgård
        ALIGN_DWORD     2,   r7,  r8,  r9
487 abff992d Måns Rullgård
        NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
488 2e823300 Måns Rullgård
        stm             r0,  {r10-r11}
489 abff992d Måns Rullgård
        add             r0,  r0,  r2
490 2e823300 Måns Rullgård
        ldm             r1,  {r4-r6}
491 abff992d Måns Rullgård
        add             r1,  r1,  r2
492
        pld             [r1]
493 1febba1e Måns Rullgård
        ALIGN_DWORD     2,   r4,  r5,  r6
494 abff992d Måns Rullgård
        subs            r3,  r3,  #1
495
        NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
496 2e823300 Måns Rullgård
        stm             r0,  {r10-r11}
497 abff992d Måns Rullgård
        add             r0,  r0,  r2
498
        bne             6b
499 2e823300 Måns Rullgård
        pop             {r4-r11,pc}
500 fc252eba Måns Rullgård
        .align 5
501 6ad1fa5a Bernhard Rosenkränzer
4:
502 2e823300 Måns Rullgård
        ldm             r1,  {r4-r6}
503 abff992d Måns Rullgård
        add             r1,  r1,  r2
504
        pld             [r1]
505 1febba1e Måns Rullgård
        ALIGN_DWORD     3,   r4,  r5,  r6
506 2e823300 Måns Rullgård
6:      ldm             r1,  {r7-r9}
507 abff992d Måns Rullgård
        add             r1,  r1,  r2
508
        pld             [r1]
509 1febba1e Måns Rullgård
        ALIGN_DWORD     3,   r7,  r8,  r9
510 abff992d Måns Rullgård
        NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
511 2e823300 Måns Rullgård
        stm             r0,  {r10-r11}
512 abff992d Måns Rullgård
        add             r0,  r0,  r2
513 2e823300 Måns Rullgård
        ldm             r1,  {r4-r6}
514 abff992d Måns Rullgård
        add             r1,  r1,  r2
515
        pld             [r1]
516 1febba1e Måns Rullgård
        ALIGN_DWORD     3,   r4,  r5,  r6
517 abff992d Måns Rullgård
        subs            r3,  r3,  #1
518
        NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
519 2e823300 Måns Rullgård
        stm             r0,  {r10-r11}
520 abff992d Måns Rullgård
        add             r0,  r0,  r2
521
        bne             6b
522 2e823300 Måns Rullgård
        pop             {r4-r11,pc}
523 c130bedc Måns Rullgård
        .endfunc
524 6ad1fa5a Bernhard Rosenkränzer
525 d9e68f5c Måns Rullgård
        .ltorg
526
527 6ad1fa5a Bernhard Rosenkränzer
@ ----------------------------------------------------------------
528 d9e68f5c Måns Rullgård
.macro  RND_XY2_IT align, rnd
529 6ad1fa5a Bernhard Rosenkränzer
        @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
530
        @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
531
.if \align == 0
532 2e823300 Måns Rullgård
        ldm             r1,  {r6-r8}
533 6ad1fa5a Bernhard Rosenkränzer
.elseif \align == 3
534 2e823300 Måns Rullgård
        ldm             r1,  {r5-r7}
535 6ad1fa5a Bernhard Rosenkränzer
.else
536 2e823300 Måns Rullgård
        ldm             r1,  {r8-r10}
537 6ad1fa5a Bernhard Rosenkränzer
.endif
538 abff992d Måns Rullgård
        add             r1,  r1,  r2
539
        pld             [r1]
540 6ad1fa5a Bernhard Rosenkränzer
.if \align == 0
541 1febba1e Måns Rullgård
        ALIGN_DWORD_D   1,   r4,  r5,  r6,  r7,  r8
542 6ad1fa5a Bernhard Rosenkränzer
.elseif \align == 1
543 1febba1e Måns Rullgård
        ALIGN_DWORD_D   1,   r4,  r5,  r8,  r9,  r10
544
        ALIGN_DWORD_D   2,   r6,  r7,  r8,  r9,  r10
545 6ad1fa5a Bernhard Rosenkränzer
.elseif \align == 2
546 1febba1e Måns Rullgård
        ALIGN_DWORD_D   2,   r4,  r5,  r8,  r9,  r10
547
        ALIGN_DWORD_D   3,   r6,  r7,  r8,  r9,  r10
548 6ad1fa5a Bernhard Rosenkränzer
.elseif \align == 3
549 1febba1e Måns Rullgård
        ALIGN_DWORD_D   3,   r4,  r5,  r5,  r6,  r7
550 6ad1fa5a Bernhard Rosenkränzer
.endif
551 abff992d Måns Rullgård
        ldr             r14, =0x03030303
552
        tst             r3,  #1
553
        and             r8,  r4,  r14
554
        and             r9,  r5,  r14
555
        and             r10, r6,  r14
556
        and             r11, r7,  r14
557
        andeq           r14, r14, r14, \rnd #1
558
        add             r8,  r8,  r10
559
        add             r9,  r9,  r11
560
        ldr             r12, =0xfcfcfcfc >> 2
561
        addeq           r8,  r8,  r14
562
        addeq           r9,  r9,  r14
563
        and             r4,  r12, r4,  lsr #2
564
        and             r5,  r12, r5,  lsr #2
565
        and             r6,  r12, r6,  lsr #2
566
        and             r7,  r12, r7,  lsr #2
567
        add             r10, r4,  r6
568
        add             r11, r5,  r7
569
        subs            r3,  r3,  #1
570 6ad1fa5a Bernhard Rosenkränzer
.endm
571
572 d9e68f5c Måns Rullgård
.macro RND_XY2_EXPAND align, rnd
573 abff992d Måns Rullgård
        RND_XY2_IT      \align, \rnd
574 2e823300 Måns Rullgård
6:      push            {r8-r11}
575 abff992d Måns Rullgård
        RND_XY2_IT      \align, \rnd
576 2e823300 Måns Rullgård
        pop             {r4-r7}
577 abff992d Måns Rullgård
        add             r4,  r4,  r8
578
        add             r5,  r5,  r9
579
        ldr             r14, =0x0f0f0f0f
580
        add             r6,  r6,  r10
581
        add             r7,  r7,  r11
582
        and             r4,  r14, r4,  lsr #2
583
        and             r5,  r14, r5,  lsr #2
584
        add             r4,  r4,  r6
585
        add             r5,  r5,  r7
586 2e823300 Måns Rullgård
        stm             r0,  {r4-r5}
587 abff992d Måns Rullgård
        add             r0,  r0,  r2
588
        bge             6b
589 2e823300 Måns Rullgård
        pop             {r4-r11,pc}
590 6ad1fa5a Bernhard Rosenkränzer
.endm
591
592 fc252eba Måns Rullgård
        .align 5
593 55c0e1e6 Måns Rullgård
function ff_put_pixels8_xy2_arm, export=1
594 6ad1fa5a Bernhard Rosenkränzer
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
595
        @ block = word aligned, pixles = unaligned
596 abff992d Måns Rullgård
        pld             [r1]
597 2e823300 Måns Rullgård
        push            {r4-r11,lr} @ R14 is also called LR
598 abff992d Måns Rullgård
        JMP_ALIGN       r5,  r1
599 c8315e91 Måns Rullgård
1:      RND_XY2_EXPAND  0, lsl
600 fc252eba Måns Rullgård
        .align 5
601 c8315e91 Måns Rullgård
2:      RND_XY2_EXPAND  1, lsl
602 fc252eba Måns Rullgård
        .align 5
603 c8315e91 Måns Rullgård
3:      RND_XY2_EXPAND  2, lsl
604 fc252eba Måns Rullgård
        .align 5
605 c8315e91 Måns Rullgård
4:      RND_XY2_EXPAND  3, lsl
606 c130bedc Måns Rullgård
        .endfunc
607 6ad1fa5a Bernhard Rosenkränzer
608 fc252eba Måns Rullgård
        .align 5
609 55c0e1e6 Måns Rullgård
function ff_put_no_rnd_pixels8_xy2_arm, export=1
610 6ad1fa5a Bernhard Rosenkränzer
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
611
        @ block = word aligned, pixles = unaligned
612 abff992d Måns Rullgård
        pld             [r1]
613 2e823300 Måns Rullgård
        push            {r4-r11,lr}
614 abff992d Måns Rullgård
        JMP_ALIGN       r5,  r1
615 c8315e91 Måns Rullgård
1:      RND_XY2_EXPAND  0, lsr
616 fc252eba Måns Rullgård
        .align 5
617 c8315e91 Måns Rullgård
2:      RND_XY2_EXPAND  1, lsr
618 fc252eba Måns Rullgård
        .align 5
619 c8315e91 Måns Rullgård
3:      RND_XY2_EXPAND  2, lsr
620 fc252eba Måns Rullgård
        .align 5
621 c8315e91 Måns Rullgård
4:      RND_XY2_EXPAND  3, lsr
622 c130bedc Måns Rullgård
        .endfunc
623 d2d39859 Måns Rullgård
624 fc252eba Måns Rullgård
        .align 5
625 2ad4c241 Måns Rullgård
@ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride)
626
function ff_add_pixels_clamped_arm, export=1
627 d2d39859 Måns Rullgård
        push            {r4-r10}
628
        mov             r10, #8
629
1:
630
        ldr             r4,  [r1]               /* load dest */
631
        /* block[0] and block[1]*/
632
        ldrsh           r5,  [r0]
633
        ldrsh           r7,  [r0, #2]
634
        and             r6,  r4,  #0xFF
635
        and             r8,  r4,  #0xFF00
636
        add             r6,  r5,  r6
637
        add             r8,  r7,  r8,  lsr #8
638
        mvn             r5,  r5
639
        mvn             r7,  r7
640
        tst             r6,  #0x100
641
        movne           r6,  r5,  lsr #24
642
        tst             r8,  #0x100
643
        movne           r8,  r7,  lsr #24
644
        mov             r9,  r6
645
        ldrsh           r5,  [r0, #4]           /* moved form [A] */
646 abff992d Måns Rullgård
        orr             r9,  r9,  r8,  lsl #8
647 d2d39859 Måns Rullgård
        /* block[2] and block[3] */
648
        /* [A] */
649
        ldrsh           r7,  [r0, #6]
650
        and             r6,  r4,  #0xFF0000
651
        and             r8,  r4,  #0xFF000000
652 abff992d Måns Rullgård
        add             r6,  r5,  r6,  lsr #16
653
        add             r8,  r7,  r8,  lsr #24
654 d2d39859 Måns Rullgård
        mvn             r5,  r5
655
        mvn             r7,  r7
656
        tst             r6,  #0x100
657
        movne           r6,  r5,  lsr #24
658
        tst             r8,  #0x100
659
        movne           r8,  r7,  lsr #24
660 abff992d Måns Rullgård
        orr             r9,  r9,  r6,  lsl #16
661 d2d39859 Måns Rullgård
        ldr             r4,  [r1, #4]           /* moved form [B] */
662 abff992d Måns Rullgård
        orr             r9,  r9,  r8,  lsl #24
663 d2d39859 Måns Rullgård
        /* store dest */
664
        ldrsh           r5,  [r0, #8]           /* moved form [C] */
665
        str             r9,  [r1]
666
667
        /* load dest */
668
        /* [B] */
669
        /* block[4] and block[5] */
670
        /* [C] */
671
        ldrsh           r7,  [r0, #10]
672
        and             r6,  r4,  #0xFF
673
        and             r8,  r4,  #0xFF00
674
        add             r6,  r5,  r6
675 abff992d Måns Rullgård
        add             r8,  r7,  r8,  lsr #8
676 d2d39859 Måns Rullgård
        mvn             r5,  r5
677
        mvn             r7,  r7
678
        tst             r6,  #0x100
679
        movne           r6,  r5,  lsr #24
680
        tst             r8,  #0x100
681
        movne           r8,  r7,  lsr #24
682
        mov             r9,  r6
683
        ldrsh           r5,  [r0, #12]          /* moved from [D] */
684 abff992d Måns Rullgård
        orr             r9,  r9,  r8,  lsl #8
685 d2d39859 Måns Rullgård
        /* block[6] and block[7] */
686
        /* [D] */
687
        ldrsh           r7,  [r0, #14]
688
        and             r6,  r4,  #0xFF0000
689
        and             r8,  r4,  #0xFF000000
690 abff992d Måns Rullgård
        add             r6,  r5,  r6,  lsr #16
691
        add             r8,  r7,  r8,  lsr #24
692 d2d39859 Måns Rullgård
        mvn             r5,  r5
693
        mvn             r7,  r7
694
        tst             r6,  #0x100
695
        movne           r6,  r5,  lsr #24
696
        tst             r8,  #0x100
697
        movne           r8,  r7,  lsr #24
698 abff992d Måns Rullgård
        orr             r9,  r9,  r6,  lsl #16
699 d2d39859 Måns Rullgård
        add             r0,  r0,  #16           /* moved from [E] */
700 abff992d Måns Rullgård
        orr             r9,  r9,  r8,  lsl #24
701 d2d39859 Måns Rullgård
        subs            r10, r10, #1            /* moved from [F] */
702
        /* store dest */
703
        str             r9,  [r1, #4]
704
705
        /* [E] */
706
        /* [F] */
707
        add             r1,  r1,  r2
708
        bne             1b
709
710
        pop             {r4-r10}
711
        bx              lr
712
        .endfunc