Statistics
| Branch: | Revision:

ffmpeg / libavcodec / arm / dsputil_arm.S @ 2912e87a

History | View | Annotate | Download (24.4 KB)

1
@
2
@ ARMv4 optimized DSP utils
3
@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4
@
5
@ This file is part of Libav.
6
@
7
@ Libav is free software; you can redistribute it and/or
8
@ modify it under the terms of the GNU Lesser General Public
9
@ License as published by the Free Software Foundation; either
10
@ version 2.1 of the License, or (at your option) any later version.
11
@
12
@ Libav is distributed in the hope that it will be useful,
13
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
@ Lesser General Public License for more details.
16
@
17
@ You should have received a copy of the GNU Lesser General Public
18
@ License along with Libav; if not, write to the Free Software
19
@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
@
21

    
22
#include "config.h"
23
#include "asm.S"
24

    
25
        preserve8
26

    
27
#if !HAVE_PLD
28
.macro pld reg
29
.endm
30
#endif
31

    
32
#if HAVE_ARMV5TE
33
function ff_prefetch_arm, export=1
34
        subs            r2,  r2,  #1
35
        pld             [r0]
36
        add             r0,  r0,  r1
37
        bne             ff_prefetch_arm
38
        bx              lr
39
endfunc
40
#endif
41

    
42
.macro  ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
43
        mov             \Rd0, \Rn0, lsr #(\shift * 8)
44
        mov             \Rd1, \Rn1, lsr #(\shift * 8)
45
        mov             \Rd2, \Rn2, lsr #(\shift * 8)
46
        mov             \Rd3, \Rn3, lsr #(\shift * 8)
47
        orr             \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
48
        orr             \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
49
        orr             \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
50
        orr             \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
51
.endm
52
.macro  ALIGN_DWORD shift, R0, R1, R2
53
        mov             \R0, \R0, lsr #(\shift * 8)
54
        orr             \R0, \R0, \R1, lsl #(32 - \shift * 8)
55
        mov             \R1, \R1, lsr #(\shift * 8)
56
        orr             \R1, \R1, \R2, lsl #(32 - \shift * 8)
57
.endm
58
.macro  ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
59
        mov             \Rdst0, \Rsrc0, lsr #(\shift * 8)
60
        mov             \Rdst1, \Rsrc1, lsr #(\shift * 8)
61
        orr             \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
62
        orr             \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
63
.endm
64

    
65
.macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
66
        @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
67
        @ Rmask = 0xFEFEFEFE
68
        @ Rn = destroy
69
        eor             \Rd0, \Rn0, \Rm0
70
        eor             \Rd1, \Rn1, \Rm1
71
        orr             \Rn0, \Rn0, \Rm0
72
        orr             \Rn1, \Rn1, \Rm1
73
        and             \Rd0, \Rd0, \Rmask
74
        and             \Rd1, \Rd1, \Rmask
75
        sub             \Rd0, \Rn0, \Rd0, lsr #1
76
        sub             \Rd1, \Rn1, \Rd1, lsr #1
77
.endm
78

    
79
.macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
80
        @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
81
        @ Rmask = 0xFEFEFEFE
82
        @ Rn = destroy
83
        eor             \Rd0, \Rn0, \Rm0
84
        eor             \Rd1, \Rn1, \Rm1
85
        and             \Rn0, \Rn0, \Rm0
86
        and             \Rn1, \Rn1, \Rm1
87
        and             \Rd0, \Rd0, \Rmask
88
        and             \Rd1, \Rd1, \Rmask
89
        add             \Rd0, \Rn0, \Rd0, lsr #1
90
        add             \Rd1, \Rn1, \Rd1, lsr #1
91
.endm
92

    
93
.macro  JMP_ALIGN tmp, reg
94
        ands            \tmp, \reg, #3
95
        bic             \reg, \reg, #3
96
        beq             1f
97
        subs            \tmp, \tmp, #1
98
        beq             2f
99
        subs            \tmp, \tmp, #1
100
        beq             3f
101
        b    4f
102
.endm
103

    
104
@ ----------------------------------------------------------------
105
        .align 5
106
function ff_put_pixels16_arm, export=1
107
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
108
        @ block = word aligned, pixles = unaligned
109
        pld             [r1]
110
        push            {r4-r11, lr}
111
        JMP_ALIGN       r5,  r1
112
1:
113
        ldm             r1,  {r4-r7}
114
        add             r1,  r1,  r2
115
        stm             r0,  {r4-r7}
116
        pld             [r1]
117
        subs            r3,  r3,  #1
118
        add             r0,  r0,  r2
119
        bne             1b
120
        pop             {r4-r11, pc}
121
        .align 5
122
2:
123
        ldm             r1,  {r4-r8}
124
        add             r1,  r1,  r2
125
        ALIGN_QWORD_D   1,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
126
        pld             [r1]
127
        subs            r3,  r3,  #1
128
        stm             r0,  {r9-r12}
129
        add             r0,  r0,  r2
130
        bne             2b
131
        pop             {r4-r11, pc}
132
        .align 5
133
3:
134
        ldm             r1,  {r4-r8}
135
        add             r1,  r1,  r2
136
        ALIGN_QWORD_D   2,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
137
        pld             [r1]
138
        subs            r3,  r3,  #1
139
        stm             r0,  {r9-r12}
140
        add             r0,  r0,  r2
141
        bne             3b
142
        pop             {r4-r11, pc}
143
        .align 5
144
4:
145
        ldm             r1,  {r4-r8}
146
        add             r1,  r1,  r2
147
        ALIGN_QWORD_D   3,   r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
148
        pld             [r1]
149
        subs            r3,  r3,  #1
150
        stm             r0,  {r9-r12}
151
        add             r0,  r0,  r2
152
        bne             4b
153
        pop             {r4-r11,pc}
154
endfunc
155

    
156
@ ----------------------------------------------------------------
157
        .align 5
158
function ff_put_pixels8_arm, export=1
159
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
160
        @ block = word aligned, pixles = unaligned
161
        pld             [r1]
162
        push            {r4-r5,lr}
163
        JMP_ALIGN       r5,  r1
164
1:
165
        ldm             r1,  {r4-r5}
166
        add             r1,  r1,  r2
167
        subs            r3,  r3,  #1
168
        pld             [r1]
169
        stm             r0,  {r4-r5}
170
        add             r0,  r0,  r2
171
        bne             1b
172
        pop             {r4-r5,pc}
173
        .align 5
174
2:
175
        ldm             r1,  {r4-r5, r12}
176
        add             r1,  r1,  r2
177
        ALIGN_DWORD     1,   r4,  r5,  r12
178
        pld             [r1]
179
        subs            r3,  r3,  #1
180
        stm             r0,  {r4-r5}
181
        add             r0,  r0,  r2
182
        bne             2b
183
        pop             {r4-r5,pc}
184
        .align 5
185
3:
186
        ldm             r1,  {r4-r5, r12}
187
        add             r1,  r1,  r2
188
        ALIGN_DWORD     2,   r4,  r5,  r12
189
        pld             [r1]
190
        subs            r3,  r3,  #1
191
        stm             r0,  {r4-r5}
192
        add             r0,  r0,  r2
193
        bne             3b
194
        pop             {r4-r5,pc}
195
        .align 5
196
4:
197
        ldm             r1,  {r4-r5, r12}
198
        add             r1,  r1,  r2
199
        ALIGN_DWORD     3,   r4,  r5,  r12
200
        pld             [r1]
201
        subs            r3,  r3,  #1
202
        stm             r0,  {r4-r5}
203
        add             r0,  r0,  r2
204
        bne             4b
205
        pop             {r4-r5,pc}
206
endfunc
207

    
208
@ ----------------------------------------------------------------
209
        .align 5
210
function ff_put_pixels8_x2_arm, export=1
211
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
212
        @ block = word aligned, pixles = unaligned
213
        pld             [r1]
214
        push            {r4-r10,lr}
215
        ldr             r12, =0xfefefefe
216
        JMP_ALIGN       r5,  r1
217
1:
218
        ldm             r1,  {r4-r5, r10}
219
        add             r1,  r1,  r2
220
        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
221
        pld             [r1]
222
        RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
223
        subs            r3,  r3,  #1
224
        stm             r0,  {r8-r9}
225
        add             r0,  r0,  r2
226
        bne             1b
227
        pop             {r4-r10,pc}
228
        .align 5
229
2:
230
        ldm             r1,  {r4-r5, r10}
231
        add             r1,  r1,  r2
232
        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
233
        ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
234
        pld             [r1]
235
        RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
236
        subs            r3,  r3,  #1
237
        stm             r0,  {r4-r5}
238
        add             r0,  r0,  r2
239
        bne             2b
240
        pop             {r4-r10,pc}
241
        .align 5
242
3:
243
        ldm             r1,  {r4-r5, r10}
244
        add             r1,  r1,  r2
245
        ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
246
        ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
247
        pld             [r1]
248
        RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
249
        subs            r3,  r3,  #1
250
        stm             r0,  {r4-r5}
251
        add             r0,  r0,  r2
252
        bne             3b
253
        pop             {r4-r10,pc}
254
        .align 5
255
4:
256
        ldm             r1,  {r4-r5, r10}
257
        add             r1,  r1,  r2
258
        ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
259
        pld             [r1]
260
        RND_AVG32       r8,  r9,  r6,  r7,  r5,  r10, r12
261
        subs            r3,  r3,  #1
262
        stm             r0,  {r8-r9}
263
        add             r0,  r0,  r2
264
        bne             4b
265
        pop             {r4-r10,pc}
266
endfunc
267

    
268
        .align 5
269
function ff_put_no_rnd_pixels8_x2_arm, export=1
270
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
271
        @ block = word aligned, pixles = unaligned
272
        pld             [r1]
273
        push            {r4-r10,lr}
274
        ldr             r12, =0xfefefefe
275
        JMP_ALIGN       r5,  r1
276
1:
277
        ldm             r1,  {r4-r5, r10}
278
        add             r1,  r1,  r2
279
        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
280
        pld             [r1]
281
        NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
282
        subs            r3,  r3,  #1
283
        stm             r0,  {r8-r9}
284
        add             r0,  r0,  r2
285
        bne             1b
286
        pop             {r4-r10,pc}
287
        .align 5
288
2:
289
        ldm             r1,  {r4-r5, r10}
290
        add             r1,  r1,  r2
291
        ALIGN_DWORD_D   1,   r6,  r7,  r4,  r5,  r10
292
        ALIGN_DWORD_D   2,   r8,  r9,  r4,  r5,  r10
293
        pld             [r1]
294
        NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
295
        subs            r3,  r3,  #1
296
        stm             r0,  {r4-r5}
297
        add             r0,  r0,  r2
298
        bne             2b
299
        pop             {r4-r10,pc}
300
        .align 5
301
3:
302
        ldm             r1,  {r4-r5, r10}
303
        add             r1,  r1,  r2
304
        ALIGN_DWORD_D   2,   r6,  r7,  r4,  r5,  r10
305
        ALIGN_DWORD_D   3,   r8,  r9,  r4,  r5,  r10
306
        pld             [r1]
307
        NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
308
        subs            r3,  r3,  #1
309
        stm             r0,  {r4-r5}
310
        add             r0,  r0,  r2
311
        bne             3b
312
        pop             {r4-r10,pc}
313
        .align 5
314
4:
315
        ldm             r1,  {r4-r5, r10}
316
        add             r1,  r1,  r2
317
        ALIGN_DWORD_D   3,   r6,  r7,  r4,  r5,  r10
318
        pld             [r1]
319
        NO_RND_AVG32    r8,  r9,  r6,  r7,  r5,  r10, r12
320
        subs            r3,  r3,  #1
321
        stm             r0,  {r8-r9}
322
        add             r0,  r0,  r2
323
        bne             4b
324
        pop             {r4-r10,pc}
325
endfunc
326

    
327

    
328
@ ----------------------------------------------------------------
329
        .align 5
330
function ff_put_pixels8_y2_arm, export=1
331
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
332
        @ block = word aligned, pixles = unaligned
333
        pld             [r1]
334
        push            {r4-r11,lr}
335
        mov             r3,  r3,  lsr #1
336
        ldr             r12, =0xfefefefe
337
        JMP_ALIGN       r5,  r1
338
1:
339
        ldm             r1,  {r4-r5}
340
        add             r1,  r1,  r2
341
6:      ldm             r1,  {r6-r7}
342
        add             r1,  r1,  r2
343
        pld             [r1]
344
        RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
345
        ldm             r1,  {r4-r5}
346
        add             r1,  r1,  r2
347
        stm             r0,  {r8-r9}
348
        add             r0,  r0,  r2
349
        pld             [r1]
350
        RND_AVG32       r8,  r9,  r6,  r7,  r4,  r5,  r12
351
        subs            r3,  r3,  #1
352
        stm             r0,  {r8-r9}
353
        add             r0,  r0,  r2
354
        bne             6b
355
        pop             {r4-r11,pc}
356
        .align 5
357
2:
358
        ldm             r1,  {r4-r6}
359
        add             r1,  r1,  r2
360
        pld             [r1]
361
        ALIGN_DWORD     1,   r4,  r5,  r6
362
6:      ldm             r1,  {r7-r9}
363
        add             r1,  r1,  r2
364
        pld             [r1]
365
        ALIGN_DWORD     1,   r7,  r8,  r9
366
        RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
367
        stm             r0,  {r10-r11}
368
        add             r0,  r0,  r2
369
        ldm             r1,  {r4-r6}
370
        add             r1,  r1,  r2
371
        pld             [r1]
372
        ALIGN_DWORD     1,   r4,  r5,  r6
373
        subs            r3,  r3,  #1
374
        RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
375
        stm             r0,  {r10-r11}
376
        add             r0,  r0,  r2
377
        bne             6b
378
        pop             {r4-r11,pc}
379
        .align 5
380
3:
381
        ldm             r1,  {r4-r6}
382
        add             r1,  r1,  r2
383
        pld             [r1]
384
        ALIGN_DWORD     2,   r4,  r5,  r6
385
6:      ldm             r1,  {r7-r9}
386
        add             r1,  r1,  r2
387
        pld             [r1]
388
        ALIGN_DWORD     2,   r7,  r8,  r9
389
        RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
390
        stm             r0,  {r10-r11}
391
        add             r0,  r0,  r2
392
        ldm             r1,  {r4-r6}
393
        add             r1,  r1,  r2
394
        pld             [r1]
395
        ALIGN_DWORD     2,   r4,  r5,  r6
396
        subs            r3,  r3,  #1
397
        RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
398
        stm             r0,  {r10-r11}
399
        add             r0,  r0,  r2
400
        bne             6b
401
        pop             {r4-r11,pc}
402
        .align 5
403
4:
404
        ldm             r1,  {r4-r6}
405
        add             r1,  r1,  r2
406
        pld             [r1]
407
        ALIGN_DWORD     3,   r4,  r5,  r6
408
6:      ldm             r1,  {r7-r9}
409
        add             r1,  r1,  r2
410
        pld             [r1]
411
        ALIGN_DWORD     3,   r7,  r8,  r9
412
        RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
413
        stm             r0,  {r10-r11}
414
        add             r0,  r0,  r2
415
        ldm             r1,  {r4-r6}
416
        add             r1,  r1,  r2
417
        pld             [r1]
418
        ALIGN_DWORD     3,   r4,  r5,  r6
419
        subs            r3,  r3,  #1
420
        RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
421
        stm             r0,  {r10-r11}
422
        add             r0,  r0,  r2
423
        bne             6b
424
        pop             {r4-r11,pc}
425
endfunc
426

    
427
        .align 5
428
function ff_put_no_rnd_pixels8_y2_arm, export=1
429
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
430
        @ block = word aligned, pixles = unaligned
431
        pld             [r1]
432
        push            {r4-r11,lr}
433
        mov             r3,  r3,  lsr #1
434
        ldr             r12, =0xfefefefe
435
        JMP_ALIGN       r5,  r1
436
1:
437
        ldm             r1,  {r4-r5}
438
        add             r1,  r1,  r2
439
6:      ldm             r1,  {r6-r7}
440
        add             r1,  r1,  r2
441
        pld             [r1]
442
        NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
443
        ldm             r1,  {r4-r5}
444
        add             r1,  r1,  r2
445
        stm             r0,  {r8-r9}
446
        add             r0,  r0,  r2
447
        pld             [r1]
448
        NO_RND_AVG32    r8,  r9,  r6,  r7,  r4,  r5,  r12
449
        subs            r3,  r3,  #1
450
        stm             r0,  {r8-r9}
451
        add             r0,  r0,  r2
452
        bne             6b
453
        pop             {r4-r11,pc}
454
        .align 5
455
2:
456
        ldm             r1,  {r4-r6}
457
        add             r1,  r1,  r2
458
        pld             [r1]
459
        ALIGN_DWORD     1,   r4,  r5,  r6
460
6:      ldm             r1,  {r7-r9}
461
        add             r1,  r1,  r2
462
        pld             [r1]
463
        ALIGN_DWORD     1,   r7,  r8,  r9
464
        NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
465
        stm             r0,  {r10-r11}
466
        add             r0,  r0,  r2
467
        ldm             r1,  {r4-r6}
468
        add             r1,  r1,  r2
469
        pld             [r1]
470
        ALIGN_DWORD     1,   r4,  r5,  r6
471
        subs            r3,  r3,  #1
472
        NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
473
        stm             r0,  {r10-r11}
474
        add             r0,  r0,  r2
475
        bne             6b
476
        pop             {r4-r11,pc}
477
        .align 5
478
3:
479
        ldm             r1,  {r4-r6}
480
        add             r1,  r1,  r2
481
        pld             [r1]
482
        ALIGN_DWORD     2,   r4,  r5,  r6
483
6:      ldm             r1,  {r7-r9}
484
        add             r1,  r1,  r2
485
        pld             [r1]
486
        ALIGN_DWORD     2,   r7,  r8,  r9
487
        NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
488
        stm             r0,  {r10-r11}
489
        add             r0,  r0,  r2
490
        ldm             r1,  {r4-r6}
491
        add             r1,  r1,  r2
492
        pld             [r1]
493
        ALIGN_DWORD     2,   r4,  r5,  r6
494
        subs            r3,  r3,  #1
495
        NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
496
        stm             r0,  {r10-r11}
497
        add             r0,  r0,  r2
498
        bne             6b
499
        pop             {r4-r11,pc}
500
        .align 5
501
4:
502
        ldm             r1,  {r4-r6}
503
        add             r1,  r1,  r2
504
        pld             [r1]
505
        ALIGN_DWORD     3,   r4,  r5,  r6
506
6:      ldm             r1,  {r7-r9}
507
        add             r1,  r1,  r2
508
        pld             [r1]
509
        ALIGN_DWORD     3,   r7,  r8,  r9
510
        NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
511
        stm             r0,  {r10-r11}
512
        add             r0,  r0,  r2
513
        ldm             r1,  {r4-r6}
514
        add             r1,  r1,  r2
515
        pld             [r1]
516
        ALIGN_DWORD     3,   r4,  r5,  r6
517
        subs            r3,  r3,  #1
518
        NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
519
        stm             r0,  {r10-r11}
520
        add             r0,  r0,  r2
521
        bne             6b
522
        pop             {r4-r11,pc}
523
endfunc
524

    
525
        .ltorg
526

    
527
@ ----------------------------------------------------------------
528
.macro  RND_XY2_IT align, rnd
529
        @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
530
        @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
531
.if \align == 0
532
        ldm             r1,  {r6-r8}
533
.elseif \align == 3
534
        ldm             r1,  {r5-r7}
535
.else
536
        ldm             r1,  {r8-r10}
537
.endif
538
        add             r1,  r1,  r2
539
        pld             [r1]
540
.if \align == 0
541
        ALIGN_DWORD_D   1,   r4,  r5,  r6,  r7,  r8
542
.elseif \align == 1
543
        ALIGN_DWORD_D   1,   r4,  r5,  r8,  r9,  r10
544
        ALIGN_DWORD_D   2,   r6,  r7,  r8,  r9,  r10
545
.elseif \align == 2
546
        ALIGN_DWORD_D   2,   r4,  r5,  r8,  r9,  r10
547
        ALIGN_DWORD_D   3,   r6,  r7,  r8,  r9,  r10
548
.elseif \align == 3
549
        ALIGN_DWORD_D   3,   r4,  r5,  r5,  r6,  r7
550
.endif
551
        ldr             r14, =0x03030303
552
        tst             r3,  #1
553
        and             r8,  r4,  r14
554
        and             r9,  r5,  r14
555
        and             r10, r6,  r14
556
        and             r11, r7,  r14
557
        andeq           r14, r14, r14, \rnd #1
558
        add             r8,  r8,  r10
559
        add             r9,  r9,  r11
560
        ldr             r12, =0xfcfcfcfc >> 2
561
        addeq           r8,  r8,  r14
562
        addeq           r9,  r9,  r14
563
        and             r4,  r12, r4,  lsr #2
564
        and             r5,  r12, r5,  lsr #2
565
        and             r6,  r12, r6,  lsr #2
566
        and             r7,  r12, r7,  lsr #2
567
        add             r10, r4,  r6
568
        add             r11, r5,  r7
569
        subs            r3,  r3,  #1
570
.endm
571

    
572
.macro RND_XY2_EXPAND align, rnd
573
        RND_XY2_IT      \align, \rnd
574
6:      push            {r8-r11}
575
        RND_XY2_IT      \align, \rnd
576
        pop             {r4-r7}
577
        add             r4,  r4,  r8
578
        add             r5,  r5,  r9
579
        ldr             r14, =0x0f0f0f0f
580
        add             r6,  r6,  r10
581
        add             r7,  r7,  r11
582
        and             r4,  r14, r4,  lsr #2
583
        and             r5,  r14, r5,  lsr #2
584
        add             r4,  r4,  r6
585
        add             r5,  r5,  r7
586
        stm             r0,  {r4-r5}
587
        add             r0,  r0,  r2
588
        bge             6b
589
        pop             {r4-r11,pc}
590
.endm
591

    
592
        .align 5
593
function ff_put_pixels8_xy2_arm, export=1
594
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
595
        @ block = word aligned, pixles = unaligned
596
        pld             [r1]
597
        push            {r4-r11,lr} @ R14 is also called LR
598
        JMP_ALIGN       r5,  r1
599
1:      RND_XY2_EXPAND  0, lsl
600
        .align 5
601
2:      RND_XY2_EXPAND  1, lsl
602
        .align 5
603
3:      RND_XY2_EXPAND  2, lsl
604
        .align 5
605
4:      RND_XY2_EXPAND  3, lsl
606
endfunc
607

    
608
        .align 5
609
function ff_put_no_rnd_pixels8_xy2_arm, export=1
610
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
611
        @ block = word aligned, pixles = unaligned
612
        pld             [r1]
613
        push            {r4-r11,lr}
614
        JMP_ALIGN       r5,  r1
615
1:      RND_XY2_EXPAND  0, lsr
616
        .align 5
617
2:      RND_XY2_EXPAND  1, lsr
618
        .align 5
619
3:      RND_XY2_EXPAND  2, lsr
620
        .align 5
621
4:      RND_XY2_EXPAND  3, lsr
622
endfunc
623

    
624
        .align 5
625
@ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride)
626
function ff_add_pixels_clamped_arm, export=1
627
        push            {r4-r10}
628
        mov             r10, #8
629
1:
630
        ldr             r4,  [r1]               /* load dest */
631
        /* block[0] and block[1]*/
632
        ldrsh           r5,  [r0]
633
        ldrsh           r7,  [r0, #2]
634
        and             r6,  r4,  #0xFF
635
        and             r8,  r4,  #0xFF00
636
        add             r6,  r5,  r6
637
        add             r8,  r7,  r8,  lsr #8
638
        mvn             r5,  r5
639
        mvn             r7,  r7
640
        tst             r6,  #0x100
641
        movne           r6,  r5,  lsr #24
642
        tst             r8,  #0x100
643
        movne           r8,  r7,  lsr #24
644
        mov             r9,  r6
645
        ldrsh           r5,  [r0, #4]           /* moved form [A] */
646
        orr             r9,  r9,  r8,  lsl #8
647
        /* block[2] and block[3] */
648
        /* [A] */
649
        ldrsh           r7,  [r0, #6]
650
        and             r6,  r4,  #0xFF0000
651
        and             r8,  r4,  #0xFF000000
652
        add             r6,  r5,  r6,  lsr #16
653
        add             r8,  r7,  r8,  lsr #24
654
        mvn             r5,  r5
655
        mvn             r7,  r7
656
        tst             r6,  #0x100
657
        movne           r6,  r5,  lsr #24
658
        tst             r8,  #0x100
659
        movne           r8,  r7,  lsr #24
660
        orr             r9,  r9,  r6,  lsl #16
661
        ldr             r4,  [r1, #4]           /* moved form [B] */
662
        orr             r9,  r9,  r8,  lsl #24
663
        /* store dest */
664
        ldrsh           r5,  [r0, #8]           /* moved form [C] */
665
        str             r9,  [r1]
666

    
667
        /* load dest */
668
        /* [B] */
669
        /* block[4] and block[5] */
670
        /* [C] */
671
        ldrsh           r7,  [r0, #10]
672
        and             r6,  r4,  #0xFF
673
        and             r8,  r4,  #0xFF00
674
        add             r6,  r5,  r6
675
        add             r8,  r7,  r8,  lsr #8
676
        mvn             r5,  r5
677
        mvn             r7,  r7
678
        tst             r6,  #0x100
679
        movne           r6,  r5,  lsr #24
680
        tst             r8,  #0x100
681
        movne           r8,  r7,  lsr #24
682
        mov             r9,  r6
683
        ldrsh           r5,  [r0, #12]          /* moved from [D] */
684
        orr             r9,  r9,  r8,  lsl #8
685
        /* block[6] and block[7] */
686
        /* [D] */
687
        ldrsh           r7,  [r0, #14]
688
        and             r6,  r4,  #0xFF0000
689
        and             r8,  r4,  #0xFF000000
690
        add             r6,  r5,  r6,  lsr #16
691
        add             r8,  r7,  r8,  lsr #24
692
        mvn             r5,  r5
693
        mvn             r7,  r7
694
        tst             r6,  #0x100
695
        movne           r6,  r5,  lsr #24
696
        tst             r8,  #0x100
697
        movne           r8,  r7,  lsr #24
698
        orr             r9,  r9,  r6,  lsl #16
699
        add             r0,  r0,  #16           /* moved from [E] */
700
        orr             r9,  r9,  r8,  lsl #24
701
        subs            r10, r10, #1            /* moved from [F] */
702
        /* store dest */
703
        str             r9,  [r1, #4]
704

    
705
        /* [E] */
706
        /* [F] */
707
        add             r1,  r1,  r2
708
        bne             1b
709

    
710
        pop             {r4-r10}
711
        bx              lr
712
endfunc