Statistics
| Branch: | Revision:

ffmpeg / libavcodec / arm / dsputil_arm_s.S @ d9e68f5c

History | View | Annotate | Download (21.5 KB)

1
@
2
@ ARMv4 optimized DSP utils
3
@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4
@
5
@ This file is part of FFmpeg.
6
@
7
@ FFmpeg is free software; you can redistribute it and/or
8
@ modify it under the terms of the GNU Lesser General Public
9
@ License as published by the Free Software Foundation; either
10
@ version 2.1 of the License, or (at your option) any later version.
11
@
12
@ FFmpeg is distributed in the hope that it will be useful,
13
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
@ Lesser General Public License for more details.
16
@
17
@ You should have received a copy of the GNU Lesser General Public
18
@ License along with FFmpeg; if not, write to the Free Software
19
@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
@
21

    
22
#include "config.h"
23
#include "asm.S"
24

    
25
        preserve8
26

    
27
#if !HAVE_PLD
28
.macro pld reg
29
.endm
30
#endif
31

    
32
#if HAVE_ARMV5TE
33
function ff_prefetch_arm, export=1
34
        subs    r2, r2, #1
35
        pld     [r0]
36
        add     r0, r0, r1
37
        bne     ff_prefetch_arm
38
        bx      lr
39
        .endfunc
40
#endif
41

    
42
.macro  ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
43
        mov \Rd0, \Rn0, lsr #(\shift * 8)
44
        mov \Rd1, \Rn1, lsr #(\shift * 8)
45
        mov \Rd2, \Rn2, lsr #(\shift * 8)
46
        mov \Rd3, \Rn3, lsr #(\shift * 8)
47
        orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
48
        orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
49
        orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
50
        orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
51
.endm
52
.macro  ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
53
        mov \R0, \R0, lsr #(\shift * 8)
54
        orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
55
        mov \R1, \R1, lsr #(\shift * 8)
56
        orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
57
.endm
58
.macro  ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
59
        mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
60
        mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
61
        orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
62
        orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
63
.endm
64

    
65
.macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
66
        @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
67
        @ Rmask = 0xFEFEFEFE
68
        @ Rn = destroy
69
        eor \Rd0, \Rn0, \Rm0
70
        eor \Rd1, \Rn1, \Rm1
71
        orr \Rn0, \Rn0, \Rm0
72
        orr \Rn1, \Rn1, \Rm1
73
        and \Rd0, \Rd0, \Rmask
74
        and \Rd1, \Rd1, \Rmask
75
        sub \Rd0, \Rn0, \Rd0, lsr #1
76
        sub \Rd1, \Rn1, \Rd1, lsr #1
77
.endm
78

    
79
.macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
80
        @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
81
        @ Rmask = 0xFEFEFEFE
82
        @ Rn = destroy
83
        eor \Rd0, \Rn0, \Rm0
84
        eor \Rd1, \Rn1, \Rm1
85
        and \Rn0, \Rn0, \Rm0
86
        and \Rn1, \Rn1, \Rm1
87
        and \Rd0, \Rd0, \Rmask
88
        and \Rd1, \Rd1, \Rmask
89
        add \Rd0, \Rn0, \Rd0, lsr #1
90
        add \Rd1, \Rn1, \Rd1, lsr #1
91
.endm
92

    
93
@ ----------------------------------------------------------------
94
        .align 5
95
function put_pixels16_arm, export=1
96
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
97
        @ block = word aligned, pixles = unaligned
98
        pld [r1]
99
        stmfd sp!, {r4-r11, lr} @ R14 is also called LR
100
        adr r5, 5f
101
        ands r4, r1, #3
102
        bic r1, r1, #3
103
        add r5, r5, r4, lsl #2
104
        ldrne pc, [r5]
105
1:
106
        ldmia r1, {r4-r7}
107
        add r1, r1, r2
108
        stmia r0, {r4-r7}
109
        pld [r1]
110
        subs r3, r3, #1
111
        add r0, r0, r2
112
        bne 1b
113
        ldmfd sp!, {r4-r11, pc}
114
        .align 5
115
2:
116
        ldmia r1, {r4-r8}
117
        add r1, r1, r2
118
        ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
119
        pld [r1]
120
        subs r3, r3, #1
121
        stmia r0, {r9-r12}
122
        add r0, r0, r2
123
        bne 2b
124
        ldmfd sp!, {r4-r11, pc}
125
        .align 5
126
3:
127
        ldmia r1, {r4-r8}
128
        add r1, r1, r2
129
        ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
130
        pld [r1]
131
        subs r3, r3, #1
132
        stmia r0, {r9-r12}
133
        add r0, r0, r2
134
        bne 3b
135
        ldmfd sp!, {r4-r11, pc}
136
        .align 5
137
4:
138
        ldmia r1, {r4-r8}
139
        add r1, r1, r2
140
        ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
141
        pld [r1]
142
        subs r3, r3, #1
143
        stmia r0, {r9-r12}
144
        add r0, r0, r2
145
        bne 4b
146
        ldmfd sp!, {r4-r11,pc}
147
5:
148
        .word 1b
149
        .word 2b
150
        .word 3b
151
        .word 4b
152
        .endfunc
153

    
154
@ ----------------------------------------------------------------
155
        .align 5
156
function put_pixels8_arm, export=1
157
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
158
        @ block = word aligned, pixles = unaligned
159
        pld [r1]
160
        stmfd sp!, {r4-r5,lr} @ R14 is also called LR
161
        adr r5, 5f
162
        ands r4, r1, #3
163
        bic r1, r1, #3
164
        add r5, r5, r4, lsl #2
165
        ldrne pc, [r5]
166
1:
167
        ldmia r1, {r4-r5}
168
        add r1, r1, r2
169
        subs r3, r3, #1
170
        pld [r1]
171
        stmia r0, {r4-r5}
172
        add r0, r0, r2
173
        bne 1b
174
        ldmfd sp!, {r4-r5,pc}
175
        .align 5
176
2:
177
        ldmia r1, {r4-r5, r12}
178
        add r1, r1, r2
179
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
180
        pld [r1]
181
        subs r3, r3, #1
182
        stmia r0, {r4-r5}
183
        add r0, r0, r2
184
        bne 2b
185
        ldmfd sp!, {r4-r5,pc}
186
        .align 5
187
3:
188
        ldmia r1, {r4-r5, r12}
189
        add r1, r1, r2
190
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
191
        pld [r1]
192
        subs r3, r3, #1
193
        stmia r0, {r4-r5}
194
        add r0, r0, r2
195
        bne 3b
196
        ldmfd sp!, {r4-r5,pc}
197
        .align 5
198
4:
199
        ldmia r1, {r4-r5, r12}
200
        add r1, r1, r2
201
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
202
        pld [r1]
203
        subs r3, r3, #1
204
        stmia r0, {r4-r5}
205
        add r0, r0, r2
206
        bne 4b
207
        ldmfd sp!, {r4-r5,pc}
208
5:
209
        .word 1b
210
        .word 2b
211
        .word 3b
212
        .word 4b
213
        .endfunc
214

    
215
@ ----------------------------------------------------------------
216
        .align 5
217
function put_pixels8_x2_arm, export=1
218
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
219
        @ block = word aligned, pixles = unaligned
220
        pld [r1]
221
        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
222
        adr r5, 5f
223
        ands r4, r1, #3
224
        ldr r12, =0xfefefefe
225
        add r5, r5, r4, lsl #2
226
        bic r1, r1, #3
227
        ldrne pc, [r5]
228
1:
229
        ldmia r1, {r4-r5, r10}
230
        add r1, r1, r2
231
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
232
        pld [r1]
233
        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
234
        subs r3, r3, #1
235
        stmia r0, {r8-r9}
236
        add r0, r0, r2
237
        bne 1b
238
        ldmfd sp!, {r4-r10,pc}
239
        .align 5
240
2:
241
        ldmia r1, {r4-r5, r10}
242
        add r1, r1, r2
243
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
244
        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
245
        pld [r1]
246
        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
247
        subs r3, r3, #1
248
        stmia r0, {r4-r5}
249
        add r0, r0, r2
250
        bne 2b
251
        ldmfd sp!, {r4-r10,pc}
252
        .align 5
253
3:
254
        ldmia r1, {r4-r5, r10}
255
        add r1, r1, r2
256
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
257
        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
258
        pld [r1]
259
        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
260
        subs r3, r3, #1
261
        stmia r0, {r4-r5}
262
        add r0, r0, r2
263
        bne 3b
264
        ldmfd sp!, {r4-r10,pc}
265
        .align 5
266
4:
267
        ldmia r1, {r4-r5, r10}
268
        add r1, r1, r2
269
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
270
        pld [r1]
271
        RND_AVG32 r8, r9, r6, r7, r5, r10, r12
272
        subs r3, r3, #1
273
        stmia r0, {r8-r9}
274
        add r0, r0, r2
275
        bne 4b
276
        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
277
5:
278
        .word 1b
279
        .word 2b
280
        .word 3b
281
        .word 4b
282
        .endfunc
283

    
284
        .align 5
285
function put_no_rnd_pixels8_x2_arm, export=1
286
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
287
        @ block = word aligned, pixles = unaligned
288
        pld [r1]
289
        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
290
        adr r5, 5f
291
        ands r4, r1, #3
292
        ldr r12, =0xfefefefe
293
        add r5, r5, r4, lsl #2
294
        bic r1, r1, #3
295
        ldrne pc, [r5]
296
1:
297
        ldmia r1, {r4-r5, r10}
298
        add r1, r1, r2
299
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
300
        pld [r1]
301
        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
302
        subs r3, r3, #1
303
        stmia r0, {r8-r9}
304
        add r0, r0, r2
305
        bne 1b
306
        ldmfd sp!, {r4-r10,pc}
307
        .align 5
308
2:
309
        ldmia r1, {r4-r5, r10}
310
        add r1, r1, r2
311
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
312
        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
313
        pld [r1]
314
        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
315
        subs r3, r3, #1
316
        stmia r0, {r4-r5}
317
        add r0, r0, r2
318
        bne 2b
319
        ldmfd sp!, {r4-r10,pc}
320
        .align 5
321
3:
322
        ldmia r1, {r4-r5, r10}
323
        add r1, r1, r2
324
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
325
        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
326
        pld [r1]
327
        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
328
        subs r3, r3, #1
329
        stmia r0, {r4-r5}
330
        add r0, r0, r2
331
        bne 3b
332
        ldmfd sp!, {r4-r10,pc}
333
        .align 5
334
4:
335
        ldmia r1, {r4-r5, r10}
336
        add r1, r1, r2
337
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
338
        pld [r1]
339
        NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
340
        subs r3, r3, #1
341
        stmia r0, {r8-r9}
342
        add r0, r0, r2
343
        bne 4b
344
        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
345
5:
346
        .word 1b
347
        .word 2b
348
        .word 3b
349
        .word 4b
350
        .endfunc
351

    
352

    
353
@ ----------------------------------------------------------------
354
        .align 5
355
function put_pixels8_y2_arm, export=1
356
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
357
        @ block = word aligned, pixles = unaligned
358
        pld [r1]
359
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
360
        adr r5, 5f
361
        ands r4, r1, #3
362
        mov r3, r3, lsr #1
363
        ldr r12, =0xfefefefe
364
        add r5, r5, r4, lsl #2
365
        bic r1, r1, #3
366
        ldrne pc, [r5]
367
1:
368
        ldmia r1, {r4-r5}
369
        add r1, r1, r2
370
6:      ldmia r1, {r6-r7}
371
        add r1, r1, r2
372
        pld [r1]
373
        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
374
        ldmia r1, {r4-r5}
375
        add r1, r1, r2
376
        stmia r0, {r8-r9}
377
        add r0, r0, r2
378
        pld [r1]
379
        RND_AVG32 r8, r9, r6, r7, r4, r5, r12
380
        subs r3, r3, #1
381
        stmia r0, {r8-r9}
382
        add r0, r0, r2
383
        bne 6b
384
        ldmfd sp!, {r4-r11,pc}
385
        .align 5
386
2:
387
        ldmia r1, {r4-r6}
388
        add r1, r1, r2
389
        pld [r1]
390
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
391
6:      ldmia r1, {r7-r9}
392
        add r1, r1, r2
393
        pld [r1]
394
        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
395
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
396
        stmia r0, {r10-r11}
397
        add r0, r0, r2
398
        ldmia r1, {r4-r6}
399
        add r1, r1, r2
400
        pld [r1]
401
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
402
        subs r3, r3, #1
403
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
404
        stmia r0, {r10-r11}
405
        add r0, r0, r2
406
        bne 6b
407
        ldmfd sp!, {r4-r11,pc}
408
        .align 5
409
3:
410
        ldmia r1, {r4-r6}
411
        add r1, r1, r2
412
        pld [r1]
413
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
414
6:      ldmia r1, {r7-r9}
415
        add r1, r1, r2
416
        pld [r1]
417
        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
418
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
419
        stmia r0, {r10-r11}
420
        add r0, r0, r2
421
        ldmia r1, {r4-r6}
422
        add r1, r1, r2
423
        pld [r1]
424
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
425
        subs r3, r3, #1
426
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
427
        stmia r0, {r10-r11}
428
        add r0, r0, r2
429
        bne 6b
430
        ldmfd sp!, {r4-r11,pc}
431
        .align 5
432
4:
433
        ldmia r1, {r4-r6}
434
        add r1, r1, r2
435
        pld [r1]
436
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
437
6:      ldmia r1, {r7-r9}
438
        add r1, r1, r2
439
        pld [r1]
440
        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
441
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
442
        stmia r0, {r10-r11}
443
        add r0, r0, r2
444
        ldmia r1, {r4-r6}
445
        add r1, r1, r2
446
        pld [r1]
447
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
448
        subs r3, r3, #1
449
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
450
        stmia r0, {r10-r11}
451
        add r0, r0, r2
452
        bne 6b
453
        ldmfd sp!, {r4-r11,pc}
454

    
455
5:
456
        .word 1b
457
        .word 2b
458
        .word 3b
459
        .word 4b
460
        .endfunc
461

    
462
        .align 5
463
function put_no_rnd_pixels8_y2_arm, export=1
464
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
465
        @ block = word aligned, pixles = unaligned
466
        pld [r1]
467
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
468
        adr r5, 5f
469
        ands r4, r1, #3
470
        mov r3, r3, lsr #1
471
        ldr r12, =0xfefefefe
472
        add r5, r5, r4, lsl #2
473
        bic r1, r1, #3
474
        ldrne pc, [r5]
475
1:
476
        ldmia r1, {r4-r5}
477
        add r1, r1, r2
478
6:      ldmia r1, {r6-r7}
479
        add r1, r1, r2
480
        pld [r1]
481
        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
482
        ldmia r1, {r4-r5}
483
        add r1, r1, r2
484
        stmia r0, {r8-r9}
485
        add r0, r0, r2
486
        pld [r1]
487
        NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
488
        subs r3, r3, #1
489
        stmia r0, {r8-r9}
490
        add r0, r0, r2
491
        bne 6b
492
        ldmfd sp!, {r4-r11,pc}
493
        .align 5
494
2:
495
        ldmia r1, {r4-r6}
496
        add r1, r1, r2
497
        pld [r1]
498
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
499
6:      ldmia r1, {r7-r9}
500
        add r1, r1, r2
501
        pld [r1]
502
        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
503
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
504
        stmia r0, {r10-r11}
505
        add r0, r0, r2
506
        ldmia r1, {r4-r6}
507
        add r1, r1, r2
508
        pld [r1]
509
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
510
        subs r3, r3, #1
511
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
512
        stmia r0, {r10-r11}
513
        add r0, r0, r2
514
        bne 6b
515
        ldmfd sp!, {r4-r11,pc}
516
        .align 5
517
3:
518
        ldmia r1, {r4-r6}
519
        add r1, r1, r2
520
        pld [r1]
521
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
522
6:      ldmia r1, {r7-r9}
523
        add r1, r1, r2
524
        pld [r1]
525
        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
526
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
527
        stmia r0, {r10-r11}
528
        add r0, r0, r2
529
        ldmia r1, {r4-r6}
530
        add r1, r1, r2
531
        pld [r1]
532
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
533
        subs r3, r3, #1
534
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
535
        stmia r0, {r10-r11}
536
        add r0, r0, r2
537
        bne 6b
538
        ldmfd sp!, {r4-r11,pc}
539
        .align 5
540
4:
541
        ldmia r1, {r4-r6}
542
        add r1, r1, r2
543
        pld [r1]
544
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
545
6:      ldmia r1, {r7-r9}
546
        add r1, r1, r2
547
        pld [r1]
548
        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
549
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
550
        stmia r0, {r10-r11}
551
        add r0, r0, r2
552
        ldmia r1, {r4-r6}
553
        add r1, r1, r2
554
        pld [r1]
555
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
556
        subs r3, r3, #1
557
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
558
        stmia r0, {r10-r11}
559
        add r0, r0, r2
560
        bne 6b
561
        ldmfd sp!, {r4-r11,pc}
562
5:
563
        .word 1b
564
        .word 2b
565
        .word 3b
566
        .word 4b
567
        .endfunc
568

    
569
        .ltorg
570

    
571
@ ----------------------------------------------------------------
572
.macro  RND_XY2_IT align, rnd
573
        @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
574
        @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
575
.if \align == 0
576
        ldmia r1, {r6-r8}
577
.elseif \align == 3
578
        ldmia r1, {r5-r7}
579
.else
580
        ldmia r1, {r8-r10}
581
.endif
582
        add r1, r1, r2
583
        pld [r1]
584
.if \align == 0
585
        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
586
.elseif \align == 1
587
        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
588
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
589
.elseif \align == 2
590
        ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
591
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
592
.elseif \align == 3
593
        ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
594
.endif
595
        ldr r14, =0x03030303
596
        tst r3, #1
597
        and r8, r4, r14
598
        and r9, r5, r14
599
        and r10, r6, r14
600
        and r11, r7, r14
601
        andeq r14, r14, r14, \rnd #1
602
        add r8, r8, r10
603
        add r9, r9, r11
604
        addeq r8, r8, r14
605
        addeq r9, r9, r14
606
        ldr r14, =0xfcfcfcfc >> 2
607
        and r4, r14, r4, lsr #2
608
        and r5, r14, r5, lsr #2
609
        and r6, r14, r6, lsr #2
610
        and r7, r14, r7, lsr #2
611
        add r10, r4, r6
612
        add r11, r5, r7
613
        subs r3, r3, #1
614
.endm
615

    
616
.macro RND_XY2_EXPAND align, rnd
617
        RND_XY2_IT \align, \rnd
618
6:      stmfd sp!, {r8-r11}
619
        RND_XY2_IT \align, \rnd
620
        ldmfd sp!, {r4-r7}
621
        add r4, r4, r8
622
        add r5, r5, r9
623
        add r6, r6, r10
624
        add r7, r7, r11
625
        ldr r14, =0x0f0f0f0f
626
        and r4, r14, r4, lsr #2
627
        and r5, r14, r5, lsr #2
628
        add r4, r4, r6
629
        add r5, r5, r7
630
        stmia r0, {r4-r5}
631
        add r0, r0, r2
632
        bge 6b
633
        ldmfd sp!, {r4-r11,pc}
634
.endm
635

    
636
        .align 5
637
function put_pixels8_xy2_arm, export=1
638
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
639
        @ block = word aligned, pixles = unaligned
640
        pld [r1]
641
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
642
        adrl r12, 5f
643
        ands r4, r1, #3
644
        add r5, r12, r4, lsl #2
645
        bic r1, r1, #3
646
        ldrne pc, [r5]
647
1:
648
        RND_XY2_EXPAND 0, lsl
649

    
650
        .align 5
651
2:
652
        RND_XY2_EXPAND 1, lsl
653

    
654
        .align 5
655
3:
656
        RND_XY2_EXPAND 2, lsl
657

    
658
        .align 5
659
4:
660
        RND_XY2_EXPAND 3, lsl
661
5:
662
        .word 1b
663
        .word 2b
664
        .word 3b
665
        .word 4b
666
        .endfunc
667

    
668
        .align 5
669
function put_no_rnd_pixels8_xy2_arm, export=1
670
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
671
        @ block = word aligned, pixles = unaligned
672
        pld [r1]
673
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
674
        adrl r12, 5f
675
        ands r4, r1, #3
676
        add r5, r12, r4, lsl #2
677
        bic r1, r1, #3
678
        ldrne pc, [r5]
679
1:
680
        RND_XY2_EXPAND 0, lsr
681

    
682
        .align 5
683
2:
684
        RND_XY2_EXPAND 1, lsr
685

    
686
        .align 5
687
3:
688
        RND_XY2_EXPAND 2, lsr
689

    
690
        .align 5
691
4:
692
        RND_XY2_EXPAND 3, lsr
693
5:
694
        .word 1b
695
        .word 2b
696
        .word 3b
697
        .word 4b
698
        .endfunc
699

    
700
        .align 5
701
@ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride)
702
function ff_add_pixels_clamped_ARM, export=1
703
        push            {r4-r10}
704
        mov             r10, #8
705
1:
706
        ldr             r4,  [r1]               /* load dest */
707
        /* block[0] and block[1]*/
708
        ldrsh           r5,  [r0]
709
        ldrsh           r7,  [r0, #2]
710
        and             r6,  r4,  #0xFF
711
        and             r8,  r4,  #0xFF00
712
        add             r6,  r5,  r6
713
        add             r8,  r7,  r8,  lsr #8
714
        mvn             r5,  r5
715
        mvn             r7,  r7
716
        tst             r6,  #0x100
717
        movne           r6,  r5,  lsr #24
718
        tst             r8,  #0x100
719
        movne           r8,  r7,  lsr #24
720
        mov             r9,  r6
721
        ldrsh           r5,  [r0, #4]           /* moved form [A] */
722
        orr             r9,  r9,  r8, lsl #8
723
        /* block[2] and block[3] */
724
        /* [A] */
725
        ldrsh           r7,  [r0, #6]
726
        and             r6,  r4,  #0xFF0000
727
        and             r8,  r4,  #0xFF000000
728
        add             r6,  r5,  r6, lsr #16
729
        add             r8,  r7,  r8, lsr #24
730
        mvn             r5,  r5
731
        mvn             r7,  r7
732
        tst             r6,  #0x100
733
        movne           r6,  r5,  lsr #24
734
        tst             r8,  #0x100
735
        movne           r8,  r7,  lsr #24
736
        orr             r9,  r9,  r6, lsl #16
737
        ldr             r4,  [r1, #4]           /* moved form [B] */
738
        orr             r9,  r9,  r8, lsl #24
739
        /* store dest */
740
        ldrsh           r5,  [r0, #8]           /* moved form [C] */
741
        str             r9,  [r1]
742

    
743
        /* load dest */
744
        /* [B] */
745
        /* block[4] and block[5] */
746
        /* [C] */
747
        ldrsh           r7,  [r0, #10]
748
        and             r6,  r4,  #0xFF
749
        and             r8,  r4,  #0xFF00
750
        add             r6,  r5,  r6
751
        add             r8,  r7,  r8, lsr #8
752
        mvn             r5,  r5
753
        mvn             r7,  r7
754
        tst             r6,  #0x100
755
        movne           r6,  r5,  lsr #24
756
        tst             r8,  #0x100
757
        movne           r8,  r7,  lsr #24
758
        mov             r9,  r6
759
        ldrsh           r5,  [r0, #12]          /* moved from [D] */
760
        orr             r9,  r9,  r8, lsl #8
761
        /* block[6] and block[7] */
762
        /* [D] */
763
        ldrsh           r7,  [r0, #14]
764
        and             r6,  r4,  #0xFF0000
765
        and             r8,  r4,  #0xFF000000
766
        add             r6,  r5,  r6, lsr #16
767
        add             r8,  r7,  r8, lsr #24
768
        mvn             r5,  r5
769
        mvn             r7,  r7
770
        tst             r6,  #0x100
771
        movne           r6,  r5,  lsr #24
772
        tst             r8,  #0x100
773
        movne           r8,  r7,  lsr #24
774
        orr             r9,  r9,  r6, lsl #16
775
        add             r0,  r0,  #16           /* moved from [E] */
776
        orr             r9,  r9,  r8, lsl #24
777
        subs            r10, r10, #1            /* moved from [F] */
778
        /* store dest */
779
        str             r9,  [r1, #4]
780

    
781
        /* [E] */
782
        /* [F] */
783
        add             r1,  r1,  r2
784
        bne             1b
785

    
786
        pop             {r4-r10}
787
        bx              lr
788
        .endfunc