Statistics
| Branch: | Revision:

ffmpeg / libavcodec / arm / dsputil_arm_s.S @ a2fc0f6a

History | View | Annotate | Download (21.8 KB)

1
@
2
@ ARMv4 optimized DSP utils
3
@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4
@
5
@ This file is part of FFmpeg.
6
@
7
@ FFmpeg is free software; you can redistribute it and/or
8
@ modify it under the terms of the GNU Lesser General Public
9
@ License as published by the Free Software Foundation; either
10
@ version 2.1 of the License, or (at your option) any later version.
11
@
12
@ FFmpeg is distributed in the hope that it will be useful,
13
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
@ Lesser General Public License for more details.
16
@
17
@ You should have received a copy of the GNU Lesser General Public
18
@ License along with FFmpeg; if not, write to the Free Software
19
@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
@
21

    
22
#include "config.h"
23
#include "asm.S"
24

    
25
        preserve8
26

    
27
#ifndef HAVE_PLD
28
.macro pld reg
29
.endm
30
#endif
31

    
32
#ifdef HAVE_ARMV5TE
33
function ff_prefetch_arm, export=1
34
        subs    r2, r2, #1
35
        pld     [r0]
36
        add     r0, r0, r1
37
        bne     ff_prefetch_arm
38
        bx      lr
39
        .endfunc
40
#endif
41

    
42
.macro  ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
43
        mov \Rd0, \Rn0, lsr #(\shift * 8)
44
        mov \Rd1, \Rn1, lsr #(\shift * 8)
45
        mov \Rd2, \Rn2, lsr #(\shift * 8)
46
        mov \Rd3, \Rn3, lsr #(\shift * 8)
47
        orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
48
        orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
49
        orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
50
        orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
51
.endm
52
.macro  ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
53
        mov \R0, \R0, lsr #(\shift * 8)
54
        orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
55
        mov \R1, \R1, lsr #(\shift * 8)
56
        orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
57
.endm
58
.macro  ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
59
        mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
60
        mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
61
        orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
62
        orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
63
.endm
64

    
65
.macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
66
        @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
67
        @ Rmask = 0xFEFEFEFE
68
        @ Rn = destroy
69
        eor \Rd0, \Rn0, \Rm0
70
        eor \Rd1, \Rn1, \Rm1
71
        orr \Rn0, \Rn0, \Rm0
72
        orr \Rn1, \Rn1, \Rm1
73
        and \Rd0, \Rd0, \Rmask
74
        and \Rd1, \Rd1, \Rmask
75
        sub \Rd0, \Rn0, \Rd0, lsr #1
76
        sub \Rd1, \Rn1, \Rd1, lsr #1
77
.endm
78

    
79
.macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
80
        @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
81
        @ Rmask = 0xFEFEFEFE
82
        @ Rn = destroy
83
        eor \Rd0, \Rn0, \Rm0
84
        eor \Rd1, \Rn1, \Rm1
85
        and \Rn0, \Rn0, \Rm0
86
        and \Rn1, \Rn1, \Rm1
87
        and \Rd0, \Rd0, \Rmask
88
        and \Rd1, \Rd1, \Rmask
89
        add \Rd0, \Rn0, \Rd0, lsr #1
90
        add \Rd1, \Rn1, \Rd1, lsr #1
91
.endm
92

    
93
@ ----------------------------------------------------------------
94
        .align 8
95
function put_pixels16_arm, export=1
96
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
97
        @ block = word aligned, pixles = unaligned
98
        pld [r1]
99
        stmfd sp!, {r4-r11, lr} @ R14 is also called LR
100
        adr r5, 5f
101
        ands r4, r1, #3
102
        bic r1, r1, #3
103
        add r5, r5, r4, lsl #2
104
        ldrne pc, [r5]
105
1:
106
        ldmia r1, {r4-r7}
107
        add r1, r1, r2
108
        stmia r0, {r4-r7}
109
        pld [r1]
110
        subs r3, r3, #1
111
        add r0, r0, r2
112
        bne 1b
113
        ldmfd sp!, {r4-r11, pc}
114
        .align 8
115
2:
116
        ldmia r1, {r4-r8}
117
        add r1, r1, r2
118
        ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
119
        pld [r1]
120
        subs r3, r3, #1
121
        stmia r0, {r9-r12}
122
        add r0, r0, r2
123
        bne 2b
124
        ldmfd sp!, {r4-r11, pc}
125
        .align 8
126
3:
127
        ldmia r1, {r4-r8}
128
        add r1, r1, r2
129
        ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
130
        pld [r1]
131
        subs r3, r3, #1
132
        stmia r0, {r9-r12}
133
        add r0, r0, r2
134
        bne 3b
135
        ldmfd sp!, {r4-r11, pc}
136
        .align 8
137
4:
138
        ldmia r1, {r4-r8}
139
        add r1, r1, r2
140
        ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
141
        pld [r1]
142
        subs r3, r3, #1
143
        stmia r0, {r9-r12}
144
        add r0, r0, r2
145
        bne 4b
146
        ldmfd sp!, {r4-r11,pc}
147
        .align 8
148
5:
149
        .word 1b
150
        .word 2b
151
        .word 3b
152
        .word 4b
153
        .endfunc
154

    
155
@ ----------------------------------------------------------------
156
        .align 8
157
function put_pixels8_arm, export=1
158
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
159
        @ block = word aligned, pixles = unaligned
160
        pld [r1]
161
        stmfd sp!, {r4-r5,lr} @ R14 is also called LR
162
        adr r5, 5f
163
        ands r4, r1, #3
164
        bic r1, r1, #3
165
        add r5, r5, r4, lsl #2
166
        ldrne pc, [r5]
167
1:
168
        ldmia r1, {r4-r5}
169
        add r1, r1, r2
170
        subs r3, r3, #1
171
        pld [r1]
172
        stmia r0, {r4-r5}
173
        add r0, r0, r2
174
        bne 1b
175
        ldmfd sp!, {r4-r5,pc}
176
        .align 8
177
2:
178
        ldmia r1, {r4-r5, r12}
179
        add r1, r1, r2
180
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
181
        pld [r1]
182
        subs r3, r3, #1
183
        stmia r0, {r4-r5}
184
        add r0, r0, r2
185
        bne 2b
186
        ldmfd sp!, {r4-r5,pc}
187
        .align 8
188
3:
189
        ldmia r1, {r4-r5, r12}
190
        add r1, r1, r2
191
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
192
        pld [r1]
193
        subs r3, r3, #1
194
        stmia r0, {r4-r5}
195
        add r0, r0, r2
196
        bne 3b
197
        ldmfd sp!, {r4-r5,pc}
198
        .align 8
199
4:
200
        ldmia r1, {r4-r5, r12}
201
        add r1, r1, r2
202
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
203
        pld [r1]
204
        subs r3, r3, #1
205
        stmia r0, {r4-r5}
206
        add r0, r0, r2
207
        bne 4b
208
        ldmfd sp!, {r4-r5,pc}
209
        .align 8
210
5:
211
        .word 1b
212
        .word 2b
213
        .word 3b
214
        .word 4b
215
        .endfunc
216

    
217
@ ----------------------------------------------------------------
218
        .align 8
219
function put_pixels8_x2_arm, export=1
220
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
221
        @ block = word aligned, pixles = unaligned
222
        pld [r1]
223
        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
224
        adr r5, 5f
225
        ands r4, r1, #3
226
        ldr r12, [r5]
227
        add r5, r5, r4, lsl #2
228
        bic r1, r1, #3
229
        ldrne pc, [r5]
230
1:
231
        ldmia r1, {r4-r5, r10}
232
        add r1, r1, r2
233
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
234
        pld [r1]
235
        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
236
        subs r3, r3, #1
237
        stmia r0, {r8-r9}
238
        add r0, r0, r2
239
        bne 1b
240
        ldmfd sp!, {r4-r10,pc}
241
        .align 8
242
2:
243
        ldmia r1, {r4-r5, r10}
244
        add r1, r1, r2
245
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
246
        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
247
        pld [r1]
248
        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
249
        subs r3, r3, #1
250
        stmia r0, {r4-r5}
251
        add r0, r0, r2
252
        bne 2b
253
        ldmfd sp!, {r4-r10,pc}
254
        .align 8
255
3:
256
        ldmia r1, {r4-r5, r10}
257
        add r1, r1, r2
258
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
259
        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
260
        pld [r1]
261
        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
262
        subs r3, r3, #1
263
        stmia r0, {r4-r5}
264
        add r0, r0, r2
265
        bne 3b
266
        ldmfd sp!, {r4-r10,pc}
267
        .align 8
268
4:
269
        ldmia r1, {r4-r5, r10}
270
        add r1, r1, r2
271
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
272
        pld [r1]
273
        RND_AVG32 r8, r9, r6, r7, r5, r10, r12
274
        subs r3, r3, #1
275
        stmia r0, {r8-r9}
276
        add r0, r0, r2
277
        bne 4b
278
        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
279
        .align 8
280
5:
281
        .word 0xFEFEFEFE
282
        .word 2b
283
        .word 3b
284
        .word 4b
285
        .endfunc
286

    
287
        .align 8
288
function put_no_rnd_pixels8_x2_arm, export=1
289
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
290
        @ block = word aligned, pixles = unaligned
291
        pld [r1]
292
        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
293
        adr r5, 5f
294
        ands r4, r1, #3
295
        ldr r12, [r5]
296
        add r5, r5, r4, lsl #2
297
        bic r1, r1, #3
298
        ldrne pc, [r5]
299
1:
300
        ldmia r1, {r4-r5, r10}
301
        add r1, r1, r2
302
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
303
        pld [r1]
304
        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
305
        subs r3, r3, #1
306
        stmia r0, {r8-r9}
307
        add r0, r0, r2
308
        bne 1b
309
        ldmfd sp!, {r4-r10,pc}
310
        .align 8
311
2:
312
        ldmia r1, {r4-r5, r10}
313
        add r1, r1, r2
314
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
315
        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
316
        pld [r1]
317
        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
318
        subs r3, r3, #1
319
        stmia r0, {r4-r5}
320
        add r0, r0, r2
321
        bne 2b
322
        ldmfd sp!, {r4-r10,pc}
323
        .align 8
324
3:
325
        ldmia r1, {r4-r5, r10}
326
        add r1, r1, r2
327
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
328
        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
329
        pld [r1]
330
        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
331
        subs r3, r3, #1
332
        stmia r0, {r4-r5}
333
        add r0, r0, r2
334
        bne 3b
335
        ldmfd sp!, {r4-r10,pc}
336
        .align 8
337
4:
338
        ldmia r1, {r4-r5, r10}
339
        add r1, r1, r2
340
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
341
        pld [r1]
342
        NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
343
        subs r3, r3, #1
344
        stmia r0, {r8-r9}
345
        add r0, r0, r2
346
        bne 4b
347
        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
348
        .align 8
349
5:
350
        .word 0xFEFEFEFE
351
        .word 2b
352
        .word 3b
353
        .word 4b
354
        .endfunc
355

    
356

    
357
@ ----------------------------------------------------------------
358
        .align 8
359
function put_pixels8_y2_arm, export=1
360
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
361
        @ block = word aligned, pixles = unaligned
362
        pld [r1]
363
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
364
        adr r5, 5f
365
        ands r4, r1, #3
366
        mov r3, r3, lsr #1
367
        ldr r12, [r5]
368
        add r5, r5, r4, lsl #2
369
        bic r1, r1, #3
370
        ldrne pc, [r5]
371
1:
372
        ldmia r1, {r4-r5}
373
        add r1, r1, r2
374
6:      ldmia r1, {r6-r7}
375
        add r1, r1, r2
376
        pld [r1]
377
        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
378
        ldmia r1, {r4-r5}
379
        add r1, r1, r2
380
        stmia r0, {r8-r9}
381
        add r0, r0, r2
382
        pld [r1]
383
        RND_AVG32 r8, r9, r6, r7, r4, r5, r12
384
        subs r3, r3, #1
385
        stmia r0, {r8-r9}
386
        add r0, r0, r2
387
        bne 6b
388
        ldmfd sp!, {r4-r11,pc}
389
        .align 8
390
2:
391
        ldmia r1, {r4-r6}
392
        add r1, r1, r2
393
        pld [r1]
394
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
395
6:      ldmia r1, {r7-r9}
396
        add r1, r1, r2
397
        pld [r1]
398
        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
399
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
400
        stmia r0, {r10-r11}
401
        add r0, r0, r2
402
        ldmia r1, {r4-r6}
403
        add r1, r1, r2
404
        pld [r1]
405
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
406
        subs r3, r3, #1
407
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
408
        stmia r0, {r10-r11}
409
        add r0, r0, r2
410
        bne 6b
411
        ldmfd sp!, {r4-r11,pc}
412
        .align 8
413
3:
414
        ldmia r1, {r4-r6}
415
        add r1, r1, r2
416
        pld [r1]
417
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
418
6:      ldmia r1, {r7-r9}
419
        add r1, r1, r2
420
        pld [r1]
421
        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
422
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
423
        stmia r0, {r10-r11}
424
        add r0, r0, r2
425
        ldmia r1, {r4-r6}
426
        add r1, r1, r2
427
        pld [r1]
428
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
429
        subs r3, r3, #1
430
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
431
        stmia r0, {r10-r11}
432
        add r0, r0, r2
433
        bne 6b
434
        ldmfd sp!, {r4-r11,pc}
435
        .align 8
436
4:
437
        ldmia r1, {r4-r6}
438
        add r1, r1, r2
439
        pld [r1]
440
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
441
6:      ldmia r1, {r7-r9}
442
        add r1, r1, r2
443
        pld [r1]
444
        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
445
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
446
        stmia r0, {r10-r11}
447
        add r0, r0, r2
448
        ldmia r1, {r4-r6}
449
        add r1, r1, r2
450
        pld [r1]
451
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
452
        subs r3, r3, #1
453
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
454
        stmia r0, {r10-r11}
455
        add r0, r0, r2
456
        bne 6b
457
        ldmfd sp!, {r4-r11,pc}
458

    
459
        .align 8
460
5:
461
        .word 0xFEFEFEFE
462
        .word 2b
463
        .word 3b
464
        .word 4b
465
        .endfunc
466

    
467
        .align 8
468
function put_no_rnd_pixels8_y2_arm, export=1
469
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
470
        @ block = word aligned, pixles = unaligned
471
        pld [r1]
472
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
473
        adr r5, 5f
474
        ands r4, r1, #3
475
        mov r3, r3, lsr #1
476
        ldr r12, [r5]
477
        add r5, r5, r4, lsl #2
478
        bic r1, r1, #3
479
        ldrne pc, [r5]
480
1:
481
        ldmia r1, {r4-r5}
482
        add r1, r1, r2
483
6:      ldmia r1, {r6-r7}
484
        add r1, r1, r2
485
        pld [r1]
486
        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
487
        ldmia r1, {r4-r5}
488
        add r1, r1, r2
489
        stmia r0, {r8-r9}
490
        add r0, r0, r2
491
        pld [r1]
492
        NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
493
        subs r3, r3, #1
494
        stmia r0, {r8-r9}
495
        add r0, r0, r2
496
        bne 6b
497
        ldmfd sp!, {r4-r11,pc}
498
        .align 8
499
2:
500
        ldmia r1, {r4-r6}
501
        add r1, r1, r2
502
        pld [r1]
503
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
504
6:      ldmia r1, {r7-r9}
505
        add r1, r1, r2
506
        pld [r1]
507
        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
508
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
509
        stmia r0, {r10-r11}
510
        add r0, r0, r2
511
        ldmia r1, {r4-r6}
512
        add r1, r1, r2
513
        pld [r1]
514
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
515
        subs r3, r3, #1
516
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
517
        stmia r0, {r10-r11}
518
        add r0, r0, r2
519
        bne 6b
520
        ldmfd sp!, {r4-r11,pc}
521
        .align 8
522
3:
523
        ldmia r1, {r4-r6}
524
        add r1, r1, r2
525
        pld [r1]
526
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
527
6:      ldmia r1, {r7-r9}
528
        add r1, r1, r2
529
        pld [r1]
530
        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
531
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
532
        stmia r0, {r10-r11}
533
        add r0, r0, r2
534
        ldmia r1, {r4-r6}
535
        add r1, r1, r2
536
        pld [r1]
537
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
538
        subs r3, r3, #1
539
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
540
        stmia r0, {r10-r11}
541
        add r0, r0, r2
542
        bne 6b
543
        ldmfd sp!, {r4-r11,pc}
544
        .align 8
545
4:
546
        ldmia r1, {r4-r6}
547
        add r1, r1, r2
548
        pld [r1]
549
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
550
6:      ldmia r1, {r7-r9}
551
        add r1, r1, r2
552
        pld [r1]
553
        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
554
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
555
        stmia r0, {r10-r11}
556
        add r0, r0, r2
557
        ldmia r1, {r4-r6}
558
        add r1, r1, r2
559
        pld [r1]
560
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
561
        subs r3, r3, #1
562
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
563
        stmia r0, {r10-r11}
564
        add r0, r0, r2
565
        bne 6b
566
        ldmfd sp!, {r4-r11,pc}
567
        .align 8
568
5:
569
        .word 0xFEFEFEFE
570
        .word 2b
571
        .word 3b
572
        .word 4b
573
        .endfunc
574

    
575
@ ----------------------------------------------------------------
576
.macro  RND_XY2_IT align
577
        @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
578
        @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
579
.if \align == 0
580
        ldmia r1, {r6-r8}
581
.elseif \align == 3
582
        ldmia r1, {r5-r7}
583
.else
584
        ldmia r1, {r8-r10}
585
.endif
586
        add r1, r1, r2
587
        pld [r1]
588
.if \align == 0
589
        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
590
.elseif \align == 1
591
        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
592
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
593
.elseif \align == 2
594
        ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
595
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
596
.elseif \align == 3
597
        ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
598
.endif
599
        ldr r14, [r12, #0]      @ 0x03030303
600
        tst r3, #1
601
        and r8, r4, r14
602
        and r9, r5, r14
603
        and r10, r6, r14
604
        and r11, r7, r14
605
        ldreq r14, [r12, #16]   @ 0x02020202/0x01010101
606
        add r8, r8, r10
607
        add r9, r9, r11
608
        addeq r8, r8, r14
609
        addeq r9, r9, r14
610
        ldr r14, [r12, #20]     @ 0xFCFCFCFC >> 2
611
        and r4, r14, r4, lsr #2
612
        and r5, r14, r5, lsr #2
613
        and r6, r14, r6, lsr #2
614
        and r7, r14, r7, lsr #2
615
        add r10, r4, r6
616
        add r11, r5, r7
617
        subs r3, r3, #1
618
.endm
619

    
620
.macro RND_XY2_EXPAND align
621
        RND_XY2_IT \align
622
6:      stmfd sp!, {r8-r11}
623
        RND_XY2_IT \align
624
        ldmfd sp!, {r4-r7}
625
        add r4, r4, r8
626
        add r5, r5, r9
627
        add r6, r6, r10
628
        add r7, r7, r11
629
        ldr r14, [r12, #24]     @ 0x0F0F0F0F
630
        and r4, r14, r4, lsr #2
631
        and r5, r14, r5, lsr #2
632
        add r4, r4, r6
633
        add r5, r5, r7
634
        stmia r0, {r4-r5}
635
        add r0, r0, r2
636
        bge 6b
637
        ldmfd sp!, {r4-r11,pc}
638
.endm
639

    
640
        .align 8
641
function put_pixels8_xy2_arm, export=1
642
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
643
        @ block = word aligned, pixles = unaligned
644
        pld [r1]
645
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
646
        adrl r12, 5f
647
        ands r4, r1, #3
648
        add r5, r12, r4, lsl #2
649
        bic r1, r1, #3
650
        ldrne pc, [r5]
651
1:
652
        RND_XY2_EXPAND 0
653

    
654
        .align 8
655
2:
656
        RND_XY2_EXPAND 1
657

    
658
        .align 8
659
3:
660
        RND_XY2_EXPAND 2
661

    
662
        .align 8
663
4:
664
        RND_XY2_EXPAND 3
665

    
666
5:
667
        .word 0x03030303
668
        .word 2b
669
        .word 3b
670
        .word 4b
671
        .word 0x02020202
672
        .word 0xFCFCFCFC >> 2
673
        .word 0x0F0F0F0F
674
        .endfunc
675

    
676
        .align 8
677
function put_no_rnd_pixels8_xy2_arm, export=1
678
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
679
        @ block = word aligned, pixles = unaligned
680
        pld [r1]
681
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
682
        adrl r12, 5f
683
        ands r4, r1, #3
684
        add r5, r12, r4, lsl #2
685
        bic r1, r1, #3
686
        ldrne pc, [r5]
687
1:
688
        RND_XY2_EXPAND 0
689

    
690
        .align 8
691
2:
692
        RND_XY2_EXPAND 1
693

    
694
        .align 8
695
3:
696
        RND_XY2_EXPAND 2
697

    
698
        .align 8
699
4:
700
        RND_XY2_EXPAND 3
701

    
702
5:
703
        .word 0x03030303
704
        .word 2b
705
        .word 3b
706
        .word 4b
707
        .word 0x01010101
708
        .word 0xFCFCFCFC >> 2
709
        .word 0x0F0F0F0F
710
        .endfunc
711

    
712
@ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride)
713
function ff_add_pixels_clamped_ARM, export=1
714
        push            {r4-r10}
715
        mov             r10, #8
716
1:
717
        ldr             r4,  [r1]               /* load dest */
718
        /* block[0] and block[1]*/
719
        ldrsh           r5,  [r0]
720
        ldrsh           r7,  [r0, #2]
721
        and             r6,  r4,  #0xFF
722
        and             r8,  r4,  #0xFF00
723
        add             r6,  r5,  r6
724
        add             r8,  r7,  r8,  lsr #8
725
        mvn             r5,  r5
726
        mvn             r7,  r7
727
        tst             r6,  #0x100
728
        movne           r6,  r5,  lsr #24
729
        tst             r8,  #0x100
730
        movne           r8,  r7,  lsr #24
731
        mov             r9,  r6
732
        ldrsh           r5,  [r0, #4]           /* moved form [A] */
733
        orr             r9,  r9,  r8, lsl #8
734
        /* block[2] and block[3] */
735
        /* [A] */
736
        ldrsh           r7,  [r0, #6]
737
        and             r6,  r4,  #0xFF0000
738
        and             r8,  r4,  #0xFF000000
739
        add             r6,  r5,  r6, lsr #16
740
        add             r8,  r7,  r8, lsr #24
741
        mvn             r5,  r5
742
        mvn             r7,  r7
743
        tst             r6,  #0x100
744
        movne           r6,  r5,  lsr #24
745
        tst             r8,  #0x100
746
        movne           r8,  r7,  lsr #24
747
        orr             r9,  r9,  r6, lsl #16
748
        ldr             r4,  [r1, #4]           /* moved form [B] */
749
        orr             r9,  r9,  r8, lsl #24
750
        /* store dest */
751
        ldrsh           r5,  [r0, #8]           /* moved form [C] */
752
        str             r9,  [r1]
753

    
754
        /* load dest */
755
        /* [B] */
756
        /* block[4] and block[5] */
757
        /* [C] */
758
        ldrsh           r7,  [r0, #10]
759
        and             r6,  r4,  #0xFF
760
        and             r8,  r4,  #0xFF00
761
        add             r6,  r5,  r6
762
        add             r8,  r7,  r8, lsr #8
763
        mvn             r5,  r5
764
        mvn             r7,  r7
765
        tst             r6,  #0x100
766
        movne           r6,  r5,  lsr #24
767
        tst             r8,  #0x100
768
        movne           r8,  r7,  lsr #24
769
        mov             r9,  r6
770
        ldrsh           r5,  [r0, #12]          /* moved from [D] */
771
        orr             r9,  r9,  r8, lsl #8
772
        /* block[6] and block[7] */
773
        /* [D] */
774
        ldrsh           r7,  [r0, #14]
775
        and             r6,  r4,  #0xFF0000
776
        and             r8,  r4,  #0xFF000000
777
        add             r6,  r5,  r6, lsr #16
778
        add             r8,  r7,  r8, lsr #24
779
        mvn             r5,  r5
780
        mvn             r7,  r7
781
        tst             r6,  #0x100
782
        movne           r6,  r5,  lsr #24
783
        tst             r8,  #0x100
784
        movne           r8,  r7,  lsr #24
785
        orr             r9,  r9,  r6, lsl #16
786
        add             r0,  r0,  #16           /* moved from [E] */
787
        orr             r9,  r9,  r8, lsl #24
788
        subs            r10, r10, #1            /* moved from [F] */
789
        /* store dest */
790
        str             r9,  [r1, #4]
791

    
792
        /* [E] */
793
        /* [F] */
794
        add             r1,  r1,  r2
795
        bne             1b
796

    
797
        pop             {r4-r10}
798
        bx              lr
799
        .endfunc