Statistics
| Branch: | Revision:

ffmpeg / libavcodec / armv4l / dsputil_arm_s.S @ b4e806b2

History | View | Annotate | Download (18.3 KB)

1
@
2
@ ARMv4L optimized DSP utils
3
@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4
@
5
@ This file is part of FFmpeg.
6
@
7
@ FFmpeg is free software; you can redistribute it and/or
8
@ modify it under the terms of the GNU Lesser General Public
9
@ License as published by the Free Software Foundation; either
10
@ version 2.1 of the License, or (at your option) any later version.
11
@
12
@ FFmpeg is distributed in the hope that it will be useful,
13
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
@ Lesser General Public License for more details.
16
@
17
@ You should have received a copy of the GNU Lesser General Public
18
@ License along with FFmpeg; if not, write to the Free Software
19
@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
@
21

    
22
#include "config.h"
23

    
24
#ifndef HAVE_PLD
25
.macro pld reg
26
.endm
27
#endif
28

    
29
.macro  ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
30
        mov \Rd0, \Rn0, lsr #(\shift * 8)
31
        mov \Rd1, \Rn1, lsr #(\shift * 8)
32
        mov \Rd2, \Rn2, lsr #(\shift * 8)
33
        mov \Rd3, \Rn3, lsr #(\shift * 8)
34
        orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
35
        orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
36
        orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
37
        orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
38
.endm
39
.macro  ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
40
        mov \R0, \R0, lsr #(\shift * 8)
41
        orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
42
        mov \R1, \R1, lsr #(\shift * 8)
43
        orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
44
.endm
45
.macro  ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
46
        mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
47
        mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
48
        orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
49
        orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
50
.endm
51

    
52
.macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
53
        @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
54
        @ Rmask = 0xFEFEFEFE
55
        @ Rn = destroy
56
        eor \Rd0, \Rn0, \Rm0
57
        eor \Rd1, \Rn1, \Rm1
58
        orr \Rn0, \Rn0, \Rm0
59
        orr \Rn1, \Rn1, \Rm1
60
        and \Rd0, \Rd0, \Rmask
61
        and \Rd1, \Rd1, \Rmask
62
        sub \Rd0, \Rn0, \Rd0, lsr #1
63
        sub \Rd1, \Rn1, \Rd1, lsr #1
64
.endm
65

    
66
.macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
67
        @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
68
        @ Rmask = 0xFEFEFEFE
69
        @ Rn = destroy
70
        eor \Rd0, \Rn0, \Rm0
71
        eor \Rd1, \Rn1, \Rm1
72
        and \Rn0, \Rn0, \Rm0
73
        and \Rn1, \Rn1, \Rm1
74
        and \Rd0, \Rd0, \Rmask
75
        and \Rd1, \Rd1, \Rmask
76
        add \Rd0, \Rn0, \Rd0, lsr #1
77
        add \Rd1, \Rn1, \Rd1, lsr #1
78
.endm
79

    
80
@ ----------------------------------------------------------------
81
        .align 8
82
        .global put_pixels16_arm
83
put_pixels16_arm:
84
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
85
        @ block = word aligned, pixles = unaligned
86
        pld [r1]
87
        stmfd sp!, {r4-r11, lr} @ R14 is also called LR
88
        adr r5, 5f
89
        ands r4, r1, #3
90
        bic r1, r1, #3
91
        add r5, r5, r4, lsl #2
92
        ldrne pc, [r5]
93
1:
94
        ldmia r1, {r4-r7}
95
        add r1, r1, r2
96
        stmia r0, {r4-r7}
97
        pld [r1]
98
        subs r3, r3, #1
99
        add r0, r0, r2
100
        bne 1b
101
        ldmfd sp!, {r4-r11, pc}
102
        .align 8
103
2:
104
        ldmia r1, {r4-r8}
105
        add r1, r1, r2
106
        ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
107
        pld [r1]
108
        subs r3, r3, #1
109
        stmia r0, {r9-r12}
110
        add r0, r0, r2
111
        bne 2b
112
        ldmfd sp!, {r4-r11, pc}
113
        .align 8
114
3:
115
        ldmia r1, {r4-r8}
116
        add r1, r1, r2
117
        ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
118
        pld [r1]
119
        subs r3, r3, #1
120
        stmia r0, {r9-r12}
121
        add r0, r0, r2
122
        bne 3b
123
        ldmfd sp!, {r4-r11, pc}
124
        .align 8
125
4:
126
        ldmia r1, {r4-r8}
127
        add r1, r1, r2
128
        ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
129
        pld [r1]
130
        subs r3, r3, #1
131
        stmia r0, {r9-r12}
132
        add r0, r0, r2
133
        bne 4b
134
        ldmfd sp!, {r4-r11,pc}
135
        .align 8
136
5:
137
        .word 1b
138
        .word 2b
139
        .word 3b
140
        .word 4b
141

    
142
@ ----------------------------------------------------------------
143
        .align 8
144
        .global put_pixels8_arm
145
put_pixels8_arm:
146
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
147
        @ block = word aligned, pixles = unaligned
148
        pld [r1]
149
        stmfd sp!, {r4-r5,lr} @ R14 is also called LR
150
        adr r5, 5f
151
        ands r4, r1, #3
152
        bic r1, r1, #3
153
        add r5, r5, r4, lsl #2
154
        ldrne pc, [r5]
155
1:
156
        ldmia r1, {r4-r5}
157
        add r1, r1, r2
158
        subs r3, r3, #1
159
        pld [r1]
160
        stmia r0, {r4-r5}
161
        add r0, r0, r2
162
        bne 1b
163
        ldmfd sp!, {r4-r5,pc}
164
        .align 8
165
2:
166
        ldmia r1, {r4-r5, r12}
167
        add r1, r1, r2
168
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
169
        pld [r1]
170
        subs r3, r3, #1
171
        stmia r0, {r4-r5}
172
        add r0, r0, r2
173
        bne 2b
174
        ldmfd sp!, {r4-r5,pc}
175
        .align 8
176
3:
177
        ldmia r1, {r4-r5, r12}
178
        add r1, r1, r2
179
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
180
        pld [r1]
181
        subs r3, r3, #1
182
        stmia r0, {r4-r5}
183
        add r0, r0, r2
184
        bne 3b
185
        ldmfd sp!, {r4-r5,pc}
186
        .align 8
187
4:
188
        ldmia r1, {r4-r5, r12}
189
        add r1, r1, r2
190
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
191
        pld [r1]
192
        subs r3, r3, #1
193
        stmia r0, {r4-r5}
194
        add r0, r0, r2
195
        bne 4b
196
        ldmfd sp!, {r4-r5,pc}
197
        .align 8
198
5:
199
        .word 1b
200
        .word 2b
201
        .word 3b
202
        .word 4b
203

    
204
@ ----------------------------------------------------------------
205
        .align 8
206
        .global put_pixels8_x2_arm
207
put_pixels8_x2_arm:
208
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
209
        @ block = word aligned, pixles = unaligned
210
        pld [r1]
211
        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
212
        adr r5, 5f
213
        ands r4, r1, #3
214
        ldr r12, [r5]
215
        add r5, r5, r4, lsl #2
216
        bic r1, r1, #3
217
        ldrne pc, [r5]
218
1:
219
        ldmia r1, {r4-r5, r10}
220
        add r1, r1, r2
221
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
222
        pld [r1]
223
        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
224
        subs r3, r3, #1
225
        stmia r0, {r8-r9}
226
        add r0, r0, r2
227
        bne 1b
228
        ldmfd sp!, {r4-r10,pc}
229
        .align 8
230
2:
231
        ldmia r1, {r4-r5, r10}
232
        add r1, r1, r2
233
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
234
        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
235
        pld [r1]
236
        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
237
        subs r3, r3, #1
238
        stmia r0, {r4-r5}
239
        add r0, r0, r2
240
        bne 2b
241
        ldmfd sp!, {r4-r10,pc}
242
        .align 8
243
3:
244
        ldmia r1, {r4-r5, r10}
245
        add r1, r1, r2
246
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
247
        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
248
        pld [r1]
249
        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
250
        subs r3, r3, #1
251
        stmia r0, {r4-r5}
252
        add r0, r0, r2
253
        bne 3b
254
        ldmfd sp!, {r4-r10,pc}
255
        .align 8
256
4:
257
        ldmia r1, {r4-r5, r10}
258
        add r1, r1, r2
259
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
260
        pld [r1]
261
        RND_AVG32 r8, r9, r6, r7, r5, r10, r12
262
        subs r3, r3, #1
263
        stmia r0, {r8-r9}
264
        add r0, r0, r2
265
        bne 4b
266
        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
267
        .align 8
268
5:
269
        .word 0xFEFEFEFE
270
        .word 2b
271
        .word 3b
272
        .word 4b
273

    
274
        .align 8
275
        .global put_no_rnd_pixels8_x2_arm
276
put_no_rnd_pixels8_x2_arm:
277
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
278
        @ block = word aligned, pixles = unaligned
279
        pld [r1]
280
        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
281
        adr r5, 5f
282
        ands r4, r1, #3
283
        ldr r12, [r5]
284
        add r5, r5, r4, lsl #2
285
        bic r1, r1, #3
286
        ldrne pc, [r5]
287
1:
288
        ldmia r1, {r4-r5, r10}
289
        add r1, r1, r2
290
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
291
        pld [r1]
292
        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
293
        subs r3, r3, #1
294
        stmia r0, {r8-r9}
295
        add r0, r0, r2
296
        bne 1b
297
        ldmfd sp!, {r4-r10,pc}
298
        .align 8
299
2:
300
        ldmia r1, {r4-r5, r10}
301
        add r1, r1, r2
302
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
303
        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
304
        pld [r1]
305
        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
306
        subs r3, r3, #1
307
        stmia r0, {r4-r5}
308
        add r0, r0, r2
309
        bne 2b
310
        ldmfd sp!, {r4-r10,pc}
311
        .align 8
312
3:
313
        ldmia r1, {r4-r5, r10}
314
        add r1, r1, r2
315
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
316
        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
317
        pld [r1]
318
        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
319
        subs r3, r3, #1
320
        stmia r0, {r4-r5}
321
        add r0, r0, r2
322
        bne 3b
323
        ldmfd sp!, {r4-r10,pc}
324
        .align 8
325
4:
326
        ldmia r1, {r4-r5, r10}
327
        add r1, r1, r2
328
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
329
        pld [r1]
330
        NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
331
        subs r3, r3, #1
332
        stmia r0, {r8-r9}
333
        add r0, r0, r2
334
        bne 4b
335
        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
336
        .align 8
337
5:
338
        .word 0xFEFEFEFE
339
        .word 2b
340
        .word 3b
341
        .word 4b
342

    
343

    
344
@ ----------------------------------------------------------------
345
        .align 8
346
        .global put_pixels8_y2_arm
347
put_pixels8_y2_arm:
348
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
349
        @ block = word aligned, pixles = unaligned
350
        pld [r1]
351
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
352
        adr r5, 5f
353
        ands r4, r1, #3
354
        mov r3, r3, lsr #1
355
        ldr r12, [r5]
356
        add r5, r5, r4, lsl #2
357
        bic r1, r1, #3
358
        ldrne pc, [r5]
359
1:
360
        ldmia r1, {r4-r5}
361
        add r1, r1, r2
362
6:      ldmia r1, {r6-r7}
363
        add r1, r1, r2
364
        pld [r1]
365
        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
366
        ldmia r1, {r4-r5}
367
        add r1, r1, r2
368
        stmia r0, {r8-r9}
369
        add r0, r0, r2
370
        pld [r1]
371
        RND_AVG32 r8, r9, r6, r7, r4, r5, r12
372
        subs r3, r3, #1
373
        stmia r0, {r8-r9}
374
        add r0, r0, r2
375
        bne 6b
376
        ldmfd sp!, {r4-r11,pc}
377
        .align 8
378
2:
379
        ldmia r1, {r4-r6}
380
        add r1, r1, r2
381
        pld [r1]
382
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
383
6:      ldmia r1, {r7-r9}
384
        add r1, r1, r2
385
        pld [r1]
386
        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
387
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
388
        stmia r0, {r10-r11}
389
        add r0, r0, r2
390
        ldmia r1, {r4-r6}
391
        add r1, r1, r2
392
        pld [r1]
393
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
394
        subs r3, r3, #1
395
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
396
        stmia r0, {r10-r11}
397
        add r0, r0, r2
398
        bne 6b
399
        ldmfd sp!, {r4-r11,pc}
400
        .align 8
401
3:
402
        ldmia r1, {r4-r6}
403
        add r1, r1, r2
404
        pld [r1]
405
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
406
6:      ldmia r1, {r7-r9}
407
        add r1, r1, r2
408
        pld [r1]
409
        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
410
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
411
        stmia r0, {r10-r11}
412
        add r0, r0, r2
413
        ldmia r1, {r4-r6}
414
        add r1, r1, r2
415
        pld [r1]
416
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
417
        subs r3, r3, #1
418
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
419
        stmia r0, {r10-r11}
420
        add r0, r0, r2
421
        bne 6b
422
        ldmfd sp!, {r4-r11,pc}
423
        .align 8
424
4:
425
        ldmia r1, {r4-r6}
426
        add r1, r1, r2
427
        pld [r1]
428
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
429
6:      ldmia r1, {r7-r9}
430
        add r1, r1, r2
431
        pld [r1]
432
        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
433
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
434
        stmia r0, {r10-r11}
435
        add r0, r0, r2
436
        ldmia r1, {r4-r6}
437
        add r1, r1, r2
438
        pld [r1]
439
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
440
        subs r3, r3, #1
441
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
442
        stmia r0, {r10-r11}
443
        add r0, r0, r2
444
        bne 6b
445
        ldmfd sp!, {r4-r11,pc}
446

    
447
        .align 8
448
5:
449
        .word 0xFEFEFEFE
450
        .word 2b
451
        .word 3b
452
        .word 4b
453

    
454
        .align 8
455
        .global put_no_rnd_pixels8_y2_arm
456
put_no_rnd_pixels8_y2_arm:
457
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
458
        @ block = word aligned, pixles = unaligned
459
        pld [r1]
460
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
461
        adr r5, 5f
462
        ands r4, r1, #3
463
        mov r3, r3, lsr #1
464
        ldr r12, [r5]
465
        add r5, r5, r4, lsl #2
466
        bic r1, r1, #3
467
        ldrne pc, [r5]
468
1:
469
        ldmia r1, {r4-r5}
470
        add r1, r1, r2
471
6:      ldmia r1, {r6-r7}
472
        add r1, r1, r2
473
        pld [r1]
474
        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
475
        ldmia r1, {r4-r5}
476
        add r1, r1, r2
477
        stmia r0, {r8-r9}
478
        add r0, r0, r2
479
        pld [r1]
480
        NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
481
        subs r3, r3, #1
482
        stmia r0, {r8-r9}
483
        add r0, r0, r2
484
        bne 6b
485
        ldmfd sp!, {r4-r11,pc}
486
        .align 8
487
2:
488
        ldmia r1, {r4-r6}
489
        add r1, r1, r2
490
        pld [r1]
491
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
492
6:      ldmia r1, {r7-r9}
493
        add r1, r1, r2
494
        pld [r1]
495
        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
496
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
497
        stmia r0, {r10-r11}
498
        add r0, r0, r2
499
        ldmia r1, {r4-r6}
500
        add r1, r1, r2
501
        pld [r1]
502
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
503
        subs r3, r3, #1
504
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
505
        stmia r0, {r10-r11}
506
        add r0, r0, r2
507
        bne 6b
508
        ldmfd sp!, {r4-r11,pc}
509
        .align 8
510
3:
511
        ldmia r1, {r4-r6}
512
        add r1, r1, r2
513
        pld [r1]
514
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
515
6:      ldmia r1, {r7-r9}
516
        add r1, r1, r2
517
        pld [r1]
518
        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
519
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
520
        stmia r0, {r10-r11}
521
        add r0, r0, r2
522
        ldmia r1, {r4-r6}
523
        add r1, r1, r2
524
        pld [r1]
525
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
526
        subs r3, r3, #1
527
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
528
        stmia r0, {r10-r11}
529
        add r0, r0, r2
530
        bne 6b
531
        ldmfd sp!, {r4-r11,pc}
532
        .align 8
533
4:
534
        ldmia r1, {r4-r6}
535
        add r1, r1, r2
536
        pld [r1]
537
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
538
6:      ldmia r1, {r7-r9}
539
        add r1, r1, r2
540
        pld [r1]
541
        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
542
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
543
        stmia r0, {r10-r11}
544
        add r0, r0, r2
545
        ldmia r1, {r4-r6}
546
        add r1, r1, r2
547
        pld [r1]
548
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
549
        subs r3, r3, #1
550
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
551
        stmia r0, {r10-r11}
552
        add r0, r0, r2
553
        bne 6b
554
        ldmfd sp!, {r4-r11,pc}
555
        .align 8
556
5:
557
        .word 0xFEFEFEFE
558
        .word 2b
559
        .word 3b
560
        .word 4b
561

    
562
@ ----------------------------------------------------------------
563
.macro  RND_XY2_IT align
564
        @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
565
        @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
566
.if \align == 0
567
        ldmia r1, {r6-r8}
568
.elseif \align == 3
569
        ldmia r1, {r5-r7}
570
.else
571
        ldmia r1, {r8-r10}
572
.endif
573
        add r1, r1, r2
574
        pld [r1]
575
.if \align == 0
576
        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
577
.elseif \align == 1
578
        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
579
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
580
.elseif \align == 2
581
        ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
582
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
583
.elseif \align == 3
584
        ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
585
.endif
586
        ldr r14, [r12, #0]      @ 0x03030303
587
        tst r3, #1
588
        and r8, r4, r14
589
        and r9, r5, r14
590
        and r10, r6, r14
591
        and r11, r7, r14
592
        ldreq r14, [r12, #16]   @ 0x02020202/0x01010101
593
        add r8, r8, r10
594
        add r9, r9, r11
595
        addeq r8, r8, r14
596
        addeq r9, r9, r14
597
        ldr r14, [r12, #20]     @ 0xFCFCFCFC >> 2
598
        and r4, r14, r4, lsr #2
599
        and r5, r14, r5, lsr #2
600
        and r6, r14, r6, lsr #2
601
        and r7, r14, r7, lsr #2
602
        add r10, r4, r6
603
        add r11, r5, r7
604
        subs r3, r3, #1
605
.endm
606

    
607
.macro RND_XY2_EXPAND align
608
        RND_XY2_IT \align
609
6:      stmfd sp!, {r8-r11}
610
        RND_XY2_IT \align
611
        ldmfd sp!, {r4-r7}
612
        add r4, r4, r8
613
        add r5, r5, r9
614
        add r6, r6, r10
615
        add r7, r7, r11
616
        ldr r14, [r12, #24]     @ 0x0F0F0F0F
617
        and r4, r14, r4, lsr #2
618
        and r5, r14, r5, lsr #2
619
        add r4, r4, r6
620
        add r5, r5, r7
621
        stmia r0, {r4-r5}
622
        add r0, r0, r2
623
        bge 6b
624
        ldmfd sp!, {r4-r11,pc}
625
.endm
626

    
627
        .align 8
628
        .global put_pixels8_xy2_arm
629
put_pixels8_xy2_arm:
630
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
631
        @ block = word aligned, pixles = unaligned
632
        pld [r1]
633
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
634
        adrl r12, 5f
635
        ands r4, r1, #3
636
        add r5, r12, r4, lsl #2
637
        bic r1, r1, #3
638
        ldrne pc, [r5]
639
1:
640
        RND_XY2_EXPAND 0
641

    
642
        .align 8
643
2:
644
        RND_XY2_EXPAND 1
645

    
646
        .align 8
647
3:
648
        RND_XY2_EXPAND 2
649

    
650
        .align 8
651
4:
652
        RND_XY2_EXPAND 3
653

    
654
5:
655
        .word 0x03030303
656
        .word 2b
657
        .word 3b
658
        .word 4b
659
        .word 0x02020202
660
        .word 0xFCFCFCFC >> 2
661
        .word 0x0F0F0F0F
662

    
663
        .align 8
664
        .global put_no_rnd_pixels8_xy2_arm
665
put_no_rnd_pixels8_xy2_arm:
666
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
667
        @ block = word aligned, pixles = unaligned
668
        pld [r1]
669
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
670
        adrl r12, 5f
671
        ands r4, r1, #3
672
        add r5, r12, r4, lsl #2
673
        bic r1, r1, #3
674
        ldrne pc, [r5]
675
1:
676
        RND_XY2_EXPAND 0
677

    
678
        .align 8
679
2:
680
        RND_XY2_EXPAND 1
681

    
682
        .align 8
683
3:
684
        RND_XY2_EXPAND 2
685

    
686
        .align 8
687
4:
688
        RND_XY2_EXPAND 3
689

    
690
5:
691
        .word 0x03030303
692
        .word 2b
693
        .word 3b
694
        .word 4b
695
        .word 0x01010101
696
        .word 0xFCFCFCFC >> 2
697
        .word 0x0F0F0F0F