Statistics
| Branch: | Revision:

ffmpeg / libavcodec / armv4l / dsputil_arm_s.S @ 6ad1fa5a

History | View | Annotate | Download (18.4 KB)

1
@
2
@ ARMv4L optimized DSP utils
3
@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4
@
5
@ This library is free software; you can redistribute it and/or
6
@ modify it under the terms of the GNU Lesser General Public
7
@ License as published by the Free Software Foundation; either
8
@ version 2 of the License, or (at your option) any later version.
9
@
10
@ This library is distributed in the hope that it will be useful,
11
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
@ Lesser General Public License for more details.
14
@
15
@ You should have received a copy of the GNU Lesser General Public
16
@ License along with this library; if not, write to the Free Software
17
@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
@
19

    
20
.macro  ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
21
        mov \Rd0, \Rn0, lsr #(\shift * 8)
22
        mov \Rd1, \Rn1, lsr #(\shift * 8)
23
        mov \Rd2, \Rn2, lsr #(\shift * 8)
24
        mov \Rd3, \Rn3, lsr #(\shift * 8)
25
        orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
26
        orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
27
        orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
28
        orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
29
.endm
30
.macro  ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
31
        mov \R0, \R0, lsr #(\shift * 8)
32
        orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
33
        mov \R1, \R1, lsr #(\shift * 8)
34
        orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
35
.endm
36
.macro  ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
37
        mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
38
        mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
39
        orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
40
        orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
41
.endm
42

    
43
.macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
44
        @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
45
        @ Rmask = 0xFEFEFEFE
46
        @ Rn = destroy
47
        eor \Rd0, \Rn0, \Rm0
48
        eor \Rd1, \Rn1, \Rm1
49
        orr \Rn0, \Rn0, \Rm0
50
        orr \Rn1, \Rn1, \Rm1
51
        and \Rd0, \Rd0, \Rmask
52
        and \Rd1, \Rd1, \Rmask
53
        sub \Rd0, \Rn0, \Rd0, lsr #1
54
        sub \Rd1, \Rn1, \Rd1, lsr #1
55
.endm
56

    
57
.macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
58
        @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
59
        @ Rmask = 0xFEFEFEFE
60
        @ Rn = destroy
61
        eor \Rd0, \Rn0, \Rm0
62
        eor \Rd1, \Rn1, \Rm1
63
        and \Rn0, \Rn0, \Rm0
64
        and \Rn1, \Rn1, \Rm1
65
        and \Rd0, \Rd0, \Rmask
66
        and \Rd1, \Rd1, \Rmask
67
        add \Rd0, \Rn0, \Rd0, lsr #1
68
        add \Rd1, \Rn1, \Rd1, lsr #1
69
.endm
70

    
71
@ ----------------------------------------------------------------
72
        .align 8
73
        .global put_pixels16_arm
74
put_pixels16_arm:
75
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
76
        @ block = word aligned, pixles = unaligned
77
        pld [r1]
78
        stmfd sp!, {r4-r11, lr} @ R14 is also called LR
79
        adr r5, 5f
80
        ands r4, r1, #3
81
        bic r1, r1, #3
82
        add r5, r5, r4, lsl #2
83
        ldrne pc, [r5]
84
1:
85
        ldmia r1, {r4-r7}
86
        add r1, r1, r2
87
        stmia r0, {r4-r7}
88
        pld [r1]
89
        subs r3, r3, #1
90
        add r0, r0, r2
91
        bne 1b
92
        ldmfd sp!, {r4-r11, pc}
93
        .align 8
94
2:
95
        ldmia r1, {r4-r8}
96
        add r1, r1, r2
97
        ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
98
        pld [r1]
99
        subs r3, r3, #1
100
        stmia r0, {r9-r12}
101
        add r0, r0, r2
102
        bne 2b
103
        ldmfd sp!, {r4-r11, pc}
104
        .align 8
105
3:
106
        ldmia r1, {r4-r8}
107
        add r1, r1, r2
108
        ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
109
        pld [r1]
110
        subs r3, r3, #1
111
        stmia r0, {r9-r12}
112
        add r0, r0, r2
113
        bne 3b
114
        ldmfd sp!, {r4-r11, pc}
115
        .align 8
116
4:
117
        ldmia r1, {r4-r8}
118
        add r1, r1, r2
119
        ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
120
        pld [r1]
121
        subs r3, r3, #1
122
        stmia r0, {r9-r12}
123
        add r0, r0, r2
124
        bne 4b
125
        ldmfd sp!, {r4-r11,pc}
126
        .align 8
127
5:
128
        .word 1b
129
        .word 2b
130
        .word 3b
131
        .word 4b
132

    
133
@ ----------------------------------------------------------------
134
        .align 8
135
        .global put_pixels8_arm
136
put_pixels8_arm:
137
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
138
        @ block = word aligned, pixles = unaligned
139
        pld [r1]
140
        stmfd sp!, {r4-r5,lr} @ R14 is also called LR
141
        adr r5, 5f
142
        ands r4, r1, #3
143
        bic r1, r1, #3
144
        add r5, r5, r4, lsl #2
145
        ldrne pc, [r5]
146
1:
147
        ldmia r1, {r4-r5}
148
        add r1, r1, r2
149
        subs r3, r3, #1
150
        pld [r1]
151
        stmia r0, {r4-r5}
152
        add r0, r0, r2
153
        bne 1b
154
        ldmfd sp!, {r4-r5,pc}
155
        .align 8
156
2:
157
        ldmia r1, {r4-r5, r12}
158
        add r1, r1, r2
159
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
160
        pld [r1]
161
        subs r3, r3, #1
162
        stmia r0, {r4-r5}
163
        add r0, r0, r2
164
        bne 2b
165
        ldmfd sp!, {r4-r5,pc}
166
        .align 8
167
3:
168
        ldmia r1, {r4-r5, r12}
169
        add r1, r1, r2
170
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
171
        pld [r1]
172
        subs r3, r3, #1
173
        stmia r0, {r4-r5}
174
        add r0, r0, r2
175
        bne 3b
176
        ldmfd sp!, {r4-r5,pc}
177
        .align 8
178
4:
179
        ldmia r1, {r4-r5, r12}
180
        add r1, r1, r2
181
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
182
        pld [r1]
183
        subs r3, r3, #1
184
        stmia r0, {r4-r5}
185
        add r0, r0, r2
186
        bne 4b
187
        ldmfd sp!, {r4-r5,pc}
188
        .align 8
189
5:
190
        .word 1b
191
        .word 2b
192
        .word 3b
193
        .word 4b
194

    
195
@ ----------------------------------------------------------------
196
        .align 8
197
        .global put_pixels8_x2_arm
198
put_pixels8_x2_arm:
199
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
200
        @ block = word aligned, pixles = unaligned
201
        pld [r1]
202
        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
203
        adr r5, 5f
204
        ands r4, r1, #3
205
        ldr r12, [r5]
206
        add r5, r5, r4, lsl #2
207
        bic r1, r1, #3
208
        ldrne pc, [r5]
209
1:
210
        ldmia r1, {r4-r5, r10}
211
        add r1, r1, r2
212
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
213
        pld [r1]
214
        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
215
        subs r3, r3, #1
216
        stmia r0, {r8-r9}
217
        add r0, r0, r2
218
        bne 1b
219
        ldmfd sp!, {r4-r10,pc}
220
        .align 8
221
2:
222
        ldmia r1, {r4-r5, r10}
223
        add r1, r1, r2
224
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
225
        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
226
        pld [r1]
227
        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
228
        subs r3, r3, #1
229
        stmia r0, {r4-r5}
230
        add r0, r0, r2
231
        bne 2b
232
        ldmfd sp!, {r4-r10,pc}
233
        .align 8
234
3:
235
        ldmia r1, {r4-r5, r10}
236
        add r1, r1, r2
237
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
238
        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
239
        pld [r1]
240
        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
241
        subs r3, r3, #1
242
        stmia r0, {r4-r5}
243
        add r0, r0, r2
244
        bne 3b
245
        ldmfd sp!, {r4-r10,pc}
246
        .align 8
247
4:
248
        ldmia r1, {r4-r5, r10}
249
        add r1, r1, r2
250
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
251
        pld [r1]
252
        RND_AVG32 r8, r9, r6, r7, r5, r10, r12
253
        subs r3, r3, #1
254
        stmia r0, {r8-r9}
255
        add r0, r0, r2
256
        bne 4b
257
        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
258
        .align 8
259
5:
260
        .word 0xFEFEFEFE
261
        .word 2b
262
        .word 3b
263
        .word 4b
264

    
265
        .align 8
266
        .global put_no_rnd_pixels8_x2_arm
267
put_no_rnd_pixels8_x2_arm:
268
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
269
        @ block = word aligned, pixles = unaligned
270
        pld [r1]
271
        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
272
        adr r5, 5f
273
        ands r4, r1, #3
274
        ldr r12, [r5]
275
        add r5, r5, r4, lsl #2
276
        bic r1, r1, #3
277
        ldrne pc, [r5]
278
1:
279
        ldmia r1, {r4-r5, r10}
280
        add r1, r1, r2
281
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
282
        pld [r1]
283
        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
284
        subs r3, r3, #1
285
        stmia r0, {r8-r9}
286
        add r0, r0, r2
287
        bne 1b
288
        ldmfd sp!, {r4-r10,pc}
289
        .align 8
290
2:
291
        ldmia r1, {r4-r5, r10}
292
        add r1, r1, r2
293
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
294
        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
295
        pld [r1]
296
        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
297
        subs r3, r3, #1
298
        stmia r0, {r4-r5}
299
        add r0, r0, r2
300
        bne 2b
301
        ldmfd sp!, {r4-r10,pc}
302
        .align 8
303
3:
304
        ldmia r1, {r4-r5, r10}
305
        add r1, r1, r2
306
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
307
        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
308
        pld [r1]
309
        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
310
        subs r3, r3, #1
311
        stmia r0, {r4-r5}
312
        add r0, r0, r2
313
        bne 3b
314
        ldmfd sp!, {r4-r10,pc}
315
        .align 8
316
4:
317
        ldmia r1, {r4-r5, r10}
318
        add r1, r1, r2
319
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
320
        pld [r1]
321
        NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
322
        subs r3, r3, #1
323
        stmia r0, {r8-r9}
324
        add r0, r0, r2
325
        bne 4b
326
        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
327
        .align 8
328
5:
329
        .word 0xFEFEFEFE
330
        .word 2b
331
        .word 3b
332
        .word 4b
333

    
334

    
335
@ ----------------------------------------------------------------
336
        .align 8
337
        .global put_pixels8_y2_arm
338
put_pixels8_y2_arm:
339
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
340
        @ block = word aligned, pixles = unaligned
341
        pld [r1]
342
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
343
        adr r5, 5f
344
        ands r4, r1, #3
345
        mov r3, r3, lsr #1
346
        ldr r12, [r5]
347
        add r5, r5, r4, lsl #2
348
        bic r1, r1, #3
349
        ldrne pc, [r5]
350
1:
351
        ldmia r1, {r4-r5}
352
        add r1, r1, r2
353
6:      ldmia r1, {r6-r7}
354
        add r1, r1, r2
355
        pld [r1]
356
        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
357
        ldmia r1, {r4-r5}
358
        add r1, r1, r2
359
        stmia r0, {r8-r9}
360
        add r0, r0, r2
361
        pld [r1]
362
        RND_AVG32 r8, r9, r6, r7, r4, r5, r12
363
        subs r3, r3, #1
364
        stmia r0, {r8-r9}
365
        add r0, r0, r2
366
        bne 6b
367
        ldmfd sp!, {r4-r11,pc}
368
        .align 8
369
2:
370
        ldmia r1, {r4-r6}
371
        add r1, r1, r2
372
        pld [r1]
373
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
374
6:      ldmia r1, {r7-r9}
375
        add r1, r1, r2
376
        pld [r1]
377
        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
378
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
379
        stmia r0, {r10-r11}
380
        add r0, r0, r2
381
        ldmia r1, {r4-r6}
382
        add r1, r1, r2
383
        pld [r1]
384
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
385
        subs r3, r3, #1
386
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
387
        stmia r0, {r10-r11}
388
        add r0, r0, r2
389
        bne 6b
390
        ldmfd sp!, {r4-r11,pc}
391
        .align 8
392
3:
393
        ldmia r1, {r4-r6}
394
        add r1, r1, r2
395
        pld [r1]
396
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
397
6:      ldmia r1, {r7-r9}
398
        add r1, r1, r2
399
        pld [r1]
400
        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
401
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
402
        stmia r0, {r10-r11}
403
        add r0, r0, r2
404
        ldmia r1, {r4-r6}
405
        add r1, r1, r2
406
        pld [r1]
407
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
408
        subs r3, r3, #1
409
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
410
        stmia r0, {r10-r11}
411
        add r0, r0, r2
412
        bne 6b
413
        ldmfd sp!, {r4-r11,pc}
414
        .align 8
415
4:
416
        ldmia r1, {r4-r6}
417
        add r1, r1, r2
418
        pld [r1]
419
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
420
6:      ldmia r1, {r7-r9}
421
        add r1, r1, r2
422
        pld [r1]
423
        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
424
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
425
        stmia r0, {r10-r11}
426
        add r0, r0, r2
427
        ldmia r1, {r4-r6}
428
        add r1, r1, r2
429
        pld [r1]
430
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
431
        subs r3, r3, #1
432
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
433
        stmia r0, {r10-r11}
434
        add r0, r0, r2
435
        bne 6b
436
        ldmfd sp!, {r4-r11,pc}
437

    
438
        .align 8
439
5:
440
        .word 0xFEFEFEFE
441
        .word 2b
442
        .word 3b
443
        .word 4b
444

    
445
        .align 8
446
        .global put_no_rnd_pixels8_y2_arm
447
put_no_rnd_pixels8_y2_arm:
448
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
449
        @ block = word aligned, pixles = unaligned
450
        pld [r1]
451
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
452
        adr r5, 5f
453
        ands r4, r1, #3
454
        mov r3, r3, lsr #1
455
        ldr r12, [r5]
456
        add r5, r5, r4, lsl #2
457
        bic r1, r1, #3
458
        ldrne pc, [r5]
459
1:
460
        ldmia r1, {r4-r5}
461
        add r1, r1, r2
462
6:      ldmia r1, {r6-r7}
463
        add r1, r1, r2
464
        pld [r1]
465
        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
466
        ldmia r1, {r4-r5}
467
        add r1, r1, r2
468
        stmia r0, {r8-r9}
469
        add r0, r0, r2
470
        pld [r1]
471
        NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
472
        subs r3, r3, #1
473
        stmia r0, {r8-r9}
474
        add r0, r0, r2
475
        bne 6b
476
        ldmfd sp!, {r4-r11,pc}
477
        .align 8
478
2:
479
        ldmia r1, {r4-r6}
480
        add r1, r1, r2
481
        pld [r1]
482
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
483
6:      ldmia r1, {r7-r9}
484
        add r1, r1, r2
485
        pld [r1]
486
        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
487
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
488
        stmia r0, {r10-r11}
489
        add r0, r0, r2
490
        ldmia r1, {r4-r6}
491
        add r1, r1, r2
492
        pld [r1]
493
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
494
        subs r3, r3, #1
495
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
496
        stmia r0, {r10-r11}
497
        add r0, r0, r2
498
        bne 6b
499
        ldmfd sp!, {r4-r11,pc}
500
        .align 8
501
3:
502
        ldmia r1, {r4-r6}
503
        add r1, r1, r2
504
        pld [r1]
505
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
506
6:      ldmia r1, {r7-r9}
507
        add r1, r1, r2
508
        pld [r1]
509
        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
510
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
511
        stmia r0, {r10-r11}
512
        add r0, r0, r2
513
        ldmia r1, {r4-r6}
514
        add r1, r1, r2
515
        pld [r1]
516
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
517
        subs r3, r3, #1
518
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
519
        stmia r0, {r10-r11}
520
        add r0, r0, r2
521
        bne 6b
522
        ldmfd sp!, {r4-r11,pc}
523
        .align 8
524
4:
525
        ldmia r1, {r4-r6}
526
        add r1, r1, r2
527
        pld [r1]
528
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
529
6:      ldmia r1, {r7-r9}
530
        add r1, r1, r2
531
        pld [r1]
532
        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
533
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
534
        stmia r0, {r10-r11}
535
        add r0, r0, r2
536
        ldmia r1, {r4-r6}
537
        add r1, r1, r2
538
        pld [r1]
539
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
540
        subs r3, r3, #1
541
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
542
        stmia r0, {r10-r11}
543
        add r0, r0, r2
544
        bne 6b
545
        ldmfd sp!, {r4-r11,pc}
546
        .align 8
547
5:
548
        .word 0xFEFEFEFE
549
        .word 2b
550
        .word 3b
551
        .word 4b
552

    
553
@ ----------------------------------------------------------------
554
.macro  RND_XY2_IT align, rnd
555
        @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
556
        @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
557
.if \align == 0
558
        ldmia r1, {r6-r8}
559
.elseif \align == 3
560
        ldmia r1, {r5-r7}
561
.else
562
        ldmia r1, {r8-r10}
563
.endif
564
        add r1, r1, r2
565
        pld [r1]
566
.if \align == 0
567
        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
568
.elseif \align == 1
569
        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
570
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
571
.elseif \align == 2
572
        ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
573
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
574
.elseif \align == 3
575
        ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
576
.endif
577
        ldr r14, [r12, #0]      @ 0x03030303
578
        tst r3, #1
579
        and r8, r4, r14
580
        and r9, r5, r14
581
        and r10, r6, r14
582
        and r11, r7, r14
583
.if \rnd == 1
584
        ldreq r14, [r12, #16]   @ 0x02020202
585
.else
586
        ldreq r14, [r12, #28]   @ 0x01010101
587
.endif
588
        add r8, r8, r10
589
        add r9, r9, r11
590
        addeq r8, r8, r14
591
        addeq r9, r9, r14
592
        ldr r14, [r12, #20]     @ 0xFCFCFCFC >> 2
593
        and r4, r14, r4, lsr #2
594
        and r5, r14, r5, lsr #2
595
        and r6, r14, r6, lsr #2
596
        and r7, r14, r7, lsr #2
597
        add r10, r4, r6
598
        add r11, r5, r7
599
.endm
600

    
601
.macro RND_XY2_EXPAND align, rnd
602
        RND_XY2_IT \align, \rnd
603
6:      stmfd sp!, {r8-r11}
604
        RND_XY2_IT \align, \rnd
605
        ldmfd sp!, {r4-r7}
606
        add r4, r4, r8
607
        add r5, r5, r9
608
        add r6, r6, r10
609
        add r7, r7, r11
610
        ldr r14, [r12, #24]     @ 0x0F0F0F0F
611
        and r4, r14, r4, lsr #2
612
        and r5, r14, r5, lsr #2
613
        add r4, r4, r6
614
        add r5, r5, r7
615
        subs r3, r3, #1
616
        stmia r0, {r4-r5}
617
        add r0, r0, r2
618
        bne 6b
619
        ldmfd sp!, {r4-r11,pc}
620
.endm
621

    
622
        .align 8
623
        .global put_pixels8_xy2_arm
624
put_pixels8_xy2_arm:
625
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
626
        @ block = word aligned, pixles = unaligned
627
        pld [r1]
628
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
629
        adrl r12, 5f
630
        ands r4, r1, #3
631
        add r5, r12, r4, lsl #2
632
        bic r1, r1, #3
633
        ldrne pc, [r5]
634
1:
635
        RND_XY2_EXPAND 0, 1
636

    
637
        .align 8
638
2:
639
        RND_XY2_EXPAND 1, 1
640
        
641
        .align 8
642
3:
643
        RND_XY2_EXPAND 2, 1
644
        
645
        .align 8
646
4:
647
        RND_XY2_EXPAND 3, 1
648
        
649
5:
650
        .word 0x03030303
651
        .word 2b
652
        .word 3b
653
        .word 4b
654
        .word 0x02020202
655
        .word 0xFCFCFCFC >> 2
656
        .word 0x0F0F0F0F
657
        .word 0x01010101
658

    
659
        .align 8
660
        .global put_no_rnd_pixels8_xy2_arm
661
put_no_rnd_pixels8_xy2_arm:
662
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
663
        @ block = word aligned, pixles = unaligned
664
        pld [r1]
665
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
666
        adrl r12, 5f
667
        ands r4, r1, #3
668
        add r5, r12, r4, lsl #2
669
        bic r1, r1, #3
670
        ldrne pc, [r5]
671
1:
672
        RND_XY2_EXPAND 0, 0
673

    
674
        .align 8
675
2:
676
        RND_XY2_EXPAND 1, 0
677
        
678
        .align 8
679
3:
680
        RND_XY2_EXPAND 2, 0
681
        
682
        .align 8
683
4:
684
        RND_XY2_EXPAND 3, 0
685
        
686
5:
687
        .word 0x03030303
688
        .word 2b
689
        .word 3b
690
        .word 4b
691
        .word 0x02020202
692
        .word 0xFCFCFCFC >> 2
693
        .word 0x0F0F0F0F
694
        .word 0x01010101