Statistics
| Branch: | Revision:

ffmpeg / libavcodec / armv4l / dsputil_arm_s.S @ d761f089

History | View | Annotate | Download (18.2 KB)

1
@
2
@ ARMv4L optimized DSP utils
3
@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4
@
5
@ This file is part of FFmpeg.
6
@
7
@ FFmpeg is free software; you can redistribute it and/or
8
@ modify it under the terms of the GNU Lesser General Public
9
@ License as published by the Free Software Foundation; either
10
@ version 2.1 of the License, or (at your option) any later version.
11
@
12
@ FFmpeg is distributed in the hope that it will be useful,
13
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
@ Lesser General Public License for more details.
16
@
17
@ You should have received a copy of the GNU Lesser General Public
18
@ License along with FFmpeg; if not, write to the Free Software
19
@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
@
21

    
22
.macro  ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
23
        mov \Rd0, \Rn0, lsr #(\shift * 8)
24
        mov \Rd1, \Rn1, lsr #(\shift * 8)
25
        mov \Rd2, \Rn2, lsr #(\shift * 8)
26
        mov \Rd3, \Rn3, lsr #(\shift * 8)
27
        orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
28
        orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
29
        orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
30
        orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
31
.endm
32
.macro  ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
33
        mov \R0, \R0, lsr #(\shift * 8)
34
        orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
35
        mov \R1, \R1, lsr #(\shift * 8)
36
        orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
37
.endm
38
.macro  ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
39
        mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
40
        mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
41
        orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
42
        orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
43
.endm
44

    
45
.macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
46
        @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
47
        @ Rmask = 0xFEFEFEFE
48
        @ Rn = destroy
49
        eor \Rd0, \Rn0, \Rm0
50
        eor \Rd1, \Rn1, \Rm1
51
        orr \Rn0, \Rn0, \Rm0
52
        orr \Rn1, \Rn1, \Rm1
53
        and \Rd0, \Rd0, \Rmask
54
        and \Rd1, \Rd1, \Rmask
55
        sub \Rd0, \Rn0, \Rd0, lsr #1
56
        sub \Rd1, \Rn1, \Rd1, lsr #1
57
.endm
58

    
59
.macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
60
        @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
61
        @ Rmask = 0xFEFEFEFE
62
        @ Rn = destroy
63
        eor \Rd0, \Rn0, \Rm0
64
        eor \Rd1, \Rn1, \Rm1
65
        and \Rn0, \Rn0, \Rm0
66
        and \Rn1, \Rn1, \Rm1
67
        and \Rd0, \Rd0, \Rmask
68
        and \Rd1, \Rd1, \Rmask
69
        add \Rd0, \Rn0, \Rd0, lsr #1
70
        add \Rd1, \Rn1, \Rd1, lsr #1
71
.endm
72

    
73
@ ----------------------------------------------------------------
74
        .align 8
75
        .global put_pixels16_arm
76
put_pixels16_arm:
77
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
78
        @ block = word aligned, pixles = unaligned
79
        pld [r1]
80
        stmfd sp!, {r4-r11, lr} @ R14 is also called LR
81
        adr r5, 5f
82
        ands r4, r1, #3
83
        bic r1, r1, #3
84
        add r5, r5, r4, lsl #2
85
        ldrne pc, [r5]
86
1:
87
        ldmia r1, {r4-r7}
88
        add r1, r1, r2
89
        stmia r0, {r4-r7}
90
        pld [r1]
91
        subs r3, r3, #1
92
        add r0, r0, r2
93
        bne 1b
94
        ldmfd sp!, {r4-r11, pc}
95
        .align 8
96
2:
97
        ldmia r1, {r4-r8}
98
        add r1, r1, r2
99
        ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
100
        pld [r1]
101
        subs r3, r3, #1
102
        stmia r0, {r9-r12}
103
        add r0, r0, r2
104
        bne 2b
105
        ldmfd sp!, {r4-r11, pc}
106
        .align 8
107
3:
108
        ldmia r1, {r4-r8}
109
        add r1, r1, r2
110
        ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
111
        pld [r1]
112
        subs r3, r3, #1
113
        stmia r0, {r9-r12}
114
        add r0, r0, r2
115
        bne 3b
116
        ldmfd sp!, {r4-r11, pc}
117
        .align 8
118
4:
119
        ldmia r1, {r4-r8}
120
        add r1, r1, r2
121
        ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
122
        pld [r1]
123
        subs r3, r3, #1
124
        stmia r0, {r9-r12}
125
        add r0, r0, r2
126
        bne 4b
127
        ldmfd sp!, {r4-r11,pc}
128
        .align 8
129
5:
130
        .word 1b
131
        .word 2b
132
        .word 3b
133
        .word 4b
134

    
135
@ ----------------------------------------------------------------
136
        .align 8
137
        .global put_pixels8_arm
138
put_pixels8_arm:
139
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
140
        @ block = word aligned, pixles = unaligned
141
        pld [r1]
142
        stmfd sp!, {r4-r5,lr} @ R14 is also called LR
143
        adr r5, 5f
144
        ands r4, r1, #3
145
        bic r1, r1, #3
146
        add r5, r5, r4, lsl #2
147
        ldrne pc, [r5]
148
1:
149
        ldmia r1, {r4-r5}
150
        add r1, r1, r2
151
        subs r3, r3, #1
152
        pld [r1]
153
        stmia r0, {r4-r5}
154
        add r0, r0, r2
155
        bne 1b
156
        ldmfd sp!, {r4-r5,pc}
157
        .align 8
158
2:
159
        ldmia r1, {r4-r5, r12}
160
        add r1, r1, r2
161
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
162
        pld [r1]
163
        subs r3, r3, #1
164
        stmia r0, {r4-r5}
165
        add r0, r0, r2
166
        bne 2b
167
        ldmfd sp!, {r4-r5,pc}
168
        .align 8
169
3:
170
        ldmia r1, {r4-r5, r12}
171
        add r1, r1, r2
172
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
173
        pld [r1]
174
        subs r3, r3, #1
175
        stmia r0, {r4-r5}
176
        add r0, r0, r2
177
        bne 3b
178
        ldmfd sp!, {r4-r5,pc}
179
        .align 8
180
4:
181
        ldmia r1, {r4-r5, r12}
182
        add r1, r1, r2
183
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
184
        pld [r1]
185
        subs r3, r3, #1
186
        stmia r0, {r4-r5}
187
        add r0, r0, r2
188
        bne 4b
189
        ldmfd sp!, {r4-r5,pc}
190
        .align 8
191
5:
192
        .word 1b
193
        .word 2b
194
        .word 3b
195
        .word 4b
196

    
197
@ ----------------------------------------------------------------
198
        .align 8
199
        .global put_pixels8_x2_arm
200
put_pixels8_x2_arm:
201
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
202
        @ block = word aligned, pixles = unaligned
203
        pld [r1]
204
        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
205
        adr r5, 5f
206
        ands r4, r1, #3
207
        ldr r12, [r5]
208
        add r5, r5, r4, lsl #2
209
        bic r1, r1, #3
210
        ldrne pc, [r5]
211
1:
212
        ldmia r1, {r4-r5, r10}
213
        add r1, r1, r2
214
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
215
        pld [r1]
216
        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
217
        subs r3, r3, #1
218
        stmia r0, {r8-r9}
219
        add r0, r0, r2
220
        bne 1b
221
        ldmfd sp!, {r4-r10,pc}
222
        .align 8
223
2:
224
        ldmia r1, {r4-r5, r10}
225
        add r1, r1, r2
226
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
227
        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
228
        pld [r1]
229
        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
230
        subs r3, r3, #1
231
        stmia r0, {r4-r5}
232
        add r0, r0, r2
233
        bne 2b
234
        ldmfd sp!, {r4-r10,pc}
235
        .align 8
236
3:
237
        ldmia r1, {r4-r5, r10}
238
        add r1, r1, r2
239
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
240
        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
241
        pld [r1]
242
        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
243
        subs r3, r3, #1
244
        stmia r0, {r4-r5}
245
        add r0, r0, r2
246
        bne 3b
247
        ldmfd sp!, {r4-r10,pc}
248
        .align 8
249
4:
250
        ldmia r1, {r4-r5, r10}
251
        add r1, r1, r2
252
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
253
        pld [r1]
254
        RND_AVG32 r8, r9, r6, r7, r5, r10, r12
255
        subs r3, r3, #1
256
        stmia r0, {r8-r9}
257
        add r0, r0, r2
258
        bne 4b
259
        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
260
        .align 8
261
5:
262
        .word 0xFEFEFEFE
263
        .word 2b
264
        .word 3b
265
        .word 4b
266

    
267
        .align 8
268
        .global put_no_rnd_pixels8_x2_arm
269
put_no_rnd_pixels8_x2_arm:
270
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
271
        @ block = word aligned, pixles = unaligned
272
        pld [r1]
273
        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
274
        adr r5, 5f
275
        ands r4, r1, #3
276
        ldr r12, [r5]
277
        add r5, r5, r4, lsl #2
278
        bic r1, r1, #3
279
        ldrne pc, [r5]
280
1:
281
        ldmia r1, {r4-r5, r10}
282
        add r1, r1, r2
283
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
284
        pld [r1]
285
        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
286
        subs r3, r3, #1
287
        stmia r0, {r8-r9}
288
        add r0, r0, r2
289
        bne 1b
290
        ldmfd sp!, {r4-r10,pc}
291
        .align 8
292
2:
293
        ldmia r1, {r4-r5, r10}
294
        add r1, r1, r2
295
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
296
        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
297
        pld [r1]
298
        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
299
        subs r3, r3, #1
300
        stmia r0, {r4-r5}
301
        add r0, r0, r2
302
        bne 2b
303
        ldmfd sp!, {r4-r10,pc}
304
        .align 8
305
3:
306
        ldmia r1, {r4-r5, r10}
307
        add r1, r1, r2
308
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
309
        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
310
        pld [r1]
311
        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
312
        subs r3, r3, #1
313
        stmia r0, {r4-r5}
314
        add r0, r0, r2
315
        bne 3b
316
        ldmfd sp!, {r4-r10,pc}
317
        .align 8
318
4:
319
        ldmia r1, {r4-r5, r10}
320
        add r1, r1, r2
321
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
322
        pld [r1]
323
        NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
324
        subs r3, r3, #1
325
        stmia r0, {r8-r9}
326
        add r0, r0, r2
327
        bne 4b
328
        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
329
        .align 8
330
5:
331
        .word 0xFEFEFEFE
332
        .word 2b
333
        .word 3b
334
        .word 4b
335

    
336

    
337
@ ----------------------------------------------------------------
338
        .align 8
339
        .global put_pixels8_y2_arm
340
put_pixels8_y2_arm:
341
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
342
        @ block = word aligned, pixles = unaligned
343
        pld [r1]
344
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
345
        adr r5, 5f
346
        ands r4, r1, #3
347
        mov r3, r3, lsr #1
348
        ldr r12, [r5]
349
        add r5, r5, r4, lsl #2
350
        bic r1, r1, #3
351
        ldrne pc, [r5]
352
1:
353
        ldmia r1, {r4-r5}
354
        add r1, r1, r2
355
6:      ldmia r1, {r6-r7}
356
        add r1, r1, r2
357
        pld [r1]
358
        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
359
        ldmia r1, {r4-r5}
360
        add r1, r1, r2
361
        stmia r0, {r8-r9}
362
        add r0, r0, r2
363
        pld [r1]
364
        RND_AVG32 r8, r9, r6, r7, r4, r5, r12
365
        subs r3, r3, #1
366
        stmia r0, {r8-r9}
367
        add r0, r0, r2
368
        bne 6b
369
        ldmfd sp!, {r4-r11,pc}
370
        .align 8
371
2:
372
        ldmia r1, {r4-r6}
373
        add r1, r1, r2
374
        pld [r1]
375
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
376
6:      ldmia r1, {r7-r9}
377
        add r1, r1, r2
378
        pld [r1]
379
        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
380
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
381
        stmia r0, {r10-r11}
382
        add r0, r0, r2
383
        ldmia r1, {r4-r6}
384
        add r1, r1, r2
385
        pld [r1]
386
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
387
        subs r3, r3, #1
388
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
389
        stmia r0, {r10-r11}
390
        add r0, r0, r2
391
        bne 6b
392
        ldmfd sp!, {r4-r11,pc}
393
        .align 8
394
3:
395
        ldmia r1, {r4-r6}
396
        add r1, r1, r2
397
        pld [r1]
398
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
399
6:      ldmia r1, {r7-r9}
400
        add r1, r1, r2
401
        pld [r1]
402
        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
403
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
404
        stmia r0, {r10-r11}
405
        add r0, r0, r2
406
        ldmia r1, {r4-r6}
407
        add r1, r1, r2
408
        pld [r1]
409
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
410
        subs r3, r3, #1
411
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
412
        stmia r0, {r10-r11}
413
        add r0, r0, r2
414
        bne 6b
415
        ldmfd sp!, {r4-r11,pc}
416
        .align 8
417
4:
418
        ldmia r1, {r4-r6}
419
        add r1, r1, r2
420
        pld [r1]
421
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
422
6:      ldmia r1, {r7-r9}
423
        add r1, r1, r2
424
        pld [r1]
425
        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
426
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
427
        stmia r0, {r10-r11}
428
        add r0, r0, r2
429
        ldmia r1, {r4-r6}
430
        add r1, r1, r2
431
        pld [r1]
432
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
433
        subs r3, r3, #1
434
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
435
        stmia r0, {r10-r11}
436
        add r0, r0, r2
437
        bne 6b
438
        ldmfd sp!, {r4-r11,pc}
439

    
440
        .align 8
441
5:
442
        .word 0xFEFEFEFE
443
        .word 2b
444
        .word 3b
445
        .word 4b
446

    
447
        .align 8
448
        .global put_no_rnd_pixels8_y2_arm
449
put_no_rnd_pixels8_y2_arm:
450
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
451
        @ block = word aligned, pixles = unaligned
452
        pld [r1]
453
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
454
        adr r5, 5f
455
        ands r4, r1, #3
456
        mov r3, r3, lsr #1
457
        ldr r12, [r5]
458
        add r5, r5, r4, lsl #2
459
        bic r1, r1, #3
460
        ldrne pc, [r5]
461
1:
462
        ldmia r1, {r4-r5}
463
        add r1, r1, r2
464
6:      ldmia r1, {r6-r7}
465
        add r1, r1, r2
466
        pld [r1]
467
        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
468
        ldmia r1, {r4-r5}
469
        add r1, r1, r2
470
        stmia r0, {r8-r9}
471
        add r0, r0, r2
472
        pld [r1]
473
        NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
474
        subs r3, r3, #1
475
        stmia r0, {r8-r9}
476
        add r0, r0, r2
477
        bne 6b
478
        ldmfd sp!, {r4-r11,pc}
479
        .align 8
480
2:
481
        ldmia r1, {r4-r6}
482
        add r1, r1, r2
483
        pld [r1]
484
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
485
6:      ldmia r1, {r7-r9}
486
        add r1, r1, r2
487
        pld [r1]
488
        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
489
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
490
        stmia r0, {r10-r11}
491
        add r0, r0, r2
492
        ldmia r1, {r4-r6}
493
        add r1, r1, r2
494
        pld [r1]
495
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
496
        subs r3, r3, #1
497
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
498
        stmia r0, {r10-r11}
499
        add r0, r0, r2
500
        bne 6b
501
        ldmfd sp!, {r4-r11,pc}
502
        .align 8
503
3:
504
        ldmia r1, {r4-r6}
505
        add r1, r1, r2
506
        pld [r1]
507
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
508
6:      ldmia r1, {r7-r9}
509
        add r1, r1, r2
510
        pld [r1]
511
        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
512
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
513
        stmia r0, {r10-r11}
514
        add r0, r0, r2
515
        ldmia r1, {r4-r6}
516
        add r1, r1, r2
517
        pld [r1]
518
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
519
        subs r3, r3, #1
520
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
521
        stmia r0, {r10-r11}
522
        add r0, r0, r2
523
        bne 6b
524
        ldmfd sp!, {r4-r11,pc}
525
        .align 8
526
4:
527
        ldmia r1, {r4-r6}
528
        add r1, r1, r2
529
        pld [r1]
530
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
531
6:      ldmia r1, {r7-r9}
532
        add r1, r1, r2
533
        pld [r1]
534
        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
535
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
536
        stmia r0, {r10-r11}
537
        add r0, r0, r2
538
        ldmia r1, {r4-r6}
539
        add r1, r1, r2
540
        pld [r1]
541
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
542
        subs r3, r3, #1
543
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
544
        stmia r0, {r10-r11}
545
        add r0, r0, r2
546
        bne 6b
547
        ldmfd sp!, {r4-r11,pc}
548
        .align 8
549
5:
550
        .word 0xFEFEFEFE
551
        .word 2b
552
        .word 3b
553
        .word 4b
554

    
555
@ ----------------------------------------------------------------
556
.macro  RND_XY2_IT align
557
        @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
558
        @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
559
.if \align == 0
560
        ldmia r1, {r6-r8}
561
.elseif \align == 3
562
        ldmia r1, {r5-r7}
563
.else
564
        ldmia r1, {r8-r10}
565
.endif
566
        add r1, r1, r2
567
        pld [r1]
568
.if \align == 0
569
        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
570
.elseif \align == 1
571
        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
572
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
573
.elseif \align == 2
574
        ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
575
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
576
.elseif \align == 3
577
        ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
578
.endif
579
        ldr r14, [r12, #0]      @ 0x03030303
580
        tst r3, #1
581
        and r8, r4, r14
582
        and r9, r5, r14
583
        and r10, r6, r14
584
        and r11, r7, r14
585
        ldreq r14, [r12, #16]   @ 0x02020202/0x01010101
586
        add r8, r8, r10
587
        add r9, r9, r11
588
        addeq r8, r8, r14
589
        addeq r9, r9, r14
590
        ldr r14, [r12, #20]     @ 0xFCFCFCFC >> 2
591
        and r4, r14, r4, lsr #2
592
        and r5, r14, r5, lsr #2
593
        and r6, r14, r6, lsr #2
594
        and r7, r14, r7, lsr #2
595
        add r10, r4, r6
596
        add r11, r5, r7
597
        subs r3, r3, #1
598
.endm
599

    
600
.macro RND_XY2_EXPAND align
601
        RND_XY2_IT \align
602
6:      stmfd sp!, {r8-r11}
603
        RND_XY2_IT \align
604
        ldmfd sp!, {r4-r7}
605
        add r4, r4, r8
606
        add r5, r5, r9
607
        add r6, r6, r10
608
        add r7, r7, r11
609
        ldr r14, [r12, #24]     @ 0x0F0F0F0F
610
        and r4, r14, r4, lsr #2
611
        and r5, r14, r5, lsr #2
612
        add r4, r4, r6
613
        add r5, r5, r7
614
        stmia r0, {r4-r5}
615
        add r0, r0, r2
616
        bge 6b
617
        ldmfd sp!, {r4-r11,pc}
618
.endm
619

    
620
        .align 8
621
        .global put_pixels8_xy2_arm
622
put_pixels8_xy2_arm:
623
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
624
        @ block = word aligned, pixles = unaligned
625
        pld [r1]
626
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
627
        adrl r12, 5f
628
        ands r4, r1, #3
629
        add r5, r12, r4, lsl #2
630
        bic r1, r1, #3
631
        ldrne pc, [r5]
632
1:
633
        RND_XY2_EXPAND 0
634

    
635
        .align 8
636
2:
637
        RND_XY2_EXPAND 1
638

    
639
        .align 8
640
3:
641
        RND_XY2_EXPAND 2
642

    
643
        .align 8
644
4:
645
        RND_XY2_EXPAND 3
646

    
647
5:
648
        .word 0x03030303
649
        .word 2b
650
        .word 3b
651
        .word 4b
652
        .word 0x02020202
653
        .word 0xFCFCFCFC >> 2
654
        .word 0x0F0F0F0F
655

    
656
        .align 8
657
        .global put_no_rnd_pixels8_xy2_arm
658
put_no_rnd_pixels8_xy2_arm:
659
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
660
        @ block = word aligned, pixles = unaligned
661
        pld [r1]
662
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
663
        adrl r12, 5f
664
        ands r4, r1, #3
665
        add r5, r12, r4, lsl #2
666
        bic r1, r1, #3
667
        ldrne pc, [r5]
668
1:
669
        RND_XY2_EXPAND 0
670

    
671
        .align 8
672
2:
673
        RND_XY2_EXPAND 1
674

    
675
        .align 8
676
3:
677
        RND_XY2_EXPAND 2
678

    
679
        .align 8
680
4:
681
        RND_XY2_EXPAND 3
682

    
683
5:
684
        .word 0x03030303
685
        .word 2b
686
        .word 3b
687
        .word 4b
688
        .word 0x01010101
689
        .word 0xFCFCFCFC >> 2
690
        .word 0x0F0F0F0F