Statistics
| Branch: | Revision:

ffmpeg / libavcodec / arm / dsputil_arm_s.S @ ca6532f6

History | View | Annotate | Download (20.4 KB)

1
@
2
@ ARMv4 optimized DSP utils
3
@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
4
@
5
@ This file is part of FFmpeg.
6
@
7
@ FFmpeg is free software; you can redistribute it and/or
8
@ modify it under the terms of the GNU Lesser General Public
9
@ License as published by the Free Software Foundation; either
10
@ version 2.1 of the License, or (at your option) any later version.
11
@
12
@ FFmpeg is distributed in the hope that it will be useful,
13
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
@ Lesser General Public License for more details.
16
@
17
@ You should have received a copy of the GNU Lesser General Public
18
@ License along with FFmpeg; if not, write to the Free Software
19
@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
@
21

    
22
#include "config.h"
23
#include "asm.S"
24

    
25
        preserve8
26

    
27
#if !HAVE_PLD
28
.macro pld reg
29
.endm
30
#endif
31

    
32
#if HAVE_ARMV5TE
33
function ff_prefetch_arm, export=1
34
        subs    r2, r2, #1
35
        pld     [r0]
36
        add     r0, r0, r1
37
        bne     ff_prefetch_arm
38
        bx      lr
39
        .endfunc
40
#endif
41

    
42
.macro  ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
43
        mov \Rd0, \Rn0, lsr #(\shift * 8)
44
        mov \Rd1, \Rn1, lsr #(\shift * 8)
45
        mov \Rd2, \Rn2, lsr #(\shift * 8)
46
        mov \Rd3, \Rn3, lsr #(\shift * 8)
47
        orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
48
        orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
49
        orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
50
        orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
51
.endm
52
.macro  ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
53
        mov \R0, \R0, lsr #(\shift * 8)
54
        orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
55
        mov \R1, \R1, lsr #(\shift * 8)
56
        orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
57
.endm
58
.macro  ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
59
        mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
60
        mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
61
        orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
62
        orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
63
.endm
64

    
65
.macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
66
        @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
67
        @ Rmask = 0xFEFEFEFE
68
        @ Rn = destroy
69
        eor \Rd0, \Rn0, \Rm0
70
        eor \Rd1, \Rn1, \Rm1
71
        orr \Rn0, \Rn0, \Rm0
72
        orr \Rn1, \Rn1, \Rm1
73
        and \Rd0, \Rd0, \Rmask
74
        and \Rd1, \Rd1, \Rmask
75
        sub \Rd0, \Rn0, \Rd0, lsr #1
76
        sub \Rd1, \Rn1, \Rd1, lsr #1
77
.endm
78

    
79
.macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
80
        @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
81
        @ Rmask = 0xFEFEFEFE
82
        @ Rn = destroy
83
        eor \Rd0, \Rn0, \Rm0
84
        eor \Rd1, \Rn1, \Rm1
85
        and \Rn0, \Rn0, \Rm0
86
        and \Rn1, \Rn1, \Rm1
87
        and \Rd0, \Rd0, \Rmask
88
        and \Rd1, \Rd1, \Rmask
89
        add \Rd0, \Rn0, \Rd0, lsr #1
90
        add \Rd1, \Rn1, \Rd1, lsr #1
91
.endm
92

    
93
.macro  JMP_ALIGN tmp, reg
94
        ands \tmp, \reg, #3
95
        bic  \reg, \reg, #3
96
        beq  1f
97
        subs \tmp, \tmp, #1
98
        beq  2f
99
        subs \tmp, \tmp, #1
100
        beq  3f
101
        b    4f
102
.endm
103

    
104
@ ----------------------------------------------------------------
105
        .align 5
106
function put_pixels16_arm, export=1
107
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
108
        @ block = word aligned, pixles = unaligned
109
        pld [r1]
110
        stmfd sp!, {r4-r11, lr} @ R14 is also called LR
111
        JMP_ALIGN r5, r1
112
1:
113
        ldmia r1, {r4-r7}
114
        add r1, r1, r2
115
        stmia r0, {r4-r7}
116
        pld [r1]
117
        subs r3, r3, #1
118
        add r0, r0, r2
119
        bne 1b
120
        ldmfd sp!, {r4-r11, pc}
121
        .align 5
122
2:
123
        ldmia r1, {r4-r8}
124
        add r1, r1, r2
125
        ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
126
        pld [r1]
127
        subs r3, r3, #1
128
        stmia r0, {r9-r12}
129
        add r0, r0, r2
130
        bne 2b
131
        ldmfd sp!, {r4-r11, pc}
132
        .align 5
133
3:
134
        ldmia r1, {r4-r8}
135
        add r1, r1, r2
136
        ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
137
        pld [r1]
138
        subs r3, r3, #1
139
        stmia r0, {r9-r12}
140
        add r0, r0, r2
141
        bne 3b
142
        ldmfd sp!, {r4-r11, pc}
143
        .align 5
144
4:
145
        ldmia r1, {r4-r8}
146
        add r1, r1, r2
147
        ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
148
        pld [r1]
149
        subs r3, r3, #1
150
        stmia r0, {r9-r12}
151
        add r0, r0, r2
152
        bne 4b
153
        ldmfd sp!, {r4-r11,pc}
154
        .endfunc
155

    
156
@ ----------------------------------------------------------------
157
        .align 5
158
function put_pixels8_arm, export=1
159
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
160
        @ block = word aligned, pixles = unaligned
161
        pld [r1]
162
        stmfd sp!, {r4-r5,lr} @ R14 is also called LR
163
        JMP_ALIGN r5, r1
164
1:
165
        ldmia r1, {r4-r5}
166
        add r1, r1, r2
167
        subs r3, r3, #1
168
        pld [r1]
169
        stmia r0, {r4-r5}
170
        add r0, r0, r2
171
        bne 1b
172
        ldmfd sp!, {r4-r5,pc}
173
        .align 5
174
2:
175
        ldmia r1, {r4-r5, r12}
176
        add r1, r1, r2
177
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
178
        pld [r1]
179
        subs r3, r3, #1
180
        stmia r0, {r4-r5}
181
        add r0, r0, r2
182
        bne 2b
183
        ldmfd sp!, {r4-r5,pc}
184
        .align 5
185
3:
186
        ldmia r1, {r4-r5, r12}
187
        add r1, r1, r2
188
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
189
        pld [r1]
190
        subs r3, r3, #1
191
        stmia r0, {r4-r5}
192
        add r0, r0, r2
193
        bne 3b
194
        ldmfd sp!, {r4-r5,pc}
195
        .align 5
196
4:
197
        ldmia r1, {r4-r5, r12}
198
        add r1, r1, r2
199
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
200
        pld [r1]
201
        subs r3, r3, #1
202
        stmia r0, {r4-r5}
203
        add r0, r0, r2
204
        bne 4b
205
        ldmfd sp!, {r4-r5,pc}
206
        .endfunc
207

    
208
@ ----------------------------------------------------------------
209
        .align 5
210
function put_pixels8_x2_arm, export=1
211
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
212
        @ block = word aligned, pixles = unaligned
213
        pld [r1]
214
        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
215
        ldr r12, =0xfefefefe
216
        JMP_ALIGN r5, r1
217
1:
218
        ldmia r1, {r4-r5, r10}
219
        add r1, r1, r2
220
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
221
        pld [r1]
222
        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
223
        subs r3, r3, #1
224
        stmia r0, {r8-r9}
225
        add r0, r0, r2
226
        bne 1b
227
        ldmfd sp!, {r4-r10,pc}
228
        .align 5
229
2:
230
        ldmia r1, {r4-r5, r10}
231
        add r1, r1, r2
232
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
233
        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
234
        pld [r1]
235
        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
236
        subs r3, r3, #1
237
        stmia r0, {r4-r5}
238
        add r0, r0, r2
239
        bne 2b
240
        ldmfd sp!, {r4-r10,pc}
241
        .align 5
242
3:
243
        ldmia r1, {r4-r5, r10}
244
        add r1, r1, r2
245
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
246
        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
247
        pld [r1]
248
        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
249
        subs r3, r3, #1
250
        stmia r0, {r4-r5}
251
        add r0, r0, r2
252
        bne 3b
253
        ldmfd sp!, {r4-r10,pc}
254
        .align 5
255
4:
256
        ldmia r1, {r4-r5, r10}
257
        add r1, r1, r2
258
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
259
        pld [r1]
260
        RND_AVG32 r8, r9, r6, r7, r5, r10, r12
261
        subs r3, r3, #1
262
        stmia r0, {r8-r9}
263
        add r0, r0, r2
264
        bne 4b
265
        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
266
        .endfunc
267

    
268
        .align 5
269
function put_no_rnd_pixels8_x2_arm, export=1
270
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
271
        @ block = word aligned, pixles = unaligned
272
        pld [r1]
273
        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
274
        ldr r12, =0xfefefefe
275
        JMP_ALIGN r5, r1
276
1:
277
        ldmia r1, {r4-r5, r10}
278
        add r1, r1, r2
279
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
280
        pld [r1]
281
        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
282
        subs r3, r3, #1
283
        stmia r0, {r8-r9}
284
        add r0, r0, r2
285
        bne 1b
286
        ldmfd sp!, {r4-r10,pc}
287
        .align 5
288
2:
289
        ldmia r1, {r4-r5, r10}
290
        add r1, r1, r2
291
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
292
        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
293
        pld [r1]
294
        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
295
        subs r3, r3, #1
296
        stmia r0, {r4-r5}
297
        add r0, r0, r2
298
        bne 2b
299
        ldmfd sp!, {r4-r10,pc}
300
        .align 5
301
3:
302
        ldmia r1, {r4-r5, r10}
303
        add r1, r1, r2
304
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
305
        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
306
        pld [r1]
307
        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
308
        subs r3, r3, #1
309
        stmia r0, {r4-r5}
310
        add r0, r0, r2
311
        bne 3b
312
        ldmfd sp!, {r4-r10,pc}
313
        .align 5
314
4:
315
        ldmia r1, {r4-r5, r10}
316
        add r1, r1, r2
317
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
318
        pld [r1]
319
        NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
320
        subs r3, r3, #1
321
        stmia r0, {r8-r9}
322
        add r0, r0, r2
323
        bne 4b
324
        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
325
        .endfunc
326

    
327

    
328
@ ----------------------------------------------------------------
329
        .align 5
330
function put_pixels8_y2_arm, export=1
331
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
332
        @ block = word aligned, pixles = unaligned
333
        pld [r1]
334
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
335
        mov r3, r3, lsr #1
336
        ldr r12, =0xfefefefe
337
        JMP_ALIGN r5, r1
338
1:
339
        ldmia r1, {r4-r5}
340
        add r1, r1, r2
341
6:      ldmia r1, {r6-r7}
342
        add r1, r1, r2
343
        pld [r1]
344
        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
345
        ldmia r1, {r4-r5}
346
        add r1, r1, r2
347
        stmia r0, {r8-r9}
348
        add r0, r0, r2
349
        pld [r1]
350
        RND_AVG32 r8, r9, r6, r7, r4, r5, r12
351
        subs r3, r3, #1
352
        stmia r0, {r8-r9}
353
        add r0, r0, r2
354
        bne 6b
355
        ldmfd sp!, {r4-r11,pc}
356
        .align 5
357
2:
358
        ldmia r1, {r4-r6}
359
        add r1, r1, r2
360
        pld [r1]
361
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
362
6:      ldmia r1, {r7-r9}
363
        add r1, r1, r2
364
        pld [r1]
365
        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
366
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
367
        stmia r0, {r10-r11}
368
        add r0, r0, r2
369
        ldmia r1, {r4-r6}
370
        add r1, r1, r2
371
        pld [r1]
372
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
373
        subs r3, r3, #1
374
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
375
        stmia r0, {r10-r11}
376
        add r0, r0, r2
377
        bne 6b
378
        ldmfd sp!, {r4-r11,pc}
379
        .align 5
380
3:
381
        ldmia r1, {r4-r6}
382
        add r1, r1, r2
383
        pld [r1]
384
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
385
6:      ldmia r1, {r7-r9}
386
        add r1, r1, r2
387
        pld [r1]
388
        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
389
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
390
        stmia r0, {r10-r11}
391
        add r0, r0, r2
392
        ldmia r1, {r4-r6}
393
        add r1, r1, r2
394
        pld [r1]
395
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
396
        subs r3, r3, #1
397
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
398
        stmia r0, {r10-r11}
399
        add r0, r0, r2
400
        bne 6b
401
        ldmfd sp!, {r4-r11,pc}
402
        .align 5
403
4:
404
        ldmia r1, {r4-r6}
405
        add r1, r1, r2
406
        pld [r1]
407
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
408
6:      ldmia r1, {r7-r9}
409
        add r1, r1, r2
410
        pld [r1]
411
        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
412
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
413
        stmia r0, {r10-r11}
414
        add r0, r0, r2
415
        ldmia r1, {r4-r6}
416
        add r1, r1, r2
417
        pld [r1]
418
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
419
        subs r3, r3, #1
420
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
421
        stmia r0, {r10-r11}
422
        add r0, r0, r2
423
        bne 6b
424
        ldmfd sp!, {r4-r11,pc}
425
        .endfunc
426

    
427
        .align 5
428
function put_no_rnd_pixels8_y2_arm, export=1
429
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
430
        @ block = word aligned, pixles = unaligned
431
        pld [r1]
432
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
433
        mov r3, r3, lsr #1
434
        ldr r12, =0xfefefefe
435
        JMP_ALIGN r5, r1
436
1:
437
        ldmia r1, {r4-r5}
438
        add r1, r1, r2
439
6:      ldmia r1, {r6-r7}
440
        add r1, r1, r2
441
        pld [r1]
442
        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
443
        ldmia r1, {r4-r5}
444
        add r1, r1, r2
445
        stmia r0, {r8-r9}
446
        add r0, r0, r2
447
        pld [r1]
448
        NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
449
        subs r3, r3, #1
450
        stmia r0, {r8-r9}
451
        add r0, r0, r2
452
        bne 6b
453
        ldmfd sp!, {r4-r11,pc}
454
        .align 5
455
2:
456
        ldmia r1, {r4-r6}
457
        add r1, r1, r2
458
        pld [r1]
459
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
460
6:      ldmia r1, {r7-r9}
461
        add r1, r1, r2
462
        pld [r1]
463
        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
464
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
465
        stmia r0, {r10-r11}
466
        add r0, r0, r2
467
        ldmia r1, {r4-r6}
468
        add r1, r1, r2
469
        pld [r1]
470
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
471
        subs r3, r3, #1
472
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
473
        stmia r0, {r10-r11}
474
        add r0, r0, r2
475
        bne 6b
476
        ldmfd sp!, {r4-r11,pc}
477
        .align 5
478
3:
479
        ldmia r1, {r4-r6}
480
        add r1, r1, r2
481
        pld [r1]
482
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
483
6:      ldmia r1, {r7-r9}
484
        add r1, r1, r2
485
        pld [r1]
486
        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
487
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
488
        stmia r0, {r10-r11}
489
        add r0, r0, r2
490
        ldmia r1, {r4-r6}
491
        add r1, r1, r2
492
        pld [r1]
493
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
494
        subs r3, r3, #1
495
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
496
        stmia r0, {r10-r11}
497
        add r0, r0, r2
498
        bne 6b
499
        ldmfd sp!, {r4-r11,pc}
500
        .align 5
501
4:
502
        ldmia r1, {r4-r6}
503
        add r1, r1, r2
504
        pld [r1]
505
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
506
6:      ldmia r1, {r7-r9}
507
        add r1, r1, r2
508
        pld [r1]
509
        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
510
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
511
        stmia r0, {r10-r11}
512
        add r0, r0, r2
513
        ldmia r1, {r4-r6}
514
        add r1, r1, r2
515
        pld [r1]
516
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
517
        subs r3, r3, #1
518
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
519
        stmia r0, {r10-r11}
520
        add r0, r0, r2
521
        bne 6b
522
        ldmfd sp!, {r4-r11,pc}
523
        .endfunc
524

    
525
        .ltorg
526

    
527
@ ----------------------------------------------------------------
528
.macro  RND_XY2_IT align, rnd
529
        @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
530
        @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
531
.if \align == 0
532
        ldmia r1, {r6-r8}
533
.elseif \align == 3
534
        ldmia r1, {r5-r7}
535
.else
536
        ldmia r1, {r8-r10}
537
.endif
538
        add r1, r1, r2
539
        pld [r1]
540
.if \align == 0
541
        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
542
.elseif \align == 1
543
        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
544
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
545
.elseif \align == 2
546
        ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
547
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
548
.elseif \align == 3
549
        ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
550
.endif
551
        ldr r14, =0x03030303
552
        tst r3, #1
553
        and r8, r4, r14
554
        and r9, r5, r14
555
        and r10, r6, r14
556
        and r11, r7, r14
557
        andeq r14, r14, r14, \rnd #1
558
        add r8, r8, r10
559
        add r9, r9, r11
560
        ldr r12, =0xfcfcfcfc >> 2
561
        addeq r8, r8, r14
562
        addeq r9, r9, r14
563
        and r4, r12, r4, lsr #2
564
        and r5, r12, r5, lsr #2
565
        and r6, r12, r6, lsr #2
566
        and r7, r12, r7, lsr #2
567
        add r10, r4, r6
568
        add r11, r5, r7
569
        subs r3, r3, #1
570
.endm
571

    
572
.macro RND_XY2_EXPAND align, rnd
573
        RND_XY2_IT \align, \rnd
574
6:      stmfd sp!, {r8-r11}
575
        RND_XY2_IT \align, \rnd
576
        ldmfd sp!, {r4-r7}
577
        add r4, r4, r8
578
        add r5, r5, r9
579
        ldr r14, =0x0f0f0f0f
580
        add r6, r6, r10
581
        add r7, r7, r11
582
        and r4, r14, r4, lsr #2
583
        and r5, r14, r5, lsr #2
584
        add r4, r4, r6
585
        add r5, r5, r7
586
        stmia r0, {r4-r5}
587
        add r0, r0, r2
588
        bge 6b
589
        ldmfd sp!, {r4-r11,pc}
590
.endm
591

    
592
        .align 5
593
function put_pixels8_xy2_arm, export=1
594
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
595
        @ block = word aligned, pixles = unaligned
596
        pld [r1]
597
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
598
        JMP_ALIGN r5, r1
599
1:
600
        RND_XY2_EXPAND 0, lsl
601

    
602
        .align 5
603
2:
604
        RND_XY2_EXPAND 1, lsl
605

    
606
        .align 5
607
3:
608
        RND_XY2_EXPAND 2, lsl
609

    
610
        .align 5
611
4:
612
        RND_XY2_EXPAND 3, lsl
613
        .endfunc
614

    
615
        .align 5
616
function put_no_rnd_pixels8_xy2_arm, export=1
617
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
618
        @ block = word aligned, pixles = unaligned
619
        pld [r1]
620
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
621
        JMP_ALIGN r5, r1
622
1:
623
        RND_XY2_EXPAND 0, lsr
624

    
625
        .align 5
626
2:
627
        RND_XY2_EXPAND 1, lsr
628

    
629
        .align 5
630
3:
631
        RND_XY2_EXPAND 2, lsr
632

    
633
        .align 5
634
4:
635
        RND_XY2_EXPAND 3, lsr
636
        .endfunc
637

    
638
        .align 5
639
@ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride)
640
function ff_add_pixels_clamped_ARM, export=1
641
        push            {r4-r10}
642
        mov             r10, #8
643
1:
644
        ldr             r4,  [r1]               /* load dest */
645
        /* block[0] and block[1]*/
646
        ldrsh           r5,  [r0]
647
        ldrsh           r7,  [r0, #2]
648
        and             r6,  r4,  #0xFF
649
        and             r8,  r4,  #0xFF00
650
        add             r6,  r5,  r6
651
        add             r8,  r7,  r8,  lsr #8
652
        mvn             r5,  r5
653
        mvn             r7,  r7
654
        tst             r6,  #0x100
655
        movne           r6,  r5,  lsr #24
656
        tst             r8,  #0x100
657
        movne           r8,  r7,  lsr #24
658
        mov             r9,  r6
659
        ldrsh           r5,  [r0, #4]           /* moved form [A] */
660
        orr             r9,  r9,  r8, lsl #8
661
        /* block[2] and block[3] */
662
        /* [A] */
663
        ldrsh           r7,  [r0, #6]
664
        and             r6,  r4,  #0xFF0000
665
        and             r8,  r4,  #0xFF000000
666
        add             r6,  r5,  r6, lsr #16
667
        add             r8,  r7,  r8, lsr #24
668
        mvn             r5,  r5
669
        mvn             r7,  r7
670
        tst             r6,  #0x100
671
        movne           r6,  r5,  lsr #24
672
        tst             r8,  #0x100
673
        movne           r8,  r7,  lsr #24
674
        orr             r9,  r9,  r6, lsl #16
675
        ldr             r4,  [r1, #4]           /* moved form [B] */
676
        orr             r9,  r9,  r8, lsl #24
677
        /* store dest */
678
        ldrsh           r5,  [r0, #8]           /* moved form [C] */
679
        str             r9,  [r1]
680

    
681
        /* load dest */
682
        /* [B] */
683
        /* block[4] and block[5] */
684
        /* [C] */
685
        ldrsh           r7,  [r0, #10]
686
        and             r6,  r4,  #0xFF
687
        and             r8,  r4,  #0xFF00
688
        add             r6,  r5,  r6
689
        add             r8,  r7,  r8, lsr #8
690
        mvn             r5,  r5
691
        mvn             r7,  r7
692
        tst             r6,  #0x100
693
        movne           r6,  r5,  lsr #24
694
        tst             r8,  #0x100
695
        movne           r8,  r7,  lsr #24
696
        mov             r9,  r6
697
        ldrsh           r5,  [r0, #12]          /* moved from [D] */
698
        orr             r9,  r9,  r8, lsl #8
699
        /* block[6] and block[7] */
700
        /* [D] */
701
        ldrsh           r7,  [r0, #14]
702
        and             r6,  r4,  #0xFF0000
703
        and             r8,  r4,  #0xFF000000
704
        add             r6,  r5,  r6, lsr #16
705
        add             r8,  r7,  r8, lsr #24
706
        mvn             r5,  r5
707
        mvn             r7,  r7
708
        tst             r6,  #0x100
709
        movne           r6,  r5,  lsr #24
710
        tst             r8,  #0x100
711
        movne           r8,  r7,  lsr #24
712
        orr             r9,  r9,  r6, lsl #16
713
        add             r0,  r0,  #16           /* moved from [E] */
714
        orr             r9,  r9,  r8, lsl #24
715
        subs            r10, r10, #1            /* moved from [F] */
716
        /* store dest */
717
        str             r9,  [r1, #4]
718

    
719
        /* [E] */
720
        /* [F] */
721
        add             r1,  r1,  r2
722
        bne             1b
723

    
724
        pop             {r4-r10}
725
        bx              lr
726
        .endfunc