Statistics
| Branch: | Revision:

ffmpeg / libavcodec / arm / dsputil_armv6.S @ 2912e87a

History | View | Annotate | Download (20.5 KB)

1
/*
2
 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
3
 *
4
 * This file is part of Libav.
5
 *
6
 * Libav is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * Libav is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with Libav; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20

    
21
#include "asm.S"
22

    
23
        preserve8
24

    
25
        .text
26

    
27
.macro  call_2x_pixels  type, subp
28
function ff_\type\()_pixels16\subp\()_armv6, export=1
29
        push            {r0-r3, lr}
30
        bl              ff_\type\()_pixels8\subp\()_armv6
31
        pop             {r0-r3, lr}
32
        add             r0,  r0,  #8
33
        add             r1,  r1,  #8
34
        b               ff_\type\()_pixels8\subp\()_armv6
35
endfunc
36
.endm
37

    
38
call_2x_pixels          avg
39
call_2x_pixels          put, _x2
40
call_2x_pixels          put, _y2
41
call_2x_pixels          put, _x2_no_rnd
42
call_2x_pixels          put, _y2_no_rnd
43

    
44
function ff_put_pixels16_armv6, export=1
45
        push            {r4-r11}
46
1:
47
        ldr             r5,  [r1, #4]
48
        ldr             r6,  [r1, #8]
49
        ldr             r7,  [r1, #12]
50
        ldr             r4,  [r1], r2
51
        strd            r6,  r7,  [r0, #8]
52
        ldr             r9,  [r1, #4]
53
        strd            r4,  r5,  [r0],  r2
54
        ldr             r10, [r1, #8]
55
        ldr             r11, [r1, #12]
56
        ldr             r8,  [r1], r2
57
        strd            r10, r11, [r0, #8]
58
        subs            r3,  r3,  #2
59
        strd            r8,  r9,  [r0],  r2
60
        bne             1b
61

    
62
        pop             {r4-r11}
63
        bx              lr
64
endfunc
65

    
66
function ff_put_pixels8_armv6, export=1
67
        push            {r4-r7}
68
1:
69
        ldr             r5,  [r1, #4]
70
        ldr             r4,  [r1], r2
71
        ldr             r7,  [r1, #4]
72
        strd            r4,  r5,  [r0],  r2
73
        ldr             r6,  [r1], r2
74
        subs            r3,  r3,  #2
75
        strd            r6,  r7,  [r0],  r2
76
        bne             1b
77

    
78
        pop             {r4-r7}
79
        bx              lr
80
endfunc
81

    
82
function ff_put_pixels8_x2_armv6, export=1
83
        push            {r4-r11, lr}
84
        mov             r12, #1
85
        orr             r12, r12, r12, lsl #8
86
        orr             r12, r12, r12, lsl #16
87
1:
88
        ldr             r4,  [r1]
89
        subs            r3,  r3,  #2
90
        ldr             r5,  [r1, #4]
91
        ldr             r7,  [r1, #5]
92
        lsr             r6,  r4,  #8
93
        ldr             r8,  [r1, r2]!
94
        orr             r6,  r6,  r5,  lsl #24
95
        ldr             r9,  [r1, #4]
96
        ldr             r11, [r1, #5]
97
        lsr             r10, r8,  #8
98
        add             r1,  r1,  r2
99
        orr             r10, r10, r9,  lsl #24
100
        eor             r14, r4,  r6
101
        uhadd8          r4,  r4,  r6
102
        eor             r6,  r5,  r7
103
        uhadd8          r5,  r5,  r7
104
        and             r14, r14, r12
105
        and             r6,  r6,  r12
106
        uadd8           r4,  r4,  r14
107
        eor             r14, r8,  r10
108
        uadd8           r5,  r5,  r6
109
        eor             r6,  r9,  r11
110
        uhadd8          r8,  r8,  r10
111
        and             r14, r14, r12
112
        uhadd8          r9,  r9,  r11
113
        and             r6,  r6,  r12
114
        uadd8           r8,  r8,  r14
115
        strd            r4,  r5,  [r0],  r2
116
        uadd8           r9,  r9,  r6
117
        strd            r8,  r9,  [r0],  r2
118
        bne             1b
119

    
120
        pop             {r4-r11, pc}
121
endfunc
122

    
123
function ff_put_pixels8_y2_armv6, export=1
124
        push            {r4-r11}
125
        mov             r12, #1
126
        orr             r12, r12, r12, lsl #8
127
        orr             r12, r12, r12, lsl #16
128
        ldr             r4,  [r1]
129
        ldr             r5,  [r1, #4]
130
        ldr             r6,  [r1, r2]!
131
        ldr             r7,  [r1, #4]
132
1:
133
        subs            r3,  r3,  #2
134
        uhadd8          r8,  r4,  r6
135
        eor             r10, r4,  r6
136
        uhadd8          r9,  r5,  r7
137
        eor             r11, r5,  r7
138
        and             r10, r10, r12
139
        ldr             r4,  [r1, r2]!
140
        uadd8           r8,  r8,  r10
141
        and             r11, r11, r12
142
        uadd8           r9,  r9,  r11
143
        ldr             r5,  [r1, #4]
144
        uhadd8          r10, r4,  r6
145
        eor             r6,  r4,  r6
146
        uhadd8          r11, r5,  r7
147
        and             r6,  r6,  r12
148
        eor             r7,  r5,  r7
149
        uadd8           r10, r10, r6
150
        and             r7,  r7,  r12
151
        ldr             r6,  [r1, r2]!
152
        uadd8           r11, r11, r7
153
        strd            r8,  r9,  [r0],  r2
154
        ldr             r7,  [r1, #4]
155
        strd            r10, r11, [r0],  r2
156
        bne             1b
157

    
158
        pop             {r4-r11}
159
        bx              lr
160
endfunc
161

    
162
function ff_put_pixels8_x2_no_rnd_armv6, export=1
163
        push            {r4-r9, lr}
164
1:
165
        subs            r3,  r3,  #2
166
        ldr             r4,  [r1]
167
        ldr             r5,  [r1, #4]
168
        ldr             r7,  [r1, #5]
169
        ldr             r8,  [r1, r2]!
170
        ldr             r9,  [r1, #4]
171
        ldr             r14, [r1, #5]
172
        add             r1,  r1,  r2
173
        lsr             r6,  r4,  #8
174
        orr             r6,  r6,  r5,  lsl #24
175
        lsr             r12, r8,  #8
176
        orr             r12, r12, r9,  lsl #24
177
        uhadd8          r4,  r4,  r6
178
        uhadd8          r5,  r5,  r7
179
        uhadd8          r8,  r8,  r12
180
        uhadd8          r9,  r9,  r14
181
        stm             r0,  {r4,r5}
182
        add             r0,  r0,  r2
183
        stm             r0,  {r8,r9}
184
        add             r0,  r0,  r2
185
        bne             1b
186

    
187
        pop             {r4-r9, pc}
188
endfunc
189

    
190
function ff_put_pixels8_y2_no_rnd_armv6, export=1
191
        push            {r4-r9, lr}
192
        ldr             r4,  [r1]
193
        ldr             r5,  [r1, #4]
194
        ldr             r6,  [r1, r2]!
195
        ldr             r7,  [r1, #4]
196
1:
197
        subs            r3,  r3,  #2
198
        uhadd8          r8,  r4,  r6
199
        ldr             r4,  [r1, r2]!
200
        uhadd8          r9,  r5,  r7
201
        ldr             r5,  [r1, #4]
202
        uhadd8          r12, r4,  r6
203
        ldr             r6,  [r1, r2]!
204
        uhadd8          r14, r5,  r7
205
        ldr             r7,  [r1, #4]
206
        stm             r0,  {r8,r9}
207
        add             r0,  r0,  r2
208
        stm             r0,  {r12,r14}
209
        add             r0,  r0,  r2
210
        bne             1b
211

    
212
        pop             {r4-r9, pc}
213
endfunc
214

    
215
function ff_avg_pixels8_armv6, export=1
216
        pld             [r1, r2]
217
        push            {r4-r10, lr}
218
        mov             lr,  #1
219
        orr             lr,  lr,  lr,  lsl #8
220
        orr             lr,  lr,  lr,  lsl #16
221
        ldrd            r4,  r5,  [r0]
222
        ldr             r10, [r1, #4]
223
        ldr             r9,  [r1], r2
224
        subs            r3,  r3,  #2
225
1:
226
        pld             [r1, r2]
227
        eor             r8,  r4,  r9
228
        uhadd8          r4,  r4,  r9
229
        eor             r12, r5,  r10
230
        ldrd            r6,  r7,  [r0, r2]
231
        uhadd8          r5,  r5,  r10
232
        and             r8,  r8,  lr
233
        ldr             r10, [r1, #4]
234
        and             r12, r12, lr
235
        uadd8           r4,  r4,  r8
236
        ldr             r9,  [r1], r2
237
        eor             r8,  r6,  r9
238
        uadd8           r5,  r5,  r12
239
        pld             [r1, r2,  lsl #1]
240
        eor             r12, r7,  r10
241
        uhadd8          r6,  r6,  r9
242
        strd            r4,  r5,  [r0], r2
243
        uhadd8          r7,  r7,  r10
244
        beq             2f
245
        and             r8,  r8,  lr
246
        ldrd            r4,  r5,  [r0, r2]
247
        uadd8           r6,  r6,  r8
248
        ldr             r10, [r1, #4]
249
        and             r12, r12, lr
250
        subs            r3,  r3,  #2
251
        uadd8           r7,  r7,  r12
252
        ldr             r9,  [r1], r2
253
        strd            r6,  r7,  [r0], r2
254
        b               1b
255
2:
256
        and             r8,  r8,  lr
257
        and             r12, r12, lr
258
        uadd8           r6,  r6,  r8
259
        uadd8           r7,  r7,  r12
260
        strd            r6,  r7,  [r0], r2
261

    
262
        pop             {r4-r10, pc}
263
endfunc
264

    
265
function ff_add_pixels_clamped_armv6, export=1
266
        push            {r4-r8,lr}
267
        mov             r3,  #8
268
1:
269
        ldm             r0!, {r4,r5,r12,lr}
270
        ldrd            r6,  r7,  [r1]
271
        pkhbt           r8,  r4,  r5,  lsl #16
272
        pkhtb           r5,  r5,  r4,  asr #16
273
        pkhbt           r4,  r12, lr,  lsl #16
274
        pkhtb           lr,  lr,  r12, asr #16
275
        pld             [r1, r2]
276
        uxtab16         r8,  r8,  r6
277
        uxtab16         r5,  r5,  r6,  ror #8
278
        uxtab16         r4,  r4,  r7
279
        uxtab16         lr,  lr,  r7,  ror #8
280
        usat16          r8,  #8,  r8
281
        usat16          r5,  #8,  r5
282
        usat16          r4,  #8,  r4
283
        usat16          lr,  #8,  lr
284
        orr             r6,  r8,  r5,  lsl #8
285
        orr             r7,  r4,  lr,  lsl #8
286
        subs            r3,  r3,  #1
287
        strd            r6,  r7,  [r1],  r2
288
        bgt             1b
289
        pop             {r4-r8,pc}
290
endfunc
291

    
292
function ff_get_pixels_armv6, export=1
293
        pld             [r1, r2]
294
        push            {r4-r8, lr}
295
        mov             lr,  #8
296
1:
297
        ldrd            r4,  r5,  [r1],  r2
298
        subs            lr,  lr,  #1
299
        uxtb16          r6,  r4
300
        uxtb16          r4,  r4,  ror #8
301
        uxtb16          r12, r5
302
        uxtb16          r8,  r5,  ror #8
303
        pld             [r1, r2]
304
        pkhbt           r5,  r6,  r4,  lsl #16
305
        pkhtb           r6,  r4,  r6,  asr #16
306
        pkhbt           r7,  r12, r8,  lsl #16
307
        pkhtb           r12, r8,  r12, asr #16
308
        stm             r0!, {r5,r6,r7,r12}
309
        bgt             1b
310

    
311
        pop             {r4-r8, pc}
312
endfunc
313

    
314
function ff_diff_pixels_armv6, export=1
315
        pld             [r1, r3]
316
        pld             [r2, r3]
317
        push            {r4-r9, lr}
318
        mov             lr,  #8
319
1:
320
        ldrd            r4,  r5,  [r1],  r3
321
        ldrd            r6,  r7,  [r2],  r3
322
        uxtb16          r8,  r4
323
        uxtb16          r4,  r4,  ror #8
324
        uxtb16          r9,  r6
325
        uxtb16          r6,  r6,  ror #8
326
        pld             [r1, r3]
327
        ssub16          r9,  r8,  r9
328
        ssub16          r6,  r4,  r6
329
        uxtb16          r8,  r5
330
        uxtb16          r5,  r5,  ror #8
331
        pld             [r2, r3]
332
        pkhbt           r4,  r9,  r6,  lsl #16
333
        pkhtb           r6,  r6,  r9,  asr #16
334
        uxtb16          r9,  r7
335
        uxtb16          r7,  r7,  ror #8
336
        ssub16          r9,  r8,  r9
337
        ssub16          r5,  r5,  r7
338
        subs            lr,  lr,  #1
339
        pkhbt           r8,  r9,  r5,  lsl #16
340
        pkhtb           r9,  r5,  r9,  asr #16
341
        stm             r0!, {r4,r6,r8,r9}
342
        bgt             1b
343

    
344
        pop             {r4-r9, pc}
345
endfunc
346

    
347
function ff_pix_abs16_armv6, export=1
348
        ldr             r0,  [sp]
349
        push            {r4-r9, lr}
350
        mov             r12, #0
351
        mov             lr,  #0
352
        ldm             r1,  {r4-r7}
353
        ldr             r8,  [r2]
354
1:
355
        ldr             r9,  [r2, #4]
356
        pld             [r1, r3]
357
        usada8          r12, r4,  r8,  r12
358
        ldr             r8,  [r2, #8]
359
        pld             [r2, r3]
360
        usada8          lr,  r5,  r9,  lr
361
        ldr             r9,  [r2, #12]
362
        usada8          r12, r6,  r8,  r12
363
        subs            r0,  r0,  #1
364
        usada8          lr,  r7,  r9,  lr
365
        beq             2f
366
        add             r1,  r1,  r3
367
        ldm             r1,  {r4-r7}
368
        add             r2,  r2,  r3
369
        ldr             r8,  [r2]
370
        b               1b
371
2:
372
        add             r0,  r12, lr
373
        pop             {r4-r9, pc}
374
endfunc
375

    
376
function ff_pix_abs16_x2_armv6, export=1
377
        ldr             r12, [sp]
378
        push            {r4-r11, lr}
379
        mov             r0,  #0
380
        mov             lr,  #1
381
        orr             lr,  lr,  lr,  lsl #8
382
        orr             lr,  lr,  lr,  lsl #16
383
1:
384
        ldr             r8,  [r2]
385
        ldr             r9,  [r2, #4]
386
        lsr             r10, r8,  #8
387
        ldr             r4,  [r1]
388
        lsr             r6,  r9,  #8
389
        orr             r10, r10, r9,  lsl #24
390
        ldr             r5,  [r2, #8]
391
        eor             r11, r8,  r10
392
        uhadd8          r7,  r8,  r10
393
        orr             r6,  r6,  r5,  lsl #24
394
        and             r11, r11, lr
395
        uadd8           r7,  r7,  r11
396
        ldr             r8,  [r1, #4]
397
        usada8          r0,  r4,  r7,  r0
398
        eor             r7,  r9,  r6
399
        lsr             r10, r5,  #8
400
        and             r7,  r7,  lr
401
        uhadd8          r4,  r9,  r6
402
        ldr             r6,  [r2, #12]
403
        uadd8           r4,  r4,  r7
404
        pld             [r1, r3]
405
        orr             r10, r10, r6,  lsl #24
406
        usada8          r0,  r8,  r4,  r0
407
        ldr             r4,  [r1, #8]
408
        eor             r11, r5,  r10
409
        ldrb            r7,  [r2, #16]
410
        and             r11, r11, lr
411
        uhadd8          r8,  r5,  r10
412
        ldr             r5,  [r1, #12]
413
        uadd8           r8,  r8,  r11
414
        pld             [r2, r3]
415
        lsr             r10, r6,  #8
416
        usada8          r0,  r4,  r8,  r0
417
        orr             r10, r10, r7,  lsl #24
418
        subs            r12,  r12,  #1
419
        eor             r11, r6,  r10
420
        add             r1,  r1,  r3
421
        uhadd8          r9,  r6,  r10
422
        and             r11, r11, lr
423
        uadd8           r9,  r9,  r11
424
        add             r2,  r2,  r3
425
        usada8          r0,  r5,  r9,  r0
426
        bgt             1b
427

    
428
        pop             {r4-r11, pc}
429
endfunc
430

    
431
.macro  usad_y2         p0,  p1,  p2,  p3,  n0,  n1,  n2,  n3
432
        ldr             \n0, [r2]
433
        eor             \n1, \p0, \n0
434
        uhadd8          \p0, \p0, \n0
435
        and             \n1, \n1, lr
436
        ldr             \n2, [r1]
437
        uadd8           \p0, \p0, \n1
438
        ldr             \n1, [r2, #4]
439
        usada8          r0,  \p0, \n2, r0
440
        pld             [r1,  r3]
441
        eor             \n3, \p1, \n1
442
        uhadd8          \p1, \p1, \n1
443
        and             \n3, \n3, lr
444
        ldr             \p0, [r1, #4]
445
        uadd8           \p1, \p1, \n3
446
        ldr             \n2, [r2, #8]
447
        usada8          r0,  \p1, \p0, r0
448
        pld             [r2,  r3]
449
        eor             \p0, \p2, \n2
450
        uhadd8          \p2, \p2, \n2
451
        and             \p0, \p0, lr
452
        ldr             \p1, [r1, #8]
453
        uadd8           \p2, \p2, \p0
454
        ldr             \n3, [r2, #12]
455
        usada8          r0,  \p2, \p1, r0
456
        eor             \p1, \p3, \n3
457
        uhadd8          \p3, \p3, \n3
458
        and             \p1, \p1, lr
459
        ldr             \p0,  [r1, #12]
460
        uadd8           \p3, \p3, \p1
461
        add             r1,  r1,  r3
462
        usada8          r0,  \p3, \p0,  r0
463
        add             r2,  r2,  r3
464
.endm
465

    
466
function ff_pix_abs16_y2_armv6, export=1
467
        pld             [r1]
468
        pld             [r2]
469
        ldr             r12, [sp]
470
        push            {r4-r11, lr}
471
        mov             r0,  #0
472
        mov             lr,  #1
473
        orr             lr,  lr,  lr,  lsl #8
474
        orr             lr,  lr,  lr,  lsl #16
475
        ldr             r4,  [r2]
476
        ldr             r5,  [r2, #4]
477
        ldr             r6,  [r2, #8]
478
        ldr             r7,  [r2, #12]
479
        add             r2,  r2,  r3
480
1:
481
        usad_y2         r4,  r5,  r6,  r7,  r8,  r9,  r10, r11
482
        subs            r12, r12, #2
483
        usad_y2         r8,  r9,  r10, r11, r4,  r5,  r6,  r7
484
        bgt             1b
485

    
486
        pop             {r4-r11, pc}
487
endfunc
488

    
489
function ff_pix_abs8_armv6, export=1
490
        pld             [r2, r3]
491
        ldr             r12, [sp]
492
        push            {r4-r9, lr}
493
        mov             r0,  #0
494
        mov             lr,  #0
495
        ldrd            r4,  r5,  [r1], r3
496
1:
497
        subs            r12, r12, #2
498
        ldr             r7,  [r2, #4]
499
        ldr             r6,  [r2], r3
500
        ldrd            r8,  r9,  [r1], r3
501
        usada8          r0,  r4,  r6,  r0
502
        pld             [r2, r3]
503
        usada8          lr,  r5,  r7,  lr
504
        ldr             r7,  [r2, #4]
505
        ldr             r6,  [r2], r3
506
        beq             2f
507
        ldrd            r4,  r5,  [r1], r3
508
        usada8          r0,  r8,  r6,  r0
509
        pld             [r2, r3]
510
        usada8          lr,  r9,  r7,  lr
511
        b               1b
512
2:
513
        usada8          r0,  r8,  r6,  r0
514
        usada8          lr,  r9,  r7,  lr
515
        add             r0,  r0,  lr
516
        pop             {r4-r9, pc}
517
endfunc
518

    
519
function ff_sse16_armv6, export=1
520
        ldr             r12, [sp]
521
        push            {r4-r9, lr}
522
        mov             r0,  #0
523
1:
524
        ldrd            r4,  r5,  [r1]
525
        ldr             r8,  [r2]
526
        uxtb16          lr,  r4
527
        uxtb16          r4,  r4,  ror #8
528
        uxtb16          r9,  r8
529
        uxtb16          r8,  r8,  ror #8
530
        ldr             r7,  [r2, #4]
531
        usub16          lr,  lr,  r9
532
        usub16          r4,  r4,  r8
533
        smlad           r0,  lr,  lr,  r0
534
        uxtb16          r6,  r5
535
        uxtb16          lr,  r5,  ror #8
536
        uxtb16          r8,  r7
537
        uxtb16          r9,  r7,  ror #8
538
        smlad           r0,  r4,  r4,  r0
539
        ldrd            r4,  r5,  [r1, #8]
540
        usub16          r6,  r6,  r8
541
        usub16          r8,  lr,  r9
542
        ldr             r7,  [r2, #8]
543
        smlad           r0,  r6,  r6,  r0
544
        uxtb16          lr,  r4
545
        uxtb16          r4,  r4,  ror #8
546
        uxtb16          r9,  r7
547
        uxtb16          r7,  r7, ror #8
548
        smlad           r0,  r8,  r8,  r0
549
        ldr             r8,  [r2, #12]
550
        usub16          lr,  lr,  r9
551
        usub16          r4,  r4,  r7
552
        smlad           r0,  lr,  lr,  r0
553
        uxtb16          r6,  r5
554
        uxtb16          r5,  r5,  ror #8
555
        uxtb16          r9,  r8
556
        uxtb16          r8,  r8,  ror #8
557
        smlad           r0,  r4,  r4,  r0
558
        usub16          r6,  r6,  r9
559
        usub16          r5,  r5,  r8
560
        smlad           r0,  r6,  r6,  r0
561
        add             r1,  r1,  r3
562
        add             r2,  r2,  r3
563
        subs            r12, r12, #1
564
        smlad           r0,  r5,  r5,  r0
565
        bgt             1b
566

    
567
        pop             {r4-r9, pc}
568
endfunc
569

    
570
function ff_pix_norm1_armv6, export=1
571
        push            {r4-r6, lr}
572
        mov             r12, #16
573
        mov             lr,  #0
574
1:
575
        ldm             r0,  {r2-r5}
576
        uxtb16          r6,  r2
577
        uxtb16          r2,  r2,  ror #8
578
        smlad           lr,  r6,  r6,  lr
579
        uxtb16          r6,  r3
580
        smlad           lr,  r2,  r2,  lr
581
        uxtb16          r3,  r3,  ror #8
582
        smlad           lr,  r6,  r6,  lr
583
        uxtb16          r6,  r4
584
        smlad           lr,  r3,  r3,  lr
585
        uxtb16          r4,  r4,  ror #8
586
        smlad           lr,  r6,  r6,  lr
587
        uxtb16          r6,  r5
588
        smlad           lr,  r4,  r4,  lr
589
        uxtb16          r5,  r5,  ror #8
590
        smlad           lr,  r6,  r6,  lr
591
        subs            r12, r12, #1
592
        add             r0,  r0,  r1
593
        smlad           lr,  r5,  r5,  lr
594
        bgt             1b
595

    
596
        mov             r0,  lr
597
        pop             {r4-r6, pc}
598
endfunc
599

    
600
function ff_pix_sum_armv6, export=1
601
        push            {r4-r7, lr}
602
        mov             r12, #16
603
        mov             r2,  #0
604
        mov             r3,  #0
605
        mov             lr,  #0
606
        ldr             r4,  [r0]
607
1:
608
        subs            r12, r12, #1
609
        ldr             r5,  [r0, #4]
610
        usada8          r2,  r4,  lr,  r2
611
        ldr             r6,  [r0, #8]
612
        usada8          r3,  r5,  lr,  r3
613
        ldr             r7,  [r0, #12]
614
        usada8          r2,  r6,  lr,  r2
615
        beq             2f
616
        ldr             r4,  [r0, r1]!
617
        usada8          r3,  r7,  lr,  r3
618
        bgt             1b
619
2:
620
        usada8          r3,  r7,  lr,  r3
621
        add             r0,  r2,  r3
622
        pop             {r4-r7, pc}
623
endfunc