Statistics
| Branch: | Revision:

ffmpeg / libavcodec / arm / dsputil_armv6.S @ 31326143

History | View | Annotate | Download (17.1 KB)

1
/*
2
 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20

    
21
#include "asm.S"
22

    
23
        .text
24

    
25
.macro  call_2x_pixels  type, subp
26
function ff_\type\()_pixels16\subp\()_armv6, export=1
27
        push            {r0-r3, lr}
28
        bl              ff_\type\()_pixels8\subp\()_armv6
29
        pop             {r0-r3, lr}
30
        add             r0,  r0,  #8
31
        add             r1,  r1,  #8
32
        b               ff_\type\()_pixels8\subp\()_armv6
33
.endfunc
34
.endm
35

    
36
call_2x_pixels          avg
37
call_2x_pixels          put, _x2
38
call_2x_pixels          put, _y2
39
call_2x_pixels          put, _x2_no_rnd
40
call_2x_pixels          put, _y2_no_rnd
41

    
42
function ff_put_pixels16_armv6, export=1
43
        push            {r4-r11}
44
1:
45
        ldr             r5,  [r1, #4]
46
        ldr             r6,  [r1, #8]
47
        ldr             r7,  [r1, #12]
48
        ldr             r4,  [r1], r2
49
        strd            r6,  r7,  [r0, #8]
50
        ldr             r9,  [r1, #4]
51
        strd            r4,  r5,  [r0],  r2
52
        ldr             r10, [r1, #8]
53
        ldr             r11, [r1, #12]
54
        ldr             r8,  [r1], r2
55
        strd            r10, r11, [r0, #8]
56
        subs            r3,  r3,  #2
57
        strd            r8,  r9,  [r0],  r2
58
        bne             1b
59

    
60
        pop             {r4-r11}
61
        bx              lr
62
.endfunc
63

    
64
function ff_put_pixels8_armv6, export=1
65
        push            {r4-r7}
66
1:
67
        ldr             r5,  [r1, #4]
68
        ldr             r4,  [r1], r2
69
        ldr             r7,  [r1, #4]
70
        strd            r4,  r5,  [r0],  r2
71
        ldr             r6,  [r1], r2
72
        subs            r3,  r3,  #2
73
        strd            r6,  r7,  [r0],  r2
74
        bne             1b
75

    
76
        pop             {r4-r7}
77
        bx              lr
78
.endfunc
79

    
80
function ff_put_pixels8_x2_armv6, export=1
81
        push            {r4-r11, lr}
82
        mov             r12, #1
83
        orr             r12, r12, r12, lsl #8
84
        orr             r12, r12, r12, lsl #16
85
1:
86
        ldr             r4,  [r1]
87
        subs            r3,  r3,  #2
88
        ldr             r5,  [r1, #4]
89
        ldr             r7,  [r1, #5]
90
        lsr             r6,  r4,  #8
91
        ldr             r8,  [r1, r2]!
92
        orr             r6,  r6,  r5,  lsl #24
93
        ldr             r9,  [r1, #4]
94
        ldr             r11, [r1, #5]
95
        lsr             r10, r8,  #8
96
        add             r1,  r1,  r2
97
        orr             r10, r10, r9,  lsl #24
98
        eor             r14, r4,  r6
99
        uhadd8          r4,  r4,  r6
100
        eor             r6,  r5,  r7
101
        uhadd8          r5,  r5,  r7
102
        and             r14, r14, r12
103
        and             r6,  r6,  r12
104
        uadd8           r4,  r4,  r14
105
        eor             r14, r8,  r10
106
        uadd8           r5,  r5,  r6
107
        eor             r6,  r9,  r11
108
        uhadd8          r8,  r8,  r10
109
        and             r14, r14, r12
110
        uhadd8          r9,  r9,  r11
111
        and             r6,  r6,  r12
112
        uadd8           r8,  r8,  r14
113
        strd            r4,  r5,  [r0],  r2
114
        uadd8           r9,  r9,  r6
115
        strd            r8,  r9,  [r0],  r2
116
        bne             1b
117

    
118
        pop             {r4-r11, pc}
119
.endfunc
120

    
121
function ff_put_pixels8_y2_armv6, export=1
122
        push            {r4-r11}
123
        mov             r12, #1
124
        orr             r12, r12, r12, lsl #8
125
        orr             r12, r12, r12, lsl #16
126
        ldr             r4,  [r1]
127
        ldr             r5,  [r1, #4]
128
        ldr             r6,  [r1, r2]!
129
        ldr             r7,  [r1, #4]
130
1:
131
        subs            r3,  r3,  #2
132
        uhadd8          r8,  r4,  r6
133
        eor             r10, r4,  r6
134
        uhadd8          r9,  r5,  r7
135
        eor             r11, r5,  r7
136
        and             r10, r10, r12
137
        ldr             r4,  [r1, r2]!
138
        uadd8           r8,  r8,  r10
139
        and             r11, r11, r12
140
        uadd8           r9,  r9,  r11
141
        ldr             r5,  [r1, #4]
142
        uhadd8          r10, r4,  r6
143
        eor             r6,  r4,  r6
144
        uhadd8          r11, r5,  r7
145
        and             r6,  r6,  r12
146
        eor             r7,  r5,  r7
147
        uadd8           r10, r10, r6
148
        and             r7,  r7,  r12
149
        ldr             r6,  [r1, r2]!
150
        uadd8           r11, r11, r7
151
        strd            r8,  r9,  [r0],  r2
152
        ldr             r7,  [r1, #4]
153
        strd            r10, r11, [r0],  r2
154
        bne             1b
155

    
156
        pop             {r4-r11}
157
        bx              lr
158
.endfunc
159

    
160
function ff_put_pixels8_x2_no_rnd_armv6, export=1
161
        push            {r4-r9, lr}
162
1:
163
        subs            r3,  r3,  #2
164
        ldr             r4,  [r1]
165
        ldr             r5,  [r1, #4]
166
        ldr             r7,  [r1, #5]
167
        ldr             r8,  [r1, r2]!
168
        ldr             r9,  [r1, #4]
169
        ldr             r14, [r1, #5]
170
        add             r1,  r1,  r2
171
        lsr             r6,  r4,  #8
172
        orr             r6,  r6,  r5,  lsl #24
173
        lsr             r12, r8,  #8
174
        orr             r12, r12, r9,  lsl #24
175
        uhadd8          r4,  r4,  r6
176
        uhadd8          r5,  r5,  r7
177
        uhadd8          r8,  r8,  r12
178
        uhadd8          r9,  r9,  r14
179
        stm             r0,  {r4,r5}
180
        add             r0,  r0,  r2
181
        stm             r0,  {r8,r9}
182
        add             r0,  r0,  r2
183
        bne             1b
184

    
185
        pop             {r4-r9, pc}
186
.endfunc
187

    
188
function ff_put_pixels8_y2_no_rnd_armv6, export=1
189
        push            {r4-r9, lr}
190
        ldr             r4,  [r1]
191
        ldr             r5,  [r1, #4]
192
        ldr             r6,  [r1, r2]!
193
        ldr             r7,  [r1, #4]
194
1:
195
        subs            r3,  r3,  #2
196
        uhadd8          r8,  r4,  r6
197
        ldr             r4,  [r1, r2]!
198
        uhadd8          r9,  r5,  r7
199
        ldr             r5,  [r1, #4]
200
        uhadd8          r12, r4,  r6
201
        ldr             r6,  [r1, r2]!
202
        uhadd8          r14, r5,  r7
203
        ldr             r7,  [r1, #4]
204
        stm             r0,  {r8,r9}
205
        add             r0,  r0,  r2
206
        stm             r0,  {r12,r14}
207
        add             r0,  r0,  r2
208
        bne             1b
209

    
210
        pop             {r4-r9, pc}
211
.endfunc
212

    
213
function ff_avg_pixels8_armv6, export=1
214
        pld             [r1, r2]
215
        push            {r4-r10, lr}
216
        mov             lr,  #1
217
        orr             lr,  lr,  lr,  lsl #8
218
        orr             lr,  lr,  lr,  lsl #16
219
        ldrd            r4,  r5,  [r0]
220
        ldr             r10, [r1, #4]
221
        ldr             r9,  [r1], r2
222
        subs            r3,  r3,  #2
223
1:
224
        pld             [r1, r2]
225
        eor             r8,  r4,  r9
226
        uhadd8          r4,  r4,  r9
227
        eor             r12, r5,  r10
228
        ldrd            r6,  r7,  [r0, r2]
229
        uhadd8          r5,  r5,  r10
230
        and             r8,  r8,  lr
231
        ldr             r10, [r1, #4]
232
        and             r12, r12, lr
233
        uadd8           r4,  r4,  r8
234
        ldr             r9,  [r1], r2
235
        eor             r8,  r6,  r9
236
        uadd8           r5,  r5,  r12
237
        pld             [r1, r2,  lsl #1]
238
        eor             r12, r7,  r10
239
        uhadd8          r6,  r6,  r9
240
        strd            r4,  r5,  [r0], r2
241
        uhadd8          r7,  r7,  r10
242
        beq             2f
243
        and             r8,  r8,  lr
244
        ldrd            r4,  r5,  [r0, r2]
245
        uadd8           r6,  r6,  r8
246
        ldr             r10, [r1, #4]
247
        and             r12, r12, lr
248
        subs            r3,  r3,  #2
249
        uadd8           r7,  r7,  r12
250
        ldr             r9,  [r1], r2
251
        strd            r6,  r7,  [r0], r2
252
        b               1b
253
2:
254
        and             r8,  r8,  lr
255
        and             r12, r12, lr
256
        uadd8           r6,  r6,  r8
257
        uadd8           r7,  r7,  r12
258
        strd            r6,  r7,  [r0], r2
259

    
260
        pop             {r4-r10, pc}
261
.endfunc
262

    
263
function ff_add_pixels_clamped_armv6, export=1
264
        push            {r4-r8,lr}
265
        mov             r3,  #8
266
1:
267
        ldm             r0!, {r4,r5,r12,lr}
268
        ldrd            r6,  r7,  [r1]
269
        pkhbt           r8,  r4,  r5,  lsl #16
270
        pkhtb           r5,  r5,  r4,  asr #16
271
        pkhbt           r4,  r12, lr,  lsl #16
272
        pkhtb           lr,  lr,  r12, asr #16
273
        pld             [r1, r2]
274
        uxtab16         r8,  r8,  r6
275
        uxtab16         r5,  r5,  r6,  ror #8
276
        uxtab16         r4,  r4,  r7
277
        uxtab16         lr,  lr,  r7,  ror #8
278
        usat16          r8,  #8,  r8
279
        usat16          r5,  #8,  r5
280
        usat16          r4,  #8,  r4
281
        usat16          lr,  #8,  lr
282
        orr             r6,  r8,  r5,  lsl #8
283
        orr             r7,  r4,  lr,  lsl #8
284
        subs            r3,  r3,  #1
285
        strd            r6,  r7,  [r1],  r2
286
        bgt             1b
287
        pop             {r4-r8,pc}
288
.endfunc
289

    
290
function ff_get_pixels_armv6, export=1
291
        pld             [r1, r2]
292
        push            {r4-r8, lr}
293
        mov             lr,  #8
294
1:
295
        ldrd            r4,  r5,  [r1],  r2
296
        subs            lr,  lr,  #1
297
        uxtb16          r6,  r4
298
        uxtb16          r4,  r4,  ror #8
299
        uxtb16          r12, r5
300
        uxtb16          r8,  r5,  ror #8
301
        pld             [r1, r2]
302
        pkhbt           r5,  r6,  r4,  lsl #16
303
        pkhtb           r6,  r4,  r6,  asr #16
304
        pkhbt           r7,  r12, r8,  lsl #16
305
        pkhtb           r12, r8,  r12, asr #16
306
        stm             r0!, {r5,r6,r7,r12}
307
        bgt             1b
308

    
309
        pop             {r4-r8, pc}
310
.endfunc
311

    
312
function ff_diff_pixels_armv6, export=1
313
        pld             [r1, r3]
314
        pld             [r2, r3]
315
        push            {r4-r9, lr}
316
        mov             lr,  #8
317
1:
318
        ldrd            r4,  r5,  [r1],  r3
319
        ldrd            r6,  r7,  [r2],  r3
320
        uxtb16          r8,  r4
321
        uxtb16          r4,  r4,  ror #8
322
        uxtb16          r9,  r6
323
        uxtb16          r6,  r6,  ror #8
324
        pld             [r1, r3]
325
        ssub16          r9,  r8,  r9
326
        ssub16          r6,  r4,  r6
327
        uxtb16          r8,  r5
328
        uxtb16          r5,  r5,  ror #8
329
        pld             [r2, r3]
330
        pkhbt           r4,  r9,  r6,  lsl #16
331
        pkhtb           r6,  r6,  r9,  asr #16
332
        uxtb16          r9,  r7
333
        uxtb16          r7,  r7,  ror #8
334
        ssub16          r9,  r8,  r9
335
        ssub16          r5,  r5,  r7
336
        subs            lr,  lr,  #1
337
        pkhbt           r8,  r9,  r5,  lsl #16
338
        pkhtb           r9,  r5,  r9,  asr #16
339
        stm             r0!, {r4,r6,r8,r9}
340
        bgt             1b
341

    
342
        pop             {r4-r9, pc}
343
.endfunc
344

    
345
function ff_pix_abs16_armv6, export=1
346
        ldr             r0,  [sp]
347
        push            {r4-r9, lr}
348
        mov             r12, #0
349
        mov             lr,  #0
350
        ldm             r1,  {r4-r7}
351
        ldr             r8,  [r2]
352
1:
353
        ldr             r9,  [r2, #4]
354
        pld             [r1, r3]
355
        usada8          r12, r4,  r8,  r12
356
        ldr             r8,  [r2, #8]
357
        pld             [r2, r3]
358
        usada8          lr,  r5,  r9,  lr
359
        ldr             r9,  [r2, #12]
360
        usada8          r12, r6,  r8,  r12
361
        subs            r0,  r0,  #1
362
        usada8          lr,  r7,  r9,  lr
363
        beq             2f
364
        add             r1,  r1,  r3
365
        ldm             r1,  {r4-r7}
366
        add             r2,  r2,  r3
367
        ldr             r8,  [r2]
368
        b               1b
369
2:
370
        add             r0,  r12, lr
371
        pop             {r4-r9, pc}
372
.endfunc
373

    
374
function ff_pix_abs16_x2_armv6, export=1
375
        ldr             r12, [sp]
376
        push            {r4-r11, lr}
377
        mov             r0,  #0
378
        mov             lr,  #1
379
        orr             lr,  lr,  lr,  lsl #8
380
        orr             lr,  lr,  lr,  lsl #16
381
1:
382
        ldr             r8,  [r2]
383
        ldr             r9,  [r2, #4]
384
        lsr             r10, r8,  #8
385
        ldr             r4,  [r1]
386
        lsr             r6,  r9,  #8
387
        orr             r10, r10, r9,  lsl #24
388
        ldr             r5,  [r2, #8]
389
        eor             r11, r8,  r10
390
        uhadd8          r7,  r8,  r10
391
        orr             r6,  r6,  r5,  lsl #24
392
        and             r11, r11, lr
393
        uadd8           r7,  r7,  r11
394
        ldr             r8,  [r1, #4]
395
        usada8          r0,  r4,  r7,  r0
396
        eor             r7,  r9,  r6
397
        lsr             r10, r5,  #8
398
        and             r7,  r7,  lr
399
        uhadd8          r4,  r9,  r6
400
        ldr             r6,  [r2, #12]
401
        uadd8           r4,  r4,  r7
402
        pld             [r1, r3]
403
        orr             r10, r10, r6,  lsl #24
404
        usada8          r0,  r8,  r4,  r0
405
        ldr             r4,  [r1, #8]
406
        eor             r11, r5,  r10
407
        ldrb            r7,  [r2, #16]
408
        and             r11, r11, lr
409
        uhadd8          r8,  r5,  r10
410
        ldr             r5,  [r1, #12]
411
        uadd8           r8,  r8,  r11
412
        pld             [r2, r3]
413
        lsr             r10, r6,  #8
414
        usada8          r0,  r4,  r8,  r0
415
        orr             r10, r10, r7,  lsl #24
416
        subs            r12,  r12,  #1
417
        eor             r11, r6,  r10
418
        add             r1,  r1,  r3
419
        uhadd8          r9,  r6,  r10
420
        and             r11, r11, lr
421
        uadd8           r9,  r9,  r11
422
        add             r2,  r2,  r3
423
        usada8          r0,  r5,  r9,  r0
424
        bgt             1b
425

    
426
        pop             {r4-r11, pc}
427
.endfunc
428

    
429
.macro  usad_y2         p0,  p1,  p2,  p3,  n0,  n1,  n2,  n3
430
        ldr             \n0, [r2]
431
        eor             \n1, \p0, \n0
432
        uhadd8          \p0, \p0, \n0
433
        and             \n1, \n1, lr
434
        ldr             \n2, [r1]
435
        uadd8           \p0, \p0, \n1
436
        ldr             \n1, [r2, #4]
437
        usada8          r0,  \p0, \n2, r0
438
        pld             [r1,  r3]
439
        eor             \n3, \p1, \n1
440
        uhadd8          \p1, \p1, \n1
441
        and             \n3, \n3, lr
442
        ldr             \p0, [r1, #4]
443
        uadd8           \p1, \p1, \n3
444
        ldr             \n2, [r2, #8]
445
        usada8          r0,  \p1, \p0, r0
446
        pld             [r2,  r3]
447
        eor             \p0, \p2, \n2
448
        uhadd8          \p2, \p2, \n2
449
        and             \p0, \p0, lr
450
        ldr             \p1, [r1, #8]
451
        uadd8           \p2, \p2, \p0
452
        ldr             \n3, [r2, #12]
453
        usada8          r0,  \p2, \p1, r0
454
        eor             \p1, \p3, \n3
455
        uhadd8          \p3, \p3, \n3
456
        and             \p1, \p1, lr
457
        ldr             \p0,  [r1, #12]
458
        uadd8           \p3, \p3, \p1
459
        add             r1,  r1,  r3
460
        usada8          r0,  \p3, \p0,  r0
461
        add             r2,  r2,  r3
462
.endm
463

    
464
function ff_pix_abs16_y2_armv6, export=1
465
        pld             [r1]
466
        pld             [r2]
467
        ldr             r12, [sp]
468
        push            {r4-r11, lr}
469
        mov             r0,  #0
470
        mov             lr,  #1
471
        orr             lr,  lr,  lr,  lsl #8
472
        orr             lr,  lr,  lr,  lsl #16
473
        ldr             r4,  [r2]
474
        ldr             r5,  [r2, #4]
475
        ldr             r6,  [r2, #8]
476
        ldr             r7,  [r2, #12]
477
        add             r2,  r2,  r3
478
1:
479
        usad_y2         r4,  r5,  r6,  r7,  r8,  r9,  r10, r11
480
        subs            r12, r12, #2
481
        usad_y2         r8,  r9,  r10, r11, r4,  r5,  r6,  r7
482
        bgt             1b
483

    
484
        pop             {r4-r11, pc}
485
.endfunc
486

    
487
function ff_pix_abs8_armv6, export=1
488
        pld             [r2, r3]
489
        ldr             r12, [sp]
490
        push            {r4-r9, lr}
491
        mov             r0,  #0
492
        mov             lr,  #0
493
        ldrd            r4,  r5,  [r1], r3
494
1:
495
        subs            r12, r12, #2
496
        ldr             r7,  [r2, #4]
497
        ldr             r6,  [r2], r3
498
        ldrd            r8,  r9,  [r1], r3
499
        usada8          r0,  r4,  r6,  r0
500
        pld             [r2, r3]
501
        usada8          lr,  r5,  r7,  lr
502
        ldr             r7,  [r2, #4]
503
        ldr             r6,  [r2], r3
504
        beq             2f
505
        ldrd            r4,  r5,  [r1], r3
506
        usada8          r0,  r8,  r6,  r0
507
        pld             [r2, r3]
508
        usada8          lr,  r9,  r7,  lr
509
        b               1b
510
2:
511
        usada8          r0,  r8,  r6,  r0
512
        usada8          lr,  r9,  r7,  lr
513
        add             r0,  r0,  lr
514
        pop             {r4-r9, pc}
515
.endfunc