Statistics
| Branch: | Revision:

ffmpeg / libavcodec / arm / dsputil_armv6.S @ 39a760f6

History | View | Annotate | Download (12.3 KB)

1 153f4957 Måns Rullgård
/*
2
 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20
21
#include "asm.S"
22
23
        .text
24
25 38e016a7 Måns Rullgård
.macro  call_2x_pixels  type, subp
26
function ff_\type\()_pixels16\subp\()_armv6, export=1
27
        push            {r0-r3, lr}
28
        bl              ff_\type\()_pixels8\subp\()_armv6
29
        pop             {r0-r3, lr}
30
        add             r0,  r0,  #8
31
        add             r1,  r1,  #8
32
        b               ff_\type\()_pixels8\subp\()_armv6
33
.endfunc
34
.endm
35
36
call_2x_pixels          avg
37
call_2x_pixels          put, _x2
38
call_2x_pixels          put, _y2
39
call_2x_pixels          put, _x2_no_rnd
40
call_2x_pixels          put, _y2_no_rnd
41
42
function ff_put_pixels16_armv6, export=1
43
        push            {r4-r11}
44
1:
45
        ldr             r5,  [r1, #4]
46
        ldr             r6,  [r1, #8]
47
        ldr             r7,  [r1, #12]
48
        ldr             r4,  [r1], r2
49
        strd            r6,  r7,  [r0, #8]
50
        ldr             r9,  [r1, #4]
51
        strd            r4,  r5,  [r0],  r2
52
        ldr             r10, [r1, #8]
53
        ldr             r11, [r1, #12]
54
        ldr             r8,  [r1], r2
55
        strd            r10, r11, [r0, #8]
56
        subs            r3,  r3,  #2
57
        strd            r8,  r9,  [r0],  r2
58
        bne             1b
59
60
        pop             {r4-r11}
61
        bx              lr
62
.endfunc
63
64
function ff_put_pixels8_armv6, export=1
65
        push            {r4-r7}
66
1:
67
        ldr             r5,  [r1, #4]
68
        ldr             r4,  [r1], r2
69
        ldr             r7,  [r1, #4]
70
        strd            r4,  r5,  [r0],  r2
71
        ldr             r6,  [r1], r2
72
        subs            r3,  r3,  #2
73
        strd            r6,  r7,  [r0],  r2
74
        bne             1b
75
76
        pop             {r4-r7}
77
        bx              lr
78
.endfunc
79
80
function ff_put_pixels8_x2_armv6, export=1
81
        push            {r4-r11, lr}
82
        mov             r12, #1
83
        orr             r12, r12, r12, lsl #8
84
        orr             r12, r12, r12, lsl #16
85
1:
86
        ldr             r4,  [r1]
87
        subs            r3,  r3,  #2
88
        ldr             r5,  [r1, #4]
89
        ldr             r7,  [r1, #5]
90
        lsr             r6,  r4,  #8
91
        ldr             r8,  [r1, r2]!
92
        orr             r6,  r6,  r5,  lsl #24
93
        ldr             r9,  [r1, #4]
94
        ldr             r11, [r1, #5]
95
        lsr             r10, r8,  #8
96
        add             r1,  r1,  r2
97
        orr             r10, r10, r9,  lsl #24
98
        eor             r14, r4,  r6
99
        uhadd8          r4,  r4,  r6
100
        eor             r6,  r5,  r7
101
        uhadd8          r5,  r5,  r7
102
        and             r14, r14, r12
103
        and             r6,  r6,  r12
104
        uadd8           r4,  r4,  r14
105
        eor             r14, r8,  r10
106
        uadd8           r5,  r5,  r6
107
        eor             r6,  r9,  r11
108
        uhadd8          r8,  r8,  r10
109
        and             r14, r14, r12
110
        uhadd8          r9,  r9,  r11
111
        and             r6,  r6,  r12
112
        uadd8           r8,  r8,  r14
113
        strd            r4,  r5,  [r0],  r2
114
        uadd8           r9,  r9,  r6
115
        strd            r8,  r9,  [r0],  r2
116
        bne             1b
117
118
        pop             {r4-r11, pc}
119
.endfunc
120
121
function ff_put_pixels8_y2_armv6, export=1
122
        push            {r4-r11}
123
        mov             r12, #1
124
        orr             r12, r12, r12, lsl #8
125
        orr             r12, r12, r12, lsl #16
126
        ldr             r4,  [r1]
127
        ldr             r5,  [r1, #4]
128
        ldr             r6,  [r1, r2]!
129
        ldr             r7,  [r1, #4]
130
1:
131
        subs            r3,  r3,  #2
132
        uhadd8          r8,  r4,  r6
133
        eor             r10, r4,  r6
134
        uhadd8          r9,  r5,  r7
135
        eor             r11, r5,  r7
136
        and             r10, r10, r12
137
        ldr             r4,  [r1, r2]!
138
        uadd8           r8,  r8,  r10
139
        and             r11, r11, r12
140
        uadd8           r9,  r9,  r11
141
        ldr             r5,  [r1, #4]
142
        uhadd8          r10, r4,  r6
143
        eor             r6,  r4,  r6
144
        uhadd8          r11, r5,  r7
145
        and             r6,  r6,  r12
146
        eor             r7,  r5,  r7
147
        uadd8           r10, r10, r6
148
        and             r7,  r7,  r12
149
        ldr             r6,  [r1, r2]!
150
        uadd8           r11, r11, r7
151
        strd            r8,  r9,  [r0],  r2
152
        ldr             r7,  [r1, #4]
153
        strd            r10, r11, [r0],  r2
154
        bne             1b
155
156
        pop             {r4-r11}
157
        bx              lr
158
.endfunc
159
160
function ff_put_pixels8_x2_no_rnd_armv6, export=1
161
        push            {r4-r9, lr}
162
1:
163
        subs            r3,  r3,  #2
164
        ldr             r4,  [r1]
165
        ldr             r5,  [r1, #4]
166
        ldr             r7,  [r1, #5]
167
        ldr             r8,  [r1, r2]!
168
        ldr             r9,  [r1, #4]
169
        ldr             r14, [r1, #5]
170
        add             r1,  r1,  r2
171
        lsr             r6,  r4,  #8
172
        orr             r6,  r6,  r5,  lsl #24
173
        lsr             r12, r8,  #8
174
        orr             r12, r12, r9,  lsl #24
175
        uhadd8          r4,  r4,  r6
176
        uhadd8          r5,  r5,  r7
177
        uhadd8          r8,  r8,  r12
178
        uhadd8          r9,  r9,  r14
179
        stm             r0,  {r4,r5}
180
        add             r0,  r0,  r2
181
        stm             r0,  {r8,r9}
182
        add             r0,  r0,  r2
183
        bne             1b
184
185
        pop             {r4-r9, pc}
186
.endfunc
187
188
function ff_put_pixels8_y2_no_rnd_armv6, export=1
189
        push            {r4-r9, lr}
190
        ldr             r4,  [r1]
191
        ldr             r5,  [r1, #4]
192
        ldr             r6,  [r1, r2]!
193
        ldr             r7,  [r1, #4]
194
1:
195
        subs            r3,  r3,  #2
196
        uhadd8          r8,  r4,  r6
197
        ldr             r4,  [r1, r2]!
198
        uhadd8          r9,  r5,  r7
199
        ldr             r5,  [r1, #4]
200
        uhadd8          r12, r4,  r6
201
        ldr             r6,  [r1, r2]!
202
        uhadd8          r14, r5,  r7
203
        ldr             r7,  [r1, #4]
204
        stm             r0,  {r8,r9}
205
        add             r0,  r0,  r2
206
        stm             r0,  {r12,r14}
207
        add             r0,  r0,  r2
208
        bne             1b
209
210
        pop             {r4-r9, pc}
211
.endfunc
212
213
function ff_avg_pixels8_armv6, export=1
214
        pld             [r1, r2]
215
        push            {r4-r10, lr}
216
        mov             lr,  #1
217
        orr             lr,  lr,  lr,  lsl #8
218
        orr             lr,  lr,  lr,  lsl #16
219
        ldrd            r4,  r5,  [r0]
220
        ldr             r10, [r1, #4]
221
        ldr             r9,  [r1], r2
222
        subs            r3,  r3,  #2
223
1:
224
        pld             [r1, r2]
225
        eor             r8,  r4,  r9
226
        uhadd8          r4,  r4,  r9
227
        eor             r12, r5,  r10
228
        ldrd            r6,  r7,  [r0, r2]
229
        uhadd8          r5,  r5,  r10
230
        and             r8,  r8,  lr
231
        ldr             r10, [r1, #4]
232
        and             r12, r12, lr
233
        uadd8           r4,  r4,  r8
234
        ldr             r9,  [r1], r2
235
        eor             r8,  r6,  r9
236
        uadd8           r5,  r5,  r12
237
        pld             [r1, r2,  lsl #1]
238
        eor             r12, r7,  r10
239
        uhadd8          r6,  r6,  r9
240
        strd            r4,  r5,  [r0], r2
241
        uhadd8          r7,  r7,  r10
242
        beq             2f
243
        and             r8,  r8,  lr
244
        ldrd            r4,  r5,  [r0, r2]
245
        uadd8           r6,  r6,  r8
246
        ldr             r10, [r1, #4]
247
        and             r12, r12, lr
248
        subs            r3,  r3,  #2
249
        uadd8           r7,  r7,  r12
250
        ldr             r9,  [r1], r2
251
        strd            r6,  r7,  [r0], r2
252
        b               1b
253
2:
254
        and             r8,  r8,  lr
255
        and             r12, r12, lr
256
        uadd8           r6,  r6,  r8
257
        uadd8           r7,  r7,  r12
258
        strd            r6,  r7,  [r0], r2
259
260
        pop             {r4-r10, pc}
261
.endfunc
262
263 153f4957 Måns Rullgård
function ff_add_pixels_clamped_armv6, export=1
264
        push            {r4-r8,lr}
265
        mov             r3,  #8
266
1:
267
        ldm             r0!, {r4,r5,r12,lr}
268
        ldrd            r6,  r7,  [r1]
269
        pkhbt           r8,  r4,  r5,  lsl #16
270
        pkhtb           r5,  r5,  r4,  asr #16
271
        pkhbt           r4,  r12, lr,  lsl #16
272
        pkhtb           lr,  lr,  r12, asr #16
273
        pld             [r1, r2]
274
        uxtab16         r8,  r8,  r6
275
        uxtab16         r5,  r5,  r6,  ror #8
276
        uxtab16         r4,  r4,  r7
277
        uxtab16         lr,  lr,  r7,  ror #8
278
        usat16          r8,  #8,  r8
279
        usat16          r5,  #8,  r5
280
        usat16          r4,  #8,  r4
281
        usat16          lr,  #8,  lr
282
        orr             r6,  r8,  r5,  lsl #8
283
        orr             r7,  r4,  lr,  lsl #8
284
        subs            r3,  r3,  #1
285
        strd            r6,  r7,  [r1],  r2
286
        bgt             1b
287
        pop             {r4-r8,pc}
288
.endfunc
289 e6056a90 Måns Rullgård
290
function ff_pix_abs16_armv6, export=1
291
        ldr             r0,  [sp]
292
        push            {r4-r9, lr}
293
        mov             r12, #0
294
        mov             lr,  #0
295
        ldm             r1,  {r4-r7}
296
        ldr             r8,  [r2]
297
1:
298
        ldr             r9,  [r2, #4]
299
        pld             [r1, r3]
300
        usada8          r12, r4,  r8,  r12
301
        ldr             r8,  [r2, #8]
302
        pld             [r2, r3]
303
        usada8          lr,  r5,  r9,  lr
304
        ldr             r9,  [r2, #12]
305
        usada8          r12, r6,  r8,  r12
306
        subs            r0,  r0,  #1
307
        usada8          lr,  r7,  r9,  lr
308
        beq             2f
309
        add             r1,  r1,  r3
310
        ldm             r1,  {r4-r7}
311
        add             r2,  r2,  r3
312
        ldr             r8,  [r2]
313
        b               1b
314
2:
315
        add             r0,  r12, lr
316
        pop             {r4-r9, pc}
317
.endfunc
318 39a760f6 Måns Rullgård
319
function ff_pix_abs16_x2_armv6, export=1
320
        ldr             r12, [sp]
321
        push            {r4-r11, lr}
322
        mov             r0,  #0
323
        mov             lr,  #1
324
        orr             lr,  lr,  lr,  lsl #8
325
        orr             lr,  lr,  lr,  lsl #16
326
1:
327
        ldr             r8,  [r2]
328
        ldr             r9,  [r2, #4]
329
        lsr             r10, r8,  #8
330
        ldr             r4,  [r1]
331
        lsr             r6,  r9,  #8
332
        orr             r10, r10, r9,  lsl #24
333
        ldr             r5,  [r2, #8]
334
        eor             r11, r8,  r10
335
        uhadd8          r7,  r8,  r10
336
        orr             r6,  r6,  r5,  lsl #24
337
        and             r11, r11, lr
338
        uadd8           r7,  r7,  r11
339
        ldr             r8,  [r1, #4]
340
        usada8          r0,  r4,  r7,  r0
341
        eor             r7,  r9,  r6
342
        lsr             r10, r5,  #8
343
        and             r7,  r7,  lr
344
        uhadd8          r4,  r9,  r6
345
        ldr             r6,  [r2, #12]
346
        uadd8           r4,  r4,  r7
347
        pld             [r1, r3]
348
        orr             r10, r10, r6,  lsl #24
349
        usada8          r0,  r8,  r4,  r0
350
        ldr             r4,  [r1, #8]
351
        eor             r11, r5,  r10
352
        ldrb            r7,  [r2, #16]
353
        and             r11, r11, lr
354
        uhadd8          r8,  r5,  r10
355
        ldr             r5,  [r1, #12]
356
        uadd8           r8,  r8,  r11
357
        pld             [r2, r3]
358
        lsr             r10, r6,  #8
359
        usada8          r0,  r4,  r8,  r0
360
        orr             r10, r10, r7,  lsl #24
361
        subs            r12,  r12,  #1
362
        eor             r11, r6,  r10
363
        add             r1,  r1,  r3
364
        uhadd8          r9,  r6,  r10
365
        and             r11, r11, lr
366
        uadd8           r9,  r9,  r11
367
        add             r2,  r2,  r3
368
        usada8          r0,  r5,  r9,  r0
369
        bgt             1b
370
371
        pop             {r4-r11, pc}
372
.endfunc