Revision 38e016a7 libavcodec/arm/dsputil_armv6.S

View differences:

libavcodec/arm/dsputil_armv6.S
22 22

  
23 23
        .text
24 24

  
25
.macro  call_2x_pixels  type, subp
26
function ff_\type\()_pixels16\subp\()_armv6, export=1
27
        push            {r0-r3, lr}
28
        bl              ff_\type\()_pixels8\subp\()_armv6
29
        pop             {r0-r3, lr}
30
        add             r0,  r0,  #8
31
        add             r1,  r1,  #8
32
        b               ff_\type\()_pixels8\subp\()_armv6
33
.endfunc
34
.endm
35

  
36
call_2x_pixels          avg
37
call_2x_pixels          put, _x2
38
call_2x_pixels          put, _y2
39
call_2x_pixels          put, _x2_no_rnd
40
call_2x_pixels          put, _y2_no_rnd
41

  
42
function ff_put_pixels16_armv6, export=1
43
        push            {r4-r11}
44
1:
45
        ldr             r5,  [r1, #4]
46
        ldr             r6,  [r1, #8]
47
        ldr             r7,  [r1, #12]
48
        ldr             r4,  [r1], r2
49
        strd            r6,  r7,  [r0, #8]
50
        ldr             r9,  [r1, #4]
51
        strd            r4,  r5,  [r0],  r2
52
        ldr             r10, [r1, #8]
53
        ldr             r11, [r1, #12]
54
        ldr             r8,  [r1], r2
55
        strd            r10, r11, [r0, #8]
56
        subs            r3,  r3,  #2
57
        strd            r8,  r9,  [r0],  r2
58
        bne             1b
59

  
60
        pop             {r4-r11}
61
        bx              lr
62
.endfunc
63

  
64
function ff_put_pixels8_armv6, export=1
65
        push            {r4-r7}
66
1:
67
        ldr             r5,  [r1, #4]
68
        ldr             r4,  [r1], r2
69
        ldr             r7,  [r1, #4]
70
        strd            r4,  r5,  [r0],  r2
71
        ldr             r6,  [r1], r2
72
        subs            r3,  r3,  #2
73
        strd            r6,  r7,  [r0],  r2
74
        bne             1b
75

  
76
        pop             {r4-r7}
77
        bx              lr
78
.endfunc
79

  
80
function ff_put_pixels8_x2_armv6, export=1
81
        push            {r4-r11, lr}
82
        mov             r12, #1
83
        orr             r12, r12, r12, lsl #8
84
        orr             r12, r12, r12, lsl #16
85
1:
86
        ldr             r4,  [r1]
87
        subs            r3,  r3,  #2
88
        ldr             r5,  [r1, #4]
89
        ldr             r7,  [r1, #5]
90
        lsr             r6,  r4,  #8
91
        ldr             r8,  [r1, r2]!
92
        orr             r6,  r6,  r5,  lsl #24
93
        ldr             r9,  [r1, #4]
94
        ldr             r11, [r1, #5]
95
        lsr             r10, r8,  #8
96
        add             r1,  r1,  r2
97
        orr             r10, r10, r9,  lsl #24
98
        eor             r14, r4,  r6
99
        uhadd8          r4,  r4,  r6
100
        eor             r6,  r5,  r7
101
        uhadd8          r5,  r5,  r7
102
        and             r14, r14, r12
103
        and             r6,  r6,  r12
104
        uadd8           r4,  r4,  r14
105
        eor             r14, r8,  r10
106
        uadd8           r5,  r5,  r6
107
        eor             r6,  r9,  r11
108
        uhadd8          r8,  r8,  r10
109
        and             r14, r14, r12
110
        uhadd8          r9,  r9,  r11
111
        and             r6,  r6,  r12
112
        uadd8           r8,  r8,  r14
113
        strd            r4,  r5,  [r0],  r2
114
        uadd8           r9,  r9,  r6
115
        strd            r8,  r9,  [r0],  r2
116
        bne             1b
117

  
118
        pop             {r4-r11, pc}
119
.endfunc
120

  
121
function ff_put_pixels8_y2_armv6, export=1
122
        push            {r4-r11}
123
        mov             r12, #1
124
        orr             r12, r12, r12, lsl #8
125
        orr             r12, r12, r12, lsl #16
126
        ldr             r4,  [r1]
127
        ldr             r5,  [r1, #4]
128
        ldr             r6,  [r1, r2]!
129
        ldr             r7,  [r1, #4]
130
1:
131
        subs            r3,  r3,  #2
132
        uhadd8          r8,  r4,  r6
133
        eor             r10, r4,  r6
134
        uhadd8          r9,  r5,  r7
135
        eor             r11, r5,  r7
136
        and             r10, r10, r12
137
        ldr             r4,  [r1, r2]!
138
        uadd8           r8,  r8,  r10
139
        and             r11, r11, r12
140
        uadd8           r9,  r9,  r11
141
        ldr             r5,  [r1, #4]
142
        uhadd8          r10, r4,  r6
143
        eor             r6,  r4,  r6
144
        uhadd8          r11, r5,  r7
145
        and             r6,  r6,  r12
146
        eor             r7,  r5,  r7
147
        uadd8           r10, r10, r6
148
        and             r7,  r7,  r12
149
        ldr             r6,  [r1, r2]!
150
        uadd8           r11, r11, r7
151
        strd            r8,  r9,  [r0],  r2
152
        ldr             r7,  [r1, #4]
153
        strd            r10, r11, [r0],  r2
154
        bne             1b
155

  
156
        pop             {r4-r11}
157
        bx              lr
158
.endfunc
159

  
160
function ff_put_pixels8_x2_no_rnd_armv6, export=1
161
        push            {r4-r9, lr}
162
1:
163
        subs            r3,  r3,  #2
164
        ldr             r4,  [r1]
165
        ldr             r5,  [r1, #4]
166
        ldr             r7,  [r1, #5]
167
        ldr             r8,  [r1, r2]!
168
        ldr             r9,  [r1, #4]
169
        ldr             r14, [r1, #5]
170
        add             r1,  r1,  r2
171
        lsr             r6,  r4,  #8
172
        orr             r6,  r6,  r5,  lsl #24
173
        lsr             r12, r8,  #8
174
        orr             r12, r12, r9,  lsl #24
175
        uhadd8          r4,  r4,  r6
176
        uhadd8          r5,  r5,  r7
177
        uhadd8          r8,  r8,  r12
178
        uhadd8          r9,  r9,  r14
179
        stm             r0,  {r4,r5}
180
        add             r0,  r0,  r2
181
        stm             r0,  {r8,r9}
182
        add             r0,  r0,  r2
183
        bne             1b
184

  
185
        pop             {r4-r9, pc}
186
.endfunc
187

  
188
function ff_put_pixels8_y2_no_rnd_armv6, export=1
189
        push            {r4-r9, lr}
190
        ldr             r4,  [r1]
191
        ldr             r5,  [r1, #4]
192
        ldr             r6,  [r1, r2]!
193
        ldr             r7,  [r1, #4]
194
1:
195
        subs            r3,  r3,  #2
196
        uhadd8          r8,  r4,  r6
197
        ldr             r4,  [r1, r2]!
198
        uhadd8          r9,  r5,  r7
199
        ldr             r5,  [r1, #4]
200
        uhadd8          r12, r4,  r6
201
        ldr             r6,  [r1, r2]!
202
        uhadd8          r14, r5,  r7
203
        ldr             r7,  [r1, #4]
204
        stm             r0,  {r8,r9}
205
        add             r0,  r0,  r2
206
        stm             r0,  {r12,r14}
207
        add             r0,  r0,  r2
208
        bne             1b
209

  
210
        pop             {r4-r9, pc}
211
.endfunc
212

  
213
function ff_avg_pixels8_armv6, export=1
214
        pld             [r1, r2]
215
        push            {r4-r10, lr}
216
        mov             lr,  #1
217
        orr             lr,  lr,  lr,  lsl #8
218
        orr             lr,  lr,  lr,  lsl #16
219
        ldrd            r4,  r5,  [r0]
220
        ldr             r10, [r1, #4]
221
        ldr             r9,  [r1], r2
222
        subs            r3,  r3,  #2
223
1:
224
        pld             [r1, r2]
225
        eor             r8,  r4,  r9
226
        uhadd8          r4,  r4,  r9
227
        eor             r12, r5,  r10
228
        ldrd            r6,  r7,  [r0, r2]
229
        uhadd8          r5,  r5,  r10
230
        and             r8,  r8,  lr
231
        ldr             r10, [r1, #4]
232
        and             r12, r12, lr
233
        uadd8           r4,  r4,  r8
234
        ldr             r9,  [r1], r2
235
        eor             r8,  r6,  r9
236
        uadd8           r5,  r5,  r12
237
        pld             [r1, r2,  lsl #1]
238
        eor             r12, r7,  r10
239
        uhadd8          r6,  r6,  r9
240
        strd            r4,  r5,  [r0], r2
241
        uhadd8          r7,  r7,  r10
242
        beq             2f
243
        and             r8,  r8,  lr
244
        ldrd            r4,  r5,  [r0, r2]
245
        uadd8           r6,  r6,  r8
246
        ldr             r10, [r1, #4]
247
        and             r12, r12, lr
248
        subs            r3,  r3,  #2
249
        uadd8           r7,  r7,  r12
250
        ldr             r9,  [r1], r2
251
        strd            r6,  r7,  [r0], r2
252
        b               1b
253
2:
254
        and             r8,  r8,  lr
255
        and             r12, r12, lr
256
        uadd8           r6,  r6,  r8
257
        uadd8           r7,  r7,  r12
258
        strd            r6,  r7,  [r0], r2
259

  
260
        pop             {r4-r10, pc}
261
.endfunc
262

  
25 263
function ff_add_pixels_clamped_armv6, export=1
26 264
        push            {r4-r8,lr}
27 265
        mov             r3,  #8

Also available in: Unified diff