Revision 2e823300

View differences:

libavcodec/arm/dsputil_arm_s.S
107 107
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
108 108
        @ block = word aligned, pixles = unaligned
109 109
        pld             [r1]
110
        stmfd           sp!, {r4-r11, lr} @ R14 is also called LR
110
        push            {r4-r11, lr}
111 111
        JMP_ALIGN       r5,  r1
112 112
1:
113
        ldmia           r1,  {r4-r7}
113
        ldm             r1,  {r4-r7}
114 114
        add             r1,  r1,  r2
115
        stmia           r0,  {r4-r7}
115
        stm             r0,  {r4-r7}
116 116
        pld             [r1]
117 117
        subs            r3,  r3,  #1
118 118
        add             r0,  r0,  r2
119 119
        bne             1b
120
        ldmfd           sp!, {r4-r11, pc}
120
        pop             {r4-r11, pc}
121 121
        .align 5
122 122
2:
123
        ldmia           r1,  {r4-r8}
123
        ldm             r1,  {r4-r8}
124 124
        add             r1,  r1,  r2
125 125
        ADJ_ALIGN_QUADWORD_D 1, r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
126 126
        pld             [r1]
127 127
        subs            r3,  r3,  #1
128
        stmia           r0,  {r9-r12}
128
        stm             r0,  {r9-r12}
129 129
        add             r0,  r0,  r2
130 130
        bne             2b
131
        ldmfd           sp!, {r4-r11, pc}
131
        pop             {r4-r11, pc}
132 132
        .align 5
133 133
3:
134
        ldmia           r1,  {r4-r8}
134
        ldm             r1,  {r4-r8}
135 135
        add             r1,  r1,  r2
136 136
        ADJ_ALIGN_QUADWORD_D 2, r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
137 137
        pld             [r1]
138 138
        subs            r3,  r3,  #1
139
        stmia           r0,  {r9-r12}
139
        stm             r0,  {r9-r12}
140 140
        add             r0,  r0,  r2
141 141
        bne             3b
142
        ldmfd           sp!, {r4-r11, pc}
142
        pop             {r4-r11, pc}
143 143
        .align 5
144 144
4:
145
        ldmia           r1,  {r4-r8}
145
        ldm             r1,  {r4-r8}
146 146
        add             r1,  r1,  r2
147 147
        ADJ_ALIGN_QUADWORD_D 3, r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
148 148
        pld             [r1]
149 149
        subs            r3,  r3,  #1
150
        stmia           r0,  {r9-r12}
150
        stm             r0,  {r9-r12}
151 151
        add             r0,  r0,  r2
152 152
        bne             4b
153
        ldmfd           sp!, {r4-r11,pc}
153
        pop             {r4-r11,pc}
154 154
        .endfunc
155 155

  
156 156
@ ----------------------------------------------------------------
......
159 159
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
160 160
        @ block = word aligned, pixles = unaligned
161 161
        pld             [r1]
162
        stmfd           sp!, {r4-r5,lr} @ R14 is also called LR
162
        push            {r4-r5,lr}
163 163
        JMP_ALIGN       r5,  r1
164 164
1:
165
        ldmia           r1,  {r4-r5}
165
        ldm             r1,  {r4-r5}
166 166
        add             r1,  r1,  r2
167 167
        subs            r3,  r3,  #1
168 168
        pld             [r1]
169
        stmia           r0,  {r4-r5}
169
        stm             r0,  {r4-r5}
170 170
        add             r0,  r0,  r2
171 171
        bne             1b
172
        ldmfd           sp!, {r4-r5,pc}
172
        pop             {r4-r5,pc}
173 173
        .align 5
174 174
2:
175
        ldmia           r1,  {r4-r5, r12}
175
        ldm             r1,  {r4-r5, r12}
176 176
        add             r1,  r1,  r2
177 177
        ADJ_ALIGN_DOUBLEWORD 1, r4,  r5,  r12
178 178
        pld             [r1]
179 179
        subs            r3,  r3,  #1
180
        stmia           r0,  {r4-r5}
180
        stm             r0,  {r4-r5}
181 181
        add             r0,  r0,  r2
182 182
        bne             2b
183
        ldmfd           sp!, {r4-r5,pc}
183
        pop             {r4-r5,pc}
184 184
        .align 5
185 185
3:
186
        ldmia           r1,  {r4-r5, r12}
186
        ldm             r1,  {r4-r5, r12}
187 187
        add             r1,  r1,  r2
188 188
        ADJ_ALIGN_DOUBLEWORD 2, r4,  r5,  r12
189 189
        pld             [r1]
190 190
        subs            r3,  r3,  #1
191
        stmia           r0,  {r4-r5}
191
        stm             r0,  {r4-r5}
192 192
        add             r0,  r0,  r2
193 193
        bne             3b
194
        ldmfd           sp!, {r4-r5,pc}
194
        pop             {r4-r5,pc}
195 195
        .align 5
196 196
4:
197
        ldmia           r1,  {r4-r5, r12}
197
        ldm             r1,  {r4-r5, r12}
198 198
        add             r1,  r1,  r2
199 199
        ADJ_ALIGN_DOUBLEWORD 3, r4,  r5,  r12
200 200
        pld             [r1]
201 201
        subs            r3,  r3,  #1
202
        stmia           r0,  {r4-r5}
202
        stm             r0,  {r4-r5}
203 203
        add             r0,  r0,  r2
204 204
        bne             4b
205
        ldmfd           sp!, {r4-r5,pc}
205
        pop             {r4-r5,pc}
206 206
        .endfunc
207 207

  
208 208
@ ----------------------------------------------------------------
......
211 211
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
212 212
        @ block = word aligned, pixles = unaligned
213 213
        pld             [r1]
214
        stmfd           sp!, {r4-r10,lr} @ R14 is also called LR
214
        push            {r4-r10,lr}
215 215
        ldr             r12, =0xfefefefe
216 216
        JMP_ALIGN       r5,  r1
217 217
1:
218
        ldmia           r1,  {r4-r5, r10}
218
        ldm             r1,  {r4-r5, r10}
219 219
        add             r1,  r1,  r2
220 220
        ADJ_ALIGN_DOUBLEWORD_D 1, r6,  r7,  r4,  r5,  r10
221 221
        pld             [r1]
222 222
        RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
223 223
        subs            r3,  r3,  #1
224
        stmia           r0,  {r8-r9}
224
        stm             r0,  {r8-r9}
225 225
        add             r0,  r0,  r2
226 226
        bne             1b
227
        ldmfd           sp!, {r4-r10,pc}
227
        pop             {r4-r10,pc}
228 228
        .align 5
229 229
2:
230
        ldmia           r1,  {r4-r5, r10}
230
        ldm             r1,  {r4-r5, r10}
231 231
        add             r1,  r1,  r2
232 232
        ADJ_ALIGN_DOUBLEWORD_D 1, r6,  r7,  r4,  r5,  r10
233 233
        ADJ_ALIGN_DOUBLEWORD_D 2, r8,  r9,  r4,  r5,  r10
234 234
        pld             [r1]
235 235
        RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
236 236
        subs            r3,  r3,  #1
237
        stmia           r0,  {r4-r5}
237
        stm             r0,  {r4-r5}
238 238
        add             r0,  r0,  r2
239 239
        bne             2b
240
        ldmfd           sp!, {r4-r10,pc}
240
        pop             {r4-r10,pc}
241 241
        .align 5
242 242
3:
243
        ldmia           r1,  {r4-r5, r10}
243
        ldm             r1,  {r4-r5, r10}
244 244
        add             r1,  r1,  r2
245 245
        ADJ_ALIGN_DOUBLEWORD_D 2, r6,  r7,  r4,  r5,  r10
246 246
        ADJ_ALIGN_DOUBLEWORD_D 3, r8,  r9,  r4,  r5,  r10
247 247
        pld             [r1]
248 248
        RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
249 249
        subs            r3,  r3,  #1
250
        stmia           r0,  {r4-r5}
250
        stm             r0,  {r4-r5}
251 251
        add             r0,  r0,  r2
252 252
        bne             3b
253
        ldmfd           sp!, {r4-r10,pc}
253
        pop             {r4-r10,pc}
254 254
        .align 5
255 255
4:
256
        ldmia           r1,  {r4-r5, r10}
256
        ldm             r1,  {r4-r5, r10}
257 257
        add             r1,  r1,  r2
258 258
        ADJ_ALIGN_DOUBLEWORD_D 3, r6,  r7,  r4,  r5,  r10
259 259
        pld             [r1]
260 260
        RND_AVG32       r8,  r9,  r6,  r7,  r5,  r10, r12
261 261
        subs            r3,  r3,  #1
262
        stmia           r0,  {r8-r9}
262
        stm             r0,  {r8-r9}
263 263
        add             r0,  r0,  r2
264 264
        bne             4b
265
        ldmfd           sp!, {r4-r10,pc} @@ update PC with LR content.
265
        pop             {r4-r10,pc}
266 266
        .endfunc
267 267

  
268 268
        .align 5
......
270 270
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
271 271
        @ block = word aligned, pixles = unaligned
272 272
        pld             [r1]
273
        stmfd           sp!, {r4-r10,lr} @ R14 is also called LR
273
        push            {r4-r10,lr}
274 274
        ldr             r12, =0xfefefefe
275 275
        JMP_ALIGN       r5,  r1
276 276
1:
277
        ldmia           r1,  {r4-r5, r10}
277
        ldm             r1,  {r4-r5, r10}
278 278
        add             r1,  r1,  r2
279 279
        ADJ_ALIGN_DOUBLEWORD_D 1, r6,  r7,  r4,  r5,  r10
280 280
        pld             [r1]
281 281
        NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
282 282
        subs            r3,  r3,  #1
283
        stmia           r0,  {r8-r9}
283
        stm             r0,  {r8-r9}
284 284
        add             r0,  r0,  r2
285 285
        bne             1b
286
        ldmfd           sp!, {r4-r10,pc}
286
        pop             {r4-r10,pc}
287 287
        .align 5
288 288
2:
289
        ldmia           r1,  {r4-r5, r10}
289
        ldm             r1,  {r4-r5, r10}
290 290
        add             r1,  r1,  r2
291 291
        ADJ_ALIGN_DOUBLEWORD_D 1, r6,  r7,  r4,  r5,  r10
292 292
        ADJ_ALIGN_DOUBLEWORD_D 2, r8,  r9,  r4,  r5,  r10
293 293
        pld             [r1]
294 294
        NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
295 295
        subs            r3,  r3,  #1
296
        stmia           r0,  {r4-r5}
296
        stm             r0,  {r4-r5}
297 297
        add             r0,  r0,  r2
298 298
        bne             2b
299
        ldmfd           sp!, {r4-r10,pc}
299
        pop             {r4-r10,pc}
300 300
        .align 5
301 301
3:
302
        ldmia           r1,  {r4-r5, r10}
302
        ldm             r1,  {r4-r5, r10}
303 303
        add             r1,  r1,  r2
304 304
        ADJ_ALIGN_DOUBLEWORD_D 2, r6,  r7,  r4,  r5,  r10
305 305
        ADJ_ALIGN_DOUBLEWORD_D 3, r8,  r9,  r4,  r5,  r10
306 306
        pld             [r1]
307 307
        NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
308 308
        subs            r3,  r3,  #1
309
        stmia           r0,  {r4-r5}
309
        stm             r0,  {r4-r5}
310 310
        add             r0,  r0,  r2
311 311
        bne             3b
312
        ldmfd           sp!, {r4-r10,pc}
312
        pop             {r4-r10,pc}
313 313
        .align 5
314 314
4:
315
        ldmia           r1,  {r4-r5, r10}
315
        ldm             r1,  {r4-r5, r10}
316 316
        add             r1,  r1,  r2
317 317
        ADJ_ALIGN_DOUBLEWORD_D 3, r6,  r7,  r4,  r5,  r10
318 318
        pld             [r1]
319 319
        NO_RND_AVG32    r8,  r9,  r6,  r7,  r5,  r10, r12
320 320
        subs            r3,  r3,  #1
321
        stmia           r0,  {r8-r9}
321
        stm             r0,  {r8-r9}
322 322
        add             r0,  r0,  r2
323 323
        bne             4b
324
        ldmfd           sp!, {r4-r10,pc} @@ update PC with LR content.
324
        pop             {r4-r10,pc}
325 325
        .endfunc
326 326

  
327 327

  
......
331 331
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
332 332
        @ block = word aligned, pixles = unaligned
333 333
        pld             [r1]
334
        stmfd           sp!, {r4-r11,lr} @ R14 is also called LR
334
        push            {r4-r11,lr}
335 335
        mov             r3,  r3,  lsr #1
336 336
        ldr             r12, =0xfefefefe
337 337
        JMP_ALIGN       r5,  r1
338 338
1:
339
        ldmia           r1,  {r4-r5}
339
        ldm             r1,  {r4-r5}
340 340
        add             r1,  r1,  r2
341
6:      ldmia           r1,  {r6-r7}
341
6:      ldm             r1,  {r6-r7}
342 342
        add             r1,  r1,  r2
343 343
        pld             [r1]
344 344
        RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
345
        ldmia           r1,  {r4-r5}
345
        ldm             r1,  {r4-r5}
346 346
        add             r1,  r1,  r2
347
        stmia           r0,  {r8-r9}
347
        stm             r0,  {r8-r9}
348 348
        add             r0,  r0,  r2
349 349
        pld             [r1]
350 350
        RND_AVG32       r8,  r9,  r6,  r7,  r4,  r5,  r12
351 351
        subs            r3,  r3,  #1
352
        stmia           r0,  {r8-r9}
352
        stm             r0,  {r8-r9}
353 353
        add             r0,  r0,  r2
354 354
        bne             6b
355
        ldmfd           sp!, {r4-r11,pc}
355
        pop             {r4-r11,pc}
356 356
        .align 5
357 357
2:
358
        ldmia           r1,  {r4-r6}
358
        ldm             r1,  {r4-r6}
359 359
        add             r1,  r1,  r2
360 360
        pld             [r1]
361 361
        ADJ_ALIGN_DOUBLEWORD 1, r4,  r5,  r6
362
6:      ldmia           r1,  {r7-r9}
362
6:      ldm             r1,  {r7-r9}
363 363
        add             r1,  r1,  r2
364 364
        pld             [r1]
365 365
        ADJ_ALIGN_DOUBLEWORD 1, r7,  r8,  r9
366 366
        RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
367
        stmia           r0,  {r10-r11}
367
        stm             r0,  {r10-r11}
368 368
        add             r0,  r0,  r2
369
        ldmia           r1,  {r4-r6}
369
        ldm             r1,  {r4-r6}
370 370
        add             r1,  r1,  r2
371 371
        pld             [r1]
372 372
        ADJ_ALIGN_DOUBLEWORD 1, r4,  r5,  r6
373 373
        subs            r3,  r3,  #1
374 374
        RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
375
        stmia           r0,  {r10-r11}
375
        stm             r0,  {r10-r11}
376 376
        add             r0,  r0,  r2
377 377
        bne             6b
378
        ldmfd           sp!, {r4-r11,pc}
378
        pop             {r4-r11,pc}
379 379
        .align 5
380 380
3:
381
        ldmia           r1,  {r4-r6}
381
        ldm             r1,  {r4-r6}
382 382
        add             r1,  r1,  r2
383 383
        pld             [r1]
384 384
        ADJ_ALIGN_DOUBLEWORD 2, r4,  r5,  r6
385
6:      ldmia           r1,  {r7-r9}
385
6:      ldm             r1,  {r7-r9}
386 386
        add             r1,  r1,  r2
387 387
        pld             [r1]
388 388
        ADJ_ALIGN_DOUBLEWORD 2, r7,  r8,  r9
389 389
        RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
390
        stmia           r0,  {r10-r11}
390
        stm             r0,  {r10-r11}
391 391
        add             r0,  r0,  r2
392
        ldmia           r1,  {r4-r6}
392
        ldm             r1,  {r4-r6}
393 393
        add             r1,  r1,  r2
394 394
        pld             [r1]
395 395
        ADJ_ALIGN_DOUBLEWORD 2, r4,  r5,  r6
396 396
        subs            r3,  r3,  #1
397 397
        RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
398
        stmia           r0,  {r10-r11}
398
        stm             r0,  {r10-r11}
399 399
        add             r0,  r0,  r2
400 400
        bne             6b
401
        ldmfd           sp!, {r4-r11,pc}
401
        pop             {r4-r11,pc}
402 402
        .align 5
403 403
4:
404
        ldmia           r1,  {r4-r6}
404
        ldm             r1,  {r4-r6}
405 405
        add             r1,  r1,  r2
406 406
        pld             [r1]
407 407
        ADJ_ALIGN_DOUBLEWORD 3, r4,  r5,  r6
408
6:      ldmia           r1,  {r7-r9}
408
6:      ldm             r1,  {r7-r9}
409 409
        add             r1,  r1,  r2
410 410
        pld             [r1]
411 411
        ADJ_ALIGN_DOUBLEWORD 3, r7,  r8,  r9
412 412
        RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
413
        stmia           r0,  {r10-r11}
413
        stm             r0,  {r10-r11}
414 414
        add             r0,  r0,  r2
415
        ldmia           r1,  {r4-r6}
415
        ldm             r1,  {r4-r6}
416 416
        add             r1,  r1,  r2
417 417
        pld             [r1]
418 418
        ADJ_ALIGN_DOUBLEWORD 3, r4,  r5,  r6
419 419
        subs            r3,  r3,  #1
420 420
        RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
421
        stmia           r0,  {r10-r11}
421
        stm             r0,  {r10-r11}
422 422
        add             r0,  r0,  r2
423 423
        bne             6b
424
        ldmfd           sp!, {r4-r11,pc}
424
        pop             {r4-r11,pc}
425 425
        .endfunc
426 426

  
427 427
        .align 5
......
429 429
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
430 430
        @ block = word aligned, pixles = unaligned
431 431
        pld             [r1]
432
        stmfd           sp!, {r4-r11,lr} @ R14 is also called LR
432
        push            {r4-r11,lr}
433 433
        mov             r3,  r3,  lsr #1
434 434
        ldr             r12, =0xfefefefe
435 435
        JMP_ALIGN       r5,  r1
436 436
1:
437
        ldmia           r1,  {r4-r5}
437
        ldm             r1,  {r4-r5}
438 438
        add             r1,  r1,  r2
439
6:      ldmia           r1,  {r6-r7}
439
6:      ldm             r1,  {r6-r7}
440 440
        add             r1,  r1,  r2
441 441
        pld             [r1]
442 442
        NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
443
        ldmia           r1,  {r4-r5}
443
        ldm             r1,  {r4-r5}
444 444
        add             r1,  r1,  r2
445
        stmia           r0,  {r8-r9}
445
        stm             r0,  {r8-r9}
446 446
        add             r0,  r0,  r2
447 447
        pld             [r1]
448 448
        NO_RND_AVG32    r8,  r9,  r6,  r7,  r4,  r5,  r12
449 449
        subs            r3,  r3,  #1
450
        stmia           r0,  {r8-r9}
450
        stm             r0,  {r8-r9}
451 451
        add             r0,  r0,  r2
452 452
        bne             6b
453
        ldmfd           sp!, {r4-r11,pc}
453
        pop             {r4-r11,pc}
454 454
        .align 5
455 455
2:
456
        ldmia           r1,  {r4-r6}
456
        ldm             r1,  {r4-r6}
457 457
        add             r1,  r1,  r2
458 458
        pld             [r1]
459 459
        ADJ_ALIGN_DOUBLEWORD 1, r4,  r5,  r6
460
6:      ldmia           r1,  {r7-r9}
460
6:      ldm             r1,  {r7-r9}
461 461
        add             r1,  r1,  r2
462 462
        pld             [r1]
463 463
        ADJ_ALIGN_DOUBLEWORD 1, r7,  r8,  r9
464 464
        NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
465
        stmia           r0,  {r10-r11}
465
        stm             r0,  {r10-r11}
466 466
        add             r0,  r0,  r2
467
        ldmia           r1,  {r4-r6}
467
        ldm             r1,  {r4-r6}
468 468
        add             r1,  r1,  r2
469 469
        pld             [r1]
470 470
        ADJ_ALIGN_DOUBLEWORD 1, r4,  r5,  r6
471 471
        subs            r3,  r3,  #1
472 472
        NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
473
        stmia           r0,  {r10-r11}
473
        stm             r0,  {r10-r11}
474 474
        add             r0,  r0,  r2
475 475
        bne             6b
476
        ldmfd           sp!, {r4-r11,pc}
476
        pop             {r4-r11,pc}
477 477
        .align 5
478 478
3:
479
        ldmia           r1,  {r4-r6}
479
        ldm             r1,  {r4-r6}
480 480
        add             r1,  r1,  r2
481 481
        pld             [r1]
482 482
        ADJ_ALIGN_DOUBLEWORD 2, r4,  r5,  r6
483
6:      ldmia           r1,  {r7-r9}
483
6:      ldm             r1,  {r7-r9}
484 484
        add             r1,  r1,  r2
485 485
        pld             [r1]
486 486
        ADJ_ALIGN_DOUBLEWORD 2, r7,  r8,  r9
487 487
        NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
488
        stmia           r0,  {r10-r11}
488
        stm             r0,  {r10-r11}
489 489
        add             r0,  r0,  r2
490
        ldmia           r1,  {r4-r6}
490
        ldm             r1,  {r4-r6}
491 491
        add             r1,  r1,  r2
492 492
        pld             [r1]
493 493
        ADJ_ALIGN_DOUBLEWORD 2, r4,  r5,  r6
494 494
        subs            r3,  r3,  #1
495 495
        NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
496
        stmia           r0,  {r10-r11}
496
        stm             r0,  {r10-r11}
497 497
        add             r0,  r0,  r2
498 498
        bne             6b
499
        ldmfd           sp!, {r4-r11,pc}
499
        pop             {r4-r11,pc}
500 500
        .align 5
501 501
4:
502
        ldmia           r1,  {r4-r6}
502
        ldm             r1,  {r4-r6}
503 503
        add             r1,  r1,  r2
504 504
        pld             [r1]
505 505
        ADJ_ALIGN_DOUBLEWORD 3, r4,  r5,  r6
506
6:      ldmia           r1,  {r7-r9}
506
6:      ldm             r1,  {r7-r9}
507 507
        add             r1,  r1,  r2
508 508
        pld             [r1]
509 509
        ADJ_ALIGN_DOUBLEWORD 3, r7,  r8,  r9
510 510
        NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
511
        stmia           r0,  {r10-r11}
511
        stm             r0,  {r10-r11}
512 512
        add             r0,  r0,  r2
513
        ldmia           r1,  {r4-r6}
513
        ldm             r1,  {r4-r6}
514 514
        add             r1,  r1,  r2
515 515
        pld             [r1]
516 516
        ADJ_ALIGN_DOUBLEWORD 3, r4,  r5,  r6
517 517
        subs            r3,  r3,  #1
518 518
        NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
519
        stmia           r0,  {r10-r11}
519
        stm             r0,  {r10-r11}
520 520
        add             r0,  r0,  r2
521 521
        bne             6b
522
        ldmfd           sp!, {r4-r11,pc}
522
        pop             {r4-r11,pc}
523 523
        .endfunc
524 524

  
525 525
        .ltorg
......
529 529
        @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
530 530
        @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
531 531
.if \align == 0
532
        ldmia           r1,  {r6-r8}
532
        ldm             r1,  {r6-r8}
533 533
.elseif \align == 3
534
        ldmia           r1,  {r5-r7}
534
        ldm             r1,  {r5-r7}
535 535
.else
536
        ldmia           r1,  {r8-r10}
536
        ldm             r1,  {r8-r10}
537 537
.endif
538 538
        add             r1,  r1,  r2
539 539
        pld             [r1]
......
571 571

  
572 572
.macro RND_XY2_EXPAND align, rnd
573 573
        RND_XY2_IT      \align, \rnd
574
6:      stmfd           sp!, {r8-r11}
574
6:      push            {r8-r11}
575 575
        RND_XY2_IT      \align, \rnd
576
        ldmfd           sp!, {r4-r7}
576
        pop             {r4-r7}
577 577
        add             r4,  r4,  r8
578 578
        add             r5,  r5,  r9
579 579
        ldr             r14, =0x0f0f0f0f
......
583 583
        and             r5,  r14, r5,  lsr #2
584 584
        add             r4,  r4,  r6
585 585
        add             r5,  r5,  r7
586
        stmia           r0,  {r4-r5}
586
        stm             r0,  {r4-r5}
587 587
        add             r0,  r0,  r2
588 588
        bge             6b
589
        ldmfd           sp!, {r4-r11,pc}
589
        pop             {r4-r11,pc}
590 590
.endm
591 591

  
592 592
        .align 5
......
594 594
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
595 595
        @ block = word aligned, pixles = unaligned
596 596
        pld             [r1]
597
        stmfd           sp!, {r4-r11,lr} @ R14 is also called LR
597
        push            {r4-r11,lr} @ R14 is also called LR
598 598
        JMP_ALIGN       r5,  r1
599 599
1:
600 600
        RND_XY2_EXPAND  0, lsl
......
617 617
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
618 618
        @ block = word aligned, pixles = unaligned
619 619
        pld             [r1]
620
        stmfd           sp!, {r4-r11,lr} @ R14 is also called LR
620
        push            {r4-r11,lr}
621 621
        JMP_ALIGN       r5,  r1
622 622
1:
623 623
        RND_XY2_EXPAND  0, lsr

Also available in: Unified diff