Revision abff992d

View differences:

libavcodec/arm/dsputil_arm_s.S
31 31

  
32 32
#if HAVE_ARMV5TE
33 33
function ff_prefetch_arm, export=1
34
        subs    r2, r2, #1
35
        pld     [r0]
36
        add     r0, r0, r1
37
        bne     ff_prefetch_arm
38
        bx      lr
34
        subs            r2,  r2,  #1
35
        pld             [r0]
36
        add             r0,  r0,  r1
37
        bne             ff_prefetch_arm
38
        bx              lr
39 39
        .endfunc
40 40
#endif
41 41

  
42 42
.macro  ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
43
        mov \Rd0, \Rn0, lsr #(\shift * 8)
44
        mov \Rd1, \Rn1, lsr #(\shift * 8)
45
        mov \Rd2, \Rn2, lsr #(\shift * 8)
46
        mov \Rd3, \Rn3, lsr #(\shift * 8)
47
        orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
48
        orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
49
        orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
50
        orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
43
        mov             \Rd0, \Rn0, lsr #(\shift * 8)
44
        mov             \Rd1, \Rn1, lsr #(\shift * 8)
45
        mov             \Rd2, \Rn2, lsr #(\shift * 8)
46
        mov             \Rd3, \Rn3, lsr #(\shift * 8)
47
        orr             \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
48
        orr             \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
49
        orr             \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
50
        orr             \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
51 51
.endm
52 52
.macro  ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
53
        mov \R0, \R0, lsr #(\shift * 8)
54
        orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
55
        mov \R1, \R1, lsr #(\shift * 8)
56
        orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
53
        mov             \R0, \R0, lsr #(\shift * 8)
54
        orr             \R0, \R0, \R1, lsl #(32 - \shift * 8)
55
        mov             \R1, \R1, lsr #(\shift * 8)
56
        orr             \R1, \R1, \R2, lsl #(32 - \shift * 8)
57 57
.endm
58 58
.macro  ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
59
        mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
60
        mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
61
        orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
62
        orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
59
        mov             \Rdst0, \Rsrc0, lsr #(\shift * 8)
60
        mov             \Rdst1, \Rsrc1, lsr #(\shift * 8)
61
        orr             \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
62
        orr             \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
63 63
.endm
64 64

  
65 65
.macro  RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
66 66
        @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
67 67
        @ Rmask = 0xFEFEFEFE
68 68
        @ Rn = destroy
69
        eor \Rd0, \Rn0, \Rm0
70
        eor \Rd1, \Rn1, \Rm1
71
        orr \Rn0, \Rn0, \Rm0
72
        orr \Rn1, \Rn1, \Rm1
73
        and \Rd0, \Rd0, \Rmask
74
        and \Rd1, \Rd1, \Rmask
75
        sub \Rd0, \Rn0, \Rd0, lsr #1
76
        sub \Rd1, \Rn1, \Rd1, lsr #1
69
        eor             \Rd0, \Rn0, \Rm0
70
        eor             \Rd1, \Rn1, \Rm1
71
        orr             \Rn0, \Rn0, \Rm0
72
        orr             \Rn1, \Rn1, \Rm1
73
        and             \Rd0, \Rd0, \Rmask
74
        and             \Rd1, \Rd1, \Rmask
75
        sub             \Rd0, \Rn0, \Rd0, lsr #1
76
        sub             \Rd1, \Rn1, \Rd1, lsr #1
77 77
.endm
78 78

  
79 79
.macro  NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
80 80
        @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
81 81
        @ Rmask = 0xFEFEFEFE
82 82
        @ Rn = destroy
83
        eor \Rd0, \Rn0, \Rm0
84
        eor \Rd1, \Rn1, \Rm1
85
        and \Rn0, \Rn0, \Rm0
86
        and \Rn1, \Rn1, \Rm1
87
        and \Rd0, \Rd0, \Rmask
88
        and \Rd1, \Rd1, \Rmask
89
        add \Rd0, \Rn0, \Rd0, lsr #1
90
        add \Rd1, \Rn1, \Rd1, lsr #1
83
        eor             \Rd0, \Rn0, \Rm0
84
        eor             \Rd1, \Rn1, \Rm1
85
        and             \Rn0, \Rn0, \Rm0
86
        and             \Rn1, \Rn1, \Rm1
87
        and             \Rd0, \Rd0, \Rmask
88
        and             \Rd1, \Rd1, \Rmask
89
        add             \Rd0, \Rn0, \Rd0, lsr #1
90
        add             \Rd1, \Rn1, \Rd1, lsr #1
91 91
.endm
92 92

  
93 93
.macro  JMP_ALIGN tmp, reg
94
        ands \tmp, \reg, #3
95
        bic  \reg, \reg, #3
96
        beq  1f
97
        subs \tmp, \tmp, #1
98
        beq  2f
99
        subs \tmp, \tmp, #1
100
        beq  3f
94
        ands            \tmp, \reg, #3
95
        bic             \reg, \reg, #3
96
        beq             1f
97
        subs            \tmp, \tmp, #1
98
        beq             2f
99
        subs            \tmp, \tmp, #1
100
        beq             3f
101 101
        b    4f
102 102
.endm
103 103

  
......
106 106
function put_pixels16_arm, export=1
107 107
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
108 108
        @ block = word aligned, pixles = unaligned
109
        pld [r1]
110
        stmfd sp!, {r4-r11, lr} @ R14 is also called LR
111
        JMP_ALIGN r5, r1
109
        pld             [r1]
110
        stmfd           sp!, {r4-r11, lr} @ R14 is also called LR
111
        JMP_ALIGN       r5,  r1
112 112
1:
113
        ldmia r1, {r4-r7}
114
        add r1, r1, r2
115
        stmia r0, {r4-r7}
116
        pld [r1]
117
        subs r3, r3, #1
118
        add r0, r0, r2
119
        bne 1b
120
        ldmfd sp!, {r4-r11, pc}
113
        ldmia           r1,  {r4-r7}
114
        add             r1,  r1,  r2
115
        stmia           r0,  {r4-r7}
116
        pld             [r1]
117
        subs            r3,  r3,  #1
118
        add             r0,  r0,  r2
119
        bne             1b
120
        ldmfd           sp!, {r4-r11, pc}
121 121
        .align 5
122 122
2:
123
        ldmia r1, {r4-r8}
124
        add r1, r1, r2
125
        ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
126
        pld [r1]
127
        subs r3, r3, #1
128
        stmia r0, {r9-r12}
129
        add r0, r0, r2
130
        bne 2b
131
        ldmfd sp!, {r4-r11, pc}
123
        ldmia           r1,  {r4-r8}
124
        add             r1,  r1,  r2
125
        ADJ_ALIGN_QUADWORD_D 1, r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
126
        pld             [r1]
127
        subs            r3,  r3,  #1
128
        stmia           r0,  {r9-r12}
129
        add             r0,  r0,  r2
130
        bne             2b
131
        ldmfd           sp!, {r4-r11, pc}
132 132
        .align 5
133 133
3:
134
        ldmia r1, {r4-r8}
135
        add r1, r1, r2
136
        ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
137
        pld [r1]
138
        subs r3, r3, #1
139
        stmia r0, {r9-r12}
140
        add r0, r0, r2
141
        bne 3b
142
        ldmfd sp!, {r4-r11, pc}
134
        ldmia           r1,  {r4-r8}
135
        add             r1,  r1,  r2
136
        ADJ_ALIGN_QUADWORD_D 2, r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
137
        pld             [r1]
138
        subs            r3,  r3,  #1
139
        stmia           r0,  {r9-r12}
140
        add             r0,  r0,  r2
141
        bne             3b
142
        ldmfd           sp!, {r4-r11, pc}
143 143
        .align 5
144 144
4:
145
        ldmia r1, {r4-r8}
146
        add r1, r1, r2
147
        ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
148
        pld [r1]
149
        subs r3, r3, #1
150
        stmia r0, {r9-r12}
151
        add r0, r0, r2
152
        bne 4b
153
        ldmfd sp!, {r4-r11,pc}
145
        ldmia           r1,  {r4-r8}
146
        add             r1,  r1,  r2
147
        ADJ_ALIGN_QUADWORD_D 3, r9,  r10, r11, r12, r4,  r5,  r6,  r7,  r8
148
        pld             [r1]
149
        subs            r3,  r3,  #1
150
        stmia           r0,  {r9-r12}
151
        add             r0,  r0,  r2
152
        bne             4b
153
        ldmfd           sp!, {r4-r11,pc}
154 154
        .endfunc
155 155

  
156 156
@ ----------------------------------------------------------------
......
158 158
function put_pixels8_arm, export=1
159 159
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
160 160
        @ block = word aligned, pixles = unaligned
161
        pld [r1]
162
        stmfd sp!, {r4-r5,lr} @ R14 is also called LR
163
        JMP_ALIGN r5, r1
161
        pld             [r1]
162
        stmfd           sp!, {r4-r5,lr} @ R14 is also called LR
163
        JMP_ALIGN       r5,  r1
164 164
1:
165
        ldmia r1, {r4-r5}
166
        add r1, r1, r2
167
        subs r3, r3, #1
168
        pld [r1]
169
        stmia r0, {r4-r5}
170
        add r0, r0, r2
171
        bne 1b
172
        ldmfd sp!, {r4-r5,pc}
165
        ldmia           r1,  {r4-r5}
166
        add             r1,  r1,  r2
167
        subs            r3,  r3,  #1
168
        pld             [r1]
169
        stmia           r0,  {r4-r5}
170
        add             r0,  r0,  r2
171
        bne             1b
172
        ldmfd           sp!, {r4-r5,pc}
173 173
        .align 5
174 174
2:
175
        ldmia r1, {r4-r5, r12}
176
        add r1, r1, r2
177
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
178
        pld [r1]
179
        subs r3, r3, #1
180
        stmia r0, {r4-r5}
181
        add r0, r0, r2
182
        bne 2b
183
        ldmfd sp!, {r4-r5,pc}
175
        ldmia           r1,  {r4-r5, r12}
176
        add             r1,  r1,  r2
177
        ADJ_ALIGN_DOUBLEWORD 1, r4,  r5,  r12
178
        pld             [r1]
179
        subs            r3,  r3,  #1
180
        stmia           r0,  {r4-r5}
181
        add             r0,  r0,  r2
182
        bne             2b
183
        ldmfd           sp!, {r4-r5,pc}
184 184
        .align 5
185 185
3:
186
        ldmia r1, {r4-r5, r12}
187
        add r1, r1, r2
188
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
189
        pld [r1]
190
        subs r3, r3, #1
191
        stmia r0, {r4-r5}
192
        add r0, r0, r2
193
        bne 3b
194
        ldmfd sp!, {r4-r5,pc}
186
        ldmia           r1,  {r4-r5, r12}
187
        add             r1,  r1,  r2
188
        ADJ_ALIGN_DOUBLEWORD 2, r4,  r5,  r12
189
        pld             [r1]
190
        subs            r3,  r3,  #1
191
        stmia           r0,  {r4-r5}
192
        add             r0,  r0,  r2
193
        bne             3b
194
        ldmfd           sp!, {r4-r5,pc}
195 195
        .align 5
196 196
4:
197
        ldmia r1, {r4-r5, r12}
198
        add r1, r1, r2
199
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
200
        pld [r1]
201
        subs r3, r3, #1
202
        stmia r0, {r4-r5}
203
        add r0, r0, r2
204
        bne 4b
205
        ldmfd sp!, {r4-r5,pc}
197
        ldmia           r1,  {r4-r5, r12}
198
        add             r1,  r1,  r2
199
        ADJ_ALIGN_DOUBLEWORD 3, r4,  r5,  r12
200
        pld             [r1]
201
        subs            r3,  r3,  #1
202
        stmia           r0,  {r4-r5}
203
        add             r0,  r0,  r2
204
        bne             4b
205
        ldmfd           sp!, {r4-r5,pc}
206 206
        .endfunc
207 207

  
208 208
@ ----------------------------------------------------------------
......
210 210
function put_pixels8_x2_arm, export=1
211 211
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
212 212
        @ block = word aligned, pixles = unaligned
213
        pld [r1]
214
        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
215
        ldr r12, =0xfefefefe
216
        JMP_ALIGN r5, r1
213
        pld             [r1]
214
        stmfd           sp!, {r4-r10,lr} @ R14 is also called LR
215
        ldr             r12, =0xfefefefe
216
        JMP_ALIGN       r5,  r1
217 217
1:
218
        ldmia r1, {r4-r5, r10}
219
        add r1, r1, r2
220
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
221
        pld [r1]
222
        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
223
        subs r3, r3, #1
224
        stmia r0, {r8-r9}
225
        add r0, r0, r2
226
        bne 1b
227
        ldmfd sp!, {r4-r10,pc}
218
        ldmia           r1,  {r4-r5, r10}
219
        add             r1,  r1,  r2
220
        ADJ_ALIGN_DOUBLEWORD_D 1, r6,  r7,  r4,  r5,  r10
221
        pld             [r1]
222
        RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
223
        subs            r3,  r3,  #1
224
        stmia           r0,  {r8-r9}
225
        add             r0,  r0,  r2
226
        bne             1b
227
        ldmfd           sp!, {r4-r10,pc}
228 228
        .align 5
229 229
2:
230
        ldmia r1, {r4-r5, r10}
231
        add r1, r1, r2
232
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
233
        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
234
        pld [r1]
235
        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
236
        subs r3, r3, #1
237
        stmia r0, {r4-r5}
238
        add r0, r0, r2
239
        bne 2b
240
        ldmfd sp!, {r4-r10,pc}
230
        ldmia           r1,  {r4-r5, r10}
231
        add             r1,  r1,  r2
232
        ADJ_ALIGN_DOUBLEWORD_D 1, r6,  r7,  r4,  r5,  r10
233
        ADJ_ALIGN_DOUBLEWORD_D 2, r8,  r9,  r4,  r5,  r10
234
        pld             [r1]
235
        RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
236
        subs            r3,  r3,  #1
237
        stmia           r0,  {r4-r5}
238
        add             r0,  r0,  r2
239
        bne             2b
240
        ldmfd           sp!, {r4-r10,pc}
241 241
        .align 5
242 242
3:
243
        ldmia r1, {r4-r5, r10}
244
        add r1, r1, r2
245
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
246
        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
247
        pld [r1]
248
        RND_AVG32 r4, r5, r6, r7, r8, r9, r12
249
        subs r3, r3, #1
250
        stmia r0, {r4-r5}
251
        add r0, r0, r2
252
        bne 3b
253
        ldmfd sp!, {r4-r10,pc}
243
        ldmia           r1,  {r4-r5, r10}
244
        add             r1,  r1,  r2
245
        ADJ_ALIGN_DOUBLEWORD_D 2, r6,  r7,  r4,  r5,  r10
246
        ADJ_ALIGN_DOUBLEWORD_D 3, r8,  r9,  r4,  r5,  r10
247
        pld             [r1]
248
        RND_AVG32       r4,  r5,  r6,  r7,  r8,  r9,  r12
249
        subs            r3,  r3,  #1
250
        stmia           r0,  {r4-r5}
251
        add             r0,  r0,  r2
252
        bne             3b
253
        ldmfd           sp!, {r4-r10,pc}
254 254
        .align 5
255 255
4:
256
        ldmia r1, {r4-r5, r10}
257
        add r1, r1, r2
258
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
259
        pld [r1]
260
        RND_AVG32 r8, r9, r6, r7, r5, r10, r12
261
        subs r3, r3, #1
262
        stmia r0, {r8-r9}
263
        add r0, r0, r2
264
        bne 4b
265
        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
256
        ldmia           r1,  {r4-r5, r10}
257
        add             r1,  r1,  r2
258
        ADJ_ALIGN_DOUBLEWORD_D 3, r6,  r7,  r4,  r5,  r10
259
        pld             [r1]
260
        RND_AVG32       r8,  r9,  r6,  r7,  r5,  r10, r12
261
        subs            r3,  r3,  #1
262
        stmia           r0,  {r8-r9}
263
        add             r0,  r0,  r2
264
        bne             4b
265
        ldmfd           sp!, {r4-r10,pc} @@ update PC with LR content.
266 266
        .endfunc
267 267

  
268 268
        .align 5
269 269
function put_no_rnd_pixels8_x2_arm, export=1
270 270
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
271 271
        @ block = word aligned, pixles = unaligned
272
        pld [r1]
273
        stmfd sp!, {r4-r10,lr} @ R14 is also called LR
274
        ldr r12, =0xfefefefe
275
        JMP_ALIGN r5, r1
272
        pld             [r1]
273
        stmfd           sp!, {r4-r10,lr} @ R14 is also called LR
274
        ldr             r12, =0xfefefefe
275
        JMP_ALIGN       r5,  r1
276 276
1:
277
        ldmia r1, {r4-r5, r10}
278
        add r1, r1, r2
279
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
280
        pld [r1]
281
        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
282
        subs r3, r3, #1
283
        stmia r0, {r8-r9}
284
        add r0, r0, r2
285
        bne 1b
286
        ldmfd sp!, {r4-r10,pc}
277
        ldmia           r1,  {r4-r5, r10}
278
        add             r1,  r1,  r2
279
        ADJ_ALIGN_DOUBLEWORD_D 1, r6,  r7,  r4,  r5,  r10
280
        pld             [r1]
281
        NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
282
        subs            r3,  r3,  #1
283
        stmia           r0,  {r8-r9}
284
        add             r0,  r0,  r2
285
        bne             1b
286
        ldmfd           sp!, {r4-r10,pc}
287 287
        .align 5
288 288
2:
289
        ldmia r1, {r4-r5, r10}
290
        add r1, r1, r2
291
        ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
292
        ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
293
        pld [r1]
294
        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
295
        subs r3, r3, #1
296
        stmia r0, {r4-r5}
297
        add r0, r0, r2
298
        bne 2b
299
        ldmfd sp!, {r4-r10,pc}
289
        ldmia           r1,  {r4-r5, r10}
290
        add             r1,  r1,  r2
291
        ADJ_ALIGN_DOUBLEWORD_D 1, r6,  r7,  r4,  r5,  r10
292
        ADJ_ALIGN_DOUBLEWORD_D 2, r8,  r9,  r4,  r5,  r10
293
        pld             [r1]
294
        NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
295
        subs            r3,  r3,  #1
296
        stmia           r0,  {r4-r5}
297
        add             r0,  r0,  r2
298
        bne             2b
299
        ldmfd           sp!, {r4-r10,pc}
300 300
        .align 5
301 301
3:
302
        ldmia r1, {r4-r5, r10}
303
        add r1, r1, r2
304
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
305
        ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
306
        pld [r1]
307
        NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
308
        subs r3, r3, #1
309
        stmia r0, {r4-r5}
310
        add r0, r0, r2
311
        bne 3b
312
        ldmfd sp!, {r4-r10,pc}
302
        ldmia           r1,  {r4-r5, r10}
303
        add             r1,  r1,  r2
304
        ADJ_ALIGN_DOUBLEWORD_D 2, r6,  r7,  r4,  r5,  r10
305
        ADJ_ALIGN_DOUBLEWORD_D 3, r8,  r9,  r4,  r5,  r10
306
        pld             [r1]
307
        NO_RND_AVG32    r4,  r5,  r6,  r7,  r8,  r9,  r12
308
        subs            r3,  r3,  #1
309
        stmia           r0,  {r4-r5}
310
        add             r0,  r0,  r2
311
        bne             3b
312
        ldmfd           sp!, {r4-r10,pc}
313 313
        .align 5
314 314
4:
315
        ldmia r1, {r4-r5, r10}
316
        add r1, r1, r2
317
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
318
        pld [r1]
319
        NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
320
        subs r3, r3, #1
321
        stmia r0, {r8-r9}
322
        add r0, r0, r2
323
        bne 4b
324
        ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
315
        ldmia           r1,  {r4-r5, r10}
316
        add             r1,  r1,  r2
317
        ADJ_ALIGN_DOUBLEWORD_D 3, r6,  r7,  r4,  r5,  r10
318
        pld             [r1]
319
        NO_RND_AVG32    r8,  r9,  r6,  r7,  r5,  r10, r12
320
        subs            r3,  r3,  #1
321
        stmia           r0,  {r8-r9}
322
        add             r0,  r0,  r2
323
        bne             4b
324
        ldmfd           sp!, {r4-r10,pc} @@ update PC with LR content.
325 325
        .endfunc
326 326

  
327 327

  
......
330 330
function put_pixels8_y2_arm, export=1
331 331
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
332 332
        @ block = word aligned, pixles = unaligned
333
        pld [r1]
334
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
335
        mov r3, r3, lsr #1
336
        ldr r12, =0xfefefefe
337
        JMP_ALIGN r5, r1
333
        pld             [r1]
334
        stmfd           sp!, {r4-r11,lr} @ R14 is also called LR
335
        mov             r3,  r3,  lsr #1
336
        ldr             r12, =0xfefefefe
337
        JMP_ALIGN       r5,  r1
338 338
1:
339
        ldmia r1, {r4-r5}
340
        add r1, r1, r2
341
6:      ldmia r1, {r6-r7}
342
        add r1, r1, r2
343
        pld [r1]
344
        RND_AVG32 r8, r9, r4, r5, r6, r7, r12
345
        ldmia r1, {r4-r5}
346
        add r1, r1, r2
347
        stmia r0, {r8-r9}
348
        add r0, r0, r2
349
        pld [r1]
350
        RND_AVG32 r8, r9, r6, r7, r4, r5, r12
351
        subs r3, r3, #1
352
        stmia r0, {r8-r9}
353
        add r0, r0, r2
354
        bne 6b
355
        ldmfd sp!, {r4-r11,pc}
339
        ldmia           r1,  {r4-r5}
340
        add             r1,  r1,  r2
341
6:      ldmia           r1,  {r6-r7}
342
        add             r1,  r1,  r2
343
        pld             [r1]
344
        RND_AVG32       r8,  r9,  r4,  r5,  r6,  r7,  r12
345
        ldmia           r1,  {r4-r5}
346
        add             r1,  r1,  r2
347
        stmia           r0,  {r8-r9}
348
        add             r0,  r0,  r2
349
        pld             [r1]
350
        RND_AVG32       r8,  r9,  r6,  r7,  r4,  r5,  r12
351
        subs            r3,  r3,  #1
352
        stmia           r0,  {r8-r9}
353
        add             r0,  r0,  r2
354
        bne             6b
355
        ldmfd           sp!, {r4-r11,pc}
356 356
        .align 5
357 357
2:
358
        ldmia r1, {r4-r6}
359
        add r1, r1, r2
360
        pld [r1]
361
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
362
6:      ldmia r1, {r7-r9}
363
        add r1, r1, r2
364
        pld [r1]
365
        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
366
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
367
        stmia r0, {r10-r11}
368
        add r0, r0, r2
369
        ldmia r1, {r4-r6}
370
        add r1, r1, r2
371
        pld [r1]
372
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
373
        subs r3, r3, #1
374
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
375
        stmia r0, {r10-r11}
376
        add r0, r0, r2
377
        bne 6b
378
        ldmfd sp!, {r4-r11,pc}
358
        ldmia           r1,  {r4-r6}
359
        add             r1,  r1,  r2
360
        pld             [r1]
361
        ADJ_ALIGN_DOUBLEWORD 1, r4,  r5,  r6
362
6:      ldmia           r1,  {r7-r9}
363
        add             r1,  r1,  r2
364
        pld             [r1]
365
        ADJ_ALIGN_DOUBLEWORD 1, r7,  r8,  r9
366
        RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
367
        stmia           r0,  {r10-r11}
368
        add             r0,  r0,  r2
369
        ldmia           r1,  {r4-r6}
370
        add             r1,  r1,  r2
371
        pld             [r1]
372
        ADJ_ALIGN_DOUBLEWORD 1, r4,  r5,  r6
373
        subs            r3,  r3,  #1
374
        RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
375
        stmia           r0,  {r10-r11}
376
        add             r0,  r0,  r2
377
        bne             6b
378
        ldmfd           sp!, {r4-r11,pc}
379 379
        .align 5
380 380
3:
381
        ldmia r1, {r4-r6}
382
        add r1, r1, r2
383
        pld [r1]
384
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
385
6:      ldmia r1, {r7-r9}
386
        add r1, r1, r2
387
        pld [r1]
388
        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
389
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
390
        stmia r0, {r10-r11}
391
        add r0, r0, r2
392
        ldmia r1, {r4-r6}
393
        add r1, r1, r2
394
        pld [r1]
395
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
396
        subs r3, r3, #1
397
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
398
        stmia r0, {r10-r11}
399
        add r0, r0, r2
400
        bne 6b
401
        ldmfd sp!, {r4-r11,pc}
381
        ldmia           r1,  {r4-r6}
382
        add             r1,  r1,  r2
383
        pld             [r1]
384
        ADJ_ALIGN_DOUBLEWORD 2, r4,  r5,  r6
385
6:      ldmia           r1,  {r7-r9}
386
        add             r1,  r1,  r2
387
        pld             [r1]
388
        ADJ_ALIGN_DOUBLEWORD 2, r7,  r8,  r9
389
        RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
390
        stmia           r0,  {r10-r11}
391
        add             r0,  r0,  r2
392
        ldmia           r1,  {r4-r6}
393
        add             r1,  r1,  r2
394
        pld             [r1]
395
        ADJ_ALIGN_DOUBLEWORD 2, r4,  r5,  r6
396
        subs            r3,  r3,  #1
397
        RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
398
        stmia           r0,  {r10-r11}
399
        add             r0,  r0,  r2
400
        bne             6b
401
        ldmfd           sp!, {r4-r11,pc}
402 402
        .align 5
403 403
4:
404
        ldmia r1, {r4-r6}
405
        add r1, r1, r2
406
        pld [r1]
407
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
408
6:      ldmia r1, {r7-r9}
409
        add r1, r1, r2
410
        pld [r1]
411
        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
412
        RND_AVG32 r10, r11, r4, r5, r7, r8, r12
413
        stmia r0, {r10-r11}
414
        add r0, r0, r2
415
        ldmia r1, {r4-r6}
416
        add r1, r1, r2
417
        pld [r1]
418
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
419
        subs r3, r3, #1
420
        RND_AVG32 r10, r11, r7, r8, r4, r5, r12
421
        stmia r0, {r10-r11}
422
        add r0, r0, r2
423
        bne 6b
424
        ldmfd sp!, {r4-r11,pc}
404
        ldmia           r1,  {r4-r6}
405
        add             r1,  r1,  r2
406
        pld             [r1]
407
        ADJ_ALIGN_DOUBLEWORD 3, r4,  r5,  r6
408
6:      ldmia           r1,  {r7-r9}
409
        add             r1,  r1,  r2
410
        pld             [r1]
411
        ADJ_ALIGN_DOUBLEWORD 3, r7,  r8,  r9
412
        RND_AVG32       r10, r11, r4,  r5,  r7,  r8,  r12
413
        stmia           r0,  {r10-r11}
414
        add             r0,  r0,  r2
415
        ldmia           r1,  {r4-r6}
416
        add             r1,  r1,  r2
417
        pld             [r1]
418
        ADJ_ALIGN_DOUBLEWORD 3, r4,  r5,  r6
419
        subs            r3,  r3,  #1
420
        RND_AVG32       r10, r11, r7,  r8,  r4,  r5,  r12
421
        stmia           r0,  {r10-r11}
422
        add             r0,  r0,  r2
423
        bne             6b
424
        ldmfd           sp!, {r4-r11,pc}
425 425
        .endfunc
426 426

  
427 427
        .align 5
428 428
function put_no_rnd_pixels8_y2_arm, export=1
429 429
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
430 430
        @ block = word aligned, pixles = unaligned
431
        pld [r1]
432
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
433
        mov r3, r3, lsr #1
434
        ldr r12, =0xfefefefe
435
        JMP_ALIGN r5, r1
431
        pld             [r1]
432
        stmfd           sp!, {r4-r11,lr} @ R14 is also called LR
433
        mov             r3,  r3,  lsr #1
434
        ldr             r12, =0xfefefefe
435
        JMP_ALIGN       r5,  r1
436 436
1:
437
        ldmia r1, {r4-r5}
438
        add r1, r1, r2
439
6:      ldmia r1, {r6-r7}
440
        add r1, r1, r2
441
        pld [r1]
442
        NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
443
        ldmia r1, {r4-r5}
444
        add r1, r1, r2
445
        stmia r0, {r8-r9}
446
        add r0, r0, r2
447
        pld [r1]
448
        NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
449
        subs r3, r3, #1
450
        stmia r0, {r8-r9}
451
        add r0, r0, r2
452
        bne 6b
453
        ldmfd sp!, {r4-r11,pc}
437
        ldmia           r1,  {r4-r5}
438
        add             r1,  r1,  r2
439
6:      ldmia           r1,  {r6-r7}
440
        add             r1,  r1,  r2
441
        pld             [r1]
442
        NO_RND_AVG32    r8,  r9,  r4,  r5,  r6,  r7,  r12
443
        ldmia           r1,  {r4-r5}
444
        add             r1,  r1,  r2
445
        stmia           r0,  {r8-r9}
446
        add             r0,  r0,  r2
447
        pld             [r1]
448
        NO_RND_AVG32    r8,  r9,  r6,  r7,  r4,  r5,  r12
449
        subs            r3,  r3,  #1
450
        stmia           r0,  {r8-r9}
451
        add             r0,  r0,  r2
452
        bne             6b
453
        ldmfd           sp!, {r4-r11,pc}
454 454
        .align 5
455 455
2:
456
        ldmia r1, {r4-r6}
457
        add r1, r1, r2
458
        pld [r1]
459
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
460
6:      ldmia r1, {r7-r9}
461
        add r1, r1, r2
462
        pld [r1]
463
        ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
464
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
465
        stmia r0, {r10-r11}
466
        add r0, r0, r2
467
        ldmia r1, {r4-r6}
468
        add r1, r1, r2
469
        pld [r1]
470
        ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
471
        subs r3, r3, #1
472
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
473
        stmia r0, {r10-r11}
474
        add r0, r0, r2
475
        bne 6b
476
        ldmfd sp!, {r4-r11,pc}
456
        ldmia           r1,  {r4-r6}
457
        add             r1,  r1,  r2
458
        pld             [r1]
459
        ADJ_ALIGN_DOUBLEWORD 1, r4,  r5,  r6
460
6:      ldmia           r1,  {r7-r9}
461
        add             r1,  r1,  r2
462
        pld             [r1]
463
        ADJ_ALIGN_DOUBLEWORD 1, r7,  r8,  r9
464
        NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
465
        stmia           r0,  {r10-r11}
466
        add             r0,  r0,  r2
467
        ldmia           r1,  {r4-r6}
468
        add             r1,  r1,  r2
469
        pld             [r1]
470
        ADJ_ALIGN_DOUBLEWORD 1, r4,  r5,  r6
471
        subs            r3,  r3,  #1
472
        NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
473
        stmia           r0,  {r10-r11}
474
        add             r0,  r0,  r2
475
        bne             6b
476
        ldmfd           sp!, {r4-r11,pc}
477 477
        .align 5
478 478
3:
479
        ldmia r1, {r4-r6}
480
        add r1, r1, r2
481
        pld [r1]
482
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
483
6:      ldmia r1, {r7-r9}
484
        add r1, r1, r2
485
        pld [r1]
486
        ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
487
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
488
        stmia r0, {r10-r11}
489
        add r0, r0, r2
490
        ldmia r1, {r4-r6}
491
        add r1, r1, r2
492
        pld [r1]
493
        ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
494
        subs r3, r3, #1
495
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
496
        stmia r0, {r10-r11}
497
        add r0, r0, r2
498
        bne 6b
499
        ldmfd sp!, {r4-r11,pc}
479
        ldmia           r1,  {r4-r6}
480
        add             r1,  r1,  r2
481
        pld             [r1]
482
        ADJ_ALIGN_DOUBLEWORD 2, r4,  r5,  r6
483
6:      ldmia           r1,  {r7-r9}
484
        add             r1,  r1,  r2
485
        pld             [r1]
486
        ADJ_ALIGN_DOUBLEWORD 2, r7,  r8,  r9
487
        NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
488
        stmia           r0,  {r10-r11}
489
        add             r0,  r0,  r2
490
        ldmia           r1,  {r4-r6}
491
        add             r1,  r1,  r2
492
        pld             [r1]
493
        ADJ_ALIGN_DOUBLEWORD 2, r4,  r5,  r6
494
        subs            r3,  r3,  #1
495
        NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
496
        stmia           r0,  {r10-r11}
497
        add             r0,  r0,  r2
498
        bne             6b
499
        ldmfd           sp!, {r4-r11,pc}
500 500
        .align 5
501 501
4:
502
        ldmia r1, {r4-r6}
503
        add r1, r1, r2
504
        pld [r1]
505
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
506
6:      ldmia r1, {r7-r9}
507
        add r1, r1, r2
508
        pld [r1]
509
        ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
510
        NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
511
        stmia r0, {r10-r11}
512
        add r0, r0, r2
513
        ldmia r1, {r4-r6}
514
        add r1, r1, r2
515
        pld [r1]
516
        ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
517
        subs r3, r3, #1
518
        NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
519
        stmia r0, {r10-r11}
520
        add r0, r0, r2
521
        bne 6b
522
        ldmfd sp!, {r4-r11,pc}
502
        ldmia           r1,  {r4-r6}
503
        add             r1,  r1,  r2
504
        pld             [r1]
505
        ADJ_ALIGN_DOUBLEWORD 3, r4,  r5,  r6
506
6:      ldmia           r1,  {r7-r9}
507
        add             r1,  r1,  r2
508
        pld             [r1]
509
        ADJ_ALIGN_DOUBLEWORD 3, r7,  r8,  r9
510
        NO_RND_AVG32    r10, r11, r4,  r5,  r7,  r8,  r12
511
        stmia           r0,  {r10-r11}
512
        add             r0,  r0,  r2
513
        ldmia           r1,  {r4-r6}
514
        add             r1,  r1,  r2
515
        pld             [r1]
516
        ADJ_ALIGN_DOUBLEWORD 3, r4,  r5,  r6
517
        subs            r3,  r3,  #1
518
        NO_RND_AVG32    r10, r11, r7,  r8,  r4,  r5,  r12
519
        stmia           r0,  {r10-r11}
520
        add             r0,  r0,  r2
521
        bne             6b
522
        ldmfd           sp!, {r4-r11,pc}
523 523
        .endfunc
524 524

  
525 525
        .ltorg
......
529 529
        @ l1=  (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
530 530
        @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
531 531
.if \align == 0
532
        ldmia r1, {r6-r8}
532
        ldmia           r1,  {r6-r8}
533 533
.elseif \align == 3
534
        ldmia r1, {r5-r7}
534
        ldmia           r1,  {r5-r7}
535 535
.else
536
        ldmia r1, {r8-r10}
536
        ldmia           r1,  {r8-r10}
537 537
.endif
538
        add r1, r1, r2
539
        pld [r1]
538
        add             r1,  r1,  r2
539
        pld             [r1]
540 540
.if \align == 0
541
        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
541
        ADJ_ALIGN_DOUBLEWORD_D 1, r4,  r5,  r6,  r7,  r8
542 542
.elseif \align == 1
543
        ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
544
        ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
543
        ADJ_ALIGN_DOUBLEWORD_D 1, r4,  r5,  r8,  r9,  r10
544
        ADJ_ALIGN_DOUBLEWORD_D 2, r6,  r7,  r8,  r9,  r10
545 545
.elseif \align == 2
546
        ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
547
        ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
546
        ADJ_ALIGN_DOUBLEWORD_D 2, r4,  r5,  r8,  r9,  r10
547
        ADJ_ALIGN_DOUBLEWORD_D 3, r6,  r7,  r8,  r9,  r10
548 548
.elseif \align == 3
549
        ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
549
        ADJ_ALIGN_DOUBLEWORD_D 3, r4,  r5,  r5,  r6,  r7
550 550
.endif
551
        ldr r14, =0x03030303
552
        tst r3, #1
553
        and r8, r4, r14
554
        and r9, r5, r14
555
        and r10, r6, r14
556
        and r11, r7, r14
557
        andeq r14, r14, r14, \rnd #1
558
        add r8, r8, r10
559
        add r9, r9, r11
560
        ldr r12, =0xfcfcfcfc >> 2
561
        addeq r8, r8, r14
562
        addeq r9, r9, r14
563
        and r4, r12, r4, lsr #2
564
        and r5, r12, r5, lsr #2
565
        and r6, r12, r6, lsr #2
566
        and r7, r12, r7, lsr #2
567
        add r10, r4, r6
568
        add r11, r5, r7
569
        subs r3, r3, #1
551
        ldr             r14, =0x03030303
552
        tst             r3,  #1
553
        and             r8,  r4,  r14
554
        and             r9,  r5,  r14
555
        and             r10, r6,  r14
556
        and             r11, r7,  r14
557
        andeq           r14, r14, r14, \rnd #1
558
        add             r8,  r8,  r10
559
        add             r9,  r9,  r11
560
        ldr             r12, =0xfcfcfcfc >> 2
561
        addeq           r8,  r8,  r14
562
        addeq           r9,  r9,  r14
563
        and             r4,  r12, r4,  lsr #2
564
        and             r5,  r12, r5,  lsr #2
565
        and             r6,  r12, r6,  lsr #2
566
        and             r7,  r12, r7,  lsr #2
567
        add             r10, r4,  r6
568
        add             r11, r5,  r7
569
        subs            r3,  r3,  #1
570 570
.endm
571 571

  
572 572
.macro RND_XY2_EXPAND align, rnd
573
        RND_XY2_IT \align, \rnd
574
6:      stmfd sp!, {r8-r11}
575
        RND_XY2_IT \align, \rnd
576
        ldmfd sp!, {r4-r7}
577
        add r4, r4, r8
578
        add r5, r5, r9
579
        ldr r14, =0x0f0f0f0f
580
        add r6, r6, r10
581
        add r7, r7, r11
582
        and r4, r14, r4, lsr #2
583
        and r5, r14, r5, lsr #2
584
        add r4, r4, r6
585
        add r5, r5, r7
586
        stmia r0, {r4-r5}
587
        add r0, r0, r2
588
        bge 6b
589
        ldmfd sp!, {r4-r11,pc}
573
        RND_XY2_IT      \align, \rnd
574
6:      stmfd           sp!, {r8-r11}
575
        RND_XY2_IT      \align, \rnd
576
        ldmfd           sp!, {r4-r7}
577
        add             r4,  r4,  r8
578
        add             r5,  r5,  r9
579
        ldr             r14, =0x0f0f0f0f
580
        add             r6,  r6,  r10
581
        add             r7,  r7,  r11
582
        and             r4,  r14, r4,  lsr #2
583
        and             r5,  r14, r5,  lsr #2
584
        add             r4,  r4,  r6
585
        add             r5,  r5,  r7
586
        stmia           r0,  {r4-r5}
587
        add             r0,  r0,  r2
588
        bge             6b
589
        ldmfd           sp!, {r4-r11,pc}
590 590
.endm
591 591

  
592 592
        .align 5
593 593
function put_pixels8_xy2_arm, export=1
594 594
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
595 595
        @ block = word aligned, pixles = unaligned
596
        pld [r1]
597
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
598
        JMP_ALIGN r5, r1
596
        pld             [r1]
597
        stmfd           sp!, {r4-r11,lr} @ R14 is also called LR
598
        JMP_ALIGN       r5,  r1
599 599
1:
600
        RND_XY2_EXPAND 0, lsl
600
        RND_XY2_EXPAND  0, lsl
601 601

  
602 602
        .align 5
603 603
2:
604
        RND_XY2_EXPAND 1, lsl
604
        RND_XY2_EXPAND  1, lsl
605 605

  
606 606
        .align 5
607 607
3:
608
        RND_XY2_EXPAND 2, lsl
608
        RND_XY2_EXPAND  2, lsl
609 609

  
610 610
        .align 5
611 611
4:
612
        RND_XY2_EXPAND 3, lsl
612
        RND_XY2_EXPAND  3, lsl
613 613
        .endfunc
614 614

  
615 615
        .align 5
616 616
function put_no_rnd_pixels8_xy2_arm, export=1
617 617
        @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
618 618
        @ block = word aligned, pixles = unaligned
619
        pld [r1]
620
        stmfd sp!, {r4-r11,lr} @ R14 is also called LR
621
        JMP_ALIGN r5, r1
619
        pld             [r1]
620
        stmfd           sp!, {r4-r11,lr} @ R14 is also called LR
621
        JMP_ALIGN       r5,  r1
622 622
1:
623
        RND_XY2_EXPAND 0, lsr
623
        RND_XY2_EXPAND  0, lsr
624 624

  
625 625
        .align 5
626 626
2:
627
        RND_XY2_EXPAND 1, lsr
627
        RND_XY2_EXPAND  1, lsr
628 628

  
629 629
        .align 5
630 630
3:
631
        RND_XY2_EXPAND 2, lsr
631
        RND_XY2_EXPAND  2, lsr
632 632

  
633 633
        .align 5
634 634
4:
635
        RND_XY2_EXPAND 3, lsr
635
        RND_XY2_EXPAND  3, lsr
636 636
        .endfunc
637 637

  
638 638
        .align 5
......
657 657
        movne           r8,  r7,  lsr #24
658 658
        mov             r9,  r6
659 659
        ldrsh           r5,  [r0, #4]           /* moved form [A] */
660
        orr             r9,  r9,  r8, lsl #8
660
        orr             r9,  r9,  r8,  lsl #8
661 661
        /* block[2] and block[3] */
662 662
        /* [A] */
663 663
        ldrsh           r7,  [r0, #6]
664 664
        and             r6,  r4,  #0xFF0000
665 665
        and             r8,  r4,  #0xFF000000
666
        add             r6,  r5,  r6, lsr #16
667
        add             r8,  r7,  r8, lsr #24
666
        add             r6,  r5,  r6,  lsr #16
667
        add             r8,  r7,  r8,  lsr #24
668 668
        mvn             r5,  r5
669 669
        mvn             r7,  r7
670 670
        tst             r6,  #0x100
671 671
        movne           r6,  r5,  lsr #24
672 672
        tst             r8,  #0x100
673 673
        movne           r8,  r7,  lsr #24
674
        orr             r9,  r9,  r6, lsl #16
674
        orr             r9,  r9,  r6,  lsl #16
675 675
        ldr             r4,  [r1, #4]           /* moved form [B] */
676
        orr             r9,  r9,  r8, lsl #24
676
        orr             r9,  r9,  r8,  lsl #24
677 677
        /* store dest */
678 678
        ldrsh           r5,  [r0, #8]           /* moved form [C] */
679 679
        str             r9,  [r1]
......
686 686
        and             r6,  r4,  #0xFF
687 687
        and             r8,  r4,  #0xFF00
688 688
        add             r6,  r5,  r6
689
        add             r8,  r7,  r8, lsr #8
689
        add             r8,  r7,  r8,  lsr #8
690 690
        mvn             r5,  r5
691 691
        mvn             r7,  r7
692 692
        tst             r6,  #0x100
......
695 695
        movne           r8,  r7,  lsr #24
696 696
        mov             r9,  r6
697 697
        ldrsh           r5,  [r0, #12]          /* moved from [D] */
698
        orr             r9,  r9,  r8, lsl #8
698
        orr             r9,  r9,  r8,  lsl #8
699 699
        /* block[6] and block[7] */
700 700
        /* [D] */
701 701
        ldrsh           r7,  [r0, #14]
702 702
        and             r6,  r4,  #0xFF0000
703 703
        and             r8,  r4,  #0xFF000000
704
        add             r6,  r5,  r6, lsr #16
705
        add             r8,  r7,  r8, lsr #24
704
        add             r6,  r5,  r6,  lsr #16
705
        add             r8,  r7,  r8,  lsr #24
706 706
        mvn             r5,  r5
707 707
        mvn             r7,  r7
708 708
        tst             r6,  #0x100
709 709
        movne           r6,  r5,  lsr #24
710 710
        tst             r8,  #0x100
711 711
        movne           r8,  r7,  lsr #24
712
        orr             r9,  r9,  r6, lsl #16
712
        orr             r9,  r9,  r6,  lsl #16
713 713
        add             r0,  r0,  #16           /* moved from [E] */
714
        orr             r9,  r9,  r8, lsl #24
714
        orr             r9,  r9,  r8,  lsl #24
715 715
        subs            r10, r10, #1            /* moved from [F] */
716 716
        /* store dest */
717 717
        str             r9,  [r1, #4]

Also available in: Unified diff