Statistics
| Branch: | Revision:

ffmpeg / libavcodec / arm / simple_idct_armv6.S @ 2912e87a

History | View | Annotate | Download (13.1 KB)

1 7d42886b Måns Rullgård
/*
2
 * Simple IDCT
3
 *
4
 * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
5 f2250162 Måns Rullgård
 * Copyright (c) 2007 Mans Rullgard <mans@mansr.com>
6 7d42886b Måns Rullgård
 *
7 2912e87a Mans Rullgard
 * This file is part of Libav.
8 7d42886b Måns Rullgård
 *
9 2912e87a Mans Rullgard
 * Libav is free software; you can redistribute it and/or
10 7d42886b Måns Rullgård
 * modify it under the terms of the GNU Lesser General Public
11
 * License as published by the Free Software Foundation; either
12
 * version 2.1 of the License, or (at your option) any later version.
13
 *
14 2912e87a Mans Rullgard
 * Libav is distributed in the hope that it will be useful,
15 7d42886b Måns Rullgård
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
 * Lesser General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public
20 2912e87a Mans Rullgard
 * License along with Libav; if not, write to the Free Software
21 7d42886b Måns Rullgård
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
 */
23
24 c130bedc Måns Rullgård
#include "asm.S"
25
26 7d42886b Måns Rullgård
#define W1  22725   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
27
#define W2  21407   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
28
#define W3  19266   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
29
#define W4  16383   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
30
#define W5  12873   /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
31
#define W6  8867    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
32
#define W7  4520    /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */
33
#define ROW_SHIFT 11
34
#define COL_SHIFT 20
35
36
#define W13 (W1 | (W3 << 16))
37
#define W26 (W2 | (W6 << 16))
38
#define W42 (W4 | (W2 << 16))
39
#define W42n (-W4&0xffff | (-W2 << 16))
40
#define W46 (W4 | (W6 << 16))
41
#define W57 (W5 | (W7 << 16))
42
43
        .text
44
        .align
45
w13:    .long W13
46
w26:    .long W26
47
w42:    .long W42
48
w42n:   .long W42n
49
w46:    .long W46
50
w57:    .long W57
51
52
/*
53
  Compute partial IDCT of single row.
54
  shift = left-shift amount
55 3b16c719 Måns Rullgård
  r0 = source address
56
  r2 = row[2,0] <= 2 cycles
57
  r3 = row[3,1]
58 ac62b626 Måns Rullgård
  ip = w42      <= 2 cycles
59 7d42886b Måns Rullgård
60 3b16c719 Måns Rullgård
  Output in registers r4--r11
61 7d42886b Måns Rullgård
*/
62
        .macro idct_row shift
63 e9497946 Måns Rullgård
        ldr    lr, w46               /* lr  = W4 | (W6 << 16) */
64 3b16c719 Måns Rullgård
        mov    r1, #(1<<(\shift-1))
65
        smlad  r4, r2, ip, r1
66
        smlsd  r7, r2, ip, r1
67 e9497946 Måns Rullgård
        ldr    ip, w13               /* ip  = W1 | (W3 << 16) */
68 7f727ace Måns Rullgård
        ldr    r10,w57               /* r10 = W5 | (W7 << 16) */
69 3b16c719 Måns Rullgård
        smlad  r5, r2, lr, r1
70
        smlsd  r6, r2, lr, r1
71
72 e9497946 Måns Rullgård
        smuad  r8, r3, ip            /* r8  =  B0 = W1*row[1] + W3*row[3] */
73
        smusdx r11,r3, r10           /* r11 =  B3 = W7*row[1] - W5*row[3] */
74
        ldr    lr, [r0, #12]         /* lr  =  row[7,5] */
75
        pkhtb  r2, ip, r10,asr #16   /* r3  =  W7 | (W3 << 16) */
76
        pkhbt  r1, ip, r10,lsl #16   /* r1  =  W1 | (W5 << 16) */
77
        smusdx r9, r2, r3            /* r9  = -B1 = W7*row[3] - W3*row[1] */
78
        smlad  r8, lr, r10,r8        /* B0  +=      W5*row[5] + W7*row[7] */
79
        smusdx r10,r3, r1            /* r10 =  B2 = W5*row[1] - W1*row[3] */
80 3b16c719 Måns Rullgård
81 e9497946 Måns Rullgård
        ldr    r3, w42n              /* r3 =  -W4 | (-W2 << 16) */
82
        smlad  r10,lr, r2, r10       /* B2 +=  W7*row[5] + W3*row[7] */
83
        ldr    r2, [r0, #4]          /* r2 =   row[6,4] */
84
        smlsdx r11,lr, ip, r11       /* B3 +=  W3*row[5] - W1*row[7] */
85
        ldr    ip, w46               /* ip =   W4 | (W6 << 16) */
86
        smlad  r9, lr, r1, r9        /* B1 -=  W1*row[5] + W5*row[7] */
87 7d42886b Måns Rullgård
88 3b16c719 Måns Rullgård
        smlad  r5, r2, r3, r5        /* A1 += -W4*row[4] - W2*row[6] */
89
        smlsd  r6, r2, r3, r6        /* A2 += -W4*row[4] + W2*row[6] */
90 e9497946 Måns Rullgård
        smlad  r4, r2, ip, r4        /* A0 +=  W4*row[4] + W6*row[6] */
91
        smlsd  r7, r2, ip, r7        /* A3 +=  W4*row[4] - W6*row[6] */
92 7d42886b Måns Rullgård
        .endm
93
94
/*
95 118a49b0 Måns Rullgård
  Compute partial IDCT of half row.
96
  shift = left-shift amount
97 3b16c719 Måns Rullgård
  r2 = row[2,0]
98
  r3 = row[3,1]
99 7348ed6e Måns Rullgård
  ip = w42
100 118a49b0 Måns Rullgård
101 3b16c719 Måns Rullgård
  Output in registers r4--r11
102 118a49b0 Måns Rullgård
*/
103
        .macro idct_row4 shift
104 e9497946 Måns Rullgård
        ldr    lr, w46               /* lr =  W4 | (W6 << 16) */
105 7f727ace Måns Rullgård
        ldr    r10,w57               /* r10 = W5 | (W7 << 16) */
106 3b16c719 Måns Rullgård
        mov    r1, #(1<<(\shift-1))
107
        smlad  r4, r2, ip, r1
108
        smlsd  r7, r2, ip, r1
109 e9497946 Måns Rullgård
        ldr    ip, w13               /* ip =  W1 | (W3 << 16) */
110 3b16c719 Måns Rullgård
        smlad  r5, r2, lr, r1
111
        smlsd  r6, r2, lr, r1
112 e9497946 Måns Rullgård
        smusdx r11,r3, r10           /* r11 =  B3 = W7*row[1] - W5*row[3] */
113
        smuad  r8, r3, ip            /* r8  =  B0 = W1*row[1] + W3*row[3] */
114
        pkhtb  r2, ip, r10,asr #16   /* r3  =  W7 | (W3 << 16) */
115
        pkhbt  r1, ip, r10,lsl #16   /* r1  =  W1 | (W5 << 16) */
116
        smusdx r9, r2, r3            /* r9  = -B1 = W7*row[3] - W3*row[1] */
117
        smusdx r10,r3, r1            /* r10 =  B2 = W5*row[1] - W1*row[3] */
118 118a49b0 Måns Rullgård
        .endm
119
120
/*
121 7d42886b Måns Rullgård
  Compute final part of IDCT single row without shift.
122 3b16c719 Måns Rullgård
  Input in registers r4--r11
123
  Output in registers ip, r4--r6, lr, r8--r10
124 7d42886b Måns Rullgård
*/
125
        .macro idct_finish
126 3b16c719 Måns Rullgård
        add    ip, r4, r8            /* r1 = A0 + B0 */
127
        sub    lr, r4, r8            /* r2 = A0 - B0 */
128
        sub    r4, r5, r9            /* r2 = A1 + B1 */
129
        add    r8, r5, r9            /* r2 = A1 - B1 */
130
        add    r5, r6, r10           /* r1 = A2 + B2 */
131
        sub    r9, r6, r10           /* r1 = A2 - B2 */
132
        add    r6, r7, r11           /* r2 = A3 + B3 */
133
        sub    r10,r7, r11           /* r2 = A3 - B3 */
134 7d42886b Måns Rullgård
        .endm
135
136
/*
137
  Compute final part of IDCT single row.
138
  shift = right-shift amount
139 3b16c719 Måns Rullgård
  Input/output in registers r4--r11
140 7d42886b Måns Rullgård
*/
141
        .macro idct_finish_shift shift
142 3b16c719 Måns Rullgård
        add    r3, r4, r8            /* r3 = A0 + B0 */
143
        sub    r2, r4, r8            /* r2 = A0 - B0 */
144
        mov    r4, r3, asr #\shift
145
        mov    r8, r2, asr #\shift
146
147
        sub    r3, r5, r9            /* r3 = A1 + B1 */
148
        add    r2, r5, r9            /* r2 = A1 - B1 */
149
        mov    r5, r3, asr #\shift
150
        mov    r9, r2, asr #\shift
151
152
        add    r3, r6, r10           /* r3 = A2 + B2 */
153
        sub    r2, r6, r10           /* r2 = A2 - B2 */
154
        mov    r6, r3, asr #\shift
155
        mov    r10,r2, asr #\shift
156
157
        add    r3, r7, r11           /* r3 = A3 + B3 */
158
        sub    r2, r7, r11           /* r2 = A3 - B3 */
159
        mov    r7, r3, asr #\shift
160
        mov    r11,r2, asr #\shift
161 7d42886b Måns Rullgård
        .endm
162
163
/*
164
  Compute final part of IDCT single row, saturating results at 8 bits.
165
  shift = right-shift amount
166 3b16c719 Måns Rullgård
  Input/output in registers r4--r11
167 7d42886b Måns Rullgård
*/
168
        .macro idct_finish_shift_sat shift
169 3b16c719 Måns Rullgård
        add    r3, r4, r8            /* r3 = A0 + B0 */
170
        sub    ip, r4, r8            /* ip = A0 - B0 */
171
        usat   r4, #8, r3, asr #\shift
172
        usat   r8, #8, ip, asr #\shift
173
174
        sub    r3, r5, r9            /* r3 = A1 + B1 */
175
        add    ip, r5, r9            /* ip = A1 - B1 */
176
        usat   r5, #8, r3, asr #\shift
177
        usat   r9, #8, ip, asr #\shift
178
179
        add    r3, r6, r10           /* r3 = A2 + B2 */
180
        sub    ip, r6, r10           /* ip = A2 - B2 */
181
        usat   r6, #8, r3, asr #\shift
182
        usat   r10,#8, ip, asr #\shift
183
184
        add    r3, r7, r11           /* r3 = A3 + B3 */
185
        sub    ip, r7, r11           /* ip = A3 - B3 */
186
        usat   r7, #8, r3, asr #\shift
187
        usat   r11,#8, ip, asr #\shift
188 7d42886b Måns Rullgård
        .endm
189
190
/*
191
  Compute IDCT of single row, storing as column.
192 3b16c719 Måns Rullgård
  r0 = source
193
  r1 = dest
194 7d42886b Måns Rullgård
*/
195 c130bedc Måns Rullgård
function idct_row_armv6
196 9a0cf9f8 Måns Rullgård
        push   {lr}
197 7ee82992 Måns Rullgård
198 3b16c719 Måns Rullgård
        ldr    lr, [r0, #12]         /* lr = row[7,5] */
199
        ldr    ip, [r0, #4]          /* ip = row[6,4] */
200
        ldr    r3, [r0, #8]          /* r3 = row[3,1] */
201
        ldr    r2, [r0]              /* r2 = row[2,0] */
202 7ee82992 Måns Rullgård
        orrs   lr, lr, ip
203 3b16c719 Måns Rullgård
        cmpeq  lr, r3
204
        cmpeq  lr, r2, lsr #16
205 118a49b0 Måns Rullgård
        beq    1f
206 9a0cf9f8 Måns Rullgård
        push   {r1}
207 7f727ace Måns Rullgård
        ldr    ip, w42               /* ip = W4 | (W2 << 16) */
208 7ee82992 Måns Rullgård
        cmp    lr, #0
209
        beq    2f
210 271593f1 Måns Rullgård
211 7ee82992 Måns Rullgård
        idct_row   ROW_SHIFT
212
        b      3f
213 271593f1 Måns Rullgård
214 7ee82992 Måns Rullgård
2:      idct_row4  ROW_SHIFT
215 118a49b0 Måns Rullgård
216 9a0cf9f8 Måns Rullgård
3:      pop    {r1}
217 7ee82992 Måns Rullgård
        idct_finish_shift ROW_SHIFT
218 7d42886b Måns Rullgård
219 3b16c719 Måns Rullgård
        strh   r4, [r1]
220
        strh   r5, [r1, #(16*2)]
221
        strh   r6, [r1, #(16*4)]
222
        strh   r7, [r1, #(16*6)]
223
        strh   r11,[r1, #(16*1)]
224
        strh   r10,[r1, #(16*3)]
225
        strh   r9, [r1, #(16*5)]
226
        strh   r8, [r1, #(16*7)]
227 7d42886b Måns Rullgård
228 9a0cf9f8 Måns Rullgård
        pop    {pc}
229 118a49b0 Måns Rullgård
230 3b16c719 Måns Rullgård
1:      mov    r2, r2, lsl #3
231
        strh   r2, [r1]
232
        strh   r2, [r1, #(16*2)]
233
        strh   r2, [r1, #(16*4)]
234
        strh   r2, [r1, #(16*6)]
235
        strh   r2, [r1, #(16*1)]
236
        strh   r2, [r1, #(16*3)]
237
        strh   r2, [r1, #(16*5)]
238
        strh   r2, [r1, #(16*7)]
239 9a0cf9f8 Måns Rullgård
        pop    {pc}
240 a7e7d40c Måns Rullgård
endfunc
241 7d42886b Måns Rullgård
242
/*
243
  Compute IDCT of single column, read as row.
244 3b16c719 Måns Rullgård
  r0 = source
245
  r1 = dest
246 7d42886b Måns Rullgård
*/
247 c130bedc Måns Rullgård
function idct_col_armv6
248 9a0cf9f8 Måns Rullgård
        push   {r1, lr}
249 7d42886b Måns Rullgård
250 3b16c719 Måns Rullgård
        ldr    r2, [r0]              /* r2 = row[2,0] */
251 7f727ace Måns Rullgård
        ldr    ip, w42               /* ip = W4 | (W2 << 16) */
252 3b16c719 Måns Rullgård
        ldr    r3, [r0, #8]          /* r3 = row[3,1] */
253 7d42886b Måns Rullgård
        idct_row COL_SHIFT
254 9a0cf9f8 Måns Rullgård
        pop    {r1}
255 7d42886b Måns Rullgård
        idct_finish_shift COL_SHIFT
256
257 3b16c719 Måns Rullgård
        strh   r4, [r1]
258
        strh   r5, [r1, #(16*1)]
259
        strh   r6, [r1, #(16*2)]
260
        strh   r7, [r1, #(16*3)]
261
        strh   r11,[r1, #(16*4)]
262
        strh   r10,[r1, #(16*5)]
263
        strh   r9, [r1, #(16*6)]
264
        strh   r8, [r1, #(16*7)]
265 7d42886b Måns Rullgård
266 9a0cf9f8 Måns Rullgård
        pop    {pc}
267 a7e7d40c Måns Rullgård
endfunc
268 7d42886b Måns Rullgård
269
/*
270
  Compute IDCT of single column, read as row, store saturated 8-bit.
271 3b16c719 Måns Rullgård
  r0 = source
272
  r1 = dest
273
  r2 = line size
274 7d42886b Måns Rullgård
*/
275 c130bedc Måns Rullgård
function idct_col_put_armv6
276 9a0cf9f8 Måns Rullgård
        push   {r1, r2, lr}
277 7d42886b Måns Rullgård
278 3b16c719 Måns Rullgård
        ldr    r2, [r0]              /* r2 = row[2,0] */
279 7f727ace Måns Rullgård
        ldr    ip, w42               /* ip = W4 | (W2 << 16) */
280 3b16c719 Måns Rullgård
        ldr    r3, [r0, #8]          /* r3 = row[3,1] */
281 7d42886b Måns Rullgård
        idct_row COL_SHIFT
282 9a0cf9f8 Måns Rullgård
        pop    {r1, r2}
283 7d42886b Måns Rullgård
        idct_finish_shift_sat COL_SHIFT
284
285 3b16c719 Måns Rullgård
        strb   r4, [r1], r2
286
        strb   r5, [r1], r2
287
        strb   r6, [r1], r2
288
        strb   r7, [r1], r2
289
        strb   r11,[r1], r2
290
        strb   r10,[r1], r2
291
        strb   r9, [r1], r2
292
        strb   r8, [r1], r2
293 7d42886b Måns Rullgård
294 3b16c719 Måns Rullgård
        sub    r1, r1, r2, lsl #3
295 7d42886b Måns Rullgård
296 9a0cf9f8 Måns Rullgård
        pop    {pc}
297 a7e7d40c Måns Rullgård
endfunc
298 7d42886b Måns Rullgård
299
/*
300
  Compute IDCT of single column, read as row, add/store saturated 8-bit.
301 3b16c719 Måns Rullgård
  r0 = source
302
  r1 = dest
303
  r2 = line size
304 7d42886b Måns Rullgård
*/
305 c130bedc Måns Rullgård
function idct_col_add_armv6
306 9a0cf9f8 Måns Rullgård
        push   {r1, r2, lr}
307 7d42886b Måns Rullgård
308 3b16c719 Måns Rullgård
        ldr    r2, [r0]              /* r2 = row[2,0] */
309 7f727ace Måns Rullgård
        ldr    ip, w42               /* ip = W4 | (W2 << 16) */
310 3b16c719 Måns Rullgård
        ldr    r3, [r0, #8]          /* r3 = row[3,1] */
311 7d42886b Måns Rullgård
        idct_row COL_SHIFT
312 9a0cf9f8 Måns Rullgård
        pop    {r1, r2}
313 7d42886b Måns Rullgård
        idct_finish
314
315 3b16c719 Måns Rullgård
        ldrb   r3, [r1]
316
        ldrb   r7, [r1, r2]
317
        ldrb   r11,[r1, r2, lsl #2]
318
        add    ip, r3, ip, asr #COL_SHIFT
319 7d42886b Måns Rullgård
        usat   ip, #8, ip
320 3b16c719 Måns Rullgård
        add    r4, r7, r4, asr #COL_SHIFT
321
        strb   ip, [r1], r2
322
        ldrb   ip, [r1, r2]
323
        usat   r4, #8, r4
324
        ldrb   r11,[r1, r2, lsl #2]
325
        add    r5, ip, r5, asr #COL_SHIFT
326
        usat   r5, #8, r5
327
        strb   r4, [r1], r2
328
        ldrb   r3, [r1, r2]
329
        ldrb   ip, [r1, r2, lsl #2]
330
        strb   r5, [r1], r2
331
        ldrb   r7, [r1, r2]
332
        ldrb   r4, [r1, r2, lsl #2]
333
        add    r6, r3, r6, asr #COL_SHIFT
334
        usat   r6, #8, r6
335
        add    r10,r7, r10,asr #COL_SHIFT
336
        usat   r10,#8, r10
337
        add    r9, r11,r9, asr #COL_SHIFT
338
        usat   r9, #8, r9
339
        add    r8, ip, r8, asr #COL_SHIFT
340
        usat   r8, #8, r8
341
        add    lr, r4, lr, asr #COL_SHIFT
342 7d42886b Måns Rullgård
        usat   lr, #8, lr
343 3b16c719 Måns Rullgård
        strb   r6, [r1], r2
344
        strb   r10,[r1], r2
345
        strb   r9, [r1], r2
346
        strb   r8, [r1], r2
347
        strb   lr, [r1], r2
348 7d42886b Måns Rullgård
349 3b16c719 Måns Rullgård
        sub    r1, r1, r2, lsl #3
350 7d42886b Måns Rullgård
351 9a0cf9f8 Måns Rullgård
        pop    {pc}
352 a7e7d40c Måns Rullgård
endfunc
353 7d42886b Måns Rullgård
354
/*
355
  Compute 8 IDCT row transforms.
356
  func = IDCT row->col function
357
  width = width of columns in bytes
358
*/
359
        .macro idct_rows func width
360
        bl     \func
361 3b16c719 Måns Rullgård
        add    r0, r0, #(16*2)
362
        add    r1, r1, #\width
363 7d42886b Måns Rullgård
        bl     \func
364 3b16c719 Måns Rullgård
        add    r0, r0, #(16*2)
365
        add    r1, r1, #\width
366 7d42886b Måns Rullgård
        bl     \func
367 3b16c719 Måns Rullgård
        add    r0, r0, #(16*2)
368
        add    r1, r1, #\width
369 7d42886b Måns Rullgård
        bl     \func
370 3b16c719 Måns Rullgård
        sub    r0, r0, #(16*5)
371
        add    r1, r1, #\width
372 7d42886b Måns Rullgård
        bl     \func
373 3b16c719 Måns Rullgård
        add    r0, r0, #(16*2)
374
        add    r1, r1, #\width
375 7d42886b Måns Rullgård
        bl     \func
376 3b16c719 Måns Rullgård
        add    r0, r0, #(16*2)
377
        add    r1, r1, #\width
378 7d42886b Måns Rullgård
        bl     \func
379 3b16c719 Måns Rullgård
        add    r0, r0, #(16*2)
380
        add    r1, r1, #\width
381 7d42886b Måns Rullgård
        bl     \func
382
383 3b16c719 Måns Rullgård
        sub    r0, r0, #(16*7)
384 7d42886b Måns Rullgård
        .endm
385
386
/* void ff_simple_idct_armv6(DCTELEM *data); */
387 c130bedc Måns Rullgård
function ff_simple_idct_armv6, export=1
388 9a0cf9f8 Måns Rullgård
        push   {r4-r11, lr}
389 7d42886b Måns Rullgård
        sub    sp, sp, #128
390
391 3b16c719 Måns Rullgård
        mov    r1, sp
392 7d42886b Måns Rullgård
        idct_rows idct_row_armv6, 2
393 3b16c719 Måns Rullgård
        mov    r1, r0
394
        mov    r0, sp
395 7d42886b Måns Rullgård
        idct_rows idct_col_armv6, 2
396
397
        add    sp, sp, #128
398 9a0cf9f8 Måns Rullgård
        pop    {r4-r11, pc}
399 a7e7d40c Måns Rullgård
endfunc
400 7d42886b Måns Rullgård
401
/* ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
402 c130bedc Måns Rullgård
function ff_simple_idct_add_armv6, export=1
403 9a0cf9f8 Måns Rullgård
        push   {r0, r1, r4-r11, lr}
404 7d42886b Måns Rullgård
        sub    sp, sp, #128
405
406 3b16c719 Måns Rullgård
        mov    r0, r2
407
        mov    r1, sp
408 7d42886b Måns Rullgård
        idct_rows idct_row_armv6, 2
409 3b16c719 Måns Rullgård
        mov    r0, sp
410
        ldr    r1, [sp, #128]
411
        ldr    r2, [sp, #(128+4)]
412 7d42886b Måns Rullgård
        idct_rows idct_col_add_armv6, 1
413
414
        add    sp, sp, #(128+8)
415 9a0cf9f8 Måns Rullgård
        pop    {r4-r11, pc}
416 a7e7d40c Måns Rullgård
endfunc
417 7d42886b Måns Rullgård
418
/* ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data); */
419 c130bedc Måns Rullgård
function ff_simple_idct_put_armv6, export=1
420 9a0cf9f8 Måns Rullgård
        push   {r0, r1, r4-r11, lr}
421 7d42886b Måns Rullgård
        sub    sp, sp, #128
422
423 3b16c719 Måns Rullgård
        mov    r0, r2
424
        mov    r1, sp
425 7d42886b Måns Rullgård
        idct_rows idct_row_armv6, 2
426 3b16c719 Måns Rullgård
        mov    r0, sp
427
        ldr    r1, [sp, #128]
428
        ldr    r2, [sp, #(128+4)]
429 7d42886b Måns Rullgård
        idct_rows idct_col_put_armv6, 1
430
431
        add    sp, sp, #(128+8)
432 9a0cf9f8 Måns Rullgård
        pop    {r4-r11, pc}
433 a7e7d40c Måns Rullgård
endfunc