Statistics
| Branch: | Revision:

ffmpeg / libavcodec / arm / jrevdct_arm.S @ a2fc0f6a

History | View | Annotate | Download (13.6 KB)

1
/*
2
   C-like prototype :
3
        void j_rev_dct_ARM(DCTBLOCK data)
4

    
5
   With DCTBLOCK being a pointer to an array of 64 'signed shorts'
6

    
7
   Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org)
8

    
9
   Permission is hereby granted, free of charge, to any person obtaining a copy
10
   of this software and associated documentation files (the "Software"), to deal
11
   in the Software without restriction, including without limitation the rights
12
   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
   copies of the Software, and to permit persons to whom the Software is
14
   furnished to do so, subject to the following conditions:
15

    
16
   The above copyright notice and this permission notice shall be included in
17
   all copies or substantial portions of the Software.
18

    
19
   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
22
   COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
23
   IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24
   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25

    
26
*/
27

    
28
#include "asm.S"
29

    
30
#define FIX_0_298631336 2446
31
#define FIX_0_541196100 4433
32
#define FIX_0_765366865 6270
33
#define FIX_1_175875602 9633
34
#define FIX_1_501321110 12299
35
#define FIX_2_053119869 16819
36
#define FIX_3_072711026 25172
37
#define FIX_M_0_390180644 -3196
38
#define FIX_M_0_899976223 -7373
39
#define FIX_M_1_847759065 -15137
40
#define FIX_M_1_961570560 -16069
41
#define FIX_M_2_562915447 -20995
42
#define FIX_0xFFFF 0xFFFF
43

    
44
#define FIX_0_298631336_ID      0
45
#define FIX_0_541196100_ID      4
46
#define FIX_0_765366865_ID      8
47
#define FIX_1_175875602_ID     12
48
#define FIX_1_501321110_ID     16
49
#define FIX_2_053119869_ID     20
50
#define FIX_3_072711026_ID     24
51
#define FIX_M_0_390180644_ID   28
52
#define FIX_M_0_899976223_ID   32
53
#define FIX_M_1_847759065_ID   36
54
#define FIX_M_1_961570560_ID   40
55
#define FIX_M_2_562915447_ID   44
56
#define FIX_0xFFFF_ID          48
57
        .text
58
        .align
59

    
60
function j_rev_dct_ARM, export=1
61
        stmdb   sp!, { r4 - r12, lr }   @ all callee saved regs
62

    
63
        sub sp, sp, #4                  @ reserve some space on the stack
64
        str r0, [ sp ]                  @ save the DCT pointer to the stack
65

    
66
        mov lr, r0                      @ lr = pointer to the current row
67
        mov r12, #8                     @ r12 = row-counter
68
        add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array
69
row_loop:
70
        ldrsh r0, [lr, # 0]             @ r0 = 'd0'
71
        ldrsh r2, [lr, # 2]             @ r2 = 'd2'
72

    
73
        @ Optimization for row that have all items except the first set to 0
74
        @ (this works as the DCTELEMS are always 4-byte aligned)
75
        ldr r5, [lr, # 0]
76
        ldr r6, [lr, # 4]
77
        ldr r3, [lr, # 8]
78
        ldr r4, [lr, #12]
79
        orr r3, r3, r4
80
        orr r3, r3, r6
81
        orrs r5, r3, r5
82
        beq end_of_row_loop             @ nothing to be done as ALL of them are '0'
83
        orrs r3, r3, r2
84
        beq empty_row
85

    
86
        ldrsh r1, [lr, # 8]             @ r1 = 'd1'
87
        ldrsh r4, [lr, # 4]             @ r4 = 'd4'
88
        ldrsh r6, [lr, # 6]             @ r6 = 'd6'
89

    
90
        ldr r3, [r11, #FIX_0_541196100_ID]
91
        add r7, r2, r6
92
        ldr r5, [r11, #FIX_M_1_847759065_ID]
93
        mul r7, r3, r7                      @ r7 = z1
94
        ldr r3, [r11, #FIX_0_765366865_ID]
95
        mla r6, r5, r6, r7                  @ r6 = tmp2
96
        add r5, r0, r4                      @ r5 = tmp0
97
        mla r2, r3, r2, r7                  @ r2 = tmp3
98
        sub r3, r0, r4                      @ r3 = tmp1
99

    
100
        add r0, r2, r5, lsl #13             @ r0 = tmp10
101
        rsb r2, r2, r5, lsl #13             @ r2 = tmp13
102
        add r4, r6, r3, lsl #13             @ r4 = tmp11
103
        rsb r3, r6, r3, lsl #13             @ r3 = tmp12
104

    
105
        stmdb   sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
106

    
107
        ldrsh r3, [lr, #10]             @ r3 = 'd3'
108
        ldrsh r5, [lr, #12]             @ r5 = 'd5'
109
        ldrsh r7, [lr, #14]             @ r7 = 'd7'
110

    
111
        add r0, r3, r5                        @ r0 = 'z2'
112
        add r2, r1, r7                  @ r2 = 'z1'
113
        add r4, r3, r7                  @ r4 = 'z3'
114
        add r6, r1, r5                  @ r6 = 'z4'
115
        ldr r9, [r11, #FIX_1_175875602_ID]
116
        add r8, r4, r6                  @ r8 = z3 + z4
117
        ldr r10, [r11, #FIX_M_0_899976223_ID]
118
        mul r8, r9, r8                  @ r8 = 'z5'
119
        ldr r9, [r11, #FIX_M_2_562915447_ID]
120
        mul r2, r10, r2                 @ r2 = 'z1'
121
        ldr r10, [r11, #FIX_M_1_961570560_ID]
122
        mul r0, r9, r0                  @ r0 = 'z2'
123
        ldr r9, [r11, #FIX_M_0_390180644_ID]
124
        mla r4, r10, r4, r8             @ r4 = 'z3'
125
        ldr r10, [r11, #FIX_0_298631336_ID]
126
        mla r6, r9, r6, r8              @ r6 = 'z4'
127
        ldr r9, [r11, #FIX_2_053119869_ID]
128
        mla r7, r10, r7, r2             @ r7 = tmp0 + z1
129
        ldr r10, [r11, #FIX_3_072711026_ID]
130
        mla r5, r9, r5, r0              @ r5 = tmp1 + z2
131
        ldr r9, [r11, #FIX_1_501321110_ID]
132
        mla r3, r10, r3, r0             @ r3 = tmp2 + z2
133
        add r7, r7, r4                  @ r7 = tmp0
134
        mla r1, r9, r1, r2              @ r1 = tmp3 + z1
135
        add r5,        r5, r6                  @ r5 = tmp1
136
        add r3, r3, r4                  @ r3 = tmp2
137
        add r1, r1, r6                  @ r1 = tmp3
138

    
139
        ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
140
                                      @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
141

    
142
        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
143
        add r8, r0, r1
144
        add r8, r8, #(1<<10)
145
        mov r8, r8, asr #11
146
        strh r8, [lr, # 0]
147

    
148
        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
149
        sub r8, r0, r1
150
        add r8, r8, #(1<<10)
151
        mov r8, r8, asr #11
152
        strh r8, [lr, #14]
153

    
154
        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
155
        add r8, r6, r3
156
        add r8, r8, #(1<<10)
157
        mov r8, r8, asr #11
158
        strh r8, [lr, # 2]
159

    
160
        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
161
        sub r8, r6, r3
162
        add r8, r8, #(1<<10)
163
        mov r8, r8, asr #11
164
        strh r8, [lr, #12]
165

    
166
        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
167
        add r8, r4, r5
168
        add r8, r8, #(1<<10)
169
        mov r8, r8, asr #11
170
        strh r8, [lr, # 4]
171

    
172
        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
173
        sub r8, r4, r5
174
        add r8, r8, #(1<<10)
175
        mov r8, r8, asr #11
176
        strh r8, [lr, #10]
177

    
178
        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
179
        add r8, r2, r7
180
        add r8, r8, #(1<<10)
181
        mov r8, r8, asr #11
182
        strh r8, [lr, # 6]
183

    
184
        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
185
        sub r8, r2, r7
186
        add r8, r8, #(1<<10)
187
        mov r8, r8, asr #11
188
        strh r8, [lr, # 8]
189

    
190
        @ End of row loop
191
        add lr, lr, #16
192
        subs r12, r12, #1
193
        bne row_loop
194
        beq start_column_loop
195

    
196
empty_row:
197
        ldr r1, [r11, #FIX_0xFFFF_ID]
198
        mov r0, r0, lsl #2
199
        and r0, r0, r1
200
        add r0, r0, r0, lsl #16
201
        str r0, [lr, # 0]
202
        str r0, [lr, # 4]
203
        str r0, [lr, # 8]
204
        str r0, [lr, #12]
205

    
206
end_of_row_loop:
207
        @ End of loop
208
        add lr, lr, #16
209
        subs r12, r12, #1
210
        bne row_loop
211

    
212
start_column_loop:
213
        @ Start of column loop
214
        ldr lr, [ sp ]
215
        mov r12, #8
216
column_loop:
217
        ldrsh r0, [lr, #( 0*8)]             @ r0 = 'd0'
218
        ldrsh r2, [lr, #( 4*8)]             @ r2 = 'd2'
219
        ldrsh r4, [lr, #( 8*8)]             @ r4 = 'd4'
220
        ldrsh r6, [lr, #(12*8)]             @ r6 = 'd6'
221

    
222
        ldr r3, [r11, #FIX_0_541196100_ID]
223
        add r1, r2, r6
224
        ldr r5, [r11, #FIX_M_1_847759065_ID]
225
        mul r1, r3, r1                      @ r1 = z1
226
        ldr r3, [r11, #FIX_0_765366865_ID]
227
        mla r6, r5, r6, r1                  @ r6 = tmp2
228
        add r5, r0, r4                      @ r5 = tmp0
229
        mla r2, r3, r2, r1                  @ r2 = tmp3
230
        sub r3, r0, r4                      @ r3 = tmp1
231

    
232
        add r0, r2, r5, lsl #13             @ r0 = tmp10
233
        rsb r2, r2, r5, lsl #13             @ r2 = tmp13
234
        add r4, r6, r3, lsl #13             @ r4 = tmp11
235
        rsb r6, r6, r3, lsl #13             @ r6 = tmp12
236

    
237
        ldrsh r1, [lr, #( 2*8)]             @ r1 = 'd1'
238
        ldrsh r3, [lr, #( 6*8)]             @ r3 = 'd3'
239
        ldrsh r5, [lr, #(10*8)]             @ r5 = 'd5'
240
        ldrsh r7, [lr, #(14*8)]             @ r7 = 'd7'
241

    
242
        @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
243
        orr r9, r1, r3
244
        orr r10, r5, r7
245
        orrs r10, r9, r10
246
        beq empty_odd_column
247

    
248
        stmdb   sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
249

    
250
        add r0, r3, r5                  @ r0 = 'z2'
251
        add r2, r1, r7                  @ r2 = 'z1'
252
        add r4, r3, r7                  @ r4 = 'z3'
253
        add r6, r1, r5                  @ r6 = 'z4'
254
        ldr r9, [r11, #FIX_1_175875602_ID]
255
        add r8, r4, r6
256
        ldr r10, [r11, #FIX_M_0_899976223_ID]
257
        mul r8, r9, r8                  @ r8 = 'z5'
258
        ldr r9, [r11, #FIX_M_2_562915447_ID]
259
        mul r2, r10, r2                 @ r2 = 'z1'
260
        ldr r10, [r11, #FIX_M_1_961570560_ID]
261
        mul r0, r9, r0                  @ r0 = 'z2'
262
        ldr r9, [r11, #FIX_M_0_390180644_ID]
263
        mla r4, r10, r4, r8             @ r4 = 'z3'
264
        ldr r10, [r11, #FIX_0_298631336_ID]
265
        mla r6, r9, r6, r8              @ r6 = 'z4'
266
        ldr r9, [r11, #FIX_2_053119869_ID]
267
        mla r7, r10, r7, r2             @ r7 = tmp0 + z1
268
        ldr r10, [r11, #FIX_3_072711026_ID]
269
        mla r5, r9, r5, r0              @ r5 = tmp1 + z2
270
        ldr r9, [r11, #FIX_1_501321110_ID]
271
        mla r3, r10, r3, r0             @ r3 = tmp2 + z2
272
        add r7, r7, r4                  @ r7 = tmp0
273
        mla r1, r9, r1, r2              @ r1 = tmp3 + z1
274
        add r5,        r5, r6                  @ r5 = tmp1
275
        add r3, r3, r4                  @ r3 = tmp2
276
        add r1, r1, r6                  @ r1 = tmp3
277

    
278
        ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
279
                                      @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
280

    
281
        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
282
        add r8, r0, r1
283
        add r8, r8, #(1<<17)
284
        mov r8, r8, asr #18
285
        strh r8, [lr, #( 0*8)]
286

    
287
        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
288
        sub r8, r0, r1
289
        add r8, r8, #(1<<17)
290
        mov r8, r8, asr #18
291
        strh r8, [lr, #(14*8)]
292

    
293
        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
294
        add r8, r4, r3
295
        add r8, r8, #(1<<17)
296
        mov r8, r8, asr #18
297
        strh r8, [lr, #( 2*8)]
298

    
299
        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
300
        sub r8, r4, r3
301
        add r8, r8, #(1<<17)
302
        mov r8, r8, asr #18
303
        strh r8, [lr, #(12*8)]
304

    
305
        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
306
        add r8, r6, r5
307
        add r8, r8, #(1<<17)
308
        mov r8, r8, asr #18
309
        strh r8, [lr, #( 4*8)]
310

    
311
        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
312
        sub r8, r6, r5
313
        add r8, r8, #(1<<17)
314
        mov r8, r8, asr #18
315
        strh r8, [lr, #(10*8)]
316

    
317
        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
318
        add r8, r2, r7
319
        add r8, r8, #(1<<17)
320
        mov r8, r8, asr #18
321
        strh r8, [lr, #( 6*8)]
322

    
323
        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
324
        sub r8, r2, r7
325
        add r8, r8, #(1<<17)
326
        mov r8, r8, asr #18
327
        strh r8, [lr, #( 8*8)]
328

    
329
        @ End of row loop
330
        add lr, lr, #2
331
        subs r12, r12, #1
332
        bne column_loop
333
        beq the_end
334

    
335
empty_odd_column:
336
        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
337
        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
338
        add r0, r0, #(1<<17)
339
        mov r0, r0, asr #18
340
        strh r0, [lr, #( 0*8)]
341
        strh r0, [lr, #(14*8)]
342

    
343
        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
344
        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
345
        add r4, r4, #(1<<17)
346
        mov r4, r4, asr #18
347
        strh r4, [lr, #( 2*8)]
348
        strh r4, [lr, #(12*8)]
349

    
350
        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
351
        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
352
        add r6, r6, #(1<<17)
353
        mov r6, r6, asr #18
354
        strh r6, [lr, #( 4*8)]
355
        strh r6, [lr, #(10*8)]
356

    
357
        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
358
        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
359
        add r2, r2, #(1<<17)
360
        mov r2, r2, asr #18
361
        strh r2, [lr, #( 6*8)]
362
        strh r2, [lr, #( 8*8)]
363

    
364
        @ End of row loop
365
        add lr, lr, #2
366
        subs r12, r12, #1
367
        bne column_loop
368

    
369
the_end:
370
        @ The end....
371
        add sp, sp, #4
372
        ldmia   sp!, { r4 - r12, pc }   @ restore callee saved regs and return
373

    
374
const_array:
375
        .align
376
        .word FIX_0_298631336
377
        .word FIX_0_541196100
378
        .word FIX_0_765366865
379
        .word FIX_1_175875602
380
        .word FIX_1_501321110
381
        .word FIX_2_053119869
382
        .word FIX_3_072711026
383
        .word FIX_M_0_390180644
384
        .word FIX_M_0_899976223
385
        .word FIX_M_1_847759065
386
        .word FIX_M_1_961570560
387
        .word FIX_M_2_562915447
388
        .word FIX_0xFFFF