Statistics
| Branch: | Revision:

ffmpeg / libavcodec / arm / jrevdct_arm.S @ 2912e87a

History | View | Annotate | Download (13.6 KB)

1 115329f1 Diego Biurrun
/*
2 92651f67 Fabrice Bellard
   C-like prototype :
3 2ad4c241 Måns Rullgård
        void j_rev_dct_arm(DCTBLOCK data)
4 92651f67 Fabrice Bellard
5
   With DCTBLOCK being a pointer to an array of 64 'signed shorts'
6
7
   Copyright (c) 2001 Lionel Ulmer (lionel.ulmer@free.fr / bbrox@bbrox.org)
8
9
   Permission is hereby granted, free of charge, to any person obtaining a copy
10
   of this software and associated documentation files (the "Software"), to deal
11
   in the Software without restriction, including without limitation the rights
12
   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
   copies of the Software, and to permit persons to whom the Software is
14
   furnished to do so, subject to the following conditions:
15
16
   The above copyright notice and this permission notice shall be included in
17
   all copies or substantial portions of the Software.
18
19
   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
22
   COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
23
   IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24
   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 115329f1 Diego Biurrun
26 92651f67 Fabrice Bellard
*/
27 c130bedc Måns Rullgård
28
#include "asm.S"
29
30 92651f67 Fabrice Bellard
#define FIX_0_298631336 2446
31
#define FIX_0_541196100 4433
32
#define FIX_0_765366865 6270
33
#define FIX_1_175875602 9633
34
#define FIX_1_501321110 12299
35
#define FIX_2_053119869 16819
36
#define FIX_3_072711026 25172
37
#define FIX_M_0_390180644 -3196
38
#define FIX_M_0_899976223 -7373
39
#define FIX_M_1_847759065 -15137
40
#define FIX_M_1_961570560 -16069
41
#define FIX_M_2_562915447 -20995
42 115329f1 Diego Biurrun
#define FIX_0xFFFF 0xFFFF
43
44 92651f67 Fabrice Bellard
#define FIX_0_298631336_ID      0
45
#define FIX_0_541196100_ID      4
46
#define FIX_0_765366865_ID      8
47
#define FIX_1_175875602_ID     12
48
#define FIX_1_501321110_ID     16
49
#define FIX_2_053119869_ID     20
50
#define FIX_3_072711026_ID     24
51
#define FIX_M_0_390180644_ID   28
52
#define FIX_M_0_899976223_ID   32
53
#define FIX_M_1_847759065_ID   36
54
#define FIX_M_1_961570560_ID   40
55
#define FIX_M_2_562915447_ID   44
56
#define FIX_0xFFFF_ID          48
57 bb270c08 Diego Biurrun
        .text
58
        .align
59 115329f1 Diego Biurrun
60 2ad4c241 Måns Rullgård
function ff_j_rev_dct_arm, export=1
61 bb270c08 Diego Biurrun
        stmdb   sp!, { r4 - r12, lr }   @ all callee saved regs
62 92651f67 Fabrice Bellard
63 bb270c08 Diego Biurrun
        sub sp, sp, #4                  @ reserve some space on the stack
64
        str r0, [ sp ]                  @ save the DCT pointer to the stack
65 92651f67 Fabrice Bellard
66 bb270c08 Diego Biurrun
        mov lr, r0                      @ lr = pointer to the current row
67
        mov r12, #8                     @ r12 = row-counter
68 c61e40b7 Måns Rullgård
        adr r11, const_array            @ r11 = base pointer to the constants array
69 92651f67 Fabrice Bellard
row_loop:
70 bb270c08 Diego Biurrun
        ldrsh r0, [lr, # 0]             @ r0 = 'd0'
71 77753f87 Måns Rullgård
        ldrsh r2, [lr, # 2]             @ r2 = 'd2'
72 bb270c08 Diego Biurrun
73
        @ Optimization for row that have all items except the first set to 0
74
        @ (this works as the DCTELEMS are always 4-byte aligned)
75
        ldr r5, [lr, # 0]
76 77753f87 Måns Rullgård
        ldr r6, [lr, # 4]
77 bb270c08 Diego Biurrun
        ldr r3, [lr, # 8]
78
        ldr r4, [lr, #12]
79
        orr r3, r3, r4
80 77753f87 Måns Rullgård
        orr r3, r3, r6
81 bb270c08 Diego Biurrun
        orrs r5, r3, r5
82
        beq end_of_row_loop             @ nothing to be done as ALL of them are '0'
83 77753f87 Måns Rullgård
        orrs r3, r3, r2
84 bb270c08 Diego Biurrun
        beq empty_row
85
86 77753f87 Måns Rullgård
        ldrsh r1, [lr, # 8]             @ r1 = 'd1'
87 bb270c08 Diego Biurrun
        ldrsh r4, [lr, # 4]             @ r4 = 'd4'
88
        ldrsh r6, [lr, # 6]             @ r6 = 'd6'
89
90
        ldr r3, [r11, #FIX_0_541196100_ID]
91
        add r7, r2, r6
92
        ldr r5, [r11, #FIX_M_1_847759065_ID]
93
        mul r7, r3, r7                      @ r7 = z1
94
        ldr r3, [r11, #FIX_0_765366865_ID]
95
        mla r6, r5, r6, r7                  @ r6 = tmp2
96
        add r5, r0, r4                      @ r5 = tmp0
97
        mla r2, r3, r2, r7                  @ r2 = tmp3
98
        sub r3, r0, r4                      @ r3 = tmp1
99
100
        add r0, r2, r5, lsl #13             @ r0 = tmp10
101
        rsb r2, r2, r5, lsl #13             @ r2 = tmp13
102
        add r4, r6, r3, lsl #13             @ r4 = tmp11
103
        rsb r3, r6, r3, lsl #13             @ r3 = tmp12
104
105
        stmdb   sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
106
107
        ldrsh r3, [lr, #10]             @ r3 = 'd3'
108
        ldrsh r5, [lr, #12]             @ r5 = 'd5'
109
        ldrsh r7, [lr, #14]             @ r7 = 'd7'
110
111
        add r0, r3, r5                        @ r0 = 'z2'
112
        add r2, r1, r7                  @ r2 = 'z1'
113
        add r4, r3, r7                  @ r4 = 'z3'
114
        add r6, r1, r5                  @ r6 = 'z4'
115
        ldr r9, [r11, #FIX_1_175875602_ID]
116
        add r8, r4, r6                  @ r8 = z3 + z4
117
        ldr r10, [r11, #FIX_M_0_899976223_ID]
118
        mul r8, r9, r8                  @ r8 = 'z5'
119
        ldr r9, [r11, #FIX_M_2_562915447_ID]
120
        mul r2, r10, r2                 @ r2 = 'z1'
121
        ldr r10, [r11, #FIX_M_1_961570560_ID]
122
        mul r0, r9, r0                  @ r0 = 'z2'
123
        ldr r9, [r11, #FIX_M_0_390180644_ID]
124
        mla r4, r10, r4, r8             @ r4 = 'z3'
125
        ldr r10, [r11, #FIX_0_298631336_ID]
126
        mla r6, r9, r6, r8              @ r6 = 'z4'
127
        ldr r9, [r11, #FIX_2_053119869_ID]
128
        mla r7, r10, r7, r2             @ r7 = tmp0 + z1
129
        ldr r10, [r11, #FIX_3_072711026_ID]
130
        mla r5, r9, r5, r0              @ r5 = tmp1 + z2
131
        ldr r9, [r11, #FIX_1_501321110_ID]
132
        mla r3, r10, r3, r0             @ r3 = tmp2 + z2
133
        add r7, r7, r4                  @ r7 = tmp0
134
        mla r1, r9, r1, r2              @ r1 = tmp3 + z1
135
        add r5,        r5, r6                  @ r5 = tmp1
136
        add r3, r3, r4                  @ r3 = tmp2
137
        add r1, r1, r6                  @ r1 = tmp3
138
139
        ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
140
                                      @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
141
142
        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
143
        add r8, r0, r1
144
        add r8, r8, #(1<<10)
145
        mov r8, r8, asr #11
146
        strh r8, [lr, # 0]
147
148
        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
149
        sub r8, r0, r1
150
        add r8, r8, #(1<<10)
151
        mov r8, r8, asr #11
152
        strh r8, [lr, #14]
153
154
        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
155
        add r8, r6, r3
156
        add r8, r8, #(1<<10)
157
        mov r8, r8, asr #11
158
        strh r8, [lr, # 2]
159
160
        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
161
        sub r8, r6, r3
162
        add r8, r8, #(1<<10)
163
        mov r8, r8, asr #11
164
        strh r8, [lr, #12]
165
166
        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
167
        add r8, r4, r5
168
        add r8, r8, #(1<<10)
169
        mov r8, r8, asr #11
170
        strh r8, [lr, # 4]
171
172
        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
173
        sub r8, r4, r5
174
        add r8, r8, #(1<<10)
175
        mov r8, r8, asr #11
176
        strh r8, [lr, #10]
177
178
        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
179
        add r8, r2, r7
180
        add r8, r8, #(1<<10)
181
        mov r8, r8, asr #11
182
        strh r8, [lr, # 6]
183
184
        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
185
        sub r8, r2, r7
186
        add r8, r8, #(1<<10)
187
        mov r8, r8, asr #11
188
        strh r8, [lr, # 8]
189
190
        @ End of row loop
191
        add lr, lr, #16
192
        subs r12, r12, #1
193
        bne row_loop
194
        beq start_column_loop
195 115329f1 Diego Biurrun
196 92651f67 Fabrice Bellard
empty_row:
197 bb270c08 Diego Biurrun
        ldr r1, [r11, #FIX_0xFFFF_ID]
198
        mov r0, r0, lsl #2
199
        and r0, r0, r1
200
        add r0, r0, r0, lsl #16
201
        str r0, [lr, # 0]
202
        str r0, [lr, # 4]
203
        str r0, [lr, # 8]
204
        str r0, [lr, #12]
205 92651f67 Fabrice Bellard
206
end_of_row_loop:
207 bb270c08 Diego Biurrun
        @ End of loop
208
        add lr, lr, #16
209
        subs r12, r12, #1
210
        bne row_loop
211 92651f67 Fabrice Bellard
212
start_column_loop:
213 bb270c08 Diego Biurrun
        @ Start of column loop
214
        ldr lr, [ sp ]
215
        mov r12, #8
216 92651f67 Fabrice Bellard
column_loop:
217 bb270c08 Diego Biurrun
        ldrsh r0, [lr, #( 0*8)]             @ r0 = 'd0'
218
        ldrsh r2, [lr, #( 4*8)]             @ r2 = 'd2'
219
        ldrsh r4, [lr, #( 8*8)]             @ r4 = 'd4'
220
        ldrsh r6, [lr, #(12*8)]             @ r6 = 'd6'
221
222
        ldr r3, [r11, #FIX_0_541196100_ID]
223
        add r1, r2, r6
224
        ldr r5, [r11, #FIX_M_1_847759065_ID]
225
        mul r1, r3, r1                      @ r1 = z1
226
        ldr r3, [r11, #FIX_0_765366865_ID]
227
        mla r6, r5, r6, r1                  @ r6 = tmp2
228
        add r5, r0, r4                      @ r5 = tmp0
229
        mla r2, r3, r2, r1                  @ r2 = tmp3
230
        sub r3, r0, r4                      @ r3 = tmp1
231
232
        add r0, r2, r5, lsl #13             @ r0 = tmp10
233
        rsb r2, r2, r5, lsl #13             @ r2 = tmp13
234
        add r4, r6, r3, lsl #13             @ r4 = tmp11
235
        rsb r6, r6, r3, lsl #13             @ r6 = tmp12
236
237
        ldrsh r1, [lr, #( 2*8)]             @ r1 = 'd1'
238
        ldrsh r3, [lr, #( 6*8)]             @ r3 = 'd3'
239
        ldrsh r5, [lr, #(10*8)]             @ r5 = 'd5'
240
        ldrsh r7, [lr, #(14*8)]             @ r7 = 'd7'
241
242
        @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
243
        orr r9, r1, r3
244
        orr r10, r5, r7
245
        orrs r10, r9, r10
246
        beq empty_odd_column
247
248
        stmdb   sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
249
250
        add r0, r3, r5                  @ r0 = 'z2'
251
        add r2, r1, r7                  @ r2 = 'z1'
252
        add r4, r3, r7                  @ r4 = 'z3'
253
        add r6, r1, r5                  @ r6 = 'z4'
254
        ldr r9, [r11, #FIX_1_175875602_ID]
255
        add r8, r4, r6
256
        ldr r10, [r11, #FIX_M_0_899976223_ID]
257
        mul r8, r9, r8                  @ r8 = 'z5'
258
        ldr r9, [r11, #FIX_M_2_562915447_ID]
259
        mul r2, r10, r2                 @ r2 = 'z1'
260
        ldr r10, [r11, #FIX_M_1_961570560_ID]
261
        mul r0, r9, r0                  @ r0 = 'z2'
262
        ldr r9, [r11, #FIX_M_0_390180644_ID]
263
        mla r4, r10, r4, r8             @ r4 = 'z3'
264
        ldr r10, [r11, #FIX_0_298631336_ID]
265
        mla r6, r9, r6, r8              @ r6 = 'z4'
266
        ldr r9, [r11, #FIX_2_053119869_ID]
267
        mla r7, r10, r7, r2             @ r7 = tmp0 + z1
268
        ldr r10, [r11, #FIX_3_072711026_ID]
269
        mla r5, r9, r5, r0              @ r5 = tmp1 + z2
270
        ldr r9, [r11, #FIX_1_501321110_ID]
271
        mla r3, r10, r3, r0             @ r3 = tmp2 + z2
272
        add r7, r7, r4                  @ r7 = tmp0
273
        mla r1, r9, r1, r2              @ r1 = tmp3 + z1
274
        add r5,        r5, r6                  @ r5 = tmp1
275
        add r3, r3, r4                  @ r3 = tmp2
276
        add r1, r1, r6                  @ r1 = tmp3
277
278
        ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
279
                                      @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
280
281
        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
282
        add r8, r0, r1
283
        add r8, r8, #(1<<17)
284
        mov r8, r8, asr #18
285
        strh r8, [lr, #( 0*8)]
286
287
        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
288
        sub r8, r0, r1
289
        add r8, r8, #(1<<17)
290
        mov r8, r8, asr #18
291
        strh r8, [lr, #(14*8)]
292
293
        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
294
        add r8, r4, r3
295
        add r8, r8, #(1<<17)
296
        mov r8, r8, asr #18
297
        strh r8, [lr, #( 2*8)]
298
299
        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
300
        sub r8, r4, r3
301
        add r8, r8, #(1<<17)
302
        mov r8, r8, asr #18
303
        strh r8, [lr, #(12*8)]
304
305
        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
306
        add r8, r6, r5
307
        add r8, r8, #(1<<17)
308
        mov r8, r8, asr #18
309
        strh r8, [lr, #( 4*8)]
310
311
        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
312
        sub r8, r6, r5
313
        add r8, r8, #(1<<17)
314
        mov r8, r8, asr #18
315
        strh r8, [lr, #(10*8)]
316
317
        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
318
        add r8, r2, r7
319
        add r8, r8, #(1<<17)
320
        mov r8, r8, asr #18
321
        strh r8, [lr, #( 6*8)]
322
323
        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
324
        sub r8, r2, r7
325
        add r8, r8, #(1<<17)
326
        mov r8, r8, asr #18
327
        strh r8, [lr, #( 8*8)]
328
329
        @ End of row loop
330
        add lr, lr, #2
331
        subs r12, r12, #1
332
        bne column_loop
333
        beq the_end
334 115329f1 Diego Biurrun
335 92651f67 Fabrice Bellard
empty_odd_column:
336 bb270c08 Diego Biurrun
        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
337
        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
338
        add r0, r0, #(1<<17)
339
        mov r0, r0, asr #18
340
        strh r0, [lr, #( 0*8)]
341
        strh r0, [lr, #(14*8)]
342
343
        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
344
        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
345
        add r4, r4, #(1<<17)
346
        mov r4, r4, asr #18
347
        strh r4, [lr, #( 2*8)]
348
        strh r4, [lr, #(12*8)]
349
350
        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
351
        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
352
        add r6, r6, #(1<<17)
353
        mov r6, r6, asr #18
354
        strh r6, [lr, #( 4*8)]
355
        strh r6, [lr, #(10*8)]
356
357
        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
358
        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
359
        add r2, r2, #(1<<17)
360
        mov r2, r2, asr #18
361
        strh r2, [lr, #( 6*8)]
362
        strh r2, [lr, #( 8*8)]
363
364
        @ End of row loop
365
        add lr, lr, #2
366
        subs r12, r12, #1
367
        bne column_loop
368 115329f1 Diego Biurrun
369
the_end:
370 bb270c08 Diego Biurrun
        @ The end....
371
        add sp, sp, #4
372
        ldmia   sp!, { r4 - r12, pc }   @ restore callee saved regs and return
373 92651f67 Fabrice Bellard
374
const_array:
375 bb270c08 Diego Biurrun
        .align
376
        .word FIX_0_298631336
377
        .word FIX_0_541196100
378
        .word FIX_0_765366865
379
        .word FIX_1_175875602
380
        .word FIX_1_501321110
381
        .word FIX_2_053119869
382
        .word FIX_3_072711026
383
        .word FIX_M_0_390180644
384
        .word FIX_M_0_899976223
385
        .word FIX_M_1_847759065
386
        .word FIX_M_1_961570560
387
        .word FIX_M_2_562915447
388
        .word FIX_0xFFFF