ffmpeg / libavcodec / arm / mpegvideo_neon.S @ 451b4b86
History | View | Annotate | Download (3.65 KB)
1 |
/* |
---|---|
2 |
* Copyright (c) 2010 Mans Rullgard |
3 |
* |
4 |
* This file is part of FFmpeg. |
5 |
* |
6 |
* FFmpeg is free software; you can redistribute it and/or |
7 |
* modify it under the terms of the GNU Lesser General Public |
8 |
* License as published by the Free Software Foundation; either |
9 |
* version 2.1 of the License, or (at your option) any later version. |
10 |
* |
11 |
* FFmpeg is distributed in the hope that it will be useful, |
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 |
* Lesser General Public License for more details. |
15 |
* |
16 |
* You should have received a copy of the GNU Lesser General Public |
17 |
* License along with FFmpeg; if not, write to the Free Software |
18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
19 |
*/ |
20 |
|
21 |
#include "asm.S" |
22 |
#include "asm-offsets.h" |
23 |
|
24 |
function ff_dct_unquantize_h263_inter_neon, export=1 |
25 |
add r12, r0, #BLOCK_LAST_INDEX |
26 |
ldr r12, [r12, r2, lsl #2] |
27 |
add r0, r0, #INTER_SCANTAB_RASTER_END |
28 |
ldrb r12, [r0, r12] |
29 |
sub r2, r3, #1 |
30 |
lsl r0, r3, #1 |
31 |
orr r2, r2, #1 |
32 |
add r3, r12, #1 |
33 |
endfunc |
34 |
|
35 |
function ff_dct_unquantize_h263_neon, export=1 |
36 |
vdup.16 q15, r0 @ qmul |
37 |
vdup.16 q14, r2 @ qadd |
38 |
vneg.s16 q13, q14 |
39 |
cmp r3, #4 |
40 |
mov r0, r1 |
41 |
ble 2f |
42 |
1: |
43 |
vld1.16 {q0}, [r0,:128]! |
44 |
vclt.s16 q3, q0, #0 |
45 |
vld1.16 {q8}, [r0,:128]! |
46 |
vceq.s16 q1, q0, #0 |
47 |
vmul.s16 q2, q0, q15 |
48 |
vclt.s16 q11, q8, #0 |
49 |
vmul.s16 q10, q8, q15 |
50 |
vbsl q3, q13, q14 |
51 |
vbsl q11, q13, q14 |
52 |
vadd.s16 q2, q2, q3 |
53 |
vceq.s16 q9, q8, #0 |
54 |
vadd.s16 q10, q10, q11 |
55 |
vbif q0, q2, q1 |
56 |
vbif q8, q10, q9 |
57 |
subs r3, r3, #16 |
58 |
vst1.16 {q0}, [r1,:128]! |
59 |
vst1.16 {q8}, [r1,:128]! |
60 |
bxle lr |
61 |
cmp r3, #8 |
62 |
bgt 1b |
63 |
2: |
64 |
vld1.16 {d0}, [r0,:64] |
65 |
vclt.s16 d3, d0, #0 |
66 |
vceq.s16 d1, d0, #0 |
67 |
vmul.s16 d2, d0, d30 |
68 |
vbsl d3, d26, d28 |
69 |
vadd.s16 d2, d2, d3 |
70 |
vbif d0, d2, d1 |
71 |
vst1.16 {d0}, [r1,:64] |
72 |
bx lr |
73 |
endfunc |
74 |
|
75 |
function ff_dct_unquantize_h263_intra_neon, export=1 |
76 |
push {r4-r6,lr} |
77 |
add r12, r0, #BLOCK_LAST_INDEX |
78 |
ldr r6, [r0, #AC_PRED] |
79 |
add lr, r0, #INTER_SCANTAB_RASTER_END |
80 |
cmp r6, #0 |
81 |
movne r12, #63 |
82 |
bne 1f |
83 |
ldr r12, [r12, r2, lsl #2] |
84 |
ldrb r12, [lr, r12] |
85 |
1: ldr r5, [r0, #H263_AIC] |
86 |
ldrsh r4, [r1] |
87 |
cmp r5, #0 |
88 |
mov r5, r1 |
89 |
movne r2, #0 |
90 |
bne 2f |
91 |
cmp r2, #4 |
92 |
addge r0, r0, #4 |
93 |
sub r2, r3, #1 |
94 |
ldr r6, [r0, #Y_DC_SCALE] |
95 |
orr r2, r2, #1 |
96 |
smulbb r4, r4, r6 |
97 |
2: lsl r0, r3, #1 |
98 |
add r3, r12, #1 |
99 |
bl ff_dct_unquantize_h263_neon |
100 |
vmov.16 d0[0], r4 |
101 |
vst1.16 {d0[0]}, [r5] |
102 |
pop {r4-r6,pc} |
103 |
endfunc |