Statistics
| Branch: | Revision:

ffmpeg / libavcodec / arm / vp56dsp_neon.S @ 2912e87a

History | View | Annotate | Download (4.78 KB)

1
/*
2
 * Copyright (c) 2010 Mans Rullgard <mans@mansr.com>
3
 *
4
 * This file is part of Libav.
5
 *
6
 * Libav is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * Libav is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with Libav; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20

    
21
#include "asm.S"
22

    
23
.macro  vp6_edge_filter
24
        vdup.16         q3,  r2                 @ t
25
        vmov.i16        q13, #1
26
        vsubl.u8        q0,  d20, d18           @ p[   0] - p[-s]
27
        vsubl.u8        q1,  d16, d22           @ p[-2*s] - p[ s]
28
        vsubl.u8        q14, d21, d19
29
        vsubl.u8        q15, d17, d23
30
        vadd.i16        q2,  q0,  q0            @ 2*(p[0]-p[-s])
31
        vadd.i16        d29, d28, d28
32
        vadd.i16        q0,  q0,  q1            @    p[0]-p[-s]  + p[-2*s]-p[s]
33
        vadd.i16        d28, d28, d30
34
        vadd.i16        q0,  q0,  q2            @ 3*(p[0]-p[-s]) + p[-2*s]-p[s]
35
        vadd.i16        d28, d28, d29
36
        vrshr.s16       q0,  q0,  #3            @ v
37
        vrshr.s16       d28, d28, #3
38
        vsub.i16        q8,  q3,  q13           @ t-1
39
        vabs.s16        q1,  q0                 @ V
40
        vshr.s16        q2,  q0,  #15           @ s
41
        vabs.s16        d30, d28
42
        vshr.s16        d29, d28, #15
43
        vsub.i16        q12, q1,  q3            @ V-t
44
        vsub.i16        d31, d30, d6
45
        vsub.i16        q12, q12, q13           @ V-t-1
46
        vsub.i16        d31, d31, d26
47
        vcge.u16        q12, q12, q8            @ V-t-1 >= t-1
48
        vcge.u16        d31, d31, d16
49
        vadd.i16        q13, q3,  q3            @ 2*t
50
        vadd.i16        d16, d6,  d6
51
        vsub.i16        q13, q13, q1            @ 2*t - V
52
        vsub.i16        d16, d16, d30
53
        vadd.i16        q13, q13, q2            @ += s
54
        vadd.i16        d16, d16, d29
55
        veor            q13, q13, q2            @ ^= s
56
        veor            d16, d16, d29
57
        vbif            q0,  q13, q12
58
        vbif            d28, d16, d31
59
        vmovl.u8        q1,  d20
60
        vmovl.u8        q15, d21
61
        vaddw.u8        q2,  q0,  d18
62
        vaddw.u8        q3,  q14, d19
63
        vsub.i16        q1,  q1,  q0
64
        vsub.i16        d30, d30, d28
65
        vqmovun.s16     d18, q2
66
        vqmovun.s16     d19, q3
67
        vqmovun.s16     d20, q1
68
        vqmovun.s16     d21, q15
69
.endm
70

    
71
function ff_vp6_edge_filter_ver_neon, export=1
72
        sub             r0,  r0,  r1,  lsl #1
73
        vld1.8          {q8},     [r0], r1      @ p[-2*s]
74
        vld1.8          {q9},     [r0], r1      @ p[-s]
75
        vld1.8          {q10},    [r0], r1      @ p[0]
76
        vld1.8          {q11},    [r0]          @ p[s]
77
        vp6_edge_filter
78
        sub             r0,  r0,  r1,  lsl #1
79
        sub             r1,  r1,  #8
80
        vst1.8          {d18},    [r0]!
81
        vst1.32         {d19[0]}, [r0], r1
82
        vst1.8          {d20},    [r0]!
83
        vst1.32         {d21[0]}, [r0]
84
        bx              lr
85
endfunc
86

    
87
function ff_vp6_edge_filter_hor_neon, export=1
88
        sub             r3,  r0,  #1
89
        sub             r0,  r0,  #2
90
        vld1.32         {d16[0]}, [r0], r1
91
        vld1.32         {d18[0]}, [r0], r1
92
        vld1.32         {d20[0]}, [r0], r1
93
        vld1.32         {d22[0]}, [r0], r1
94
        vld1.32         {d16[1]}, [r0], r1
95
        vld1.32         {d18[1]}, [r0], r1
96
        vld1.32         {d20[1]}, [r0], r1
97
        vld1.32         {d22[1]}, [r0], r1
98
        vld1.32         {d17[0]}, [r0], r1
99
        vld1.32         {d19[0]}, [r0], r1
100
        vld1.32         {d21[0]}, [r0], r1
101
        vld1.32         {d23[0]}, [r0], r1
102
        vtrn.8          q8,  q9
103
        vtrn.8          q10, q11
104
        vtrn.16         q8,  q10
105
        vtrn.16         q9,  q11
106
        vp6_edge_filter
107
        vtrn.8          q9,  q10
108
        vst1.16         {d18[0]}, [r3], r1
109
        vst1.16         {d20[0]}, [r3], r1
110
        vst1.16         {d18[1]}, [r3], r1
111
        vst1.16         {d20[1]}, [r3], r1
112
        vst1.16         {d18[2]}, [r3], r1
113
        vst1.16         {d20[2]}, [r3], r1
114
        vst1.16         {d18[3]}, [r3], r1
115
        vst1.16         {d20[3]}, [r3], r1
116
        vst1.16         {d19[0]}, [r3], r1
117
        vst1.16         {d21[0]}, [r3], r1
118
        vst1.16         {d19[1]}, [r3], r1
119
        vst1.16         {d21[1]}, [r3], r1
120
        bx              lr
121
endfunc