Revision c73d99e6 libavcodec/arm/dsputil_vfp.S

View differences:

libavcodec/arm/dsputil_vfp.S
131 131
        vpop            {d8-d15}
132 132
        bx              lr
133 133
endfunc
134

  
135
#if HAVE_ARMV6
136
/**
137
 * ARM VFP optimized float to int16 conversion.
138
 * Assume that len is a positive number and is multiple of 8, destination
139
 * buffer is at least 4 bytes aligned (8 bytes alignment is better for
140
 * performance), little endian byte sex
141
 */
142
@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
143
function ff_float_to_int16_vfp, export=1
144
        push            {r4-r8,lr}
145
        vpush           {d8-d11}
146
        vldmia          r1!, {s16-s23}
147
        vcvt.s32.f32    s0,  s16
148
        vcvt.s32.f32    s1,  s17
149
        vcvt.s32.f32    s2,  s18
150
        vcvt.s32.f32    s3,  s19
151
        vcvt.s32.f32    s4,  s20
152
        vcvt.s32.f32    s5,  s21
153
        vcvt.s32.f32    s6,  s22
154
        vcvt.s32.f32    s7,  s23
155
1:
156
        subs            r2,  r2,  #8
157
        vmov            r3,  r4,  s0, s1
158
        vmov            r5,  r6,  s2, s3
159
        vmov            r7,  r8,  s4, s5
160
        vmov            ip,  lr,  s6, s7
161
        vldmiagt        r1!, {s16-s23}
162
        ssat            r4,  #16, r4
163
        ssat            r3,  #16, r3
164
        ssat            r6,  #16, r6
165
        ssat            r5,  #16, r5
166
        pkhbt           r3,  r3,  r4, lsl #16
167
        pkhbt           r4,  r5,  r6, lsl #16
168
        vcvtgt.s32.f32  s0,  s16
169
        vcvtgt.s32.f32  s1,  s17
170
        vcvtgt.s32.f32  s2,  s18
171
        vcvtgt.s32.f32  s3,  s19
172
        vcvtgt.s32.f32  s4,  s20
173
        vcvtgt.s32.f32  s5,  s21
174
        vcvtgt.s32.f32  s6,  s22
175
        vcvtgt.s32.f32  s7,  s23
176
        ssat            r8,  #16, r8
177
        ssat            r7,  #16, r7
178
        ssat            lr,  #16, lr
179
        ssat            ip,  #16, ip
180
        pkhbt           r5,  r7,  r8, lsl #16
181
        pkhbt           r6,  ip,  lr, lsl #16
182
        stmia           r0!, {r3-r6}
183
        bgt             1b
184

  
185
        vpop            {d8-d11}
186
        pop             {r4-r8,pc}
187
endfunc
188
#endif

Also available in: Unified diff