Revision 015f9f1a libavcodec/arm/dsputil_vfp.S

View differences:

libavcodec/arm/dsputil_vfp.S
41 41
 * ARM VFP optimized implementation of 'vector_fmul_c' function.
42 42
 * Assume that len is a positive number and is multiple of 8
43 43
 */
44
@ void ff_vector_fmul_vfp(float *dst, const float *src, int len)
44
@ void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1, int len)
45 45
function ff_vector_fmul_vfp, export=1
46 46
        vpush           {d8-d15}
47
        mov             r3,  r0
48 47
        fmrx            r12, fpscr
49 48
        orr             r12, r12, #(3 << 16) /* set vector size to 4 */
50 49
        fmxr            fpscr, r12
51 50

  
52
        vldmia          r3!, {s0-s3}
53
        vldmia          r1!, {s8-s11}
54
        vldmia          r3!, {s4-s7}
55
        vldmia          r1!, {s12-s15}
51
        vldmia          r1!, {s0-s3}
52
        vldmia          r2!, {s8-s11}
53
        vldmia          r1!, {s4-s7}
54
        vldmia          r2!, {s12-s15}
56 55
        vmul.f32        s8,  s0,  s8
57 56
1:
58
        subs            r2,  r2,  #16
57
        subs            r3,  r3,  #16
59 58
        vmul.f32        s12, s4,  s12
60
        vldmiage        r3!, {s16-s19}
61
        vldmiage        r1!, {s24-s27}
62
        vldmiage        r3!, {s20-s23}
63
        vldmiage        r1!, {s28-s31}
59
        vldmiage        r1!, {s16-s19}
60
        vldmiage        r2!, {s24-s27}
61
        vldmiage        r1!, {s20-s23}
62
        vldmiage        r2!, {s28-s31}
64 63
        vmulge.f32      s24, s16, s24
65 64
        vstmia          r0!, {s8-s11}
66 65
        vstmia          r0!, {s12-s15}
67 66
        vmulge.f32      s28, s20, s28
68
        vldmiagt        r3!, {s0-s3}
69
        vldmiagt        r1!, {s8-s11}
70
        vldmiagt        r3!, {s4-s7}
71
        vldmiagt        r1!, {s12-s15}
67
        vldmiagt        r1!, {s0-s3}
68
        vldmiagt        r2!, {s8-s11}
69
        vldmiagt        r1!, {s4-s7}
70
        vldmiagt        r2!, {s12-s15}
72 71
        vmulge.f32      s8,  s0,  s8
73 72
        vstmiage        r0!, {s24-s27}
74 73
        vstmiage        r0!, {s28-s31}

Also available in: Unified diff