Revision 015f9f1a libavcodec/arm/dsputil_neon.S

View differences:

libavcodec/arm/dsputil_neon.S
738 738
endfunc
739 739

  
740 740
function ff_vector_fmul_neon, export=1
741
        mov             r3,  r0
742
        subs            r2,  r2,  #8
743
        vld1.64         {d0-d3},  [r0,:128]!
744
        vld1.64         {d4-d7},  [r1,:128]!
741
        subs            r3,  r3,  #8
742
        vld1.64         {d0-d3},  [r1,:128]!
743
        vld1.64         {d4-d7},  [r2,:128]!
745 744
        vmul.f32        q8,  q0,  q2
746 745
        vmul.f32        q9,  q1,  q3
747 746
        beq             3f
748
        bics            ip,  r2,  #15
747
        bics            ip,  r3,  #15
749 748
        beq             2f
750 749
1:      subs            ip,  ip,  #16
751
        vld1.64         {d0-d1},  [r0,:128]!
752
        vld1.64         {d4-d5},  [r1,:128]!
750
        vld1.64         {d0-d1},  [r1,:128]!
751
        vld1.64         {d4-d5},  [r2,:128]!
753 752
        vmul.f32        q10, q0,  q2
754
        vld1.64         {d2-d3},  [r0,:128]!
755
        vld1.64         {d6-d7},  [r1,:128]!
753
        vld1.64         {d2-d3},  [r1,:128]!
754
        vld1.64         {d6-d7},  [r2,:128]!
756 755
        vmul.f32        q11, q1,  q3
757
        vst1.64         {d16-d19},[r3,:128]!
758
        vld1.64         {d0-d1},  [r0,:128]!
759
        vld1.64         {d4-d5},  [r1,:128]!
756
        vst1.64         {d16-d19},[r0,:128]!
757
        vld1.64         {d0-d1},  [r1,:128]!
758
        vld1.64         {d4-d5},  [r2,:128]!
760 759
        vmul.f32        q8,  q0,  q2
761
        vld1.64         {d2-d3},  [r0,:128]!
762
        vld1.64         {d6-d7},  [r1,:128]!
760
        vld1.64         {d2-d3},  [r1,:128]!
761
        vld1.64         {d6-d7},  [r2,:128]!
763 762
        vmul.f32        q9,  q1,  q3
764
        vst1.64         {d20-d23},[r3,:128]!
763
        vst1.64         {d20-d23},[r0,:128]!
765 764
        bne             1b
766
        ands            r2,  r2,  #15
765
        ands            r3,  r3,  #15
767 766
        beq             3f
768
2:      vld1.64         {d0-d1},  [r0,:128]!
769
        vld1.64         {d4-d5},  [r1,:128]!
770
        vst1.64         {d16-d17},[r3,:128]!
767
2:      vld1.64         {d0-d1},  [r1,:128]!
768
        vld1.64         {d4-d5},  [r2,:128]!
769
        vst1.64         {d16-d17},[r0,:128]!
771 770
        vmul.f32        q8,  q0,  q2
772
        vld1.64         {d2-d3},  [r0,:128]!
773
        vld1.64         {d6-d7},  [r1,:128]!
774
        vst1.64         {d18-d19},[r3,:128]!
771
        vld1.64         {d2-d3},  [r1,:128]!
772
        vld1.64         {d6-d7},  [r2,:128]!
773
        vst1.64         {d18-d19},[r0,:128]!
775 774
        vmul.f32        q9,  q1,  q3
776
3:      vst1.64         {d16-d19},[r3,:128]!
775
3:      vst1.64         {d16-d19},[r0,:128]!
777 776
        bx              lr
778 777
endfunc
779 778

  

Also available in: Unified diff