Revision 04e7f6d2 libavcodec/arm/h264dsp_neon.S

View differences:

libavcodec/arm/h264dsp_neon.S
706 706
        b               put_h264_qpel8_h_lowpass_neon
707 707
        .endfunc
708 708

  
709
function put_h264_qpel16_h_lowpass_neon
709
        .macro h264_qpel_h_lowpass type
710
function \type\()_h264_qpel16_h_lowpass_neon
710 711
        push            {lr}
711 712
        mov             ip,  #16
712
        bl              put_h264_qpel8_h_lowpass_neon
713
        bl              \type\()_h264_qpel8_h_lowpass_neon
713 714
        sub             r0,  r0,  r3, lsl #4
714 715
        sub             r1,  r1,  r2, lsl #4
715 716
        add             r0,  r0,  #8
......
718 719
        pop             {lr}
719 720
        .endfunc
720 721

  
721
function put_h264_qpel8_h_lowpass_neon
722
function \type\()_h264_qpel8_h_lowpass_neon
722 723
1:      vld1.64         {d0, d1},  [r1], r2
723 724
        vld1.64         {d16,d17}, [r1], r2
724 725
        subs            ip,  ip,  #2
725 726
        lowpass_8       d0,  d1,  d16, d17, d0,  d16
727
.ifc \type,avg
728
        vld1.8          {d2},     [r0,:64], r3
729
        vrhadd.u8       d0,  d0,  d2
730
        vld1.8          {d3},     [r0,:64]
731
        vrhadd.u8       d16, d16, d3
732
        sub             r0,  r0,  r3
733
.endif
726 734
        vst1.64         {d0},     [r0,:64], r3
727 735
        vst1.64         {d16},    [r0,:64], r3
728 736
        bne             1b
729 737
        bx              lr
730 738
        .endfunc
739
        .endm
740

  
741
        h264_qpel_h_lowpass put
742
        h264_qpel_h_lowpass avg
731 743

  
732
function put_h264_qpel16_h_lowpass_l2_neon
744
        .macro h264_qpel_h_lowpass_l2 type
745
function \type\()_h264_qpel16_h_lowpass_l2_neon
733 746
        push            {lr}
734 747
        mov             ip,  #16
735
        bl              put_h264_qpel8_h_lowpass_l2_neon
748
        bl              \type\()_h264_qpel8_h_lowpass_l2_neon
736 749
        sub             r0,  r0,  r2, lsl #4
737 750
        sub             r1,  r1,  r2, lsl #4
738 751
        sub             r3,  r3,  r2, lsl #4
......
743 756
        pop             {lr}
744 757
        .endfunc
745 758

  
746
function put_h264_qpel8_h_lowpass_l2_neon
759
function \type\()_h264_qpel8_h_lowpass_l2_neon
747 760
1:      vld1.64         {d0, d1},  [r1], r2
748 761
        vld1.64         {d16,d17}, [r1], r2
749 762
        vld1.64         {d28},     [r3], r2
......
751 764
        subs            ip,  ip,  #2
752 765
        lowpass_8       d0,  d1,  d16, d17, d0,  d1
753 766
        vrhadd.u8       q0,  q0,  q14
767
.ifc \type,avg
768
        vld1.8          {d2},      [r0,:64], r2
769
        vrhadd.u8       d0,  d0,  d2
770
        vld1.8          {d3},      [r0,:64]
771
        vrhadd.u8       d1,  d1,  d3
772
        sub             r0,  r0,  r2
773
.endif
754 774
        vst1.64         {d0},      [r0,:64], r2
755 775
        vst1.64         {d1},      [r0,:64], r2
756 776
        bne             1b
757 777
        bx              lr
758 778
        .endfunc
779
        .endm
780

  
781
        h264_qpel_h_lowpass_l2 put
782
        h264_qpel_h_lowpass_l2 avg
759 783

  
760 784
function put_h264_qpel16_v_lowpass_neon_packed
761 785
        mov             r4,  lr
......
772 796
        b               put_h264_qpel8_v_lowpass_neon
773 797
        .endfunc
774 798

  
775
function put_h264_qpel16_v_lowpass_neon
799
        .macro h264_qpel_v_lowpass type
800
function \type\()_h264_qpel16_v_lowpass_neon
776 801
        mov             r4,  lr
777
        bl              put_h264_qpel8_v_lowpass_neon
802
        bl              \type\()_h264_qpel8_v_lowpass_neon
778 803
        sub             r1,  r1,  r3, lsl #2
779
        bl              put_h264_qpel8_v_lowpass_neon
804
        bl              \type\()_h264_qpel8_v_lowpass_neon
780 805
        sub             r0,  r0,  r2, lsl #4
781 806
        add             r0,  r0,  #8
782 807
        sub             r1,  r1,  r3, lsl #4
783 808
        sub             r1,  r1,  r3, lsl #2
784 809
        add             r1,  r1,  #8
785
        bl              put_h264_qpel8_v_lowpass_neon
810
        bl              \type\()_h264_qpel8_v_lowpass_neon
786 811
        sub             r1,  r1,  r3, lsl #2
787 812
        mov             lr,  r4
788 813
        .endfunc
789 814

  
790
function put_h264_qpel8_v_lowpass_neon
815
function \type\()_h264_qpel8_v_lowpass_neon
791 816
        vld1.64         {d8},  [r1], r3
792 817
        vld1.64         {d10}, [r1], r3
793 818
        vld1.64         {d12}, [r1], r3
......
809 834
        lowpass_8       d26, d27, d28, d29, d26, d28
810 835
        transpose_8x8   d8,  d10, d12, d14, d22, d24, d26, d28
811 836

  
837
.ifc \type,avg
838
        vld1.8          {d9},  [r0,:64], r2
839
        vrhadd.u8       d8,  d8,  d9
840
        vld1.8          {d11}, [r0,:64], r2
841
        vrhadd.u8       d10, d10, d11
842
        vld1.8          {d13}, [r0,:64], r2
843
        vrhadd.u8       d12, d12, d13
844
        vld1.8          {d15}, [r0,:64], r2
845
        vrhadd.u8       d14, d14, d15
846
        vld1.8          {d23}, [r0,:64], r2
847
        vrhadd.u8       d22, d22, d23
848
        vld1.8          {d25}, [r0,:64], r2
849
        vrhadd.u8       d24, d24, d25
850
        vld1.8          {d27}, [r0,:64], r2
851
        vrhadd.u8       d26, d26, d27
852
        vld1.8          {d29}, [r0,:64], r2
853
        vrhadd.u8       d28, d28, d29
854
        sub             r0,  r0,  r2,  lsl #3
855
.endif
856

  
812 857
        vst1.64         {d8},  [r0,:64], r2
813 858
        vst1.64         {d10}, [r0,:64], r2
814 859
        vst1.64         {d12}, [r0,:64], r2
......
820 865

  
821 866
        bx              lr
822 867
        .endfunc
868
        .endm
869

  
870
        h264_qpel_v_lowpass put
871
        h264_qpel_v_lowpass avg
823 872

  
824
function put_h264_qpel16_v_lowpass_l2_neon
873
        .macro h264_qpel_v_lowpass_l2 type
874
function \type\()_h264_qpel16_v_lowpass_l2_neon
825 875
        mov             r4,  lr
826
        bl              put_h264_qpel8_v_lowpass_l2_neon
876
        bl              \type\()_h264_qpel8_v_lowpass_l2_neon
827 877
        sub             r1,  r1,  r3, lsl #2
828
        bl              put_h264_qpel8_v_lowpass_l2_neon
878
        bl              \type\()_h264_qpel8_v_lowpass_l2_neon
829 879
        sub             r0,  r0,  r3, lsl #4
830 880
        sub             ip,  ip,  r2, lsl #4
831 881
        add             r0,  r0,  #8
......
833 883
        sub             r1,  r1,  r3, lsl #4
834 884
        sub             r1,  r1,  r3, lsl #2
835 885
        add             r1,  r1,  #8
836
        bl              put_h264_qpel8_v_lowpass_l2_neon
886
        bl              \type\()_h264_qpel8_v_lowpass_l2_neon
837 887
        sub             r1,  r1,  r3, lsl #2
838 888
        mov             lr,  r4
839 889
        .endfunc
840 890

  
841
function put_h264_qpel8_v_lowpass_l2_neon
891
function \type\()_h264_qpel8_v_lowpass_l2_neon
842 892
        vld1.64         {d8},  [r1], r3
843 893
        vld1.64         {d10}, [r1], r3
844 894
        vld1.64         {d12}, [r1], r3
......
871 921
        vld1.64         {d10}, [ip], r2
872 922
        vrhadd.u8       q2,  q2,  q11
873 923
        vld1.64         {d11}, [ip], r2
924
        vrhadd.u8       q5,  q5,  q13
925

  
926
.ifc \type,avg
927
        vld1.8          {d16}, [r0,:64], r3
928
        vrhadd.u8       d0,  d0,  d16
929
        vld1.8          {d17}, [r0,:64], r3
930
        vrhadd.u8       d1,  d1,  d17
931
        vld1.8          {d16}, [r0,:64], r3
932
        vrhadd.u8       d2,  d2,  d16
933
        vld1.8          {d17}, [r0,:64], r3
934
        vrhadd.u8       d3,  d3,  d17
935
        vld1.8          {d16}, [r0,:64], r3
936
        vrhadd.u8       d4,  d4,  d16
937
        vld1.8          {d17}, [r0,:64], r3
938
        vrhadd.u8       d5,  d5,  d17
939
        vld1.8          {d16}, [r0,:64], r3
940
        vrhadd.u8       d10, d10, d16
941
        vld1.8          {d17}, [r0,:64], r3
942
        vrhadd.u8       d11, d11, d17
943
        sub             r0,  r0,  r3,  lsl #3
944
.endif
874 945

  
875 946
        vst1.64         {d0},  [r0,:64], r3
876 947
        vst1.64         {d1},  [r0,:64], r3
877
        vrhadd.u8       q5,  q5,  q13
878 948
        vst1.64         {d2},  [r0,:64], r3
879 949
        vst1.64         {d3},  [r0,:64], r3
880 950
        vst1.64         {d4},  [r0,:64], r3
......
884 954

  
885 955
        bx              lr
886 956
        .endfunc
957
        .endm
958

  
959
        h264_qpel_v_lowpass_l2 put
960
        h264_qpel_v_lowpass_l2 avg
887 961

  
888 962
function put_h264_qpel8_hv_lowpass_neon_top
889 963
        lowpass_const   ip
......
951 1025
        bx              lr
952 1026
        .endfunc
953 1027

  
954
function put_h264_qpel8_hv_lowpass_neon
1028
        .macro h264_qpel8_hv_lowpass type
1029
function \type\()_h264_qpel8_hv_lowpass_neon
955 1030
        mov             r10, lr
956 1031
        bl              put_h264_qpel8_hv_lowpass_neon_top
1032
.ifc \type,avg
1033
        vld1.8          {d0},      [r0,:64], r2
1034
        vrhadd.u8       d12, d12, d0
1035
        vld1.8          {d1},      [r0,:64], r2
1036
        vrhadd.u8       d13, d13, d1
1037
        vld1.8          {d2},      [r0,:64], r2
1038
        vrhadd.u8       d14, d14, d2
1039
        vld1.8          {d3},      [r0,:64], r2
1040
        vrhadd.u8       d15, d15, d3
1041
        vld1.8          {d4},      [r0,:64], r2
1042
        vrhadd.u8       d8,  d8,  d4
1043
        vld1.8          {d5},      [r0,:64], r2
1044
        vrhadd.u8       d9,  d9,  d5
1045
        vld1.8          {d6},      [r0,:64], r2
1046
        vrhadd.u8       d10, d10, d6
1047
        vld1.8          {d7},      [r0,:64], r2
1048
        vrhadd.u8       d11, d11, d7
1049
        sub             r0,  r0,  r2,  lsl #3
1050
.endif
957 1051
        vst1.64         {d12},     [r0,:64], r2
958 1052
        vst1.64         {d13},     [r0,:64], r2
959 1053
        vst1.64         {d14},     [r0,:64], r2
......
966 1060
        mov             lr,  r10
967 1061
        bx              lr
968 1062
        .endfunc
1063
        .endm
1064

  
1065
        h264_qpel8_hv_lowpass put
1066
        h264_qpel8_hv_lowpass avg
969 1067

  
970
function put_h264_qpel8_hv_lowpass_l2_neon
1068
        .macro h264_qpel8_hv_lowpass_l2 type
1069
function \type\()_h264_qpel8_hv_lowpass_l2_neon
971 1070
        mov             r10, lr
972 1071
        bl              put_h264_qpel8_hv_lowpass_neon_top
973 1072

  
......
978 1077
        vrhadd.u8       q1,  q1,  q7
979 1078
        vld1.64         {d6, d7},  [r2,:128]!
980 1079
        vrhadd.u8       q2,  q2,  q4
981

  
982
        vst1.64         {d0},      [r0,:64], r3
983 1080
        vrhadd.u8       q3,  q3,  q5
1081
.ifc \type,avg
1082
        vld1.8          {d16},     [r0,:64], r3
1083
        vrhadd.u8       d0,  d0,  d16
1084
        vld1.8          {d17},     [r0,:64], r3
1085
        vrhadd.u8       d1,  d1,  d17
1086
        vld1.8          {d18},     [r0,:64], r3
1087
        vrhadd.u8       d2,  d2,  d18
1088
        vld1.8          {d19},     [r0,:64], r3
1089
        vrhadd.u8       d3,  d3,  d19
1090
        vld1.8          {d20},     [r0,:64], r3
1091
        vrhadd.u8       d4,  d4,  d20
1092
        vld1.8          {d21},     [r0,:64], r3
1093
        vrhadd.u8       d5,  d5,  d21
1094
        vld1.8          {d22},     [r0,:64], r3
1095
        vrhadd.u8       d6,  d6,  d22
1096
        vld1.8          {d23},     [r0,:64], r3
1097
        vrhadd.u8       d7,  d7,  d23
1098
        sub             r0,  r0,  r3,  lsl #3
1099
.endif
1100
        vst1.64         {d0},      [r0,:64], r3
984 1101
        vst1.64         {d1},      [r0,:64], r3
985 1102
        vst1.64         {d2},      [r0,:64], r3
986 1103
        vst1.64         {d3},      [r0,:64], r3
......
992 1109
        mov             lr,  r10
993 1110
        bx              lr
994 1111
        .endfunc
1112
        .endm
1113

  
1114
        h264_qpel8_hv_lowpass_l2 put
1115
        h264_qpel8_hv_lowpass_l2 avg
995 1116

  
996
function put_h264_qpel16_hv_lowpass_neon
1117
        .macro h264_qpel16_hv type
1118
function \type\()_h264_qpel16_hv_lowpass_neon
997 1119
        mov             r9,  lr
998
        bl              put_h264_qpel8_hv_lowpass_neon
1120
        bl              \type\()_h264_qpel8_hv_lowpass_neon
999 1121
        sub             r1,  r1,  r3, lsl #2
1000
        bl              put_h264_qpel8_hv_lowpass_neon
1122
        bl              \type\()_h264_qpel8_hv_lowpass_neon
1001 1123
        sub             r1,  r1,  r3, lsl #4
1002 1124
        sub             r1,  r1,  r3, lsl #2
1003 1125
        add             r1,  r1,  #8
1004 1126
        sub             r0,  r0,  r2, lsl #4
1005 1127
        add             r0,  r0,  #8
1006
        bl              put_h264_qpel8_hv_lowpass_neon
1128
        bl              \type\()_h264_qpel8_hv_lowpass_neon
1007 1129
        sub             r1,  r1,  r3, lsl #2
1008 1130
        mov             lr,  r9
1009
        b               put_h264_qpel8_hv_lowpass_neon
1131
        b               \type\()_h264_qpel8_hv_lowpass_neon
1010 1132
        .endfunc
1011 1133

  
1012
function put_h264_qpel16_hv_lowpass_l2_neon
1134
function \type\()_h264_qpel16_hv_lowpass_l2_neon
1013 1135
        mov             r9,  lr
1014 1136
        sub             r2,  r4,  #256
1015
        bl              put_h264_qpel8_hv_lowpass_l2_neon
1137
        bl              \type\()_h264_qpel8_hv_lowpass_l2_neon
1016 1138
        sub             r1,  r1,  r3, lsl #2
1017
        bl              put_h264_qpel8_hv_lowpass_l2_neon
1139
        bl              \type\()_h264_qpel8_hv_lowpass_l2_neon
1018 1140
        sub             r1,  r1,  r3, lsl #4
1019 1141
        sub             r1,  r1,  r3, lsl #2
1020 1142
        add             r1,  r1,  #8
1021 1143
        sub             r0,  r0,  r3, lsl #4
1022 1144
        add             r0,  r0,  #8
1023
        bl              put_h264_qpel8_hv_lowpass_l2_neon
1145
        bl              \type\()_h264_qpel8_hv_lowpass_l2_neon
1024 1146
        sub             r1,  r1,  r3, lsl #2
1025 1147
        mov             lr,  r9
1026
        b               put_h264_qpel8_hv_lowpass_l2_neon
1148
        b               \type\()_h264_qpel8_hv_lowpass_l2_neon
1027 1149
        .endfunc
1150
        .endm
1028 1151

  
1029
function ff_put_h264_qpel8_mc10_neon, export=1
1152
        h264_qpel16_hv put
1153
        h264_qpel16_hv avg
1154

  
1155
        .macro h264_qpel8 type
1156
function ff_\type\()_h264_qpel8_mc10_neon, export=1
1030 1157
        lowpass_const   r3
1031 1158
        mov             r3,  r1
1032 1159
        sub             r1,  r1,  #2
1033 1160
        mov             ip,  #8
1034
        b               put_h264_qpel8_h_lowpass_l2_neon
1161
        b               \type\()_h264_qpel8_h_lowpass_l2_neon
1035 1162
        .endfunc
1036 1163

  
1037
function ff_put_h264_qpel8_mc20_neon, export=1
1164
function ff_\type\()_h264_qpel8_mc20_neon, export=1
1038 1165
        lowpass_const   r3
1039 1166
        sub             r1,  r1,  #2
1040 1167
        mov             r3,  r2
1041 1168
        mov             ip,  #8
1042
        b               put_h264_qpel8_h_lowpass_neon
1169
        b               \type\()_h264_qpel8_h_lowpass_neon
1043 1170
        .endfunc
1044 1171

  
1045
function ff_put_h264_qpel8_mc30_neon, export=1
1172
function ff_\type\()_h264_qpel8_mc30_neon, export=1
1046 1173
        lowpass_const   r3
1047 1174
        add             r3,  r1,  #1
1048 1175
        sub             r1,  r1,  #2
1049 1176
        mov             ip,  #8
1050
        b               put_h264_qpel8_h_lowpass_l2_neon
1177
        b               \type\()_h264_qpel8_h_lowpass_l2_neon
1051 1178
        .endfunc
1052 1179

  
1053
function ff_put_h264_qpel8_mc01_neon, export=1
1180
function ff_\type\()_h264_qpel8_mc01_neon, export=1
1054 1181
        push            {lr}
1055 1182
        mov             ip,  r1
1056
put_h264_qpel8_mc01:
1183
\type\()_h264_qpel8_mc01:
1057 1184
        lowpass_const   r3
1058 1185
        mov             r3,  r2
1059 1186
        sub             r1,  r1,  r2, lsl #1
1060 1187
        vpush           {d8-d15}
1061
        bl              put_h264_qpel8_v_lowpass_l2_neon
1188
        bl              \type\()_h264_qpel8_v_lowpass_l2_neon
1062 1189
        vpop            {d8-d15}
1063 1190
        pop             {pc}
1064 1191
        .endfunc
1065 1192

  
1066
function ff_put_h264_qpel8_mc11_neon, export=1
1193
function ff_\type\()_h264_qpel8_mc11_neon, export=1
1067 1194
        push            {r0, r1, r11, lr}
1068
put_h264_qpel8_mc11:
1195
\type\()_h264_qpel8_mc11:
1069 1196
        lowpass_const   r3
1070 1197
        mov             r11, sp
1071 1198
        bic             sp,  sp,  #15
......
1081 1208
        add             ip,  sp,  #64
1082 1209
        sub             r1,  r1,  r2, lsl #1
1083 1210
        mov             r2,  #8
1084
        bl              put_h264_qpel8_v_lowpass_l2_neon
1211
        bl              \type\()_h264_qpel8_v_lowpass_l2_neon
1085 1212
        vpop            {d8-d15}
1086 1213
        add             sp,  r11, #8
1087 1214
        pop             {r11, pc}
1088 1215
        .endfunc
1089 1216

  
1090
function ff_put_h264_qpel8_mc21_neon, export=1
1217
function ff_\type\()_h264_qpel8_mc21_neon, export=1
1091 1218
        push            {r0, r1, r4, r10, r11, lr}
1092
put_h264_qpel8_mc21:
1219
\type\()_h264_qpel8_mc21:
1093 1220
        lowpass_const   r3
1094 1221
        mov             r11, sp
1095 1222
        bic             sp,  sp,  #15
......
1106 1233
        sub             r1,  r1,  #2
1107 1234
        mov             r3,  r2
1108 1235
        sub             r2,  r4,  #64
1109
        bl              put_h264_qpel8_hv_lowpass_l2_neon
1236
        bl              \type\()_h264_qpel8_hv_lowpass_l2_neon
1110 1237
        vpop            {d8-d15}
1111 1238
        add             sp,  r11,  #8
1112 1239
        pop             {r4, r10, r11, pc}
1113 1240
        .endfunc
1114 1241

  
1115
function ff_put_h264_qpel8_mc31_neon, export=1
1242
function ff_\type\()_h264_qpel8_mc31_neon, export=1
1116 1243
        add             r1,  r1,  #1
1117 1244
        push            {r0, r1, r11, lr}
1118 1245
        sub             r1,  r1,  #1
1119
        b               put_h264_qpel8_mc11
1246
        b               \type\()_h264_qpel8_mc11
1120 1247
        .endfunc
1121 1248

  
1122
function ff_put_h264_qpel8_mc02_neon, export=1
1249
function ff_\type\()_h264_qpel8_mc02_neon, export=1
1123 1250
        push            {lr}
1124 1251
        lowpass_const   r3
1125 1252
        sub             r1,  r1,  r2, lsl #1
1126 1253
        mov             r3,  r2
1127 1254
        vpush           {d8-d15}
1128
        bl              put_h264_qpel8_v_lowpass_neon
1255
        bl              \type\()_h264_qpel8_v_lowpass_neon
1129 1256
        vpop            {d8-d15}
1130 1257
        pop             {pc}
1131 1258
        .endfunc
1132 1259

  
1133
function ff_put_h264_qpel8_mc12_neon, export=1
1260
function ff_\type\()_h264_qpel8_mc12_neon, export=1
1134 1261
        push            {r0, r1, r4, r10, r11, lr}
1135
put_h264_qpel8_mc12:
1262
\type\()_h264_qpel8_mc12:
1136 1263
        lowpass_const   r3
1137 1264
        mov             r11, sp
1138 1265
        bic             sp,  sp,  #15
......
1148 1275
        sub             r1,  r1,  r3, lsl #1
1149 1276
        sub             r1,  r1,  #2
1150 1277
        sub             r2,  r4,  #64
1151
        bl              put_h264_qpel8_hv_lowpass_l2_neon
1278
        bl              \type\()_h264_qpel8_hv_lowpass_l2_neon
1152 1279
        vpop            {d8-d15}
1153 1280
        add             sp,  r11,  #8
1154 1281
        pop             {r4, r10, r11, pc}
1155 1282
        .endfunc
1156 1283

  
1157
function ff_put_h264_qpel8_mc22_neon, export=1
1284
function ff_\type\()_h264_qpel8_mc22_neon, export=1
1158 1285
        push            {r4, r10, r11, lr}
1159 1286
        mov             r11, sp
1160 1287
        bic             sp,  sp,  #15
......
1164 1291
        sub             sp,  sp,  #(16*12)
1165 1292
        mov             r4,  sp
1166 1293
        vpush           {d8-d15}
1167
        bl              put_h264_qpel8_hv_lowpass_neon
1294
        bl              \type\()_h264_qpel8_hv_lowpass_neon
1168 1295
        vpop            {d8-d15}
1169 1296
        mov             sp,  r11
1170 1297
        pop             {r4, r10, r11, pc}
1171 1298
        .endfunc
1172 1299

  
1173
function ff_put_h264_qpel8_mc32_neon, export=1
1300
function ff_\type\()_h264_qpel8_mc32_neon, export=1
1174 1301
        push            {r0, r1, r4, r10, r11, lr}
1175 1302
        add             r1,  r1,  #1
1176
        b               put_h264_qpel8_mc12
1303
        b               \type\()_h264_qpel8_mc12
1177 1304
        .endfunc
1178 1305

  
1179
function ff_put_h264_qpel8_mc03_neon, export=1
1306
function ff_\type\()_h264_qpel8_mc03_neon, export=1
1180 1307
        push            {lr}
1181 1308
        add             ip,  r1,  r2
1182
        b               put_h264_qpel8_mc01
1309
        b               \type\()_h264_qpel8_mc01
1183 1310
        .endfunc
1184 1311

  
1185
function ff_put_h264_qpel8_mc13_neon, export=1
1312
function ff_\type\()_h264_qpel8_mc13_neon, export=1
1186 1313
        push            {r0, r1, r11, lr}
1187 1314
        add             r1,  r1,  r2
1188
        b               put_h264_qpel8_mc11
1315
        b               \type\()_h264_qpel8_mc11
1189 1316
        .endfunc
1190 1317

  
1191
function ff_put_h264_qpel8_mc23_neon, export=1
1318
function ff_\type\()_h264_qpel8_mc23_neon, export=1
1192 1319
        push            {r0, r1, r4, r10, r11, lr}
1193 1320
        add             r1,  r1,  r2
1194
        b               put_h264_qpel8_mc21
1321
        b               \type\()_h264_qpel8_mc21
1195 1322
        .endfunc
1196 1323

  
1197
function ff_put_h264_qpel8_mc33_neon, export=1
1324
function ff_\type\()_h264_qpel8_mc33_neon, export=1
1198 1325
        add             r1,  r1,  #1
1199 1326
        push            {r0, r1, r11, lr}
1200 1327
        add             r1,  r1,  r2
1201 1328
        sub             r1,  r1,  #1
1202
        b               put_h264_qpel8_mc11
1329
        b               \type\()_h264_qpel8_mc11
1203 1330
        .endfunc
1331
        .endm
1332

  
1333
        h264_qpel8 put
1334
        h264_qpel8 avg
1204 1335

  
1205
function ff_put_h264_qpel16_mc10_neon, export=1
1336
        .macro h264_qpel16 type
1337
function ff_\type\()_h264_qpel16_mc10_neon, export=1
1206 1338
        lowpass_const   r3
1207 1339
        mov             r3,  r1
1208 1340
        sub             r1,  r1,  #2
1209
        b               put_h264_qpel16_h_lowpass_l2_neon
1341
        b               \type\()_h264_qpel16_h_lowpass_l2_neon
1210 1342
        .endfunc
1211 1343

  
1212
function ff_put_h264_qpel16_mc20_neon, export=1
1344
function ff_\type\()_h264_qpel16_mc20_neon, export=1
1213 1345
        lowpass_const   r3
1214 1346
        sub             r1,  r1,  #2
1215 1347
        mov             r3,  r2
1216
        b               put_h264_qpel16_h_lowpass_neon
1348
        b               \type\()_h264_qpel16_h_lowpass_neon
1217 1349
        .endfunc
1218 1350

  
1219
function ff_put_h264_qpel16_mc30_neon, export=1
1351
function ff_\type\()_h264_qpel16_mc30_neon, export=1
1220 1352
        lowpass_const   r3
1221 1353
        add             r3,  r1,  #1
1222 1354
        sub             r1,  r1,  #2
1223
        b               put_h264_qpel16_h_lowpass_l2_neon
1355
        b               \type\()_h264_qpel16_h_lowpass_l2_neon
1224 1356
        .endfunc
1225 1357

  
1226
function ff_put_h264_qpel16_mc01_neon, export=1
1358
function ff_\type\()_h264_qpel16_mc01_neon, export=1
1227 1359
        push            {r4, lr}
1228 1360
        mov             ip,  r1
1229
put_h264_qpel16_mc01:
1361
\type\()_h264_qpel16_mc01:
1230 1362
        lowpass_const   r3
1231 1363
        mov             r3,  r2
1232 1364
        sub             r1,  r1,  r2, lsl #1
1233 1365
        vpush           {d8-d15}
1234
        bl              put_h264_qpel16_v_lowpass_l2_neon
1366
        bl              \type\()_h264_qpel16_v_lowpass_l2_neon
1235 1367
        vpop            {d8-d15}
1236 1368
        pop             {r4, pc}
1237 1369
        .endfunc
1238 1370

  
1239
function ff_put_h264_qpel16_mc11_neon, export=1
1371
function ff_\type\()_h264_qpel16_mc11_neon, export=1
1240 1372
        push            {r0, r1, r4, r11, lr}
1241
put_h264_qpel16_mc11:
1373
\type\()_h264_qpel16_mc11:
1242 1374
        lowpass_const   r3
1243 1375
        mov             r11, sp
1244 1376
        bic             sp,  sp,  #15
......
1253 1385
        add             ip,  sp,  #64
1254 1386
        sub             r1,  r1,  r2, lsl #1
1255 1387
        mov             r2,  #16
1256
        bl              put_h264_qpel16_v_lowpass_l2_neon
1388
        bl              \type\()_h264_qpel16_v_lowpass_l2_neon
1257 1389
        vpop            {d8-d15}
1258 1390
        add             sp,  r11, #8
1259 1391
        pop             {r4, r11, pc}
1260 1392
        .endfunc
1261 1393

  
1262
function ff_put_h264_qpel16_mc21_neon, export=1
1394
function ff_\type\()_h264_qpel16_mc21_neon, export=1
1263 1395
        push            {r0, r1, r4-r5, r9-r11, lr}
1264
put_h264_qpel16_mc21:
1396
\type\()_h264_qpel16_mc21:
1265 1397
        lowpass_const   r3
1266 1398
        mov             r11, sp
1267 1399
        bic             sp,  sp,  #15
......
1275 1407
        sub             r1,  r1,  r2, lsl #1
1276 1408
        sub             r1,  r1,  #2
1277 1409
        mov             r3,  r2
1278
        bl              put_h264_qpel16_hv_lowpass_l2_neon
1410
        bl              \type\()_h264_qpel16_hv_lowpass_l2_neon
1279 1411
        vpop            {d8-d15}
1280 1412
        add             sp,  r11,  #8
1281 1413
        pop             {r4-r5, r9-r11, pc}
1282 1414
        .endfunc
1283 1415

  
1284
function ff_put_h264_qpel16_mc31_neon, export=1
1416
function ff_\type\()_h264_qpel16_mc31_neon, export=1
1285 1417
        add             r1,  r1,  #1
1286 1418
        push            {r0, r1, r4, r11, lr}
1287 1419
        sub             r1,  r1,  #1
1288
        b               put_h264_qpel16_mc11
1420
        b               \type\()_h264_qpel16_mc11
1289 1421
        .endfunc
1290 1422

  
1291
function ff_put_h264_qpel16_mc02_neon, export=1
1423
function ff_\type\()_h264_qpel16_mc02_neon, export=1
1292 1424
        push            {r4, lr}
1293 1425
        lowpass_const   r3
1294 1426
        sub             r1,  r1,  r2, lsl #1
1295 1427
        mov             r3,  r2
1296 1428
        vpush           {d8-d15}
1297
        bl              put_h264_qpel16_v_lowpass_neon
1429
        bl              \type\()_h264_qpel16_v_lowpass_neon
1298 1430
        vpop            {d8-d15}
1299 1431
        pop             {r4, pc}
1300 1432
        .endfunc
1301 1433

  
1302
function ff_put_h264_qpel16_mc12_neon, export=1
1434
function ff_\type\()_h264_qpel16_mc12_neon, export=1
1303 1435
        push            {r0, r1, r4-r5, r9-r11, lr}
1304
put_h264_qpel16_mc12:
1436
\type\()_h264_qpel16_mc12:
1305 1437
        lowpass_const   r3
1306 1438
        mov             r11, sp
1307 1439
        bic             sp,  sp,  #15
......
1316 1448
        sub             r1,  r1,  r3, lsl #1
1317 1449
        sub             r1,  r1,  #2
1318 1450
        mov             r2,  r3
1319
        bl              put_h264_qpel16_hv_lowpass_l2_neon
1451
        bl              \type\()_h264_qpel16_hv_lowpass_l2_neon
1320 1452
        vpop            {d8-d15}
1321 1453
        add             sp,  r11,  #8
1322 1454
        pop             {r4-r5, r9-r11, pc}
1323 1455
        .endfunc
1324 1456

  
1325
function ff_put_h264_qpel16_mc22_neon, export=1
1457
function ff_\type\()_h264_qpel16_mc22_neon, export=1
1326 1458
        push            {r4, r9-r11, lr}
1327 1459
        lowpass_const   r3
1328 1460
        mov             r11, sp
......
1333 1465
        sub             sp,  sp,  #(16*12)
1334 1466
        mov             r4,  sp
1335 1467
        vpush           {d8-d15}
1336
        bl              put_h264_qpel16_hv_lowpass_neon
1468
        bl              \type\()_h264_qpel16_hv_lowpass_neon
1337 1469
        vpop            {d8-d15}
1338 1470
        mov             sp,  r11
1339 1471
        pop             {r4, r9-r11, pc}
1340 1472
        .endfunc
1341 1473

  
1342
function ff_put_h264_qpel16_mc32_neon, export=1
1474
function ff_\type\()_h264_qpel16_mc32_neon, export=1
1343 1475
        push            {r0, r1, r4-r5, r9-r11, lr}
1344 1476
        add             r1,  r1,  #1
1345
        b               put_h264_qpel16_mc12
1477
        b               \type\()_h264_qpel16_mc12
1346 1478
        .endfunc
1347 1479

  
1348
function ff_put_h264_qpel16_mc03_neon, export=1
1480
function ff_\type\()_h264_qpel16_mc03_neon, export=1
1349 1481
        push            {r4, lr}
1350 1482
        add             ip,  r1,  r2
1351
        b               put_h264_qpel16_mc01
1483
        b               \type\()_h264_qpel16_mc01
1352 1484
        .endfunc
1353 1485

  
1354
function ff_put_h264_qpel16_mc13_neon, export=1
1486
function ff_\type\()_h264_qpel16_mc13_neon, export=1
1355 1487
        push            {r0, r1, r4, r11, lr}
1356 1488
        add             r1,  r1,  r2
1357
        b               put_h264_qpel16_mc11
1489
        b               \type\()_h264_qpel16_mc11
1358 1490
        .endfunc
1359 1491

  
1360
function ff_put_h264_qpel16_mc23_neon, export=1
1492
function ff_\type\()_h264_qpel16_mc23_neon, export=1
1361 1493
        push            {r0, r1, r4-r5, r9-r11, lr}
1362 1494
        add             r1,  r1,  r2
1363
        b               put_h264_qpel16_mc21
1495
        b               \type\()_h264_qpel16_mc21
1364 1496
        .endfunc
1365 1497

  
1366
function ff_put_h264_qpel16_mc33_neon, export=1
1498
function ff_\type\()_h264_qpel16_mc33_neon, export=1
1367 1499
        add             r1,  r1,  #1
1368 1500
        push            {r0, r1, r4, r11, lr}
1369 1501
        add             r1,  r1,  r2
1370 1502
        sub             r1,  r1,  #1
1371
        b               put_h264_qpel16_mc11
1503
        b               \type\()_h264_qpel16_mc11
1372 1504
        .endfunc
1505
        .endm
1506

  
1507
        h264_qpel16 put
1508
        h264_qpel16 avg
1373 1509

  
1374 1510
@ Biweighted prediction
1375 1511

  

Also available in: Unified diff