Revision 8dbe5856 libavcodec/dsputil.c

View differences:

libavcodec/dsputil.c
43 43
uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
44 44
uint32_t ff_squareTbl[512] = {0, };
45 45

  
46
#define BIT_DEPTH 9
47
#include "dsputil_internal.h"
48
#undef BIT_DEPTH
49

  
50
#define BIT_DEPTH 10
51
#include "dsputil_internal.h"
52
#undef BIT_DEPTH
53

  
54
#define BIT_DEPTH 8
46 55
#include "dsputil_internal.h"
47 56

  
48 57
// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
......
619 628

  
620 629
static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
621 630
    switch(width){
622
    case 2: put_pixels2_c (dst, src, stride, height); break;
623
    case 4: put_pixels4_c (dst, src, stride, height); break;
624
    case 8: put_pixels8_c (dst, src, stride, height); break;
625
    case 16:put_pixels16_c(dst, src, stride, height); break;
631
    case 2: put_pixels2_8_c (dst, src, stride, height); break;
632
    case 4: put_pixels4_8_c (dst, src, stride, height); break;
633
    case 8: put_pixels8_8_c (dst, src, stride, height); break;
634
    case 16:put_pixels16_8_c(dst, src, stride, height); break;
626 635
    }
627 636
}
628 637

  
......
716 725

  
717 726
static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
718 727
    switch(width){
719
    case 2: avg_pixels2_c (dst, src, stride, height); break;
720
    case 4: avg_pixels4_c (dst, src, stride, height); break;
721
    case 8: avg_pixels8_c (dst, src, stride, height); break;
722
    case 16:avg_pixels16_c(dst, src, stride, height); break;
728
    case 2: avg_pixels2_8_c (dst, src, stride, height); break;
729
    case 4: avg_pixels4_8_c (dst, src, stride, height); break;
730
    case 8: avg_pixels8_8_c (dst, src, stride, height); break;
731
    case 16:avg_pixels16_8_c(dst, src, stride, height); break;
723 732
    }
724 733
}
725 734

  
......
953 962
static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
954 963
    uint8_t half[64];\
955 964
    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
956
    OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
965
    OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
957 966
}\
958 967
\
959 968
static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
......
963 972
static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
964 973
    uint8_t half[64];\
965 974
    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
966
    OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
975
    OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
967 976
}\
968 977
\
969 978
static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
......
971 980
    uint8_t half[64];\
972 981
    copy_block9(full, src, 16, stride, 9);\
973 982
    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
974
    OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
983
    OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
975 984
}\
976 985
\
977 986
static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
......
985 994
    uint8_t half[64];\
986 995
    copy_block9(full, src, 16, stride, 9);\
987 996
    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
988
    OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
997
    OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
989 998
}\
990 999
void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
991 1000
    uint8_t full[16*9];\
......
996 1005
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
997 1006
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
998 1007
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
999
    OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1008
    OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1000 1009
}\
1001 1010
static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1002 1011
    uint8_t full[16*9];\
......
1004 1013
    uint8_t halfHV[64];\
1005 1014
    copy_block9(full, src, 16, stride, 9);\
1006 1015
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1007
    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1016
    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1008 1017
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1009
    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1018
    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1010 1019
}\
1011 1020
void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1012 1021
    uint8_t full[16*9];\
......
1017 1026
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1018 1027
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1019 1028
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1020
    OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1029
    OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1021 1030
}\
1022 1031
static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1023 1032
    uint8_t full[16*9];\
......
1025 1034
    uint8_t halfHV[64];\
1026 1035
    copy_block9(full, src, 16, stride, 9);\
1027 1036
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1028
    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1037
    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1029 1038
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1030
    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1039
    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1031 1040
}\
1032 1041
void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1033 1042
    uint8_t full[16*9];\
......
1038 1047
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1039 1048
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1040 1049
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1041
    OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1050
    OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1042 1051
}\
1043 1052
static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1044 1053
    uint8_t full[16*9];\
......
1046 1055
    uint8_t halfHV[64];\
1047 1056
    copy_block9(full, src, 16, stride, 9);\
1048 1057
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1049
    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1058
    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1050 1059
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1051
    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1060
    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1052 1061
}\
1053 1062
void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1054 1063
    uint8_t full[16*9];\
......
1059 1068
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
1060 1069
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1061 1070
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1062
    OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1071
    OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1063 1072
}\
1064 1073
static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1065 1074
    uint8_t full[16*9];\
......
1067 1076
    uint8_t halfHV[64];\
1068 1077
    copy_block9(full, src, 16, stride, 9);\
1069 1078
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1070
    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1079
    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1071 1080
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1072
    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1081
    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1073 1082
}\
1074 1083
static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1075 1084
    uint8_t halfH[72];\
1076 1085
    uint8_t halfHV[64];\
1077 1086
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1078 1087
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1079
    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1088
    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1080 1089
}\
1081 1090
static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1082 1091
    uint8_t halfH[72];\
1083 1092
    uint8_t halfHV[64];\
1084 1093
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1085 1094
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1086
    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1095
    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1087 1096
}\
1088 1097
void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1089 1098
    uint8_t full[16*9];\
......
1094 1103
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1095 1104
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1096 1105
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1097
    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1106
    OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1098 1107
}\
1099 1108
static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1100 1109
    uint8_t full[16*9];\
1101 1110
    uint8_t halfH[72];\
1102 1111
    copy_block9(full, src, 16, stride, 9);\
1103 1112
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1104
    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1113
    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1105 1114
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1106 1115
}\
1107 1116
void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
......
1113 1122
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1114 1123
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1115 1124
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1116
    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1125
    OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1117 1126
}\
1118 1127
static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1119 1128
    uint8_t full[16*9];\
1120 1129
    uint8_t halfH[72];\
1121 1130
    copy_block9(full, src, 16, stride, 9);\
1122 1131
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1123
    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1132
    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1124 1133
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1125 1134
}\
1126 1135
static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
......
1132 1141
static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1133 1142
    uint8_t half[256];\
1134 1143
    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1135
    OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
1144
    OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1136 1145
}\
1137 1146
\
1138 1147
static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
......
1142 1151
static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1143 1152
    uint8_t half[256];\
1144 1153
    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1145
    OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
1154
    OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1146 1155
}\
1147 1156
\
1148 1157
static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
......
1150 1159
    uint8_t half[256];\
1151 1160
    copy_block17(full, src, 24, stride, 17);\
1152 1161
    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1153
    OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
1162
    OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1154 1163
}\
1155 1164
\
1156 1165
static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
......
1164 1173
    uint8_t half[256];\
1165 1174
    copy_block17(full, src, 24, stride, 17);\
1166 1175
    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1167
    OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
1176
    OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1168 1177
}\
1169 1178
void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1170 1179
    uint8_t full[24*17];\
......
1175 1184
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1176 1185
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1177 1186
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1178
    OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1187
    OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1179 1188
}\
1180 1189
static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1181 1190
    uint8_t full[24*17];\
......
1183 1192
    uint8_t halfHV[256];\
1184 1193
    copy_block17(full, src, 24, stride, 17);\
1185 1194
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1186
    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1195
    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1187 1196
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1188
    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1197
    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1189 1198
}\
1190 1199
void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1191 1200
    uint8_t full[24*17];\
......
1196 1205
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1197 1206
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1198 1207
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1199
    OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1208
    OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1200 1209
}\
1201 1210
static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1202 1211
    uint8_t full[24*17];\
......
1204 1213
    uint8_t halfHV[256];\
1205 1214
    copy_block17(full, src, 24, stride, 17);\
1206 1215
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1207
    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1216
    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1208 1217
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1209
    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1218
    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1210 1219
}\
1211 1220
void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1212 1221
    uint8_t full[24*17];\
......
1217 1226
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1218 1227
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1219 1228
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1220
    OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1229
    OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1221 1230
}\
1222 1231
static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1223 1232
    uint8_t full[24*17];\
......
1225 1234
    uint8_t halfHV[256];\
1226 1235
    copy_block17(full, src, 24, stride, 17);\
1227 1236
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1228
    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1237
    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1229 1238
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1230
    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1239
    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1231 1240
}\
1232 1241
void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1233 1242
    uint8_t full[24*17];\
......
1238 1247
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
1239 1248
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1240 1249
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1241
    OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1250
    OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1242 1251
}\
1243 1252
static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1244 1253
    uint8_t full[24*17];\
......
1246 1255
    uint8_t halfHV[256];\
1247 1256
    copy_block17(full, src, 24, stride, 17);\
1248 1257
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1249
    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1258
    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1250 1259
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1251
    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1260
    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1252 1261
}\
1253 1262
static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1254 1263
    uint8_t halfH[272];\
1255 1264
    uint8_t halfHV[256];\
1256 1265
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1257 1266
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1258
    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1267
    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1259 1268
}\
1260 1269
static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1261 1270
    uint8_t halfH[272];\
1262 1271
    uint8_t halfHV[256];\
1263 1272
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1264 1273
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1265
    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1274
    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1266 1275
}\
1267 1276
void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1268 1277
    uint8_t full[24*17];\
......
1273 1282
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1274 1283
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1275 1284
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1276
    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1285
    OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1277 1286
}\
1278 1287
static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1279 1288
    uint8_t full[24*17];\
1280 1289
    uint8_t halfH[272];\
1281 1290
    copy_block17(full, src, 24, stride, 17);\
1282 1291
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1283
    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1292
    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1284 1293
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1285 1294
}\
1286 1295
void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
......
1292 1301
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1293 1302
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1294 1303
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1295
    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
1304
    OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1296 1305
}\
1297 1306
static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1298 1307
    uint8_t full[24*17];\
1299 1308
    uint8_t halfH[272];\
1300 1309
    copy_block17(full, src, 24, stride, 17);\
1301 1310
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1302
    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1311
    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1303 1312
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1304 1313
}\
1305 1314
static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
......
1327 1336
#define put_qpel16_mc00_c ff_put_pixels16x16_c
1328 1337
#define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1329 1338
#define put_no_rnd_qpel8_mc00_c  ff_put_pixels8x8_c
1330
#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
1339
#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1331 1340

  
1332 1341
static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1333 1342
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
......
1349 1358

  
1350 1359
#if CONFIG_RV40_DECODER
1351 1360
static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1352
    put_pixels16_xy2_c(dst, src, stride, 16);
1361
    put_pixels16_xy2_8_c(dst, src, stride, 16);
1353 1362
}
1354 1363
static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1355
    avg_pixels16_xy2_c(dst, src, stride, 16);
1364
    avg_pixels16_xy2_8_c(dst, src, stride, 16);
1356 1365
}
1357 1366
static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1358
    put_pixels8_xy2_c(dst, src, stride, 8);
1367
    put_pixels8_xy2_8_c(dst, src, stride, 8);
1359 1368
}
1360 1369
static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1361
    avg_pixels8_xy2_c(dst, src, stride, 8);
1370
    avg_pixels8_xy2_8_c(dst, src, stride, 8);
1362 1371
}
1363 1372
#endif /* CONFIG_RV40_DECODER */
1364 1373

  
......
1394 1403
static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1395 1404
    uint8_t half[64];
1396 1405
    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1397
    put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
1406
    put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1398 1407
}
1399 1408

  
1400 1409
static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
......
1404 1413
static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1405 1414
    uint8_t half[64];
1406 1415
    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1407
    put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
1416
    put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1408 1417
}
1409 1418

  
1410 1419
static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
......
1418 1427
    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1419 1428
    wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1420 1429
    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1421
    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
1430
    put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1422 1431
}
1423 1432
static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1424 1433
    uint8_t halfH[88];
......
1427 1436
    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1428 1437
    wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1429 1438
    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1430
    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
1439
    put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1431 1440
}
1432 1441
static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1433 1442
    uint8_t halfH[88];
......
2870 2879
            c->idct_put= ff_jref_idct4_put;
2871 2880
            c->idct_add= ff_jref_idct4_add;
2872 2881
        }else{
2873
            c->idct_put= ff_h264_lowres_idct_put_c;
2874
            c->idct_add= ff_h264_lowres_idct_add_c;
2882
            if (avctx->codec_id != CODEC_ID_H264) {
2883
                c->idct_put= ff_h264_lowres_idct_put_8_c;
2884
                c->idct_add= ff_h264_lowres_idct_add_8_c;
2885
            } else {
2886
                switch (avctx->bits_per_raw_sample) {
2887
                    case 9:
2888
                        c->idct_put= ff_h264_lowres_idct_put_9_c;
2889
                        c->idct_add= ff_h264_lowres_idct_add_9_c;
2890
                        break;
2891
                    case 10:
2892
                        c->idct_put= ff_h264_lowres_idct_put_10_c;
2893
                        c->idct_add= ff_h264_lowres_idct_add_10_c;
2894
                        break;
2895
                    default:
2896
                        c->idct_put= ff_h264_lowres_idct_put_8_c;
2897
                        c->idct_add= ff_h264_lowres_idct_add_8_c;
2898
                }
2899
            }
2875 2900
        }
2876 2901
        c->idct    = j_rev_dct4;
2877 2902
        c->idct_permutation_type= FF_NO_IDCT_PERM;
......
2929 2954
    c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
2930 2955
    c->put_pixels_nonclamped = put_pixels_nonclamped_c;
2931 2956
    c->add_pixels_clamped = ff_add_pixels_clamped_c;
2932
    c->add_pixels8 = add_pixels8_c;
2933
    c->add_pixels4 = add_pixels4_c;
2934 2957
    c->sum_abs_dctelem = sum_abs_dctelem_c;
2935
    c->emulated_edge_mc = ff_emulated_edge_mc;
2936 2958
    c->gmc1 = gmc1_c;
2937 2959
    c->gmc = ff_gmc_c;
2938
    c->clear_block = clear_block_c;
2939
    c->clear_blocks = clear_blocks_c;
2940 2960
    c->pix_sum = pix_sum_c;
2941 2961
    c->pix_norm1 = pix_norm1_c;
2942 2962

  
......
2954 2974
    c->pix_abs[1][2] = pix_abs8_y2_c;
2955 2975
    c->pix_abs[1][3] = pix_abs8_xy2_c;
2956 2976

  
2957
#define dspfunc(PFX, IDX, NUM) \
2958
    c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c;     \
2959
    c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c;  \
2960
    c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c;  \
2961
    c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
2962

  
2963
    dspfunc(put, 0, 16);
2964
    dspfunc(put_no_rnd, 0, 16);
2965
    dspfunc(put, 1, 8);
2966
    dspfunc(put_no_rnd, 1, 8);
2967
    dspfunc(put, 2, 4);
2968
    dspfunc(put, 3, 2);
2969

  
2970
    dspfunc(avg, 0, 16);
2971
    dspfunc(avg_no_rnd, 0, 16);
2972
    dspfunc(avg, 1, 8);
2973
    dspfunc(avg_no_rnd, 1, 8);
2974
    dspfunc(avg, 2, 4);
2975
    dspfunc(avg, 3, 2);
2976
#undef dspfunc
2977

  
2978
    c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
2979
    c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
2980

  
2981 2977
    c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2982 2978
    c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2983 2979
    c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
......
3028 3024
    dspfunc(avg_qpel, 1, 8);
3029 3025
    /* dspfunc(avg_no_rnd_qpel, 1, 8); */
3030 3026

  
3031
    dspfunc(put_h264_qpel, 0, 16);
3032
    dspfunc(put_h264_qpel, 1, 8);
3033
    dspfunc(put_h264_qpel, 2, 4);
3034
    dspfunc(put_h264_qpel, 3, 2);
3035
    dspfunc(avg_h264_qpel, 0, 16);
3036
    dspfunc(avg_h264_qpel, 1, 8);
3037
    dspfunc(avg_h264_qpel, 2, 4);
3038

  
3039 3027
#undef dspfunc
3040
    c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
3041
    c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
3042
    c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
3043
    c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
3044
    c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
3045
    c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
3046

  
3047
    c->draw_edges = draw_edges_c;
3048 3028

  
3049 3029
#if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
3050 3030
    ff_mlp_init(c, avctx);
......
3169 3149
    memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
3170 3150
    memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
3171 3151

  
3152
#undef FUNC
3153
#undef FUNCC
3154
#define FUNC(f, depth) f ## _ ## depth
3155
#define FUNCC(f, depth) f ## _ ## depth ## _c
3156

  
3157
#define dspfunc1(PFX, IDX, NUM, depth)\
3158
    c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM        , depth);\
3159
    c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
3160
    c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
3161
    c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
3162

  
3163
#define dspfunc2(PFX, IDX, NUM, depth)\
3164
    c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
3165
    c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
3166
    c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
3167
    c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
3168
    c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
3169
    c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
3170
    c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
3171
    c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
3172
    c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
3173
    c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
3174
    c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
3175
    c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
3176
    c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
3177
    c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
3178
    c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
3179
    c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
3180

  
3181

  
3182
#define BIT_DEPTH_FUNCS(depth)\
3183
    c->draw_edges                    = FUNCC(draw_edges            , depth);\
3184
    c->emulated_edge_mc              = FUNC (ff_emulated_edge_mc   , depth);\
3185
    c->clear_block                   = FUNCC(clear_block           , depth);\
3186
    c->clear_blocks                  = FUNCC(clear_blocks          , depth);\
3187
    c->add_pixels8                   = FUNCC(add_pixels8           , depth);\
3188
    c->add_pixels4                   = FUNCC(add_pixels4           , depth);\
3189
    c->put_no_rnd_pixels_l2[0]       = FUNCC(put_no_rnd_pixels16_l2, depth);\
3190
    c->put_no_rnd_pixels_l2[1]       = FUNCC(put_no_rnd_pixels8_l2 , depth);\
3191
\
3192
    c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8   , depth);\
3193
    c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4   , depth);\
3194
    c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2   , depth);\
3195
    c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8   , depth);\
3196
    c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4   , depth);\
3197
    c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2   , depth);\
3198
\
3199
    dspfunc1(put       , 0, 16, depth);\
3200
    dspfunc1(put       , 1,  8, depth);\
3201
    dspfunc1(put       , 2,  4, depth);\
3202
    dspfunc1(put       , 3,  2, depth);\
3203
    dspfunc1(put_no_rnd, 0, 16, depth);\
3204
    dspfunc1(put_no_rnd, 1,  8, depth);\
3205
    dspfunc1(avg       , 0, 16, depth);\
3206
    dspfunc1(avg       , 1,  8, depth);\
3207
    dspfunc1(avg       , 2,  4, depth);\
3208
    dspfunc1(avg       , 3,  2, depth);\
3209
    dspfunc1(avg_no_rnd, 0, 16, depth);\
3210
    dspfunc1(avg_no_rnd, 1,  8, depth);\
3211
\
3212
    dspfunc2(put_h264_qpel, 0, 16, depth);\
3213
    dspfunc2(put_h264_qpel, 1,  8, depth);\
3214
    dspfunc2(put_h264_qpel, 2,  4, depth);\
3215
    dspfunc2(put_h264_qpel, 3,  2, depth);\
3216
    dspfunc2(avg_h264_qpel, 0, 16, depth);\
3217
    dspfunc2(avg_h264_qpel, 1,  8, depth);\
3218
    dspfunc2(avg_h264_qpel, 2,  4, depth);
3219

  
3220
    if (avctx->codec_id != CODEC_ID_H264 || avctx->bits_per_raw_sample == 8) {
3221
        BIT_DEPTH_FUNCS(8)
3222
    } else {
3223
        switch (avctx->bits_per_raw_sample) {
3224
            case 9:
3225
                BIT_DEPTH_FUNCS(9)
3226
                break;
3227
            case 10:
3228
                BIT_DEPTH_FUNCS(10)
3229
                break;
3230
            default:
3231
                av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
3232
                BIT_DEPTH_FUNCS(8)
3233
                break;
3234
        }
3235
    }
3236

  
3237

  
3172 3238
    if (HAVE_MMX)        dsputil_init_mmx   (c, avctx);
3173 3239
    if (ARCH_ARM)        dsputil_init_arm   (c, avctx);
3174 3240
    if (CONFIG_MLIB)     dsputil_init_mlib  (c, avctx);

Also available in: Unified diff