Revision 5137235e libavcodec/ppc/dsputil_altivec.c
libavcodec/ppc/dsputil_altivec.c | ||
---|---|---|
20 | 20 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
21 | 21 |
*/ |
22 | 22 |
|
23 |
#include "config.h" |
|
24 |
#if HAVE_ALTIVEC_H |
|
25 |
#include <altivec.h> |
|
26 |
#endif |
|
23 | 27 |
#include "libavcodec/dsputil.h" |
24 |
|
|
25 |
#include "gcc_fixes.h" |
|
26 |
|
|
27 | 28 |
#include "dsputil_ppc.h" |
28 | 29 |
#include "util_altivec.h" |
29 | 30 |
#include "types_altivec.h" |
... | ... | |
1124 | 1125 |
static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) { |
1125 | 1126 |
int sum; |
1126 | 1127 |
register vector signed short |
1127 |
temp0 REG_v(v0),
|
|
1128 |
temp1 REG_v(v1),
|
|
1129 |
temp2 REG_v(v2),
|
|
1130 |
temp3 REG_v(v3),
|
|
1131 |
temp4 REG_v(v4),
|
|
1132 |
temp5 REG_v(v5),
|
|
1133 |
temp6 REG_v(v6),
|
|
1134 |
temp7 REG_v(v7);
|
|
1128 |
temp0 __asm__ ("v0"),
|
|
1129 |
temp1 __asm__ ("v1"),
|
|
1130 |
temp2 __asm__ ("v2"),
|
|
1131 |
temp3 __asm__ ("v3"),
|
|
1132 |
temp4 __asm__ ("v4"),
|
|
1133 |
temp5 __asm__ ("v5"),
|
|
1134 |
temp6 __asm__ ("v6"),
|
|
1135 |
temp7 __asm__ ("v7");
|
|
1135 | 1136 |
register vector signed short |
1136 |
temp0S REG_v(v8),
|
|
1137 |
temp1S REG_v(v9),
|
|
1138 |
temp2S REG_v(v10),
|
|
1139 |
temp3S REG_v(v11),
|
|
1140 |
temp4S REG_v(v12),
|
|
1141 |
temp5S REG_v(v13),
|
|
1142 |
temp6S REG_v(v14),
|
|
1143 |
temp7S REG_v(v15);
|
|
1144 |
register const vector unsigned char vzero REG_v(v31)=
|
|
1137 |
temp0S __asm__ ("v8"),
|
|
1138 |
temp1S __asm__ ("v9"),
|
|
1139 |
temp2S __asm__ ("v10"),
|
|
1140 |
temp3S __asm__ ("v11"),
|
|
1141 |
temp4S __asm__ ("v12"),
|
|
1142 |
temp5S __asm__ ("v13"),
|
|
1143 |
temp6S __asm__ ("v14"),
|
|
1144 |
temp7S __asm__ ("v15");
|
|
1145 |
register const vector unsigned char vzero __asm__ ("v31") =
|
|
1145 | 1146 |
(const vector unsigned char)vec_splat_u8(0); |
1146 | 1147 |
{ |
1147 |
register const vector signed short vprod1 REG_v(v16)=
|
|
1148 |
register const vector signed short vprod1 __asm__ ("v16") =
|
|
1148 | 1149 |
(const vector signed short){ 1,-1, 1,-1, 1,-1, 1,-1 }; |
1149 |
register const vector signed short vprod2 REG_v(v17)=
|
|
1150 |
register const vector signed short vprod2 __asm__ ("v17") =
|
|
1150 | 1151 |
(const vector signed short){ 1, 1,-1,-1, 1, 1,-1,-1 }; |
1151 |
register const vector signed short vprod3 REG_v(v18)=
|
|
1152 |
register const vector signed short vprod3 __asm__ ("v18") =
|
|
1152 | 1153 |
(const vector signed short){ 1, 1, 1, 1,-1,-1,-1,-1 }; |
1153 |
register const vector unsigned char perm1 REG_v(v19)=
|
|
1154 |
register const vector unsigned char perm1 __asm__ ("v19") =
|
|
1154 | 1155 |
(const vector unsigned char) |
1155 | 1156 |
{0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, |
1156 | 1157 |
0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D}; |
1157 |
register const vector unsigned char perm2 REG_v(v20)=
|
|
1158 |
register const vector unsigned char perm2 __asm__ ("v20") =
|
|
1158 | 1159 |
(const vector unsigned char) |
1159 | 1160 |
{0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, |
1160 | 1161 |
0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B}; |
1161 |
register const vector unsigned char perm3 REG_v(v21)=
|
|
1162 |
register const vector unsigned char perm3 __asm__ ("v21") =
|
|
1162 | 1163 |
(const vector unsigned char) |
1163 | 1164 |
{0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, |
1164 | 1165 |
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}; |
1165 | 1166 |
|
1166 |
#define ONEITERBUTTERFLY(i, res1, res2) \
|
|
1167 |
{ \
|
|
1168 |
register vector unsigned char src1 REG_v(v22), \
|
|
1169 |
src2 REG_v(v23), \
|
|
1170 |
dst1 REG_v(v24), \
|
|
1171 |
dst2 REG_v(v25), \
|
|
1172 |
srcO REG_v(v22), \
|
|
1173 |
dstO REG_v(v23); \
|
|
1167 |
#define ONEITERBUTTERFLY(i, res1, res2) \ |
|
1168 |
{ \ |
|
1169 |
register vector unsigned char src1 __asm__ ("v22"), \
|
|
1170 |
src2 __asm__ ("v23"), \
|
|
1171 |
dst1 __asm__ ("v24"), \
|
|
1172 |
dst2 __asm__ ("v25"), \
|
|
1173 |
srcO __asm__ ("v22"), \
|
|
1174 |
dstO __asm__ ("v23"); \
|
|
1174 | 1175 |
\ |
1175 |
register vector signed short srcV REG_v(v24), \
|
|
1176 |
dstV REG_v(v25), \
|
|
1177 |
srcW REG_v(v26), \
|
|
1178 |
dstW REG_v(v27), \
|
|
1179 |
but0 REG_v(v28), \
|
|
1180 |
but0S REG_v(v29), \
|
|
1181 |
op1 REG_v(v30), \
|
|
1182 |
but1 REG_v(v22), \
|
|
1183 |
op1S REG_v(v23), \
|
|
1184 |
but1S REG_v(v24), \
|
|
1185 |
op2 REG_v(v25), \
|
|
1186 |
but2 REG_v(v26), \
|
|
1187 |
op2S REG_v(v27), \
|
|
1188 |
but2S REG_v(v28), \
|
|
1189 |
op3 REG_v(v29), \
|
|
1190 |
op3S REG_v(v30); \
|
|
1176 |
register vector signed short srcV __asm__ ("v24"), \
|
|
1177 |
dstV __asm__ ("v25"), \
|
|
1178 |
srcW __asm__ ("v26"), \
|
|
1179 |
dstW __asm__ ("v27"), \
|
|
1180 |
but0 __asm__ ("v28"), \
|
|
1181 |
but0S __asm__ ("v29"), \
|
|
1182 |
op1 __asm__ ("v30"), \
|
|
1183 |
but1 __asm__ ("v22"), \
|
|
1184 |
op1S __asm__ ("v23"), \
|
|
1185 |
but1S __asm__ ("v24"), \
|
|
1186 |
op2 __asm__ ("v25"), \
|
|
1187 |
but2 __asm__ ("v26"), \
|
|
1188 |
op2S __asm__ ("v27"), \
|
|
1189 |
but2S __asm__ ("v28"), \
|
|
1190 |
op3 __asm__ ("v29"), \
|
|
1191 |
op3S __asm__ ("v30"); \
|
|
1191 | 1192 |
\ |
1192 | 1193 |
src1 = vec_ld(stride * i, src); \ |
1193 | 1194 |
src2 = vec_ld((stride * i) + 16, src); \ |
Also available in: Unified diff