Revision f2677d6b libavcodec/ppc/dsputil_altivec.c
libavcodec/ppc/dsputil_altivec.c  

24  24 
#include <sys/sysctl.h> 
25  25 
#endif 
26  26  
27 
int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) 

28 
{ 

29 
int s, i; 

30 
vector unsigned char *tv, zero; 

31 
vector unsigned char pix1v, pix2v, pix2iv, avgv, t5; 

32 
vector unsigned int sad; 

33 
vector signed int sumdiffs; 

34  
35 
s = 0; 

36 
zero = vec_splat_u8(0); 

37 
sad = vec_splat_u32(0); 

38 
for(i=0;i<16;i++) { 

39 
/* 

40 
Read unaligned pixels into our vectors. The vectors are as follows: 

41 
pix1v: pix1[0]pix1[15] 

42 
pix2v: pix2[0]pix2[15] pix2iv: pix2[1]pix2[16] 

43 
*/ 

44 
tv = (vector unsigned char *) pix1; 

45 
pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); 

46 


47 
tv = (vector unsigned char *) &pix2[0]; 

48 
pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0])); 

49  
50 
tv = (vector unsigned char *) &pix2[1]; 

51 
pix2iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[1])); 

52  
53 
/* Calculate the average vector */ 

54 
avgv = vec_avg(pix2v, pix2iv); 

55  
56 
/* Calculate a sum of abs differences vector */ 

57 
t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv)); 

58  
59 
/* Add each 4 pixel group together and put 4 results into sad */ 

60 
sad = vec_sum4s(t5, sad); 

61 


62 
pix1 += line_size; 

63 
pix2 += line_size; 

64 
} 

65 
/* Sum up the four partial sums, and put the result into s */ 

66 
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); 

67 
sumdiffs = vec_splat(sumdiffs, 3); 

68 
vec_ste(sumdiffs, 0, &s); 

69  
70 
return s; 

71 
} 

72  
73 
int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) 

74 
{ 

75 
int s, i; 

76 
vector unsigned char *tv, zero; 

77 
vector unsigned char pix1v, pix2v, pix3v, avgv, t5; 

78 
vector unsigned int sad; 

79 
vector signed int sumdiffs; 

80 
uint8_t *pix3 = pix2 + line_size; 

81  
82 
s = 0; 

83 
zero = vec_splat_u8(0); 

84 
sad = vec_splat_u32(0); 

85  
86 
/* 

87 
Due to the fact that pix3 = pix2 + line_size, the pix3 of one 

88 
iteration becomes pix2 in the next iteration. We can use this 

89 
fact to avoid a potentially expensive unaligned read, each 

90 
time around the loop. 

91 
Read unaligned pixels into our vectors. The vectors are as follows: 

92 
pix2v: pix2[0]pix2[15] 

93 
Split the pixel vectors into shorts 

94 
*/ 

95 
tv = (vector unsigned char *) &pix2[0]; 

96 
pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0])); 

97 


98 
for(i=0;i<16;i++) { 

99 
/* 

100 
Read unaligned pixels into our vectors. The vectors are as follows: 

101 
pix1v: pix1[0]pix1[15] 

102 
pix3v: pix3[0]pix3[15] 

103 
*/ 

104 
tv = (vector unsigned char *) pix1; 

105 
pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); 

106  
107 
tv = (vector unsigned char *) &pix3[0]; 

108 
pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0])); 

109  
110 
/* Calculate the average vector */ 

111 
avgv = vec_avg(pix2v, pix3v); 

112  
113 
/* Calculate a sum of abs differences vector */ 

114 
t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv)); 

115  
116 
/* Add each 4 pixel group together and put 4 results into sad */ 

117 
sad = vec_sum4s(t5, sad); 

118 


119 
pix1 += line_size; 

120 
pix2v = pix3v; 

121 
pix3 += line_size; 

122 


123 
} 

124 


125 
/* Sum up the four partial sums, and put the result into s */ 

126 
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); 

127 
sumdiffs = vec_splat(sumdiffs, 3); 

128 
vec_ste(sumdiffs, 0, &s); 

129 
return s; 

130 
} 

131  
132 
int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) 

133 
{ 

134 
int s, i; 

135 
uint8_t *pix3 = pix2 + line_size; 

136 
vector unsigned char *tv, avgv, t5, zero; 

137 
vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv; 

138 
vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv; 

139 
vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; 

140 
vector unsigned short avghv, avglv, two, shift_mask; 

141 
vector unsigned short t1, t2, t3, t4; 

142 
vector unsigned int sad; 

143 
vector signed int sumdiffs; 

144  
145 
shift_mask = (vector unsigned short) (0x3fff, 0x3fff, 0x3fff, 0x3fff, 

146 
0x3fff, 0x3fff, 0x3fff, 0x3fff); 

147 
zero = vec_splat_u8(0); 

148 
two = vec_splat_u16(2); 

149 
sad = vec_splat_u32(0); 

150 


151 
s = 0; 

152  
153 
/* 

154 
Due to the fact that pix3 = pix2 + line_size, the pix3 of one 

155 
iteration becomes pix2 in the next iteration. We can use this 

156 
fact to avoid a potentially expensive unaligned read, as well 

157 
as some splitting, and vector addition each time around the loop. 

158 
Read unaligned pixels into our vectors. The vectors are as follows: 

159 
pix2v: pix2[0]pix2[15] pix2iv: pix2[1]pix2[16] 

160 
Split the pixel vectors into shorts 

161 
*/ 

162 
tv = (vector unsigned char *) &pix2[0]; 

163 
pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0])); 

164  
165 
tv = (vector unsigned char *) &pix2[1]; 

166 
pix2iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[1])); 

167  
168 
pix2hv = (vector unsigned short) vec_mergeh(zero, pix2v); 

169 
pix2lv = (vector unsigned short) vec_mergel(zero, pix2v); 

170 
pix2ihv = (vector unsigned short) vec_mergeh(zero, pix2iv); 

171 
pix2ilv = (vector unsigned short) vec_mergel(zero, pix2iv); 

172 
t1 = vec_add(pix2hv, pix2ihv); 

173 
t2 = vec_add(pix2lv, pix2ilv); 

174 


175 
for(i=0;i<16;i++) { 

176 
/* 

177 
Read unaligned pixels into our vectors. The vectors are as follows: 

178 
pix1v: pix1[0]pix1[15] 

179 
pix3v: pix3[0]pix3[15] pix3iv: pix3[1]pix3[16] 

180 
*/ 

181 
tv = (vector unsigned char *) pix1; 

182 
pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); 

183  
184 
tv = (vector unsigned char *) &pix3[0]; 

185 
pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0])); 

186  
187 
tv = (vector unsigned char *) &pix3[1]; 

188 
pix3iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[1])); 

189  
190 
/* 

191 
Note that Altivec does have vec_avg, but this works on vector pairs 

192 
and rounds up. We could do avg(avg(a,b),avg(c,d)), but the rounding 

193 
would mean that, for example, avg(3,0,0,1) = 2, when it should be 1. 

194 
Instead, we have to split the pixel vectors into vectors of shorts, 

195 
and do the averaging by hand. 

196 
*/ 

197  
198 
/* Split the pixel vectors into shorts */ 

199 
pix3hv = (vector unsigned short) vec_mergeh(zero, pix3v); 

200 
pix3lv = (vector unsigned short) vec_mergel(zero, pix3v); 

201 
pix3ihv = (vector unsigned short) vec_mergeh(zero, pix3iv); 

202 
pix3ilv = (vector unsigned short) vec_mergel(zero, pix3iv); 

203  
204 
/* Do the averaging on them */ 

205 
t3 = vec_add(pix3hv, pix3ihv); 

206 
t4 = vec_add(pix3lv, pix3ilv); 

207  
208 
avghv = vec_add(vec_add(t1, t3), two); 

209 
avghv= vec_and(vec_srl(avghv, two), shift_mask); 

210  
211 
avglv = vec_add(vec_add(t2, t4), two); 

212 
avglv = vec_and(vec_srl(avglv, two), shift_mask); 

213  
214 
/* Pack the shorts back into a result */ 

215 
avgv = vec_pack(avghv, avglv); 

216  
217 
/* Calculate a sum of abs differences vector */ 

218 
t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv)); 

219  
220 
/* Add each 4 pixel group together and put 4 results into sad */ 

221 
sad = vec_sum4s(t5, sad); 

222  
223 
pix1 += line_size; 

224 
pix3 += line_size; 

225 
/* Transfer the calculated values for pix3 into pix2 */ 

226 
t1 = t3; 

227 
t2 = t4; 

228 
} 

229 
/* Sum up the four partial sums, and put the result into s */ 

230 
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); 

231 
sumdiffs = vec_splat(sumdiffs, 3); 

232 
vec_ste(sumdiffs, 0, &s); 

233  
234 
return s; 

235 
} 

236  
27  237 
int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) 
28  238 
{ 
29  239 
int i, s; 
...  ...  
108  318 
return s; 
109  319 
} 
110  320  
321 
int pix_norm1_altivec(uint8_t *pix, int line_size) 

322 
{ 

323 
int s, i; 

324 
vector unsigned char *tv, zero; 

325 
vector unsigned char pixv; 

326 
vector unsigned short pixlv, pixhv, zeros; 

327 
vector unsigned int sv; 

328 
vector signed int sum; 

329 
vector unsigned char perm_stoint_h = (vector unsigned char) 

330 
(16, 16, 0, 1, 16, 16, 2, 3, 16, 16, 4, 5, 16, 16, 6, 7); 

331 


332 
vector unsigned char perm_stoint_l = (vector unsigned char) 

333 
(16, 16, 8, 9, 16, 16, 10, 11, 16, 16, 12, 13, 16, 16, 14, 15); 

334 


335 
zero = vec_splat_u8(0); 

336 
zeros = vec_splat_u16(0); 

337 
sv = vec_splat_u32(0); 

338 


339 
s = 0; 

340 
for (i = 0; i < 16; i++) { 

341 
/* Read in the potentially unaligned pixels */ 

342 
tv = (vector unsigned char *) pix; 

343 
pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix)); 

344  
345 
/* Split them into two vectors of shorts */ 

346 
pixhv = (vector unsigned short) vec_mergeh(zero, pixv); 

347 
pixlv = (vector unsigned short) vec_mergel(zero, pixv); 

348  
349 


350 
/* Square the values and add them to our sum */ 

351 
sv = vec_msum(pixhv, pixhv, sv); 

352 
sv = vec_msum(pixlv, pixlv, sv); 

353  
354 
pix += line_size; 

355 
} 

356 
/* Sum up the four partial sums, and put the result into s */ 

357 
sum = vec_sums((vector signed int) sv, (vector signed int) zero); 

358 
sum = vec_splat(sum, 3); 

359 
vec_ste(sum, 0, &s); 

360  
361 
return s; 

362 
} 

363  
111  364 
int pix_sum_altivec(UINT8 * pix, int line_size) 
112  365 
{ 
113  366 
Also available in: Unified diff