Revision 9c76bd48 libavcodec/ppc/dsputil_altivec.c
libavcodec/ppc/dsputil_altivec.c  

137  137 
vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv; 
138  138 
vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv; 
139  139 
vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; 
140 
vector unsigned short avghv, avglv, two, shift_mask;


140 
vector unsigned short avghv, avglv, two; 

141  141 
vector unsigned short t1, t2, t3, t4; 
142  142 
vector unsigned int sad; 
143  143 
vector signed int sumdiffs; 
144  144  
145 
shift_mask = (vector unsigned short) (0x3fff, 0x3fff, 0x3fff, 0x3fff, 

146 
0x3fff, 0x3fff, 0x3fff, 0x3fff); 

147  145 
zero = vec_splat_u8(0); 
148  146 
two = vec_splat_u16(2); 
149  147 
sad = vec_splat_u32(0); 
...  ...  
205  203 
t3 = vec_add(pix3hv, pix3ihv); 
206  204 
t4 = vec_add(pix3lv, pix3ilv); 
207  205  
208 
avghv = vec_add(vec_add(t1, t3), two); 

209 
avghv= vec_and(vec_srl(avghv, two), shift_mask); 

210  
211 
avglv = vec_add(vec_add(t2, t4), two); 

212 
avglv = vec_and(vec_srl(avglv, two), shift_mask); 

206 
avghv = vec_sr(vec_add(vec_add(t1, t3), two), two); 

207 
avglv = vec_sr(vec_add(vec_add(t2, t4), two), two); 

213  208  
214  209 
/* Pack the shorts back into a result */ 
215  210 
avgv = vec_pack(avghv, avglv); 
...  ...  
323  318 
int s, i; 
324  319 
vector unsigned char *tv, zero; 
325  320 
vector unsigned char pixv; 
326 
vector unsigned short pixlv, pixhv, zeros; 

327  321 
vector unsigned int sv; 
328  322 
vector signed int sum; 
329 
vector unsigned char perm_stoint_h = (vector unsigned char) 

330 
(16, 16, 0, 1, 16, 16, 2, 3, 16, 16, 4, 5, 16, 16, 6, 7); 

331 


332 
vector unsigned char perm_stoint_l = (vector unsigned char) 

333 
(16, 16, 8, 9, 16, 16, 10, 11, 16, 16, 12, 13, 16, 16, 14, 15); 

334  323 

335  324 
zero = vec_splat_u8(0); 
336 
zeros = vec_splat_u16(0); 

337  325 
sv = vec_splat_u32(0); 
338  326 

339  327 
s = 0; 
...  ...  
342  330 
tv = (vector unsigned char *) pix; 
343  331 
pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix)); 
344  332  
345 
/* Split them into two vectors of shorts */ 

346 
pixhv = (vector unsigned short) vec_mergeh(zero, pixv); 

347 
pixlv = (vector unsigned short) vec_mergel(zero, pixv); 

348  
349 


350 
/* Square the values and add them to our sum */ 

351 
sv = vec_msum(pixhv, pixhv, sv); 

352 
sv = vec_msum(pixlv, pixlv, sv); 

333 
/* Square the values, and add them to our sum */ 

334 
sv = vec_msum(pixv, pixv, sv); 

353  335  
354  336 
pix += line_size; 
355  337 
} 
...  ...  
361  343 
return s; 
362  344 
} 
363  345  
346  
347 
int pix_norm_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) 

348 
{ 

349 
int s, i; 

350 
vector unsigned char *tv, zero; 

351 
vector unsigned char pix1v, pix2v, t5; 

352 
vector unsigned int sv; 

353 
vector signed int sum; 

354  
355 
zero = vec_splat_u8(0); 

356 
sv = vec_splat_u32(0); 

357 
s = 0; 

358 
for (i = 0; i < 16; i++) { 

359 
/* Read in the potentially unaligned pixels */ 

360 
tv = (vector unsigned char *) pix1; 

361 
pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); 

362  
363 
tv = (vector unsigned char *) pix2; 

364 
pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix2)); 

365  
366 
/* 

367 
Since we want to use unsigned chars, we can take advantage 

368 
of the fact that abs(ab)^2 = (ab)^2. 

369 
*/ 

370 


371 
/* Calculate a sum of abs differences vector */ 

372 
t5 = vec_sub(vec_max(pix1v, pix2v), vec_min(pix1v, pix2v)); 

373  
374 
/* Square the values and add them to our sum */ 

375 
sv = vec_msum(t5, t5, sv); 

376 


377 
pix1 += line_size; 

378 
pix2 += line_size; 

379 
} 

380 
/* Sum up the four partial sums, and put the result into s */ 

381 
sum = vec_sums((vector signed int) sv, (vector signed int) zero); 

382 
sum = vec_splat(sum, 3); 

383 
vec_ste(sum, 0, &s); 

384 
return s; 

385 
} 

386  
387  
364  388 
int pix_sum_altivec(UINT8 * pix, int line_size) 
365  389 
{ 
366  390 
