Revision 9c76bd48 libavcodec/ppc/dsputil_altivec.c

View differences:

libavcodec/ppc/dsputil_altivec.c
137 137
    vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv;
138 138
    vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv;
139 139
    vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv;
140
    vector unsigned short avghv, avglv, two, shift_mask;
140
    vector unsigned short avghv, avglv, two;
141 141
    vector unsigned short t1, t2, t3, t4;
142 142
    vector unsigned int sad;
143 143
    vector signed int sumdiffs;
144 144

  
145
    shift_mask = (vector unsigned short) (0x3fff, 0x3fff, 0x3fff, 0x3fff,
146
                                          0x3fff, 0x3fff, 0x3fff, 0x3fff);
147 145
    zero = vec_splat_u8(0);
148 146
    two = vec_splat_u16(2);
149 147
    sad = vec_splat_u32(0);
......
205 203
        t3 = vec_add(pix3hv, pix3ihv);
206 204
        t4 = vec_add(pix3lv, pix3ilv);
207 205

  
208
        avghv = vec_add(vec_add(t1, t3), two);
209
        avghv= vec_and(vec_srl(avghv, two), shift_mask);
210

  
211
        avglv = vec_add(vec_add(t2, t4), two);
212
        avglv = vec_and(vec_srl(avglv, two), shift_mask);
206
        avghv = vec_sr(vec_add(vec_add(t1, t3), two), two);
207
        avglv = vec_sr(vec_add(vec_add(t2, t4), two), two);
213 208

  
214 209
        /* Pack the shorts back into a result */
215 210
        avgv = vec_pack(avghv, avglv);
......
323 318
    int s, i;
324 319
    vector unsigned char *tv, zero;
325 320
    vector unsigned char pixv;
326
    vector unsigned short pixlv, pixhv, zeros;
327 321
    vector unsigned int sv;
328 322
    vector signed int sum;
329
    vector unsigned char perm_stoint_h = (vector unsigned char)
330
        (16, 16, 0, 1, 16, 16, 2, 3, 16, 16, 4, 5, 16, 16, 6, 7);
331
    
332
    vector unsigned char perm_stoint_l = (vector unsigned char)
333
        (16, 16, 8, 9, 16, 16, 10, 11, 16, 16, 12, 13, 16, 16, 14, 15);
334 323
        
335 324
    zero = vec_splat_u8(0);
336
    zeros = vec_splat_u16(0);
337 325
    sv = vec_splat_u32(0);
338 326
    
339 327
    s = 0;
......
342 330
        tv = (vector unsigned char *) pix;
343 331
        pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix));
344 332

  
345
        /* Split them into two vectors of shorts */
346
        pixhv = (vector unsigned short) vec_mergeh(zero, pixv);
347
        pixlv = (vector unsigned short) vec_mergel(zero, pixv);
348

  
349
        
350
        /* Square the values and add them to our sum */
351
        sv = vec_msum(pixhv, pixhv, sv);
352
        sv = vec_msum(pixlv, pixlv, sv);
333
        /* Square the values, and add them to our sum */
334
        sv = vec_msum(pixv, pixv, sv);
353 335

  
354 336
        pix += line_size;
355 337
    }
......
361 343
    return s;
362 344
}
363 345

  
346

  
347
int pix_norm_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
348
{
349
    int s, i;
350
    vector unsigned char *tv, zero;
351
    vector unsigned char pix1v, pix2v, t5;
352
    vector unsigned int sv;
353
    vector signed int sum;
354

  
355
    zero = vec_splat_u8(0);
356
    sv = vec_splat_u32(0);
357
    s = 0;
358
    for (i = 0; i < 16; i++) {
359
        /* Read in the potentially unaligned pixels */
360
        tv = (vector unsigned char *) pix1;
361
        pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
362

  
363
        tv = (vector unsigned char *) pix2;
364
        pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix2));
365

  
366
        /*
367
           Since we want to use unsigned chars, we can take advantage
368
           of the fact that abs(a-b)^2 = (a-b)^2.
369
        */
370
        
371
        /* Calculate a sum of abs differences vector */
372
        t5 = vec_sub(vec_max(pix1v, pix2v), vec_min(pix1v, pix2v));
373

  
374
        /* Square the values and add them to our sum */
375
        sv = vec_msum(t5, t5, sv);
376
        
377
        pix1 += line_size;
378
        pix2 += line_size;
379
    }
380
    /* Sum up the four partial sums, and put the result into s */
381
    sum = vec_sums((vector signed int) sv, (vector signed int) zero);
382
    sum = vec_splat(sum, 3);
383
    vec_ste(sum, 0, &s);
384
    return s;
385
}
386

  
387

  
364 388
int pix_sum_altivec(UINT8 * pix, int line_size)
365 389
{
366 390

  

Also available in: Unified diff