Revision 2a5a1bda

View differences:

libavcodec/ppc/dsputil_altivec.c
1647 1647
#endif /* CONFIG_DARWIN */
1648 1648
    return 0;
1649 1649
}
1650

  
1651
/* next one assumes that ((line_size % 8) == 0) */
1652
void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h)
1653
{
1654
POWERPC_PERF_DECLARE(altivec_avg_pixels8_xy2_num, 1);
1655
#ifdef ALTIVEC_USE_REFERENCE_C_CODE
1656

  
1657
    int j;
1658
POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1);
1659
 for (j = 0; j < 2; j++) {
1660
   int             i;
1661
   const uint32_t  a = (((const struct unaligned_32 *) (pixels))->l);
1662
   const uint32_t  b = (((const struct unaligned_32 *) (pixels + 1))->l);
1663
   uint32_t        l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
1664
   uint32_t        h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1665
   uint32_t        l1, h1;
1666
   pixels += line_size;
1667
   for (i = 0; i < h; i += 2) {
1668
     uint32_t        a = (((const struct unaligned_32 *) (pixels))->l);
1669
     uint32_t        b = (((const struct unaligned_32 *) (pixels + 1))->l);
1670
     l1 = (a & 0x03030303UL) + (b & 0x03030303UL);
1671
     h1 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1672
     *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1673
     pixels += line_size;
1674
     block += line_size;
1675
     a = (((const struct unaligned_32 *) (pixels))->l);
1676
     b = (((const struct unaligned_32 *) (pixels + 1))->l);
1677
     l0 = (a & 0x03030303UL) + (b & 0x03030303UL) + 0x02020202UL;
1678
     h0 = ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2);
1679
     *((uint32_t *) block) = rnd_avg32(*((uint32_t *) block), h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));
1680
     pixels += line_size;
1681
     block += line_size;
1682
   } pixels += 4 - line_size * (h + 1);
1683
   block += 4 - line_size * h;
1684
 }
1685
POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
1686
#else /* ALTIVEC_USE_REFERENCE_C_CODE */
1687
   register int i;
1688
   register vector unsigned char
1689
     pixelsv1, pixelsv2,
1690
     pixelsavg;
1691
   register vector unsigned char
1692
     blockv, temp1, temp2, blocktemp;
1693
   register vector unsigned short
1694
     pixelssum1, pixelssum2, temp3;
1695
   register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
1696
   register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
1697
   
1698
   temp1 = vec_ld(0, pixels);
1699
   temp2 = vec_ld(16, pixels);
1700
   pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
1701
   if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F)
1702
   {
1703
     pixelsv2 = temp2;
1704
   }
1705
   else
1706
   {
1707
     pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
1708
   }
1709
   pixelsv1 = vec_mergeh(vczero, pixelsv1);
1710
   pixelsv2 = vec_mergeh(vczero, pixelsv2);
1711
   pixelssum1 = vec_add((vector unsigned short)pixelsv1,
1712
                        (vector unsigned short)pixelsv2);
1713
   pixelssum1 = vec_add(pixelssum1, vctwo);
1714
   
1715
POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1); 
1716
   for (i = 0; i < h ; i++) {
1717
     int rightside = ((unsigned long)block & 0x0000000F);
1718
     blockv = vec_ld(0, block);
1719

  
1720
     temp1 = vec_ld(line_size, pixels);
1721
     temp2 = vec_ld(line_size + 16, pixels);
1722
     pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
1723
     if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F)
1724
     {
1725
       pixelsv2 = temp2;
1726
     }
1727
     else
1728
     {
1729
       pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
1730
     }
1731

  
1732
     pixelsv1 = vec_mergeh(vczero, pixelsv1);
1733
     pixelsv2 = vec_mergeh(vczero, pixelsv2);
1734
     pixelssum2 = vec_add((vector unsigned short)pixelsv1,
1735
                          (vector unsigned short)pixelsv2);
1736
     temp3 = vec_add(pixelssum1, pixelssum2);
1737
     temp3 = vec_sra(temp3, vctwo);
1738
     pixelssum1 = vec_add(pixelssum2, vctwo);
1739
     pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
1740
     
1741
     if (rightside)
1742
     {
1743
       blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
1744
     }
1745
     else
1746
     {
1747
       blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
1748
     }
1749
     
1750
     blockv = vec_avg(blocktemp, blockv);
1751
     vec_st(blockv, 0, block);
1752
     
1753
     block += line_size;
1754
     pixels += line_size;
1755
   }
1756
   
1757
POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
1758
#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
1759
}
libavcodec/ppc/dsputil_altivec.h
48 48
extern void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, int line_size, int h);
49 49
extern int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h);
50 50
extern int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h);
51
extern void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h);
51 52

  
52 53
extern void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder);
53 54

  
libavcodec/ppc/dsputil_ppc.c
62 62
  "put_no_rnd_pixels16_xy2_altivec",
63 63
  "hadamard8_diff8x8_altivec",
64 64
  "hadamard8_diff16_altivec",
65
  "avg_pixels8_xy2_altivec",
65 66
  "clear_blocks_dcbz32_ppc",
66 67
  "clear_blocks_dcbz128_ppc"
67 68
};
......
268 269
        /* the two functions do the same thing, so use the same code */
269 270
        c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
270 271
        c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
271
// next one disabled as it's untested.
272
#if 0
273 272
        c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
274
#endif /* 0 */
273
	c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec;
275 274
        c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;
276 275
        c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
277 276
        c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
libavcodec/ppc/dsputil_ppc.h
52 52
  altivec_put_no_rnd_pixels16_xy2_num,
53 53
  altivec_hadamard8_diff8x8_num,
54 54
  altivec_hadamard8_diff16_num,
55
  altivec_avg_pixels8_xy2_num,
55 56
  powerpc_clear_blocks_dcbz32,
56 57
  powerpc_clear_blocks_dcbz128,
57 58
  powerpc_perf_total

Also available in: Unified diff