Revision c663cb0d
libavcodec/ppc/h264_altivec.c  

596  596 
ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel); 
597  597 
} 
598  598  
599 
// TODO: implement this in AltiVec 

600 
static void ff_h264_idct8_dc_add_altivec(uint8_t *dst, DCTELEM *block, int stride) { 

601 
int i, j; 

602 
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; 

603 
int dc = (block[0] + 32) >> 6; 

604 
for( j = 0; j < 8; j++ ) 

605 
{ 

606 
for( i = 0; i < 8; i++ ) 

607 
dst[i] = cm[ dst[i] + dc ]; 

608 
dst += stride; 

599 
static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, DCTELEM *block, int stride, int size) 

600 
{ 

601 
vec_s16 dc16; 

602 
vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner; 

603 
LOAD_ZERO; 

604 
DECLARE_ALIGNED_16(int, dc); 

605 
int i; 

606  
607 
dc = (block[0] + 32) >> 6; 

608 
dc16 = vec_splat((vec_s16) vec_lde(0, &dc), 1); 

609  
610 
if (size == 4) 

611 
dc16 = vec_sld(dc16, zero_s16v, 8); 

612 
dcplus = vec_packsu(dc16, zero_s16v); 

613 
dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v); 

614  
615 
aligner = vec_lvsr(0, dst); 

616 
dcplus = vec_perm(dcplus, dcplus, aligner); 

617 
dcminus = vec_perm(dcminus, dcminus, aligner); 

618  
619 
for (i = 0; i < size; i += 4) { 

620 
v0 = vec_ld(0, dst+0*stride); 

621 
v1 = vec_ld(0, dst+1*stride); 

622 
v2 = vec_ld(0, dst+2*stride); 

623 
v3 = vec_ld(0, dst+3*stride); 

624  
625 
v0 = vec_adds(v0, dcplus); 

626 
v1 = vec_adds(v1, dcplus); 

627 
v2 = vec_adds(v2, dcplus); 

628 
v3 = vec_adds(v3, dcplus); 

629  
630 
v0 = vec_subs(v0, dcminus); 

631 
v1 = vec_subs(v1, dcminus); 

632 
v2 = vec_subs(v2, dcminus); 

633 
v3 = vec_subs(v3, dcminus); 

634  
635 
vec_st(v0, 0, dst+0*stride); 

636 
vec_st(v1, 0, dst+1*stride); 

637 
vec_st(v2, 0, dst+2*stride); 

638 
vec_st(v3, 0, dst+3*stride); 

639  
640 
dst += 4*stride; 

609  641 
} 
610  642 
} 
611  643  
644 
static void h264_idct_dc_add_altivec(uint8_t *dst, DCTELEM *block, int stride) 

645 
{ 

646 
h264_idct_dc_add_internal(dst, block, stride, 4); 

647 
} 

648  
649 
static void ff_h264_idct8_dc_add_altivec(uint8_t *dst, DCTELEM *block, int stride) 

650 
{ 

651 
h264_idct_dc_add_internal(dst, block, stride, 8); 

652 
} 

653  
612  654 
static void ff_h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){ 
613  655 
int i; 
614  656 
for(i=0; i<16; i+=4){ 
...  ...  
903  945 
h264_idct_add16, h264_idct_add16intra, h264_idct_add8 are implemented 
904  946 
c>h264_idct_add = ff_h264_idct_add_altivec; 
905  947 
*/ 
948 
c>h264_idct_dc_add= h264_idct_dc_add_altivec; 

949 
c>h264_idct8_dc_add = ff_h264_idct8_dc_add_altivec; 

906  950 
c>h264_idct8_add = ff_h264_idct8_add_altivec; 
907  951 
c>h264_idct8_add4 = ff_h264_idct8_add4_altivec; 
908  952 
c>h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec; 
Also available in: Unified diff