Revision fe2ff6d2 libavcodec/ppc/float_altivec.c
libavcodec/ppc/float_altivec.c  

122  122 
} 
123  123 
} 
124  124  
125 
static void int32_to_float_fmul_scalar_altivec(float *dst, const int *src, float mul, int len) 

126 
{ 

127 
union { 

128 
vector float v; 

129 
float s[4]; 

130 
} mul_u; 

131 
int i; 

132 
vector float src1, src2, dst1, dst2, mul_v, zero; 

133  
134 
zero = (vector float)vec_splat_u32(0); 

135 
mul_u.s[0] = mul; 

136 
mul_v = vec_splat(mul_u.v, 0); 

137  
138 
for(i=0; i<len; i+=8) { 

139 
src1 = vec_ctf(vec_ld(0, src+i), 0); 

140 
src2 = vec_ctf(vec_ld(16, src+i), 0); 

141 
dst1 = vec_madd(src1, mul_v, zero); 

142 
dst2 = vec_madd(src2, mul_v, zero); 

143 
vec_st(dst1, 0, dst+i); 

144 
vec_st(dst2, 16, dst+i); 

145 
} 

146 
} 

147  
148  
149 
static vector signed short 

150 
float_to_int16_one_altivec(const float *src) 

151 
{ 

152 
vector float s0 = vec_ld(0, src); 

153 
vector float s1 = vec_ld(16, src); 

154 
vector signed int t0 = vec_cts(s0, 0); 

155 
vector signed int t1 = vec_cts(s1, 0); 

156 
return vec_packs(t0,t1); 

157 
} 

158  
159 
static void float_to_int16_altivec(int16_t *dst, const float *src, long len) 

160 
{ 

161 
int i; 

162 
vector signed short d0, d1, d; 

163 
vector unsigned char align; 

164 
if(((long)dst)&15) //FIXME 

165 
for(i=0; i<len7; i+=8) { 

166 
d0 = vec_ld(0, dst+i); 

167 
d = float_to_int16_one_altivec(src+i); 

168 
d1 = vec_ld(15, dst+i); 

169 
d1 = vec_perm(d1, d0, vec_lvsl(0,dst+i)); 

170 
align = vec_lvsr(0, dst+i); 

171 
d0 = vec_perm(d1, d, align); 

172 
d1 = vec_perm(d, d1, align); 

173 
vec_st(d0, 0, dst+i); 

174 
vec_st(d1,15, dst+i); 

175 
} 

176 
else 

177 
for(i=0; i<len7; i+=8) { 

178 
d = float_to_int16_one_altivec(src+i); 

179 
vec_st(d, 0, dst+i); 

180 
} 

181 
} 

182  
183 
static void 

184 
float_to_int16_interleave_altivec(int16_t *dst, const float **src, 

185 
long len, int channels) 

186 
{ 

187 
int i; 

188 
vector signed short d0, d1, d2, c0, c1, t0, t1; 

189 
vector unsigned char align; 

190 
if(channels == 1) 

191 
float_to_int16_altivec(dst, src[0], len); 

192 
else 

193 
if (channels == 2) { 

194 
if(((long)dst)&15) 

195 
for(i=0; i<len7; i+=8) { 

196 
d0 = vec_ld(0, dst + i); 

197 
t0 = float_to_int16_one_altivec(src[0] + i); 

198 
d1 = vec_ld(31, dst + i); 

199 
t1 = float_to_int16_one_altivec(src[1] + i); 

200 
c0 = vec_mergeh(t0, t1); 

201 
c1 = vec_mergel(t0, t1); 

202 
d2 = vec_perm(d1, d0, vec_lvsl(0, dst + i)); 

203 
align = vec_lvsr(0, dst + i); 

204 
d0 = vec_perm(d2, c0, align); 

205 
d1 = vec_perm(c0, c1, align); 

206 
vec_st(d0, 0, dst + i); 

207 
d0 = vec_perm(c1, d2, align); 

208 
vec_st(d1, 15, dst + i); 

209 
vec_st(d0, 31, dst + i); 

210 
dst+=8; 

211 
} 

212 
else 

213 
for(i=0; i<len7; i+=8) { 

214 
t0 = float_to_int16_one_altivec(src[0] + i); 

215 
t1 = float_to_int16_one_altivec(src[1] + i); 

216 
d0 = vec_mergeh(t0, t1); 

217 
d1 = vec_mergel(t0, t1); 

218 
vec_st(d0, 0, dst + i); 

219 
vec_st(d1, 16, dst + i); 

220 
dst+=8; 

221 
} 

222 
} else { 

223 
DECLARE_ALIGNED(16, int16_t, tmp)[len]; 

224 
int c, j; 

225 
for (c = 0; c < channels; c++) { 

226 
float_to_int16_altivec(tmp, src[c], len); 

227 
for (i = 0, j = c; i < len; i++, j+=channels) { 

228 
dst[j] = tmp[i]; 

229 
} 

230 
} 

231 
} 

232 
} 

233  
234  125 
void float_init_altivec(DSPContext* c, AVCodecContext *avctx) 
235  126 
{ 
236  127 
c>vector_fmul = vector_fmul_altivec; 
237  128 
c>vector_fmul_reverse = vector_fmul_reverse_altivec; 
238  129 
c>vector_fmul_add = vector_fmul_add_altivec; 
239 
c>int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_altivec; 

240  130 
if(!(avctx>flags & CODEC_FLAG_BITEXACT)) { 
241  131 
c>vector_fmul_window = vector_fmul_window_altivec; 
242 
c>float_to_int16 = float_to_int16_altivec; 

243 
c>float_to_int16_interleave = float_to_int16_interleave_altivec; 

244  132 
} 
245  133 
} 
Also available in: Unified diff