Revision c73d99e6 libavcodec/ppc/float_altivec.c

View differences:

libavcodec/ppc/float_altivec.c
122 122
    }
123 123
}
124 124

  
125
static void int32_to_float_fmul_scalar_altivec(float *dst, const int *src, float mul, int len)
126
{
127
    union {
128
        vector float v;
129
        float s[4];
130
    } mul_u;
131
    int i;
132
    vector float src1, src2, dst1, dst2, mul_v, zero;
133

  
134
    zero = (vector float)vec_splat_u32(0);
135
    mul_u.s[0] = mul;
136
    mul_v = vec_splat(mul_u.v, 0);
137

  
138
    for(i=0; i<len; i+=8) {
139
        src1 = vec_ctf(vec_ld(0,  src+i), 0);
140
        src2 = vec_ctf(vec_ld(16, src+i), 0);
141
        dst1 = vec_madd(src1, mul_v, zero);
142
        dst2 = vec_madd(src2, mul_v, zero);
143
        vec_st(dst1,  0, dst+i);
144
        vec_st(dst2, 16, dst+i);
145
    }
146
}
147

  
148

  
149
static vector signed short
150
float_to_int16_one_altivec(const float *src)
151
{
152
    vector float s0 = vec_ld(0, src);
153
    vector float s1 = vec_ld(16, src);
154
    vector signed int t0 = vec_cts(s0, 0);
155
    vector signed int t1 = vec_cts(s1, 0);
156
    return vec_packs(t0,t1);
157
}
158

  
159
static void float_to_int16_altivec(int16_t *dst, const float *src, long len)
160
{
161
    int i;
162
    vector signed short d0, d1, d;
163
    vector unsigned char align;
164
    if(((long)dst)&15) //FIXME
165
    for(i=0; i<len-7; i+=8) {
166
        d0 = vec_ld(0, dst+i);
167
        d = float_to_int16_one_altivec(src+i);
168
        d1 = vec_ld(15, dst+i);
169
        d1 = vec_perm(d1, d0, vec_lvsl(0,dst+i));
170
        align = vec_lvsr(0, dst+i);
171
        d0 = vec_perm(d1, d, align);
172
        d1 = vec_perm(d, d1, align);
173
        vec_st(d0, 0, dst+i);
174
        vec_st(d1,15, dst+i);
175
    }
176
    else
177
    for(i=0; i<len-7; i+=8) {
178
        d = float_to_int16_one_altivec(src+i);
179
        vec_st(d, 0, dst+i);
180
    }
181
}
182

  
183
static void
184
float_to_int16_interleave_altivec(int16_t *dst, const float **src,
185
                                  long len, int channels)
186
{
187
    int i;
188
    vector signed short d0, d1, d2, c0, c1, t0, t1;
189
    vector unsigned char align;
190
    if(channels == 1)
191
        float_to_int16_altivec(dst, src[0], len);
192
    else
193
        if (channels == 2) {
194
        if(((long)dst)&15)
195
        for(i=0; i<len-7; i+=8) {
196
            d0 = vec_ld(0, dst + i);
197
            t0 = float_to_int16_one_altivec(src[0] + i);
198
            d1 = vec_ld(31, dst + i);
199
            t1 = float_to_int16_one_altivec(src[1] + i);
200
            c0 = vec_mergeh(t0, t1);
201
            c1 = vec_mergel(t0, t1);
202
            d2 = vec_perm(d1, d0, vec_lvsl(0, dst + i));
203
            align = vec_lvsr(0, dst + i);
204
            d0 = vec_perm(d2, c0, align);
205
            d1 = vec_perm(c0, c1, align);
206
            vec_st(d0,  0, dst + i);
207
            d0 = vec_perm(c1, d2, align);
208
            vec_st(d1, 15, dst + i);
209
            vec_st(d0, 31, dst + i);
210
            dst+=8;
211
        }
212
        else
213
        for(i=0; i<len-7; i+=8) {
214
            t0 = float_to_int16_one_altivec(src[0] + i);
215
            t1 = float_to_int16_one_altivec(src[1] + i);
216
            d0 = vec_mergeh(t0, t1);
217
            d1 = vec_mergel(t0, t1);
218
            vec_st(d0,  0, dst + i);
219
            vec_st(d1, 16, dst + i);
220
            dst+=8;
221
        }
222
    } else {
223
        DECLARE_ALIGNED(16, int16_t, tmp)[len];
224
        int c, j;
225
        for (c = 0; c < channels; c++) {
226
            float_to_int16_altivec(tmp, src[c], len);
227
            for (i = 0, j = c; i < len; i++, j+=channels) {
228
                dst[j] = tmp[i];
229
            }
230
        }
231
   }
232
}
233

  
234 125
void float_init_altivec(DSPContext* c, AVCodecContext *avctx)
235 126
{
236 127
    c->vector_fmul = vector_fmul_altivec;
237 128
    c->vector_fmul_reverse = vector_fmul_reverse_altivec;
238 129
    c->vector_fmul_add = vector_fmul_add_altivec;
239
    c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_altivec;
240 130
    if(!(avctx->flags & CODEC_FLAG_BITEXACT)) {
241 131
        c->vector_fmul_window = vector_fmul_window_altivec;
242
        c->float_to_int16 = float_to_int16_altivec;
243
        c->float_to_int16_interleave = float_to_int16_interleave_altivec;
244 132
    }
245 133
}

Also available in: Unified diff