Revision b1159ad9 libavcodec/ppc/int_altivec.c

View differences:

libavcodec/ppc/int_altivec.c
79 79
    return u.score[3];
80 80
}
81 81

  
82
static void add_int16_altivec(int16_t * v1, int16_t * v2, int order)
83
{
84
    int i;
85
    register vec_s16 vec, *pv;
86

  
87
    for(i = 0; i < order; i += 8){
88
        pv = (vec_s16*)v2;
89
        vec = vec_perm(pv[0], pv[1], vec_lvsl(0, v2));
90
        vec_st(vec_add(vec_ld(0, v1), vec), 0, v1);
91
        v1 += 8;
92
        v2 += 8;
93
    }
94
}
95

  
96
static void sub_int16_altivec(int16_t * v1, int16_t * v2, int order)
97
{
98
    int i;
99
    register vec_s16 vec, *pv;
100

  
101
    for(i = 0; i < order; i += 8){
102
        pv = (vec_s16*)v2;
103
        vec = vec_perm(pv[0], pv[1], vec_lvsl(0, v2));
104
        vec_st(vec_sub(vec_ld(0, v1), vec), 0, v1);
105
        v1 += 8;
106
        v2 += 8;
107
    }
108
}
109

  
110 82
static int32_t scalarproduct_int16_altivec(int16_t * v1, int16_t * v2, int order, const int shift)
111 83
{
112 84
    int i;
......
137 109
    return ires;
138 110
}
139 111

  
112
static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1, int16_t *v2, int16_t *v3, int order, int mul)
113
{
114
    LOAD_ZERO;
115
    vec_s16 *pv1 = (vec_s16*)v1;
116
    vec_s16 *pv2 = (vec_s16*)v2;
117
    vec_s16 *pv3 = (vec_s16*)v3;
118
    register vec_s16 muls = {mul,mul,mul,mul,mul,mul,mul,mul};
119
    register vec_s16 t0, t1, i0, i1;
120
    register vec_s16 i2 = pv2[0], i3 = pv3[0];
121
    register vec_s32 res = zero_s32v;
122
    register vec_u8 align = vec_lvsl(0, v2);
123
    int32_t ires;
124
    order >>= 4;
125
    do {
126
        t0 = vec_perm(i2, pv2[1], align);
127
        i2 = pv2[2];
128
        t1 = vec_perm(pv2[1], i2, align);
129
        i0 = pv1[0];
130
        i1 = pv1[1];
131
        res = vec_msum(t0, i0, res);
132
        res = vec_msum(t1, i1, res);
133
        t0 = vec_perm(i3, pv3[1], align);
134
        i3 = pv3[2];
135
        t1 = vec_perm(pv3[1], i3, align);
136
        pv1[0] = vec_mladd(t0, muls, i0);
137
        pv1[1] = vec_mladd(t1, muls, i1);
138
        pv1 += 2;
139
        pv2 += 2;
140
        pv3 += 2;
141
    } while(--order);
142
    res = vec_splat(vec_sums(res, zero_s32v), 3);
143
    vec_ste(res, 0, &ires);
144
    return ires;
145
}
146

  
140 147
void int_init_altivec(DSPContext* c, AVCodecContext *avctx)
141 148
{
142 149
    c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec;
143
    c->add_int16 = add_int16_altivec;
144
    c->sub_int16 = sub_int16_altivec;
145 150
    c->scalarproduct_int16 = scalarproduct_int16_altivec;
151
    c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_altivec;
146 152
}

Also available in: Unified diff