Revision f2677d6b

View differences:

libavcodec/ppc/dsputil_altivec.c
24 24
#include <sys/sysctl.h>
25 25
#endif
26 26

  
27
int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
28
{
29
    int s, i;
30
    vector unsigned char *tv, zero;
31
    vector unsigned char pix1v, pix2v, pix2iv, avgv, t5;
32
    vector unsigned int sad;
33
    vector signed int sumdiffs;
34

  
35
    s = 0;
36
    zero = vec_splat_u8(0);
37
    sad = vec_splat_u32(0);
38
    for(i=0;i<16;i++) {
39
        /*
40
           Read unaligned pixels into our vectors. The vectors are as follows:
41
           pix1v: pix1[0]-pix1[15]
42
           pix2v: pix2[0]-pix2[15]	pix2iv: pix2[1]-pix2[16]
43
        */
44
        tv = (vector unsigned char *) pix1;
45
        pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
46
        
47
        tv = (vector unsigned char *) &pix2[0];
48
        pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));
49

  
50
        tv = (vector unsigned char *) &pix2[1];
51
        pix2iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[1]));
52

  
53
        /* Calculate the average vector */
54
        avgv = vec_avg(pix2v, pix2iv);
55

  
56
        /* Calculate a sum of abs differences vector */
57
        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));
58

  
59
        /* Add each 4 pixel group together and put 4 results into sad */
60
        sad = vec_sum4s(t5, sad);
61
        
62
        pix1 += line_size;
63
        pix2 += line_size;
64
    }
65
    /* Sum up the four partial sums, and put the result into s */
66
    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
67
    sumdiffs = vec_splat(sumdiffs, 3);
68
    vec_ste(sumdiffs, 0, &s);
69

  
70
    return s;
71
}
72

  
73
int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
74
{
75
    int s, i;
76
    vector unsigned char *tv, zero;
77
    vector unsigned char pix1v, pix2v, pix3v, avgv, t5;
78
    vector unsigned int sad;
79
    vector signed int sumdiffs;
80
    uint8_t *pix3 = pix2 + line_size;
81

  
82
    s = 0;
83
    zero = vec_splat_u8(0);
84
    sad = vec_splat_u32(0);
85

  
86
    /*
87
       Due to the fact that pix3 = pix2 + line_size, the pix3 of one
88
       iteration becomes pix2 in the next iteration. We can use this
89
       fact to avoid a potentially expensive unaligned read, each
90
       time around the loop.
91
       Read unaligned pixels into our vectors. The vectors are as follows:
92
       pix2v: pix2[0]-pix2[15]
93
       Split the pixel vectors into shorts
94
    */
95
    tv = (vector unsigned char *) &pix2[0];
96
    pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));
97
    
98
    for(i=0;i<16;i++) {
99
        /*
100
           Read unaligned pixels into our vectors. The vectors are as follows:
101
           pix1v: pix1[0]-pix1[15]
102
           pix3v: pix3[0]-pix3[15]
103
        */
104
        tv = (vector unsigned char *) pix1;
105
        pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
106

  
107
        tv = (vector unsigned char *) &pix3[0];
108
        pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0]));
109

  
110
        /* Calculate the average vector */
111
        avgv = vec_avg(pix2v, pix3v);
112

  
113
        /* Calculate a sum of abs differences vector */
114
        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));
115

  
116
        /* Add each 4 pixel group together and put 4 results into sad */
117
        sad = vec_sum4s(t5, sad);
118
        
119
        pix1 += line_size;
120
        pix2v = pix3v;
121
        pix3 += line_size;
122
        
123
    }
124
    
125
    /* Sum up the four partial sums, and put the result into s */
126
    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
127
    sumdiffs = vec_splat(sumdiffs, 3);
128
    vec_ste(sumdiffs, 0, &s);
129
    return s;    
130
}
131

  
132
int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
133
{
134
    int s, i;
135
    uint8_t *pix3 = pix2 + line_size;
136
    vector unsigned char *tv, avgv, t5, zero;
137
    vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv;
138
    vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv;
139
    vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv;
140
    vector unsigned short avghv, avglv, two, shift_mask;
141
    vector unsigned short t1, t2, t3, t4;
142
    vector unsigned int sad;
143
    vector signed int sumdiffs;
144

  
145
    shift_mask = (vector unsigned short) (0x3fff, 0x3fff, 0x3fff, 0x3fff,
146
                                          0x3fff, 0x3fff, 0x3fff, 0x3fff);
147
    zero = vec_splat_u8(0);
148
    two = vec_splat_u16(2);
149
    sad = vec_splat_u32(0);
150
    
151
    s = 0;
152

  
153
    /*
154
       Due to the fact that pix3 = pix2 + line_size, the pix3 of one
155
       iteration becomes pix2 in the next iteration. We can use this
156
       fact to avoid a potentially expensive unaligned read, as well
157
       as some splitting, and vector addition each time around the loop.
158
       Read unaligned pixels into our vectors. The vectors are as follows:
159
       pix2v: pix2[0]-pix2[15]	pix2iv: pix2[1]-pix2[16]
160
       Split the pixel vectors into shorts
161
    */
162
    tv = (vector unsigned char *) &pix2[0];
163
    pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));
164

  
165
    tv = (vector unsigned char *) &pix2[1];
166
    pix2iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[1]));
167

  
168
    pix2hv = (vector unsigned short) vec_mergeh(zero, pix2v);
169
    pix2lv = (vector unsigned short) vec_mergel(zero, pix2v);
170
    pix2ihv = (vector unsigned short) vec_mergeh(zero, pix2iv);
171
    pix2ilv = (vector unsigned short) vec_mergel(zero, pix2iv);
172
    t1 = vec_add(pix2hv, pix2ihv);
173
    t2 = vec_add(pix2lv, pix2ilv);
174
    
175
    for(i=0;i<16;i++) {
176
        /*
177
           Read unaligned pixels into our vectors. The vectors are as follows:
178
           pix1v: pix1[0]-pix1[15]
179
           pix3v: pix3[0]-pix3[15]	pix3iv: pix3[1]-pix3[16]
180
        */
181
        tv = (vector unsigned char *) pix1;
182
        pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
183

  
184
        tv = (vector unsigned char *) &pix3[0];
185
        pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0]));
186

  
187
        tv = (vector unsigned char *) &pix3[1];
188
        pix3iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[1]));
189

  
190
        /*
191
          Note that Altivec does have vec_avg, but this works on vector pairs
192
          and rounds up. We could do avg(avg(a,b),avg(c,d)), but the rounding
193
          would mean that, for example, avg(3,0,0,1) = 2, when it should be 1.
194
          Instead, we have to split the pixel vectors into vectors of shorts,
195
          and do the averaging by hand.
196
        */
197

  
198
        /* Split the pixel vectors into shorts */
199
        pix3hv = (vector unsigned short) vec_mergeh(zero, pix3v);
200
        pix3lv = (vector unsigned short) vec_mergel(zero, pix3v);
201
        pix3ihv = (vector unsigned short) vec_mergeh(zero, pix3iv);
202
        pix3ilv = (vector unsigned short) vec_mergel(zero, pix3iv);
203

  
204
        /* Do the averaging on them */
205
        t3 = vec_add(pix3hv, pix3ihv);
206
        t4 = vec_add(pix3lv, pix3ilv);
207

  
208
        avghv = vec_add(vec_add(t1, t3), two);
209
        avghv= vec_and(vec_srl(avghv, two), shift_mask);
210

  
211
        avglv = vec_add(vec_add(t2, t4), two);
212
        avglv = vec_and(vec_srl(avglv, two), shift_mask);
213

  
214
        /* Pack the shorts back into a result */
215
        avgv = vec_pack(avghv, avglv);
216

  
217
        /* Calculate a sum of abs differences vector */
218
        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));
219

  
220
        /* Add each 4 pixel group together and put 4 results into sad */
221
        sad = vec_sum4s(t5, sad);
222

  
223
        pix1 += line_size;
224
        pix3 += line_size;
225
        /* Transfer the calculated values for pix3 into pix2 */
226
        t1 = t3;
227
        t2 = t4;
228
    }
229
    /* Sum up the four partial sums, and put the result into s */
230
    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
231
    sumdiffs = vec_splat(sumdiffs, 3);
232
    vec_ste(sumdiffs, 0, &s);
233

  
234
    return s;
235
}
236

  
27 237
int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
28 238
{
29 239
    int i, s;
......
108 318
    return s;
109 319
}
110 320

  
321
int pix_norm1_altivec(uint8_t *pix, int line_size)
322
{
323
    int s, i;
324
    vector unsigned char *tv, zero;
325
    vector unsigned char pixv;
326
    vector unsigned short pixlv, pixhv, zeros;
327
    vector unsigned int sv;
328
    vector signed int sum;
329
    vector unsigned char perm_stoint_h = (vector unsigned char)
330
        (16, 16, 0, 1, 16, 16, 2, 3, 16, 16, 4, 5, 16, 16, 6, 7);
331
    
332
    vector unsigned char perm_stoint_l = (vector unsigned char)
333
        (16, 16, 8, 9, 16, 16, 10, 11, 16, 16, 12, 13, 16, 16, 14, 15);
334
        
335
    zero = vec_splat_u8(0);
336
    zeros = vec_splat_u16(0);
337
    sv = vec_splat_u32(0);
338
    
339
    s = 0;
340
    for (i = 0; i < 16; i++) {
341
        /* Read in the potentially unaligned pixels */
342
        tv = (vector unsigned char *) pix;
343
        pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix));
344

  
345
        /* Split them into two vectors of shorts */
346
        pixhv = (vector unsigned short) vec_mergeh(zero, pixv);
347
        pixlv = (vector unsigned short) vec_mergel(zero, pixv);
348

  
349
        
350
        /* Square the values and add them to our sum */
351
        sv = vec_msum(pixhv, pixhv, sv);
352
        sv = vec_msum(pixlv, pixlv, sv);
353

  
354
        pix += line_size;
355
    }
356
    /* Sum up the four partial sums, and put the result into s */
357
    sum = vec_sums((vector signed int) sv, (vector signed int) zero);
358
    sum = vec_splat(sum, 3);
359
    vec_ste(sum, 0, &s);
360

  
361
    return s;
362
}
363

  
111 364
int pix_sum_altivec(UINT8 * pix, int line_size)
112 365
{
113 366

  
libavcodec/ppc/dsputil_altivec.h
16 16
 * License along with this library; if not, write to the Free Software
17 17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18 18
 */
19
 
19

  
20
extern int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
21
extern int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
22
extern int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
20 23
extern int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
21 24
extern int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size);
25
extern int pix_norm1_altivec(uint8_t *pix, int line_size);
22 26
extern int pix_sum_altivec(UINT8 * pix, int line_size);
23 27
extern void diff_pixels_altivec(DCTELEM* block, const UINT8* s1, const UINT8* s2, int stride);
24 28
extern void get_pixels_altivec(DCTELEM* block, const UINT8 * pixels, int line_size);
libavcodec/ppc/dsputil_ppc.c
32 32
#if HAVE_ALTIVEC
33 33
    if (has_altivec()) {
34 34
        // Altivec specific optimisations
35
        c->pix_abs16x16_x2 = pix_abs16x16_x2_altivec;
36
        c->pix_abs16x16_y2 = pix_abs16x16_y2_altivec;
37
	c->pix_abs16x16_xy2 = pix_abs16x16_xy2_altivec;
35 38
	c->pix_abs16x16 = pix_abs16x16_altivec;
36 39
        c->pix_abs8x8 = pix_abs8x8_altivec;
40
        c->pix_norm1 = pix_norm1_altivec;
37 41
        c->pix_sum = pix_sum_altivec;
38 42
        c->diff_pixels = diff_pixels_altivec;
39 43
        c->get_pixels = get_pixels_altivec;

Also available in: Unified diff