Statistics
| Branch: | Revision:

ffmpeg / libavcodec / ppc / h264_altivec.c @ 7e821457

History | View | Annotate | Download (13.1 KB)

1
/*
2
 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
 */
18

    
19
#include "../dsputil.h"
20

    
21
#include "gcc_fixes.h"
22

    
23
#include "dsputil_altivec.h"
24

    
25
#define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
26
#define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
27

    
28
#define OP_U8_ALTIVEC                          PUT_OP_U8_ALTIVEC
29
#define PREFIX_h264_chroma_mc8_altivec         put_h264_chroma_mc8_altivec
30
#define PREFIX_h264_chroma_mc8_num             altivec_put_h264_chroma_mc8_num
31
#define PREFIX_h264_qpel16_h_lowpass_altivec   put_h264_qpel16_h_lowpass_altivec
32
#define PREFIX_h264_qpel16_h_lowpass_num       altivec_put_h264_qpel16_h_lowpass_num
33
#define PREFIX_h264_qpel16_v_lowpass_altivec   put_h264_qpel16_v_lowpass_altivec
34
#define PREFIX_h264_qpel16_v_lowpass_num       altivec_put_h264_qpel16_v_lowpass_num
35
#define PREFIX_h264_qpel16_hv_lowpass_altivec  put_h264_qpel16_hv_lowpass_altivec
36
#define PREFIX_h264_qpel16_hv_lowpass_num      altivec_put_h264_qpel16_hv_lowpass_num
37
#include "h264_template_altivec.c"
38
#undef OP_U8_ALTIVEC
39
#undef PREFIX_h264_chroma_mc8_altivec
40
#undef PREFIX_h264_chroma_mc8_num
41
#undef PREFIX_h264_qpel16_h_lowpass_altivec
42
#undef PREFIX_h264_qpel16_h_lowpass_num
43
#undef PREFIX_h264_qpel16_v_lowpass_altivec
44
#undef PREFIX_h264_qpel16_v_lowpass_num
45
#undef PREFIX_h264_qpel16_hv_lowpass_altivec
46
#undef PREFIX_h264_qpel16_hv_lowpass_num
47

    
48
#define OP_U8_ALTIVEC                          AVG_OP_U8_ALTIVEC
49
#define PREFIX_h264_chroma_mc8_altivec         avg_h264_chroma_mc8_altivec
50
#define PREFIX_h264_chroma_mc8_num             altivec_avg_h264_chroma_mc8_num
51
#define PREFIX_h264_qpel16_h_lowpass_altivec   avg_h264_qpel16_h_lowpass_altivec
52
#define PREFIX_h264_qpel16_h_lowpass_num       altivec_avg_h264_qpel16_h_lowpass_num
53
#define PREFIX_h264_qpel16_v_lowpass_altivec   avg_h264_qpel16_v_lowpass_altivec
54
#define PREFIX_h264_qpel16_v_lowpass_num       altivec_avg_h264_qpel16_v_lowpass_num
55
#define PREFIX_h264_qpel16_hv_lowpass_altivec  avg_h264_qpel16_hv_lowpass_altivec
56
#define PREFIX_h264_qpel16_hv_lowpass_num      altivec_avg_h264_qpel16_hv_lowpass_num
57
#include "h264_template_altivec.c"
58
#undef OP_U8_ALTIVEC
59
#undef PREFIX_h264_chroma_mc8_altivec
60
#undef PREFIX_h264_chroma_mc8_num
61
#undef PREFIX_h264_qpel16_h_lowpass_altivec
62
#undef PREFIX_h264_qpel16_h_lowpass_num
63
#undef PREFIX_h264_qpel16_v_lowpass_altivec
64
#undef PREFIX_h264_qpel16_v_lowpass_num
65
#undef PREFIX_h264_qpel16_hv_lowpass_altivec
66
#undef PREFIX_h264_qpel16_hv_lowpass_num
67

    
68
#define H264_MC(OPNAME, SIZE, CODETYPE) \
69
static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, int stride){\
70
    OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
71
}\
72
\
73
static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \
74
    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
75
    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
76
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
77
}\
78
\
79
static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
80
    OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
81
}\
82
\
83
static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
84
    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
85
    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
86
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
87
}\
88
\
89
static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
90
    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
91
    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
92
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
93
}\
94
\
95
static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
96
    OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
97
}\
98
\
99
static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
100
    DECLARE_ALIGNED_16(uint8_t, half[SIZE*SIZE]);\
101
    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
102
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
103
}\
104
\
105
static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
106
    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
107
    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
108
    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
109
    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
110
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
111
}\
112
\
113
static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
114
    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
115
    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
116
    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
117
    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
118
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
119
}\
120
\
121
static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
122
    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
123
    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
124
    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
125
    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
126
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
127
}\
128
\
129
static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
130
    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
131
    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
132
    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
133
    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
134
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
135
}\
136
\
137
static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
138
    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
139
    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
140
}\
141
\
142
static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
143
    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
144
    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
145
    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
146
    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
147
    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
148
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
149
}\
150
\
151
static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
152
    DECLARE_ALIGNED_16(uint8_t, halfH[SIZE*SIZE]);\
153
    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
154
    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
155
    put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
156
    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
157
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
158
}\
159
\
160
static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
161
    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
162
    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
163
    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
164
    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
165
    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
166
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
167
}\
168
\
169
static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
170
    DECLARE_ALIGNED_16(uint8_t, halfV[SIZE*SIZE]);\
171
    DECLARE_ALIGNED_16(uint8_t, halfHV[SIZE*SIZE]);\
172
    DECLARE_ALIGNED_16(int16_t, tmp[SIZE*(SIZE+8)]);\
173
    put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
174
    put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
175
    OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
176
}\
177

    
178
static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
179
                                    const uint8_t * src2, int dst_stride,
180
                                    int src_stride1, int h)
181
{
182
    int i;
183
    vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
184

    
185
    mask_ = vec_lvsl(0, src2);
186

    
187
    for (i = 0; i < h; i++) {
188

    
189
        tmp1 = vec_ld(i * src_stride1, src1);
190
        mask = vec_lvsl(i * src_stride1, src1);
191
        tmp2 = vec_ld(i * src_stride1 + 15, src1);
192

    
193
        a = vec_perm(tmp1, tmp2, mask);
194

    
195
        tmp1 = vec_ld(i * 16, src2);
196
        tmp2 = vec_ld(i * 16 + 15, src2);
197

    
198
        b = vec_perm(tmp1, tmp2, mask_);
199

    
200
        tmp1 = vec_ld(0, dst);
201
        mask = vec_lvsl(0, dst);
202
        tmp2 = vec_ld(15, dst);
203

    
204
        d = vec_avg(a, b);
205

    
206
        edges = vec_perm(tmp2, tmp1, mask);
207

    
208
        align = vec_lvsr(0, dst);
209

    
210
        tmp1 = vec_perm(edges, d, align);
211
        tmp2 = vec_perm(d, edges, align);
212

    
213
        vec_st(tmp1, 0 , dst);
214
        vec_st(tmp2, 15, dst);
215

    
216
        dst += dst_stride;
217
    }
218
}
219

    
220
static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
221
                                    const uint8_t * src2, int dst_stride,
222
                                    int src_stride1, int h)
223
{
224
    int i;
225
    vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
226

    
227
    mask_ = vec_lvsl(0, src2);
228

    
229
    for (i = 0; i < h; i++) {
230

    
231
        tmp1 = vec_ld(i * src_stride1, src1);
232
        mask = vec_lvsl(i * src_stride1, src1);
233
        tmp2 = vec_ld(i * src_stride1 + 15, src1);
234

    
235
        a = vec_perm(tmp1, tmp2, mask);
236

    
237
        tmp1 = vec_ld(i * 16, src2);
238
        tmp2 = vec_ld(i * 16 + 15, src2);
239

    
240
        b = vec_perm(tmp1, tmp2, mask_);
241

    
242
        tmp1 = vec_ld(0, dst);
243
        mask = vec_lvsl(0, dst);
244
        tmp2 = vec_ld(15, dst);
245

    
246
        d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
247

    
248
        edges = vec_perm(tmp2, tmp1, mask);
249

    
250
        align = vec_lvsr(0, dst);
251

    
252
        tmp1 = vec_perm(edges, d, align);
253
        tmp2 = vec_perm(d, edges, align);
254

    
255
        vec_st(tmp1, 0 , dst);
256
        vec_st(tmp2, 15, dst);
257

    
258
        dst += dst_stride;
259
    }
260
}
261

    
262
/* Implemented but could be faster
263
#define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
264
#define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
265
 */
266

    
267
  H264_MC(put_, 16, altivec)
268
  H264_MC(avg_, 16, altivec)
269

    
270
void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
271

    
272
#ifdef HAVE_ALTIVEC
273
  if (has_altivec()) {
274
    c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
275
    c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
276

    
277
#define dspfunc(PFX, IDX, NUM) \
278
    c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
279
    c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
280
    c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
281
    c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
282
    c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
283
    c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
284
    c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
285
    c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
286
    c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
287
    c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
288
    c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
289
    c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
290
    c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
291
    c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
292
    c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
293
    c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
294

    
295
    dspfunc(put_h264_qpel, 0, 16);
296
    dspfunc(avg_h264_qpel, 0, 16);
297
#undef dspfunc
298

    
299
  } else
300
#endif /* HAVE_ALTIVEC */
301
  {
302
    // Non-AltiVec PPC optimisations
303

    
304
    // ... pending ...
305
  }
306
}