Statistics
| Branch: | Revision:

ffmpeg / libavcodec / sh4 / dsputil_align.c @ b550bfaa

History | View | Annotate | Download (14.2 KB)

1
/*
2
 * aligned/packed access motion
3
 *
4
 * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

    
23

    
24
#include "avcodec.h"
25
#include "dsputil.h"
26

    
27

    
28
#define         LP(p)           *(uint32_t*)(p)
29

    
30

    
31
#define         UNPACK(ph,pl,tt0,tt1) do { \
32
        uint32_t t0,t1; t0=tt0;t1=tt1; \
33
        ph = ( (t0 & ~BYTE_VEC32(0x03))>>2) + ( (t1 & ~BYTE_VEC32(0x03))>>2); \
34
        pl = (t0 & BYTE_VEC32(0x03)) + (t1 & BYTE_VEC32(0x03)); } while(0)
35

    
36
#define         rnd_PACK(ph,pl,nph,npl) ph + nph + (((pl + npl + BYTE_VEC32(0x02))>>2) & BYTE_VEC32(0x03))
37
#define         no_rnd_PACK(ph,pl,nph,npl)      ph + nph + (((pl + npl + BYTE_VEC32(0x01))>>2) & BYTE_VEC32(0x03))
38

    
39
/* little endian */
40
#define         MERGE1(a,b,ofs) (ofs==0)?a:( ((a)>>(8*ofs))|((b)<<(32-8*ofs)) )
41
#define         MERGE2(a,b,ofs) (ofs==3)?b:( ((a)>>(8*(ofs+1)))|((b)<<(32-8*(ofs+1))) )
42
/* big
43
#define         MERGE1(a,b,ofs) (ofs==0)?a:( ((a)<<(8*ofs))|((b)>>(32-8*ofs)) )
44
#define         MERGE2(a,b,ofs) (ofs==3)?b:( ((a)<<(8+8*ofs))|((b)>>(32-8-8*ofs)) )
45
*/
46

    
47

    
48
#define         put(d,s)        d = s
49
#define         avg(d,s)        d = rnd_avg32(s,d)
50

    
51
#define         OP_C4(ofs) \
52
        ref-=ofs; \
53
        do { \
54
                OP(LP(dest),MERGE1(LP(ref),LP(ref+4),ofs)); \
55
                ref+=stride; \
56
                dest+=stride; \
57
        } while(--height)
58

    
59
#define        OP_C40() \
60
        do { \
61
                OP(LP(dest),LP(ref)); \
62
                ref+=stride; \
63
                dest+=stride; \
64
        } while(--height)
65

    
66

    
67
#define         OP      put
68

    
69
static void put_pixels4_c(uint8_t *dest,const uint8_t *ref, const int stride,int height)
70
{
71
        switch((int)ref&3){
72
        case 0: OP_C40(); return;
73
        case 1: OP_C4(1); return;
74
        case 2: OP_C4(2); return;
75
        case 3: OP_C4(3); return;
76
        }
77
}
78

    
79
#undef          OP
80
#define         OP      avg
81

    
82
static void avg_pixels4_c(uint8_t *dest,const uint8_t *ref, const int stride,int height)
83
{
84
        switch((int)ref&3){
85
        case 0: OP_C40(); return;
86
        case 1: OP_C4(1); return;
87
        case 2: OP_C4(2); return;
88
        case 3: OP_C4(3); return;
89
        }
90
}
91

    
92
#undef          OP
93

    
94
#define         OP_C(ofs,sz,avg2) \
95
{ \
96
        ref-=ofs; \
97
        do { \
98
                uint32_t        t0,t1; \
99
                t0 = LP(ref+0); \
100
                t1 = LP(ref+4); \
101
                OP(LP(dest+0), MERGE1(t0,t1,ofs)); \
102
                t0 = LP(ref+8); \
103
                OP(LP(dest+4), MERGE1(t1,t0,ofs)); \
104
if (sz==16) { \
105
                t1 = LP(ref+12); \
106
                OP(LP(dest+8), MERGE1(t0,t1,ofs)); \
107
                t0 = LP(ref+16); \
108
                OP(LP(dest+12), MERGE1(t1,t0,ofs)); \
109
} \
110
                ref+=stride; \
111
                dest+= stride; \
112
        } while(--height); \
113
}
114

    
115
/* aligned */
116
#define         OP_C0(sz,avg2) \
117
{ \
118
        do { \
119
                OP(LP(dest+0), LP(ref+0)); \
120
                OP(LP(dest+4), LP(ref+4)); \
121
if (sz==16) { \
122
                OP(LP(dest+8), LP(ref+8)); \
123
                OP(LP(dest+12), LP(ref+12)); \
124
} \
125
                ref+=stride; \
126
                dest+= stride; \
127
        } while(--height); \
128
}
129

    
130
#define         OP_X(ofs,sz,avg2) \
131
{ \
132
        ref-=ofs; \
133
        do { \
134
                uint32_t        t0,t1; \
135
                t0 = LP(ref+0); \
136
                t1 = LP(ref+4); \
137
                OP(LP(dest+0), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \
138
                t0 = LP(ref+8); \
139
                OP(LP(dest+4), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \
140
if (sz==16) { \
141
                t1 = LP(ref+12); \
142
                OP(LP(dest+8), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \
143
                t0 = LP(ref+16); \
144
                OP(LP(dest+12), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \
145
} \
146
                ref+=stride; \
147
                dest+= stride; \
148
        } while(--height); \
149
}
150

    
151
/* aligned */
152
#define         OP_Y0(sz,avg2) \
153
{ \
154
        uint32_t t0,t1,t2,t3,t; \
155
\
156
        t0 = LP(ref+0); \
157
        t1 = LP(ref+4); \
158
if (sz==16) { \
159
        t2 = LP(ref+8); \
160
        t3 = LP(ref+12); \
161
} \
162
        do { \
163
                ref += stride; \
164
\
165
                t = LP(ref+0); \
166
                OP(LP(dest+0), avg2(t0,t)); t0 = t; \
167
                t = LP(ref+4); \
168
                OP(LP(dest+4), avg2(t1,t)); t1 = t; \
169
if (sz==16) { \
170
                t = LP(ref+8); \
171
                OP(LP(dest+8), avg2(t2,t)); t2 = t; \
172
                t = LP(ref+12); \
173
                OP(LP(dest+12), avg2(t3,t)); t3 = t; \
174
} \
175
                dest+= stride; \
176
        } while(--height); \
177
}
178

    
179
#define         OP_Y(ofs,sz,avg2) \
180
{ \
181
        uint32_t t0,t1,t2,t3,t,w0,w1; \
182
\
183
        ref-=ofs; \
184
        w0 = LP(ref+0); \
185
        w1 = LP(ref+4); \
186
        t0 = MERGE1(w0,w1,ofs); \
187
        w0 = LP(ref+8); \
188
        t1 = MERGE1(w1,w0,ofs); \
189
if (sz==16) { \
190
        w1 = LP(ref+12); \
191
        t2 = MERGE1(w0,w1,ofs); \
192
        w0 = LP(ref+16); \
193
        t3 = MERGE1(w1,w0,ofs); \
194
} \
195
        do { \
196
                ref += stride; \
197
\
198
                w0 = LP(ref+0); \
199
                w1 = LP(ref+4); \
200
                t = MERGE1(w0,w1,ofs); \
201
                OP(LP(dest+0), avg2(t0,t)); t0 = t; \
202
                w0 = LP(ref+8); \
203
                t = MERGE1(w1,w0,ofs); \
204
                OP(LP(dest+4), avg2(t1,t)); t1 = t; \
205
if (sz==16) { \
206
                w1 = LP(ref+12); \
207
                t = MERGE1(w0,w1,ofs); \
208
                OP(LP(dest+8), avg2(t2,t)); t2 = t; \
209
                w0 = LP(ref+16); \
210
                t = MERGE1(w1,w0,ofs); \
211
                OP(LP(dest+12), avg2(t3,t)); t3 = t; \
212
} \
213
                dest+=stride; \
214
        } while(--height); \
215
}
216

    
217
#define OP_X0(sz,avg2) OP_X(0,sz,avg2)
218
#define OP_XY0(sz,PACK) OP_XY(0,sz,PACK)
219
#define         OP_XY(ofs,sz,PACK) \
220
{ \
221
        uint32_t        t2,t3,w0,w1; \
222
        uint32_t        a0,a1,a2,a3,a4,a5,a6,a7; \
223
\
224
        ref -= ofs; \
225
        w0 = LP(ref+0); \
226
        w1 = LP(ref+4); \
227
        UNPACK(a0,a1,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
228
        w0 = LP(ref+8); \
229
        UNPACK(a2,a3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
230
if (sz==16) { \
231
        w1 = LP(ref+12); \
232
        UNPACK(a4,a5,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
233
        w0 = LP(ref+16); \
234
        UNPACK(a6,a7,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
235
} \
236
        do { \
237
                ref+=stride; \
238
                w0 = LP(ref+0); \
239
                w1 = LP(ref+4); \
240
                UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
241
                OP(LP(dest+0),PACK(a0,a1,t2,t3)); \
242
                a0 = t2; a1 = t3; \
243
                w0 = LP(ref+8); \
244
                UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
245
                OP(LP(dest+4),PACK(a2,a3,t2,t3)); \
246
                a2 = t2; a3 = t3; \
247
if (sz==16) { \
248
                w1 = LP(ref+12); \
249
                UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
250
                OP(LP(dest+8),PACK(a4,a5,t2,t3)); \
251
                a4 = t2; a5 = t3; \
252
                w0 = LP(ref+16); \
253
                UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
254
                OP(LP(dest+12),PACK(a6,a7,t2,t3)); \
255
                a6 = t2; a7 = t3; \
256
} \
257
                dest+=stride; \
258
        } while(--height); \
259
}
260

    
261
#define         DEFFUNC(op,rnd,xy,sz,OP_N,avgfunc) \
262
static void op##_##rnd##_pixels##sz##_##xy (uint8_t * dest, const uint8_t * ref, \
263
                                const int stride, int height) \
264
{ \
265
        switch((int)ref&3) { \
266
        case 0:OP_N##0(sz,rnd##_##avgfunc); return; \
267
        case 1:OP_N(1,sz,rnd##_##avgfunc); return; \
268
        case 2:OP_N(2,sz,rnd##_##avgfunc); return; \
269
        case 3:OP_N(3,sz,rnd##_##avgfunc); return; \
270
        } \
271
}
272

    
273
#define OP put
274

    
275
DEFFUNC(put,   rnd,o,8,OP_C,avg2)
276
DEFFUNC(put,   rnd,x,8,OP_X,avg2)
277
DEFFUNC(put,no_rnd,x,8,OP_X,avg2)
278
DEFFUNC(put,   rnd,y,8,OP_Y,avg2)
279
DEFFUNC(put,no_rnd,y,8,OP_Y,avg2)
280
DEFFUNC(put,   rnd,xy,8,OP_XY,PACK)
281
DEFFUNC(put,no_rnd,xy,8,OP_XY,PACK)
282
DEFFUNC(put,   rnd,o,16,OP_C,avg2)
283
DEFFUNC(put,   rnd,x,16,OP_X,avg2)
284
DEFFUNC(put,no_rnd,x,16,OP_X,avg2)
285
DEFFUNC(put,   rnd,y,16,OP_Y,avg2)
286
DEFFUNC(put,no_rnd,y,16,OP_Y,avg2)
287
DEFFUNC(put,   rnd,xy,16,OP_XY,PACK)
288
DEFFUNC(put,no_rnd,xy,16,OP_XY,PACK)
289

    
290
#undef OP
291
#define OP avg
292

    
293
DEFFUNC(avg,   rnd,o,8,OP_C,avg2)
294
DEFFUNC(avg,   rnd,x,8,OP_X,avg2)
295
DEFFUNC(avg,no_rnd,x,8,OP_X,avg2)
296
DEFFUNC(avg,   rnd,y,8,OP_Y,avg2)
297
DEFFUNC(avg,no_rnd,y,8,OP_Y,avg2)
298
DEFFUNC(avg,   rnd,xy,8,OP_XY,PACK)
299
DEFFUNC(avg,no_rnd,xy,8,OP_XY,PACK)
300
DEFFUNC(avg,   rnd,o,16,OP_C,avg2)
301
DEFFUNC(avg,   rnd,x,16,OP_X,avg2)
302
DEFFUNC(avg,no_rnd,x,16,OP_X,avg2)
303
DEFFUNC(avg,   rnd,y,16,OP_Y,avg2)
304
DEFFUNC(avg,no_rnd,y,16,OP_Y,avg2)
305
DEFFUNC(avg,   rnd,xy,16,OP_XY,PACK)
306
DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK)
307

    
308
#undef OP
309

    
310
#define         put_no_rnd_pixels8_o     put_rnd_pixels8_o
311
#define         put_no_rnd_pixels16_o    put_rnd_pixels16_o
312
#define         avg_no_rnd_pixels8_o     avg_rnd_pixels8_o
313
#define         avg_no_rnd_pixels16_o    avg_rnd_pixels16_o
314

    
315
#define         put_pixels8_c            put_rnd_pixels8_o
316
#define         put_pixels16_c           put_rnd_pixels16_o
317
#define         avg_pixels8_c            avg_rnd_pixels8_o
318
#define         avg_pixels16_c           avg_rnd_pixels16_o
319
#define         put_no_rnd_pixels8_c     put_rnd_pixels8_o
320
#define         put_no_rnd_pixels16_c    put_rnd_pixels16_o
321
#define         avg_no_rnd_pixels8_c     avg_rnd_pixels8_o
322
#define         avg_no_rnd_pixels16_c    avg_rnd_pixels16_o
323

    
324
#define         QPEL
325

    
326
#ifdef QPEL
327

    
328
#include "qpel.c"
329

    
330
#endif
331

    
332
void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
333
{
334
        c->put_pixels_tab[0][0] = put_rnd_pixels16_o;
335
        c->put_pixels_tab[0][1] = put_rnd_pixels16_x;
336
        c->put_pixels_tab[0][2] = put_rnd_pixels16_y;
337
        c->put_pixels_tab[0][3] = put_rnd_pixels16_xy;
338
        c->put_pixels_tab[1][0] = put_rnd_pixels8_o;
339
        c->put_pixels_tab[1][1] = put_rnd_pixels8_x;
340
        c->put_pixels_tab[1][2] = put_rnd_pixels8_y;
341
        c->put_pixels_tab[1][3] = put_rnd_pixels8_xy;
342

    
343
        c->put_no_rnd_pixels_tab[0][0] = put_no_rnd_pixels16_o;
344
        c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x;
345
        c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y;
346
        c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy;
347
        c->put_no_rnd_pixels_tab[1][0] = put_no_rnd_pixels8_o;
348
        c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x;
349
        c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y;
350
        c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy;
351

    
352
        c->avg_pixels_tab[0][0] = avg_rnd_pixels16_o;
353
        c->avg_pixels_tab[0][1] = avg_rnd_pixels16_x;
354
        c->avg_pixels_tab[0][2] = avg_rnd_pixels16_y;
355
        c->avg_pixels_tab[0][3] = avg_rnd_pixels16_xy;
356
        c->avg_pixels_tab[1][0] = avg_rnd_pixels8_o;
357
        c->avg_pixels_tab[1][1] = avg_rnd_pixels8_x;
358
        c->avg_pixels_tab[1][2] = avg_rnd_pixels8_y;
359
        c->avg_pixels_tab[1][3] = avg_rnd_pixels8_xy;
360

    
361
        c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_o;
362
        c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x;
363
        c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y;
364
        c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy;
365
        c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_o;
366
        c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x;
367
        c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y;
368
        c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy;
369

    
370
#ifdef QPEL
371

    
372
#define dspfunc(PFX, IDX, NUM) \
373
    c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
374
    c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
375
    c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
376
    c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
377
    c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
378
    c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
379
    c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
380
    c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
381
    c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
382
    c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
383
    c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
384
    c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
385
    c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
386
    c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
387
    c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
388
    c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
389

    
390
    dspfunc(put_qpel, 0, 16);
391
    dspfunc(put_no_rnd_qpel, 0, 16);
392

    
393
    dspfunc(avg_qpel, 0, 16);
394
    /* dspfunc(avg_no_rnd_qpel, 0, 16); */
395

    
396
    dspfunc(put_qpel, 1, 8);
397
    dspfunc(put_no_rnd_qpel, 1, 8);
398

    
399
    dspfunc(avg_qpel, 1, 8);
400
    /* dspfunc(avg_no_rnd_qpel, 1, 8); */
401

    
402
    dspfunc(put_h264_qpel, 0, 16);
403
    dspfunc(put_h264_qpel, 1, 8);
404
    dspfunc(put_h264_qpel, 2, 4);
405
    dspfunc(avg_h264_qpel, 0, 16);
406
    dspfunc(avg_h264_qpel, 1, 8);
407
    dspfunc(avg_h264_qpel, 2, 4);
408

    
409
#undef dspfunc
410
    c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
411
    c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
412
    c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
413
    c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
414
    c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
415
    c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
416

    
417
    c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
418
    c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
419
    c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
420
    c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
421
    c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
422
    c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
423
    c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
424
    c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
425

    
426
    c->gmc1 = gmc1_c;
427
    c->gmc = gmc_c;
428

    
429
#endif
430
}