Statistics
| Branch: | Revision:

ffmpeg / libavcodec / sh4 / dsputil_align.c @ 41fda91d

History | View | Annotate | Download (12.2 KB)

1
/*
2
 * aligned/packed access motion 
3
 *
4
 * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19
 */
20

    
21

    
22
#include "../avcodec.h"
23
#include "../dsputil.h"
24

    
25

    
26
#define        LP(p)        *(uint32_t*)(p)
27

    
28

    
29
#define        BYTE_VEC(c)        ((c)*0x01010101UL)
30

    
31
#define        UNPACK(ph,pl,tt0,tt1) do { \
32
        uint32_t t0,t1; t0=tt0;t1=tt1; \
33
        ph = ( (t0 & ~BYTE_VEC(0x03))>>2) + ( (t1 & ~BYTE_VEC(0x03))>>2); \
34
        pl = (t0 & BYTE_VEC(0x03)) + (t1 & BYTE_VEC(0x03)); } while(0)
35

    
36
#define        rnd_PACK(ph,pl,nph,npl)        ph + nph + (((pl + npl + BYTE_VEC(0x02))>>2) & BYTE_VEC(0x03))
37
#define        no_rnd_PACK(ph,pl,nph,npl)        ph + nph + (((pl + npl + BYTE_VEC(0x01))>>2) & BYTE_VEC(0x03))
38

    
39
/* little endian */
40
#define        MERGE1(a,b,ofs)        (ofs==0)?a:( ((a)>>(8*ofs))|((b)<<(32-8*ofs)) )
41
#define        MERGE2(a,b,ofs)        (ofs==3)?b:( ((a)>>(8*(ofs+1)))|((b)<<(32-8*(ofs+1))) )
42
/* big
43
#define        MERGE1(a,b,ofs)        (ofs==0)?a:( ((a)<<(8*ofs))|((b)>>(32-8*ofs)) )
44
#define        MERGE2(a,b,ofs)        (ofs==3)?b:( ((a)<<(8+8*ofs))|((b)>>(32-8-8*ofs)) )
45
*/
46

    
47

    
48
#define        put(d,s)        d = s
49
#define        avg(d,s)        d = rnd_avg2(s,d)
50

    
51
static inline uint32_t rnd_avg2(uint32_t a, uint32_t b)
52
{
53
    return (a | b) - (((a ^ b) & ~BYTE_VEC(0x01)) >> 1);
54
}
55

    
56
static inline uint32_t no_rnd_avg2(uint32_t a, uint32_t b)
57
{
58
    return (a & b) + (((a ^ b) & ~BYTE_VEC(0x01)) >> 1);
59
}
60

    
61

    
62
#define        OP_C4(ofs) \
63
        ref-=ofs; \
64
        do { \
65
                OP(LP(dest),MERGE1(LP(ref),LP(ref+4),ofs)); \
66
                ref+=stride; \
67
                dest+=stride; \
68
        } while(--height)
69

    
70
#define        OP_C40() \
71
        do { \
72
                OP(LP(dest),LP(ref)); \
73
                ref+=stride; \
74
                dest+=stride; \
75
        } while(--height)
76

    
77

    
78
#define        OP        put
79

    
80
static void put_pixels4_c(uint8_t *dest,const uint8_t *ref, const int stride,int height)
81
{
82
        switch((int)ref&3){
83
        case 0: OP_C40(); return;
84
        case 1: OP_C4(1); return;
85
        case 2: OP_C4(2); return;
86
        case 3: OP_C4(3); return;
87
        }
88
}
89

    
90
#undef        OP
91
#define        OP        avg
92

    
93
static void avg_pixels4_c(uint8_t *dest,const uint8_t *ref, const int stride,int height)
94
{
95
        switch((int)ref&3){
96
        case 0: OP_C40(); return;
97
        case 1: OP_C4(1); return;
98
        case 2: OP_C4(2); return;
99
        case 3: OP_C4(3); return;
100
        }
101
}
102

    
103
#undef        OP
104

    
105
#define        OP_C(ofs,sz,avg2) \
106
{ \
107
        ref-=ofs; \
108
        do { \
109
                uint32_t        t0,t1; \
110
                t0 = LP(ref+0); \
111
                t1 = LP(ref+4); \
112
                OP(LP(dest+0), MERGE1(t0,t1,ofs)); \
113
                t0 = LP(ref+8); \
114
                OP(LP(dest+4), MERGE1(t1,t0,ofs)); \
115
if (sz==16) { \
116
                t1 = LP(ref+12); \
117
                OP(LP(dest+8), MERGE1(t0,t1,ofs)); \
118
                t0 = LP(ref+16); \
119
                OP(LP(dest+12), MERGE1(t1,t0,ofs)); \
120
} \
121
                ref+=stride; \
122
                dest+= stride; \
123
        } while(--height); \
124
}
125

    
126
/* aligned */
127
#define        OP_C0(sz,avg2) \
128
{ \
129
        do { \
130
                OP(LP(dest+0), LP(ref+0)); \
131
                OP(LP(dest+4), LP(ref+4)); \
132
if (sz==16) { \
133
                OP(LP(dest+8), LP(ref+8)); \
134
                OP(LP(dest+12), LP(ref+12)); \
135
} \
136
                ref+=stride; \
137
                dest+= stride; \
138
        } while(--height); \
139
}
140

    
141
#define        OP_X(ofs,sz,avg2) \
142
{ \
143
        ref-=ofs; \
144
        do { \
145
                uint32_t        t0,t1; \
146
                t0 = LP(ref+0); \
147
                t1 = LP(ref+4); \
148
                OP(LP(dest+0), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \
149
                t0 = LP(ref+8); \
150
                OP(LP(dest+4), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \
151
if (sz==16) { \
152
                t1 = LP(ref+12); \
153
                OP(LP(dest+8), avg2(MERGE1(t0,t1,ofs),MERGE2(t0,t1,ofs))); \
154
                t0 = LP(ref+16); \
155
                OP(LP(dest+12), avg2(MERGE1(t1,t0,ofs),MERGE2(t1,t0,ofs))); \
156
} \
157
                ref+=stride; \
158
                dest+= stride; \
159
        } while(--height); \
160
}
161

    
162
/* aligned */
163
#define        OP_Y0(sz,avg2) \
164
{ \
165
        uint32_t t0,t1,t2,t3,t; \
166
\
167
        t0 = LP(ref+0); \
168
        t1 = LP(ref+4); \
169
if (sz==16) { \
170
        t2 = LP(ref+8); \
171
        t3 = LP(ref+12); \
172
} \
173
        do { \
174
                ref += stride; \
175
\
176
                t = LP(ref+0); \
177
                OP(LP(dest+0), avg2(t0,t)); t0 = t; \
178
                t = LP(ref+4); \
179
                OP(LP(dest+4), avg2(t1,t)); t1 = t; \
180
if (sz==16) { \
181
                t = LP(ref+8); \
182
                OP(LP(dest+8), avg2(t2,t)); t2 = t; \
183
                t = LP(ref+12); \
184
                OP(LP(dest+12), avg2(t3,t)); t3 = t; \
185
} \
186
                dest+= stride; \
187
        } while(--height); \
188
}
189

    
190
#define        OP_Y(ofs,sz,avg2) \
191
{ \
192
        uint32_t t0,t1,t2,t3,t,w0,w1; \
193
\
194
        ref-=ofs; \
195
        w0 = LP(ref+0); \
196
        w1 = LP(ref+4); \
197
        t0 = MERGE1(w0,w1,ofs); \
198
        w0 = LP(ref+8); \
199
        t1 = MERGE1(w1,w0,ofs); \
200
if (sz==16) { \
201
        w1 = LP(ref+12); \
202
        t2 = MERGE1(w0,w1,ofs); \
203
        w0 = LP(ref+16); \
204
        t3 = MERGE1(w1,w0,ofs); \
205
} \
206
        do { \
207
                ref += stride; \
208
\
209
                w0 = LP(ref+0); \
210
                w1 = LP(ref+4); \
211
                t = MERGE1(w0,w1,ofs); \
212
                OP(LP(dest+0), avg2(t0,t)); t0 = t; \
213
                w0 = LP(ref+8); \
214
                t = MERGE1(w1,w0,ofs); \
215
                OP(LP(dest+4), avg2(t1,t)); t1 = t; \
216
if (sz==16) { \
217
                w1 = LP(ref+12); \
218
                t = MERGE1(w0,w1,ofs); \
219
                OP(LP(dest+8), avg2(t2,t)); t2 = t; \
220
                w0 = LP(ref+16); \
221
                t = MERGE1(w1,w0,ofs); \
222
                OP(LP(dest+12), avg2(t3,t)); t3 = t; \
223
} \
224
                dest+=stride; \
225
        } while(--height); \
226
}
227

    
228
#define OP_X0(sz,avg2) OP_X(0,sz,avg2)
229
#define OP_XY0(sz,PACK) OP_XY(0,sz,PACK)
230
#define        OP_XY(ofs,sz,PACK) \
231
{ \
232
        uint32_t        t2,t3,w0,w1; \
233
        uint32_t        a0,a1,a2,a3,a4,a5,a6,a7; \
234
\
235
        ref -= ofs; \
236
        w0 = LP(ref+0); \
237
        w1 = LP(ref+4); \
238
        UNPACK(a0,a1,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
239
        w0 = LP(ref+8); \
240
        UNPACK(a2,a3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
241
if (sz==16) { \
242
        w1 = LP(ref+12); \
243
        UNPACK(a4,a5,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
244
        w0 = LP(ref+16); \
245
        UNPACK(a6,a7,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
246
} \
247
        do { \
248
                ref+=stride; \
249
                w0 = LP(ref+0); \
250
                w1 = LP(ref+4); \
251
                UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
252
                OP(LP(dest+0),PACK(a0,a1,t2,t3)); \
253
                a0 = t2; a1 = t3; \
254
                w0 = LP(ref+8); \
255
                UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
256
                OP(LP(dest+4),PACK(a2,a3,t2,t3)); \
257
                a2 = t2; a3 = t3; \
258
if (sz==16) { \
259
                w1 = LP(ref+12); \
260
                UNPACK(t2,t3,MERGE1(w0,w1,ofs),MERGE2(w0,w1,ofs)); \
261
                OP(LP(dest+8),PACK(a4,a5,t2,t3)); \
262
                a4 = t2; a5 = t3; \
263
                w0 = LP(ref+16); \
264
                UNPACK(t2,t3,MERGE1(w1,w0,ofs),MERGE2(w1,w0,ofs)); \
265
                OP(LP(dest+12),PACK(a6,a7,t2,t3)); \
266
                a6 = t2; a7 = t3; \
267
} \
268
                dest+=stride; \
269
        } while(--height); \
270
}
271

    
272
#define        DEFFUNC(op,rnd,xy,sz,OP_N,avgfunc) \
273
static void op##_##rnd##_pixels##sz##_##xy (uint8_t * dest, const uint8_t * ref,        \
274
                                   const int stride, int height)        \
275
{ \
276
        switch((int)ref&3) { \
277
        case 0:OP_N##0(sz,rnd##_##avgfunc); return; \
278
        case 1:OP_N(1,sz,rnd##_##avgfunc); return; \
279
        case 2:OP_N(2,sz,rnd##_##avgfunc); return; \
280
        case 3:OP_N(3,sz,rnd##_##avgfunc); return; \
281
        } \
282
}
283

    
284
#define OP put
285

    
286
DEFFUNC(put,   rnd,o,8,OP_C,avg2)
287
DEFFUNC(put,   rnd,x,8,OP_X,avg2)
288
DEFFUNC(put,no_rnd,x,8,OP_X,avg2)
289
DEFFUNC(put,   rnd,y,8,OP_Y,avg2)
290
DEFFUNC(put,no_rnd,y,8,OP_Y,avg2)
291
DEFFUNC(put,   rnd,xy,8,OP_XY,PACK)
292
DEFFUNC(put,no_rnd,xy,8,OP_XY,PACK)
293
DEFFUNC(put,   rnd,o,16,OP_C,avg2)
294
DEFFUNC(put,   rnd,x,16,OP_X,avg2)
295
DEFFUNC(put,no_rnd,x,16,OP_X,avg2)
296
DEFFUNC(put,   rnd,y,16,OP_Y,avg2)
297
DEFFUNC(put,no_rnd,y,16,OP_Y,avg2)
298
DEFFUNC(put,   rnd,xy,16,OP_XY,PACK)
299
DEFFUNC(put,no_rnd,xy,16,OP_XY,PACK)
300

    
301
#undef OP
302
#define OP avg
303

    
304
DEFFUNC(avg,   rnd,o,8,OP_C,avg2)
305
DEFFUNC(avg,   rnd,x,8,OP_X,avg2)
306
DEFFUNC(avg,no_rnd,x,8,OP_X,avg2)
307
DEFFUNC(avg,   rnd,y,8,OP_Y,avg2)
308
DEFFUNC(avg,no_rnd,y,8,OP_Y,avg2)
309
DEFFUNC(avg,   rnd,xy,8,OP_XY,PACK)
310
DEFFUNC(avg,no_rnd,xy,8,OP_XY,PACK)
311
DEFFUNC(avg,   rnd,o,16,OP_C,avg2)
312
DEFFUNC(avg,   rnd,x,16,OP_X,avg2)
313
DEFFUNC(avg,no_rnd,x,16,OP_X,avg2)
314
DEFFUNC(avg,   rnd,y,16,OP_Y,avg2)
315
DEFFUNC(avg,no_rnd,y,16,OP_Y,avg2)
316
DEFFUNC(avg,   rnd,xy,16,OP_XY,PACK)
317
DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK)
318

    
319
#undef OP
320

    
321
#define        put_no_rnd_pixels8_o        put_rnd_pixels8_o
322
#define        put_no_rnd_pixels16_o        put_rnd_pixels16_o
323
#define        avg_no_rnd_pixels8_o        avg_rnd_pixels8_o
324
#define        avg_no_rnd_pixels16_o        avg_rnd_pixels16_o
325

    
326
#define        put_pixels8_c        put_rnd_pixels8_o
327
#define        put_pixels16_c        put_rnd_pixels16_o
328
#define        avg_pixels8_c        avg_rnd_pixels8_o
329
#define        avg_pixels16_c        avg_rnd_pixels16_o
330
#define        put_no_rnd_pixels8_c        put_rnd_pixels8_o
331
#define        put_no_rnd_pixels16_c        put_rnd_pixels16_o
332
#define        avg_no_rnd_pixels8_c        avg_rnd_pixels8_o
333
#define        avg_no_rnd_pixels16_c        avg_rnd_pixels16_o
334

    
335
#define        QPEL
336

    
337
#ifdef QPEL
338

    
339
#include "qpel.c"
340

    
341
#endif
342

    
343
void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
344
{
345
        c->put_pixels_tab[0][0] = put_rnd_pixels16_o;
346
        c->put_pixels_tab[0][1] = put_rnd_pixels16_x;
347
        c->put_pixels_tab[0][2] = put_rnd_pixels16_y;
348
        c->put_pixels_tab[0][3] = put_rnd_pixels16_xy;
349
        c->put_pixels_tab[1][0] = put_rnd_pixels8_o;
350
        c->put_pixels_tab[1][1] = put_rnd_pixels8_x;
351
        c->put_pixels_tab[1][2] = put_rnd_pixels8_y;
352
        c->put_pixels_tab[1][3] = put_rnd_pixels8_xy;
353

    
354
        c->put_no_rnd_pixels_tab[0][0] = put_no_rnd_pixels16_o;
355
        c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x;
356
        c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y;
357
        c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy;
358
        c->put_no_rnd_pixels_tab[1][0] = put_no_rnd_pixels8_o;
359
        c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x;
360
        c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y;
361
        c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy;
362

    
363
        c->avg_pixels_tab[0][0] = avg_rnd_pixels16_o;
364
        c->avg_pixels_tab[0][1] = avg_rnd_pixels16_x;
365
        c->avg_pixels_tab[0][2] = avg_rnd_pixels16_y;
366
        c->avg_pixels_tab[0][3] = avg_rnd_pixels16_xy;
367
        c->avg_pixels_tab[1][0] = avg_rnd_pixels8_o;
368
        c->avg_pixels_tab[1][1] = avg_rnd_pixels8_x;
369
        c->avg_pixels_tab[1][2] = avg_rnd_pixels8_y;
370
        c->avg_pixels_tab[1][3] = avg_rnd_pixels8_xy;
371

    
372
        c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_o;
373
        c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x;
374
        c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y;
375
        c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy;
376
        c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_o;
377
        c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x;
378
        c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y;
379
        c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy;
380

    
381
#ifdef QPEL
382

    
383
#define dspfunc(PFX, IDX, NUM) \
384
    c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
385
    c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
386
    c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
387
    c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
388
    c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
389
    c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
390
    c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
391
    c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
392
    c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
393
    c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
394
    c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
395
    c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
396
    c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
397
    c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
398
    c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
399
    c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
400

    
401
    dspfunc(put_qpel, 0, 16);
402
    dspfunc(put_no_rnd_qpel, 0, 16);
403

    
404
    dspfunc(avg_qpel, 0, 16);
405
    /* dspfunc(avg_no_rnd_qpel, 0, 16); */
406

    
407
    dspfunc(put_qpel, 1, 8);
408
    dspfunc(put_no_rnd_qpel, 1, 8);
409

    
410
    dspfunc(avg_qpel, 1, 8);
411
    /* dspfunc(avg_no_rnd_qpel, 1, 8); */
412

    
413
    dspfunc(put_h264_qpel, 0, 16);
414
    dspfunc(put_h264_qpel, 1, 8);
415
    dspfunc(put_h264_qpel, 2, 4);
416
    dspfunc(avg_h264_qpel, 0, 16);
417
    dspfunc(avg_h264_qpel, 1, 8);
418
    dspfunc(avg_h264_qpel, 2, 4);
419

    
420
#undef dspfunc
421
    c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
422
    c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
423
    c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
424
    c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
425
    c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
426
    c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
427

    
428
    c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
429
    c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
430
    c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
431
    c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
432
    c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
433
    c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
434
    c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
435
    c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
436

    
437
    c->gmc1 = gmc1_c;
438
    c->gmc = gmc_c;
439

    
440
#endif
441
}