Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dsputil.c @ b5f83deb

History | View | Annotate | Download (157 KB)

1 de6d9b64 Fabrice Bellard
/*
2
 * DSP utils
3 406792e7 Diego Biurrun
 * Copyright (c) 2000, 2001 Fabrice Bellard
4 8f2ab833 Michael Niedermayer
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 de6d9b64 Fabrice Bellard
 *
6 7b94177e Diego Biurrun
 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7
 *
8 b78e7197 Diego Biurrun
 * This file is part of FFmpeg.
9
 *
10
 * FFmpeg is free software; you can redistribute it and/or
11 ff4ec49e Fabrice Bellard
 * modify it under the terms of the GNU Lesser General Public
12
 * License as published by the Free Software Foundation; either
13 b78e7197 Diego Biurrun
 * version 2.1 of the License, or (at your option) any later version.
14 de6d9b64 Fabrice Bellard
 *
15 b78e7197 Diego Biurrun
 * FFmpeg is distributed in the hope that it will be useful,
16 de6d9b64 Fabrice Bellard
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ff4ec49e Fabrice Bellard
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
 * Lesser General Public License for more details.
19 de6d9b64 Fabrice Bellard
 *
20 ff4ec49e Fabrice Bellard
 * You should have received a copy of the GNU Lesser General Public
21 b78e7197 Diego Biurrun
 * License along with FFmpeg; if not, write to the Free Software
22 5509bffa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 de6d9b64 Fabrice Bellard
 */
24 115329f1 Diego Biurrun
25 983e3246 Michael Niedermayer
/**
26 ba87f080 Diego Biurrun
 * @file
27 983e3246 Michael Niedermayer
 * DSP utils
28
 */
29 115329f1 Diego Biurrun
30 9686abb8 Stefano Sabatini
#include "libavcore/imgutils.h"
31 de6d9b64 Fabrice Bellard
#include "avcodec.h"
32
#include "dsputil.h"
33 b0368839 Michael Niedermayer
#include "simple_idct.h"
34 65e4c8c9 Michael Niedermayer
#include "faandct.h"
35 6f08c541 Michael Niedermayer
#include "faanidct.h"
36 199436b9 Aurelien Jacobs
#include "mathops.h"
37 af818f7a Diego Biurrun
#include "mpegvideo.h"
38
#include "config.h"
39 3da11804 Måns Rullgård
#include "ac3dec.h"
40
#include "vorbis.h"
41
#include "png.h"
42 5596c60c Michael Niedermayer
43 55fde95e Måns Rullgård
uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
44 1d503957 Måns Rullgård
uint32_t ff_squareTbl[512] = {0, };
45 de6d9b64 Fabrice Bellard
46 917f55cc Loren Merritt
// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
47
#define pb_7f (~0UL/255 * 0x7f)
48
#define pb_80 (~0UL/255 * 0x80)
49 469bd7b1 Loren Merritt
50 0c1a9eda Zdenek Kabelac
const uint8_t ff_zigzag_direct[64] = {
51 2ad1516a Michael Niedermayer
    0,   1,  8, 16,  9,  2,  3, 10,
52
    17, 24, 32, 25, 18, 11,  4,  5,
53 e0eac44e Fabrice Bellard
    12, 19, 26, 33, 40, 48, 41, 34,
54 2ad1516a Michael Niedermayer
    27, 20, 13,  6,  7, 14, 21, 28,
55 e0eac44e Fabrice Bellard
    35, 42, 49, 56, 57, 50, 43, 36,
56
    29, 22, 15, 23, 30, 37, 44, 51,
57
    58, 59, 52, 45, 38, 31, 39, 46,
58
    53, 60, 61, 54, 47, 55, 62, 63
59
};
60
61 10acc479 Roman Shaposhnik
/* Specific zigzag scan for 248 idct. NOTE that unlike the
62
   specification, we interleave the fields */
63
const uint8_t ff_zigzag248_direct[64] = {
64
     0,  8,  1,  9, 16, 24,  2, 10,
65
    17, 25, 32, 40, 48, 56, 33, 41,
66
    18, 26,  3, 11,  4, 12, 19, 27,
67
    34, 42, 49, 57, 50, 58, 35, 43,
68
    20, 28,  5, 13,  6, 14, 21, 29,
69
    36, 44, 51, 59, 52, 60, 37, 45,
70
    22, 30,  7, 15, 23, 31, 38, 46,
71
    53, 61, 54, 62, 39, 47, 55, 63,
72
};
73
74 2f349de2 Michael Niedermayer
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
75 84dc2d8a Måns Rullgård
DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
76 2f349de2 Michael Niedermayer
77 0c1a9eda Zdenek Kabelac
const uint8_t ff_alternate_horizontal_scan[64] = {
78 115329f1 Diego Biurrun
    0,  1,   2,  3,  8,  9, 16, 17,
79 e0eac44e Fabrice Bellard
    10, 11,  4,  5,  6,  7, 15, 14,
80 115329f1 Diego Biurrun
    13, 12, 19, 18, 24, 25, 32, 33,
81 e0eac44e Fabrice Bellard
    26, 27, 20, 21, 22, 23, 28, 29,
82 115329f1 Diego Biurrun
    30, 31, 34, 35, 40, 41, 48, 49,
83 e0eac44e Fabrice Bellard
    42, 43, 36, 37, 38, 39, 44, 45,
84 115329f1 Diego Biurrun
    46, 47, 50, 51, 56, 57, 58, 59,
85 e0eac44e Fabrice Bellard
    52, 53, 54, 55, 60, 61, 62, 63,
86
};
87
88 0c1a9eda Zdenek Kabelac
const uint8_t ff_alternate_vertical_scan[64] = {
89 115329f1 Diego Biurrun
    0,  8,  16, 24,  1,  9,  2, 10,
90 e0eac44e Fabrice Bellard
    17, 25, 32, 40, 48, 56, 57, 49,
91 115329f1 Diego Biurrun
    41, 33, 26, 18,  3, 11,  4, 12,
92 e0eac44e Fabrice Bellard
    19, 27, 34, 42, 50, 58, 35, 43,
93 115329f1 Diego Biurrun
    51, 59, 20, 28,  5, 13,  6, 14,
94 e0eac44e Fabrice Bellard
    21, 29, 36, 44, 52, 60, 37, 45,
95 115329f1 Diego Biurrun
    53, 61, 22, 30,  7, 15, 23, 31,
96 e0eac44e Fabrice Bellard
    38, 46, 54, 62, 39, 47, 55, 63,
97
};
98
99 b0368839 Michael Niedermayer
/* Input permutation for the simple_idct_mmx */
100
static const uint8_t simple_mmx_permutation[64]={
101 bb270c08 Diego Biurrun
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
102
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
103
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
104
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
105
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
106
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
107
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
108
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
109 b0368839 Michael Niedermayer
};
110
111 0e956ba2 Alexander Strange
static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
112
113 4c79b95c Aurelien Jacobs
void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
114
    int i;
115
    int end;
116
117
    st->scantable= src_scantable;
118
119
    for(i=0; i<64; i++){
120
        int j;
121
        j = src_scantable[i];
122
        st->permutated[i] = permutation[j];
123 b250f9c6 Aurelien Jacobs
#if ARCH_PPC
124 4c79b95c Aurelien Jacobs
        st->inverse[j] = i;
125
#endif
126
    }
127
128
    end=-1;
129
    for(i=0; i<64; i++){
130
        int j;
131
        j = st->permutated[i];
132
        if(j>end) end=j;
133
        st->raster_end[i]= end;
134
    }
135
}
136
137 0c1a9eda Zdenek Kabelac
static int pix_sum_c(uint8_t * pix, int line_size)
138 3aa102be Michael Niedermayer
{
139
    int s, i, j;
140
141
    s = 0;
142
    for (i = 0; i < 16; i++) {
143 bb270c08 Diego Biurrun
        for (j = 0; j < 16; j += 8) {
144
            s += pix[0];
145
            s += pix[1];
146
            s += pix[2];
147
            s += pix[3];
148
            s += pix[4];
149
            s += pix[5];
150
            s += pix[6];
151
            s += pix[7];
152
            pix += 8;
153
        }
154
        pix += line_size - 16;
155 3aa102be Michael Niedermayer
    }
156
    return s;
157
}
158
159 0c1a9eda Zdenek Kabelac
static int pix_norm1_c(uint8_t * pix, int line_size)
160 3aa102be Michael Niedermayer
{
161
    int s, i, j;
162 1d503957 Måns Rullgård
    uint32_t *sq = ff_squareTbl + 256;
163 3aa102be Michael Niedermayer
164
    s = 0;
165
    for (i = 0; i < 16; i++) {
166 bb270c08 Diego Biurrun
        for (j = 0; j < 16; j += 8) {
167 2a006cd3 Felix von Leitner
#if 0
168 bb270c08 Diego Biurrun
            s += sq[pix[0]];
169
            s += sq[pix[1]];
170
            s += sq[pix[2]];
171
            s += sq[pix[3]];
172
            s += sq[pix[4]];
173
            s += sq[pix[5]];
174
            s += sq[pix[6]];
175
            s += sq[pix[7]];
176 2a006cd3 Felix von Leitner
#else
177
#if LONG_MAX > 2147483647
178 bb270c08 Diego Biurrun
            register uint64_t x=*(uint64_t*)pix;
179
            s += sq[x&0xff];
180
            s += sq[(x>>8)&0xff];
181
            s += sq[(x>>16)&0xff];
182
            s += sq[(x>>24)&0xff];
183 2a006cd3 Felix von Leitner
            s += sq[(x>>32)&0xff];
184
            s += sq[(x>>40)&0xff];
185
            s += sq[(x>>48)&0xff];
186
            s += sq[(x>>56)&0xff];
187
#else
188 bb270c08 Diego Biurrun
            register uint32_t x=*(uint32_t*)pix;
189
            s += sq[x&0xff];
190
            s += sq[(x>>8)&0xff];
191
            s += sq[(x>>16)&0xff];
192
            s += sq[(x>>24)&0xff];
193 2a006cd3 Felix von Leitner
            x=*(uint32_t*)(pix+4);
194
            s += sq[x&0xff];
195
            s += sq[(x>>8)&0xff];
196
            s += sq[(x>>16)&0xff];
197
            s += sq[(x>>24)&0xff];
198
#endif
199
#endif
200 bb270c08 Diego Biurrun
            pix += 8;
201
        }
202
        pix += line_size - 16;
203 3aa102be Michael Niedermayer
    }
204
    return s;
205
}
206
207 96711ecf Michael Niedermayer
static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
208 3d2e8cce Michael Niedermayer
    int i;
209 115329f1 Diego Biurrun
210 3d2e8cce Michael Niedermayer
    for(i=0; i+8<=w; i+=8){
211 8fc0162a Måns Rullgård
        dst[i+0]= av_bswap32(src[i+0]);
212
        dst[i+1]= av_bswap32(src[i+1]);
213
        dst[i+2]= av_bswap32(src[i+2]);
214
        dst[i+3]= av_bswap32(src[i+3]);
215
        dst[i+4]= av_bswap32(src[i+4]);
216
        dst[i+5]= av_bswap32(src[i+5]);
217
        dst[i+6]= av_bswap32(src[i+6]);
218
        dst[i+7]= av_bswap32(src[i+7]);
219 3d2e8cce Michael Niedermayer
    }
220
    for(;i<w; i++){
221 8fc0162a Måns Rullgård
        dst[i+0]= av_bswap32(src[i+0]);
222 3d2e8cce Michael Niedermayer
    }
223
}
224 3aa102be Michael Niedermayer
225 26efc54e Michael Niedermayer
static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
226
{
227
    int s, i;
228 1d503957 Måns Rullgård
    uint32_t *sq = ff_squareTbl + 256;
229 26efc54e Michael Niedermayer
230
    s = 0;
231
    for (i = 0; i < h; i++) {
232
        s += sq[pix1[0] - pix2[0]];
233
        s += sq[pix1[1] - pix2[1]];
234
        s += sq[pix1[2] - pix2[2]];
235
        s += sq[pix1[3] - pix2[3]];
236
        pix1 += line_size;
237
        pix2 += line_size;
238
    }
239
    return s;
240
}
241
242 bb198e19 Michael Niedermayer
static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
243 1457ab52 Michael Niedermayer
{
244
    int s, i;
245 1d503957 Måns Rullgård
    uint32_t *sq = ff_squareTbl + 256;
246 1457ab52 Michael Niedermayer
247
    s = 0;
248 bb198e19 Michael Niedermayer
    for (i = 0; i < h; i++) {
249 1457ab52 Michael Niedermayer
        s += sq[pix1[0] - pix2[0]];
250
        s += sq[pix1[1] - pix2[1]];
251
        s += sq[pix1[2] - pix2[2]];
252
        s += sq[pix1[3] - pix2[3]];
253
        s += sq[pix1[4] - pix2[4]];
254
        s += sq[pix1[5] - pix2[5]];
255
        s += sq[pix1[6] - pix2[6]];
256
        s += sq[pix1[7] - pix2[7]];
257
        pix1 += line_size;
258
        pix2 += line_size;
259
    }
260
    return s;
261
}
262
263 bb198e19 Michael Niedermayer
static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
264 9c76bd48 Brian Foley
{
265 6b026927 Falk Hüffner
    int s, i;
266 1d503957 Måns Rullgård
    uint32_t *sq = ff_squareTbl + 256;
267 9c76bd48 Brian Foley
268
    s = 0;
269 bb198e19 Michael Niedermayer
    for (i = 0; i < h; i++) {
270 6b026927 Falk Hüffner
        s += sq[pix1[ 0] - pix2[ 0]];
271
        s += sq[pix1[ 1] - pix2[ 1]];
272
        s += sq[pix1[ 2] - pix2[ 2]];
273
        s += sq[pix1[ 3] - pix2[ 3]];
274
        s += sq[pix1[ 4] - pix2[ 4]];
275
        s += sq[pix1[ 5] - pix2[ 5]];
276
        s += sq[pix1[ 6] - pix2[ 6]];
277
        s += sq[pix1[ 7] - pix2[ 7]];
278
        s += sq[pix1[ 8] - pix2[ 8]];
279
        s += sq[pix1[ 9] - pix2[ 9]];
280
        s += sq[pix1[10] - pix2[10]];
281
        s += sq[pix1[11] - pix2[11]];
282
        s += sq[pix1[12] - pix2[12]];
283
        s += sq[pix1[13] - pix2[13]];
284
        s += sq[pix1[14] - pix2[14]];
285
        s += sq[pix1[15] - pix2[15]];
286 2a006cd3 Felix von Leitner
287 6b026927 Falk Hüffner
        pix1 += line_size;
288
        pix2 += line_size;
289 9c76bd48 Brian Foley
    }
290
    return s;
291
}
292
293 5a6a9e78 Aurelien Jacobs
/* draw the edges of width 'w' of an image of size width, height */
294
//FIXME check that this is ok for mpeg4 interlaced
295
static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
296
{
297
    uint8_t *ptr, *last_line;
298
    int i;
299
300
    last_line = buf + (height - 1) * wrap;
301
    for(i=0;i<w;i++) {
302
        /* top and bottom */
303
        memcpy(buf - (i + 1) * wrap, buf, width);
304
        memcpy(last_line + (i + 1) * wrap, last_line, width);
305
    }
306
    /* left and right */
307
    ptr = buf;
308
    for(i=0;i<height;i++) {
309
        memset(ptr - w, ptr[0], w);
310
        memset(ptr + width, ptr[width-1], w);
311
        ptr += wrap;
312
    }
313
    /* corners */
314
    for(i=0;i<w;i++) {
315
        memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
316
        memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
317
        memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
318
        memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
319
    }
320
}
321
322 288a44fb Aurelien Jacobs
/**
323 49bd8e4b Måns Rullgård
 * Copy a rectangular area of samples to a temporary buffer and replicate the border samples.
324 288a44fb Aurelien Jacobs
 * @param buf destination buffer
325
 * @param src source buffer
326
 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
327
 * @param block_w width of block
328
 * @param block_h height of block
329
 * @param src_x x coordinate of the top left sample of the block in the source buffer
330
 * @param src_y y coordinate of the top left sample of the block in the source buffer
331
 * @param w width of the source buffer
332
 * @param h height of the source buffer
333
 */
334 910b9f30 David Conrad
void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h,
335 288a44fb Aurelien Jacobs
                                    int src_x, int src_y, int w, int h){
336
    int x, y;
337
    int start_y, start_x, end_y, end_x;
338
339
    if(src_y>= h){
340
        src+= (h-1-src_y)*linesize;
341
        src_y=h-1;
342
    }else if(src_y<=-block_h){
343
        src+= (1-block_h-src_y)*linesize;
344
        src_y=1-block_h;
345
    }
346
    if(src_x>= w){
347
        src+= (w-1-src_x);
348
        src_x=w-1;
349
    }else if(src_x<=-block_w){
350
        src+= (1-block_w-src_x);
351
        src_x=1-block_w;
352
    }
353
354
    start_y= FFMAX(0, -src_y);
355
    start_x= FFMAX(0, -src_x);
356
    end_y= FFMIN(block_h, h-src_y);
357
    end_x= FFMIN(block_w, w-src_x);
358
359
    // copy existing part
360
    for(y=start_y; y<end_y; y++){
361
        for(x=start_x; x<end_x; x++){
362
            buf[x + y*linesize]= src[x + y*linesize];
363
        }
364
    }
365
366
    //top
367
    for(y=0; y<start_y; y++){
368
        for(x=start_x; x<end_x; x++){
369
            buf[x + y*linesize]= buf[x + start_y*linesize];
370
        }
371
    }
372
373
    //bottom
374
    for(y=end_y; y<block_h; y++){
375
        for(x=start_x; x<end_x; x++){
376
            buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
377
        }
378
    }
379
380
    for(y=0; y<block_h; y++){
381
       //left
382
        for(x=0; x<start_x; x++){
383
            buf[x + y*linesize]= buf[start_x + y*linesize];
384
        }
385
386
       //right
387
        for(x=end_x; x<block_w; x++){
388
            buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
389
        }
390
    }
391
}
392
393 0c1a9eda Zdenek Kabelac
static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
394 de6d9b64 Fabrice Bellard
{
395
    int i;
396
397
    /* read the pixels */
398
    for(i=0;i<8;i++) {
399 c13e1abd Falk Hüffner
        block[0] = pixels[0];
400
        block[1] = pixels[1];
401
        block[2] = pixels[2];
402
        block[3] = pixels[3];
403
        block[4] = pixels[4];
404
        block[5] = pixels[5];
405
        block[6] = pixels[6];
406
        block[7] = pixels[7];
407
        pixels += line_size;
408
        block += 8;
409 de6d9b64 Fabrice Bellard
    }
410
}
411
412 0c1a9eda Zdenek Kabelac
static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
413 bb270c08 Diego Biurrun
                          const uint8_t *s2, int stride){
414 9dbcbd92 Michael Niedermayer
    int i;
415
416
    /* read the pixels */
417
    for(i=0;i<8;i++) {
418 c13e1abd Falk Hüffner
        block[0] = s1[0] - s2[0];
419
        block[1] = s1[1] - s2[1];
420
        block[2] = s1[2] - s2[2];
421
        block[3] = s1[3] - s2[3];
422
        block[4] = s1[4] - s2[4];
423
        block[5] = s1[5] - s2[5];
424
        block[6] = s1[6] - s2[6];
425
        block[7] = s1[7] - s2[7];
426 9dbcbd92 Michael Niedermayer
        s1 += stride;
427
        s2 += stride;
428 c13e1abd Falk Hüffner
        block += 8;
429 9dbcbd92 Michael Niedermayer
    }
430
}
431
432
433 0c1a9eda Zdenek Kabelac
static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
434 bb270c08 Diego Biurrun
                                 int line_size)
435 de6d9b64 Fabrice Bellard
{
436
    int i;
437 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
438 115329f1 Diego Biurrun
439 de6d9b64 Fabrice Bellard
    /* read the pixels */
440
    for(i=0;i<8;i++) {
441 c13e1abd Falk Hüffner
        pixels[0] = cm[block[0]];
442
        pixels[1] = cm[block[1]];
443
        pixels[2] = cm[block[2]];
444
        pixels[3] = cm[block[3]];
445
        pixels[4] = cm[block[4]];
446
        pixels[5] = cm[block[5]];
447
        pixels[6] = cm[block[6]];
448
        pixels[7] = cm[block[7]];
449
450
        pixels += line_size;
451
        block += 8;
452 de6d9b64 Fabrice Bellard
    }
453
}
454
455 178fcca8 Michael Niedermayer
static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
456 bb270c08 Diego Biurrun
                                 int line_size)
457 178fcca8 Michael Niedermayer
{
458
    int i;
459 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
460 115329f1 Diego Biurrun
461 178fcca8 Michael Niedermayer
    /* read the pixels */
462
    for(i=0;i<4;i++) {
463
        pixels[0] = cm[block[0]];
464
        pixels[1] = cm[block[1]];
465
        pixels[2] = cm[block[2]];
466
        pixels[3] = cm[block[3]];
467
468
        pixels += line_size;
469
        block += 8;
470
    }
471
}
472
473 9ca358b9 Michael Niedermayer
static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
474 bb270c08 Diego Biurrun
                                 int line_size)
475 9ca358b9 Michael Niedermayer
{
476
    int i;
477 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
478 115329f1 Diego Biurrun
479 9ca358b9 Michael Niedermayer
    /* read the pixels */
480
    for(i=0;i<2;i++) {
481
        pixels[0] = cm[block[0]];
482
        pixels[1] = cm[block[1]];
483
484
        pixels += line_size;
485
        block += 8;
486
    }
487
}
488
489 115329f1 Diego Biurrun
static void put_signed_pixels_clamped_c(const DCTELEM *block,
490 f9ed9d85 Mike Melanson
                                        uint8_t *restrict pixels,
491
                                        int line_size)
492
{
493
    int i, j;
494
495
    for (i = 0; i < 8; i++) {
496
        for (j = 0; j < 8; j++) {
497
            if (*block < -128)
498
                *pixels = 0;
499
            else if (*block > 127)
500
                *pixels = 255;
501
            else
502
                *pixels = (uint8_t)(*block + 128);
503
            block++;
504
            pixels++;
505
        }
506
        pixels += (line_size - 8);
507
    }
508
}
509
510 342c7dfd Kostya Shishkov
static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels,
511
                                    int line_size)
512
{
513
    int i;
514
515
    /* read the pixels */
516
    for(i=0;i<8;i++) {
517
        pixels[0] = block[0];
518
        pixels[1] = block[1];
519
        pixels[2] = block[2];
520
        pixels[3] = block[3];
521
        pixels[4] = block[4];
522
        pixels[5] = block[5];
523
        pixels[6] = block[6];
524
        pixels[7] = block[7];
525
526
        pixels += line_size;
527
        block += 8;
528
    }
529
}
530
531 0c1a9eda Zdenek Kabelac
static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
532 c13e1abd Falk Hüffner
                          int line_size)
533 de6d9b64 Fabrice Bellard
{
534
    int i;
535 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
536 115329f1 Diego Biurrun
537 de6d9b64 Fabrice Bellard
    /* read the pixels */
538
    for(i=0;i<8;i++) {
539 c13e1abd Falk Hüffner
        pixels[0] = cm[pixels[0] + block[0]];
540
        pixels[1] = cm[pixels[1] + block[1]];
541
        pixels[2] = cm[pixels[2] + block[2]];
542
        pixels[3] = cm[pixels[3] + block[3]];
543
        pixels[4] = cm[pixels[4] + block[4]];
544
        pixels[5] = cm[pixels[5] + block[5]];
545
        pixels[6] = cm[pixels[6] + block[6]];
546
        pixels[7] = cm[pixels[7] + block[7]];
547
        pixels += line_size;
548
        block += 8;
549 de6d9b64 Fabrice Bellard
    }
550
}
551 178fcca8 Michael Niedermayer
552
static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
553
                          int line_size)
554
{
555
    int i;
556 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
557 115329f1 Diego Biurrun
558 178fcca8 Michael Niedermayer
    /* read the pixels */
559
    for(i=0;i<4;i++) {
560
        pixels[0] = cm[pixels[0] + block[0]];
561
        pixels[1] = cm[pixels[1] + block[1]];
562
        pixels[2] = cm[pixels[2] + block[2]];
563
        pixels[3] = cm[pixels[3] + block[3]];
564
        pixels += line_size;
565
        block += 8;
566
    }
567
}
568 9ca358b9 Michael Niedermayer
569
static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
570
                          int line_size)
571
{
572
    int i;
573 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
574 115329f1 Diego Biurrun
575 9ca358b9 Michael Niedermayer
    /* read the pixels */
576
    for(i=0;i<2;i++) {
577
        pixels[0] = cm[pixels[0] + block[0]];
578
        pixels[1] = cm[pixels[1] + block[1]];
579
        pixels += line_size;
580
        block += 8;
581
    }
582
}
583 36940eca Loren Merritt
584
static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
585
{
586
    int i;
587
    for(i=0;i<8;i++) {
588
        pixels[0] += block[0];
589
        pixels[1] += block[1];
590
        pixels[2] += block[2];
591
        pixels[3] += block[3];
592
        pixels[4] += block[4];
593
        pixels[5] += block[5];
594
        pixels[6] += block[6];
595
        pixels[7] += block[7];
596
        pixels += line_size;
597
        block += 8;
598
    }
599
}
600
601
static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
602
{
603
    int i;
604
    for(i=0;i<4;i++) {
605
        pixels[0] += block[0];
606
        pixels[1] += block[1];
607
        pixels[2] += block[2];
608
        pixels[3] += block[3];
609
        pixels += line_size;
610
        block += 4;
611
    }
612
}
613
614 1edbfe19 Loren Merritt
static int sum_abs_dctelem_c(DCTELEM *block)
615
{
616
    int sum=0, i;
617
    for(i=0; i<64; i++)
618
        sum+= FFABS(block[i]);
619
    return sum;
620
}
621
622 342c7dfd Kostya Shishkov
static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
623
{
624
    int i;
625
626
    for (i = 0; i < h; i++) {
627
        memset(block, value, 16);
628
        block += line_size;
629
    }
630
}
631
632
static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
633
{
634
    int i;
635
636
    for (i = 0; i < h; i++) {
637
        memset(block, value, 8);
638
        block += line_size;
639
    }
640
}
641
642
static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize)
643
{
644
    int i, j;
645 2023cfea Måns Rullgård
    uint16_t *dst1 = (uint16_t *) dst;
646
    uint16_t *dst2 = (uint16_t *)(dst + linesize);
647 342c7dfd Kostya Shishkov
648
    for (j = 0; j < 8; j++) {
649
        for (i = 0; i < 8; i++) {
650
            dst1[i] = dst2[i] = src[i] * 0x0101;
651
        }
652
        src  += 8;
653
        dst1 += linesize;
654
        dst2 += linesize;
655
    }
656
}
657
658 59fe111e Michael Niedermayer
#if 0
659

660
#define PIXOP2(OPNAME, OP) \
661 b3184779 Michael Niedermayer
static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
662 59fe111e Michael Niedermayer
{\
663
    int i;\
664
    for(i=0; i<h; i++){\
665 905694d9 Roman Shaposhnik
        OP(*((uint64_t*)block), AV_RN64(pixels));\
666 59fe111e Michael Niedermayer
        pixels+=line_size;\
667
        block +=line_size;\
668
    }\
669
}\
670
\
671 45553457 Zdenek Kabelac
static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
672 59fe111e Michael Niedermayer
{\
673
    int i;\
674
    for(i=0; i<h; i++){\
675 905694d9 Roman Shaposhnik
        const uint64_t a= AV_RN64(pixels  );\
676
        const uint64_t b= AV_RN64(pixels+1);\
677 59fe111e Michael Niedermayer
        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
678
        pixels+=line_size;\
679
        block +=line_size;\
680
    }\
681
}\
682
\
683 45553457 Zdenek Kabelac
static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
684 59fe111e Michael Niedermayer
{\
685
    int i;\
686
    for(i=0; i<h; i++){\
687 905694d9 Roman Shaposhnik
        const uint64_t a= AV_RN64(pixels  );\
688
        const uint64_t b= AV_RN64(pixels+1);\
689 59fe111e Michael Niedermayer
        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
690
        pixels+=line_size;\
691
        block +=line_size;\
692
    }\
693
}\
694
\
695 45553457 Zdenek Kabelac
static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
696 59fe111e Michael Niedermayer
{\
697
    int i;\
698
    for(i=0; i<h; i++){\
699 905694d9 Roman Shaposhnik
        const uint64_t a= AV_RN64(pixels          );\
700
        const uint64_t b= AV_RN64(pixels+line_size);\
701 59fe111e Michael Niedermayer
        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
702
        pixels+=line_size;\
703
        block +=line_size;\
704
    }\
705
}\
706
\
707 45553457 Zdenek Kabelac
static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
708 59fe111e Michael Niedermayer
{\
709
    int i;\
710
    for(i=0; i<h; i++){\
711 905694d9 Roman Shaposhnik
        const uint64_t a= AV_RN64(pixels          );\
712
        const uint64_t b= AV_RN64(pixels+line_size);\
713 59fe111e Michael Niedermayer
        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
714
        pixels+=line_size;\
715
        block +=line_size;\
716
    }\
717
}\
718
\
719 45553457 Zdenek Kabelac
static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
720 59fe111e Michael Niedermayer
{\
721
        int i;\
722 905694d9 Roman Shaposhnik
        const uint64_t a= AV_RN64(pixels  );\
723
        const uint64_t b= AV_RN64(pixels+1);\
724 59fe111e Michael Niedermayer
        uint64_t l0=  (a&0x0303030303030303ULL)\
725
                    + (b&0x0303030303030303ULL)\
726
                    + 0x0202020202020202ULL;\
727
        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
728
                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
729
        uint64_t l1,h1;\
730
\
731
        pixels+=line_size;\
732
        for(i=0; i<h; i+=2){\
733 905694d9 Roman Shaposhnik
            uint64_t a= AV_RN64(pixels  );\
734
            uint64_t b= AV_RN64(pixels+1);\
735 59fe111e Michael Niedermayer
            l1=  (a&0x0303030303030303ULL)\
736
               + (b&0x0303030303030303ULL);\
737
            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
738
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
739
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
740
            pixels+=line_size;\
741
            block +=line_size;\
742 905694d9 Roman Shaposhnik
            a= AV_RN64(pixels  );\
743
            b= AV_RN64(pixels+1);\
744 59fe111e Michael Niedermayer
            l0=  (a&0x0303030303030303ULL)\
745
               + (b&0x0303030303030303ULL)\
746
               + 0x0202020202020202ULL;\
747
            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
748
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
749
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
750
            pixels+=line_size;\
751
            block +=line_size;\
752
        }\
753
}\
754
\
755 45553457 Zdenek Kabelac
static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
756 59fe111e Michael Niedermayer
{\
757
        int i;\
758 905694d9 Roman Shaposhnik
        const uint64_t a= AV_RN64(pixels  );\
759
        const uint64_t b= AV_RN64(pixels+1);\
760 59fe111e Michael Niedermayer
        uint64_t l0=  (a&0x0303030303030303ULL)\
761
                    + (b&0x0303030303030303ULL)\
762
                    + 0x0101010101010101ULL;\
763
        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
764
                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
765
        uint64_t l1,h1;\
766
\
767
        pixels+=line_size;\
768
        for(i=0; i<h; i+=2){\
769 905694d9 Roman Shaposhnik
            uint64_t a= AV_RN64(pixels  );\
770
            uint64_t b= AV_RN64(pixels+1);\
771 59fe111e Michael Niedermayer
            l1=  (a&0x0303030303030303ULL)\
772
               + (b&0x0303030303030303ULL);\
773
            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
774
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
775
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
776
            pixels+=line_size;\
777
            block +=line_size;\
778 905694d9 Roman Shaposhnik
            a= AV_RN64(pixels  );\
779
            b= AV_RN64(pixels+1);\
780 59fe111e Michael Niedermayer
            l0=  (a&0x0303030303030303ULL)\
781
               + (b&0x0303030303030303ULL)\
782
               + 0x0101010101010101ULL;\
783
            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
784
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
785
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
786
            pixels+=line_size;\
787
            block +=line_size;\
788
        }\
789
}\
790
\
791 45553457 Zdenek Kabelac
CALL_2X_PIXELS(OPNAME ## _pixels16_c    , OPNAME ## _pixels_c    , 8)\
792
CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
793
CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
794
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
795
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
796
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
797
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
798 59fe111e Michael Niedermayer

799
#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
800
#else // 64 bit variant
801
802
#define PIXOP2(OPNAME, OP) \
803 669ac79c Michael Niedermayer
static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
804
    int i;\
805
    for(i=0; i<h; i++){\
806 905694d9 Roman Shaposhnik
        OP(*((uint16_t*)(block  )), AV_RN16(pixels  ));\
807 669ac79c Michael Niedermayer
        pixels+=line_size;\
808
        block +=line_size;\
809
    }\
810
}\
811 0da71265 Michael Niedermayer
static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
812
    int i;\
813
    for(i=0; i<h; i++){\
814 905694d9 Roman Shaposhnik
        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
815 0da71265 Michael Niedermayer
        pixels+=line_size;\
816
        block +=line_size;\
817
    }\
818
}\
819 45553457 Zdenek Kabelac
static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
820 59fe111e Michael Niedermayer
    int i;\
821
    for(i=0; i<h; i++){\
822 905694d9 Roman Shaposhnik
        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
823
        OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
824 59fe111e Michael Niedermayer
        pixels+=line_size;\
825
        block +=line_size;\
826
    }\
827
}\
828 45553457 Zdenek Kabelac
static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
829
    OPNAME ## _pixels8_c(block, pixels, line_size, h);\
830 b3184779 Michael Niedermayer
}\
831 59fe111e Michael Niedermayer
\
832 b3184779 Michael Niedermayer
static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
833
                                                int src_stride1, int src_stride2, int h){\
834 59fe111e Michael Niedermayer
    int i;\
835
    for(i=0; i<h; i++){\
836 b3184779 Michael Niedermayer
        uint32_t a,b;\
837 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1  ]);\
838
        b= AV_RN32(&src2[i*src_stride2  ]);\
839 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride  ]), no_rnd_avg32(a, b));\
840 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1+4]);\
841
        b= AV_RN32(&src2[i*src_stride2+4]);\
842 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
843 59fe111e Michael Niedermayer
    }\
844
}\
845
\
846 b3184779 Michael Niedermayer
static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
847
                                                int src_stride1, int src_stride2, int h){\
848 59fe111e Michael Niedermayer
    int i;\
849
    for(i=0; i<h; i++){\
850 b3184779 Michael Niedermayer
        uint32_t a,b;\
851 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1  ]);\
852
        b= AV_RN32(&src2[i*src_stride2  ]);\
853 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
854 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1+4]);\
855
        b= AV_RN32(&src2[i*src_stride2+4]);\
856 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
857 59fe111e Michael Niedermayer
    }\
858
}\
859
\
860 0da71265 Michael Niedermayer
static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
861
                                                int src_stride1, int src_stride2, int h){\
862
    int i;\
863
    for(i=0; i<h; i++){\
864
        uint32_t a,b;\
865 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1  ]);\
866
        b= AV_RN32(&src2[i*src_stride2  ]);\
867 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
868 0da71265 Michael Niedermayer
    }\
869
}\
870
\
871 669ac79c Michael Niedermayer
static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
872
                                                int src_stride1, int src_stride2, int h){\
873
    int i;\
874
    for(i=0; i<h; i++){\
875
        uint32_t a,b;\
876 905694d9 Roman Shaposhnik
        a= AV_RN16(&src1[i*src_stride1  ]);\
877
        b= AV_RN16(&src2[i*src_stride2  ]);\
878 669ac79c Michael Niedermayer
        OP(*((uint16_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
879
    }\
880
}\
881
\
882 b3184779 Michael Niedermayer
static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
883
                                                int src_stride1, int src_stride2, int h){\
884
    OPNAME ## _pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
885
    OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
886
}\
887
\
888
static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
889
                                                int src_stride1, int src_stride2, int h){\
890
    OPNAME ## _no_rnd_pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
891
    OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
892
}\
893
\
894 45553457 Zdenek Kabelac
static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
895 b3184779 Michael Niedermayer
    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
896
}\
897
\
898 45553457 Zdenek Kabelac
static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
899 b3184779 Michael Niedermayer
    OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
900
}\
901
\
902 45553457 Zdenek Kabelac
static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
903 b3184779 Michael Niedermayer
    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
904
}\
905
\
906 45553457 Zdenek Kabelac
static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
907 b3184779 Michael Niedermayer
    OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
908
}\
909
\
910 94b303ba David Conrad
static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
911 b3184779 Michael Niedermayer
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
912 59fe111e Michael Niedermayer
    int i;\
913
    for(i=0; i<h; i++){\
914 b3184779 Michael Niedermayer
        uint32_t a, b, c, d, l0, l1, h0, h1;\
915 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1]);\
916
        b= AV_RN32(&src2[i*src_stride2]);\
917
        c= AV_RN32(&src3[i*src_stride3]);\
918
        d= AV_RN32(&src4[i*src_stride4]);\
919 b3184779 Michael Niedermayer
        l0=  (a&0x03030303UL)\
920
           + (b&0x03030303UL)\
921
           + 0x02020202UL;\
922
        h0= ((a&0xFCFCFCFCUL)>>2)\
923
          + ((b&0xFCFCFCFCUL)>>2);\
924
        l1=  (c&0x03030303UL)\
925
           + (d&0x03030303UL);\
926
        h1= ((c&0xFCFCFCFCUL)>>2)\
927
          + ((d&0xFCFCFCFCUL)>>2);\
928
        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
929 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1+4]);\
930
        b= AV_RN32(&src2[i*src_stride2+4]);\
931
        c= AV_RN32(&src3[i*src_stride3+4]);\
932
        d= AV_RN32(&src4[i*src_stride4+4]);\
933 b3184779 Michael Niedermayer
        l0=  (a&0x03030303UL)\
934
           + (b&0x03030303UL)\
935
           + 0x02020202UL;\
936
        h0= ((a&0xFCFCFCFCUL)>>2)\
937
          + ((b&0xFCFCFCFCUL)>>2);\
938
        l1=  (c&0x03030303UL)\
939
           + (d&0x03030303UL);\
940
        h1= ((c&0xFCFCFCFCUL)>>2)\
941
          + ((d&0xFCFCFCFCUL)>>2);\
942
        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
943 59fe111e Michael Niedermayer
    }\
944
}\
945 669ac79c Michael Niedermayer
\
946
static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
947
    OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
948
}\
949
\
950
static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
951
    OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
952
}\
953
\
954
static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
955
    OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
956
}\
957
\
958
static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
959
    OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
960
}\
961
\
962 94b303ba David Conrad
static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
963 b3184779 Michael Niedermayer
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
964 59fe111e Michael Niedermayer
    int i;\
965
    for(i=0; i<h; i++){\
966 b3184779 Michael Niedermayer
        uint32_t a, b, c, d, l0, l1, h0, h1;\
967 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1]);\
968
        b= AV_RN32(&src2[i*src_stride2]);\
969
        c= AV_RN32(&src3[i*src_stride3]);\
970
        d= AV_RN32(&src4[i*src_stride4]);\
971 b3184779 Michael Niedermayer
        l0=  (a&0x03030303UL)\
972
           + (b&0x03030303UL)\
973
           + 0x01010101UL;\
974
        h0= ((a&0xFCFCFCFCUL)>>2)\
975
          + ((b&0xFCFCFCFCUL)>>2);\
976
        l1=  (c&0x03030303UL)\
977
           + (d&0x03030303UL);\
978
        h1= ((c&0xFCFCFCFCUL)>>2)\
979
          + ((d&0xFCFCFCFCUL)>>2);\
980
        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
981 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1+4]);\
982
        b= AV_RN32(&src2[i*src_stride2+4]);\
983
        c= AV_RN32(&src3[i*src_stride3+4]);\
984
        d= AV_RN32(&src4[i*src_stride4+4]);\
985 b3184779 Michael Niedermayer
        l0=  (a&0x03030303UL)\
986
           + (b&0x03030303UL)\
987
           + 0x01010101UL;\
988
        h0= ((a&0xFCFCFCFCUL)>>2)\
989
          + ((b&0xFCFCFCFCUL)>>2);\
990
        l1=  (c&0x03030303UL)\
991
           + (d&0x03030303UL);\
992
        h1= ((c&0xFCFCFCFCUL)>>2)\
993
          + ((d&0xFCFCFCFCUL)>>2);\
994
        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
995 59fe111e Michael Niedermayer
    }\
996
}\
997 94b303ba David Conrad
static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
998 b3184779 Michael Niedermayer
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
999
    OPNAME ## _pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1000
    OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1001
}\
1002 94b303ba David Conrad
static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
1003 b3184779 Michael Niedermayer
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1004
    OPNAME ## _no_rnd_pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1005
    OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1006
}\
1007 59fe111e Michael Niedermayer
\
1008 669ac79c Michael Niedermayer
static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1009
{\
1010
        int i, a0, b0, a1, b1;\
1011
        a0= pixels[0];\
1012
        b0= pixels[1] + 2;\
1013
        a0 += b0;\
1014
        b0 += pixels[2];\
1015
\
1016
        pixels+=line_size;\
1017
        for(i=0; i<h; i+=2){\
1018
            a1= pixels[0];\
1019
            b1= pixels[1];\
1020
            a1 += b1;\
1021
            b1 += pixels[2];\
1022
\
1023
            block[0]= (a1+a0)>>2; /* FIXME non put */\
1024
            block[1]= (b1+b0)>>2;\
1025
\
1026
            pixels+=line_size;\
1027
            block +=line_size;\
1028
\
1029
            a0= pixels[0];\
1030
            b0= pixels[1] + 2;\
1031
            a0 += b0;\
1032
            b0 += pixels[2];\
1033
\
1034
            block[0]= (a1+a0)>>2;\
1035
            block[1]= (b1+b0)>>2;\
1036
            pixels+=line_size;\
1037
            block +=line_size;\
1038
        }\
1039
}\
1040
\
1041
static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1042
{\
1043
        int i;\
1044 905694d9 Roman Shaposhnik
        const uint32_t a= AV_RN32(pixels  );\
1045
        const uint32_t b= AV_RN32(pixels+1);\
1046 669ac79c Michael Niedermayer
        uint32_t l0=  (a&0x03030303UL)\
1047
                    + (b&0x03030303UL)\
1048
                    + 0x02020202UL;\
1049
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1050
                   + ((b&0xFCFCFCFCUL)>>2);\
1051
        uint32_t l1,h1;\
1052
\
1053
        pixels+=line_size;\
1054
        for(i=0; i<h; i+=2){\
1055 905694d9 Roman Shaposhnik
            uint32_t a= AV_RN32(pixels  );\
1056
            uint32_t b= AV_RN32(pixels+1);\
1057 669ac79c Michael Niedermayer
            l1=  (a&0x03030303UL)\
1058
               + (b&0x03030303UL);\
1059
            h1= ((a&0xFCFCFCFCUL)>>2)\
1060
              + ((b&0xFCFCFCFCUL)>>2);\
1061
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1062
            pixels+=line_size;\
1063
            block +=line_size;\
1064 905694d9 Roman Shaposhnik
            a= AV_RN32(pixels  );\
1065
            b= AV_RN32(pixels+1);\
1066 669ac79c Michael Niedermayer
            l0=  (a&0x03030303UL)\
1067
               + (b&0x03030303UL)\
1068
               + 0x02020202UL;\
1069
            h0= ((a&0xFCFCFCFCUL)>>2)\
1070
              + ((b&0xFCFCFCFCUL)>>2);\
1071
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1072
            pixels+=line_size;\
1073
            block +=line_size;\
1074
        }\
1075
}\
1076
\
1077 45553457 Zdenek Kabelac
static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1078 59fe111e Michael Niedermayer
{\
1079
    int j;\
1080
    for(j=0; j<2; j++){\
1081
        int i;\
1082 905694d9 Roman Shaposhnik
        const uint32_t a= AV_RN32(pixels  );\
1083
        const uint32_t b= AV_RN32(pixels+1);\
1084 59fe111e Michael Niedermayer
        uint32_t l0=  (a&0x03030303UL)\
1085
                    + (b&0x03030303UL)\
1086
                    + 0x02020202UL;\
1087
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1088
                   + ((b&0xFCFCFCFCUL)>>2);\
1089
        uint32_t l1,h1;\
1090
\
1091
        pixels+=line_size;\
1092
        for(i=0; i<h; i+=2){\
1093 905694d9 Roman Shaposhnik
            uint32_t a= AV_RN32(pixels  );\
1094
            uint32_t b= AV_RN32(pixels+1);\
1095 59fe111e Michael Niedermayer
            l1=  (a&0x03030303UL)\
1096
               + (b&0x03030303UL);\
1097
            h1= ((a&0xFCFCFCFCUL)>>2)\
1098
              + ((b&0xFCFCFCFCUL)>>2);\
1099
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1100
            pixels+=line_size;\
1101
            block +=line_size;\
1102 905694d9 Roman Shaposhnik
            a= AV_RN32(pixels  );\
1103
            b= AV_RN32(pixels+1);\
1104 59fe111e Michael Niedermayer
            l0=  (a&0x03030303UL)\
1105
               + (b&0x03030303UL)\
1106
               + 0x02020202UL;\
1107
            h0= ((a&0xFCFCFCFCUL)>>2)\
1108
              + ((b&0xFCFCFCFCUL)>>2);\
1109
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1110
            pixels+=line_size;\
1111
            block +=line_size;\
1112
        }\
1113
        pixels+=4-line_size*(h+1);\
1114
        block +=4-line_size*h;\
1115
    }\
1116
}\
1117
\
1118 45553457 Zdenek Kabelac
static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1119 59fe111e Michael Niedermayer
{\
1120
    int j;\
1121
    for(j=0; j<2; j++){\
1122
        int i;\
1123 905694d9 Roman Shaposhnik
        const uint32_t a= AV_RN32(pixels  );\
1124
        const uint32_t b= AV_RN32(pixels+1);\
1125 59fe111e Michael Niedermayer
        uint32_t l0=  (a&0x03030303UL)\
1126
                    + (b&0x03030303UL)\
1127
                    + 0x01010101UL;\
1128
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1129
                   + ((b&0xFCFCFCFCUL)>>2);\
1130
        uint32_t l1,h1;\
1131
\
1132
        pixels+=line_size;\
1133
        for(i=0; i<h; i+=2){\
1134 905694d9 Roman Shaposhnik
            uint32_t a= AV_RN32(pixels  );\
1135
            uint32_t b= AV_RN32(pixels+1);\
1136 59fe111e Michael Niedermayer
            l1=  (a&0x03030303UL)\
1137
               + (b&0x03030303UL);\
1138
            h1= ((a&0xFCFCFCFCUL)>>2)\
1139
              + ((b&0xFCFCFCFCUL)>>2);\
1140
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1141
            pixels+=line_size;\
1142
            block +=line_size;\
1143 905694d9 Roman Shaposhnik
            a= AV_RN32(pixels  );\
1144
            b= AV_RN32(pixels+1);\
1145 59fe111e Michael Niedermayer
            l0=  (a&0x03030303UL)\
1146
               + (b&0x03030303UL)\
1147
               + 0x01010101UL;\
1148
            h0= ((a&0xFCFCFCFCUL)>>2)\
1149
              + ((b&0xFCFCFCFCUL)>>2);\
1150
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1151
            pixels+=line_size;\
1152
            block +=line_size;\
1153
        }\
1154
        pixels+=4-line_size*(h+1);\
1155
        block +=4-line_size*h;\
1156
    }\
1157
}\
1158
\
1159 45553457 Zdenek Kabelac
CALL_2X_PIXELS(OPNAME ## _pixels16_c  , OPNAME ## _pixels8_c  , 8)\
1160
CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
1161
CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
1162
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
1163 3d1b1caa Måns Rullgård
av_unused CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c  , OPNAME ## _pixels8_c         , 8)\
1164 45553457 Zdenek Kabelac
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
1165
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
1166
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
1167 b3184779 Michael Niedermayer
1168 d8085ea7 Michael Niedermayer
#define op_avg(a, b) a = rnd_avg32(a, b)
1169 59fe111e Michael Niedermayer
#endif
1170
#define op_put(a, b) a = b
1171
1172
PIXOP2(avg, op_avg)
1173
PIXOP2(put, op_put)
1174
#undef op_avg
1175
#undef op_put
1176
1177 3d1b1caa Måns Rullgård
#define put_no_rnd_pixels8_c  put_pixels8_c
1178
#define put_no_rnd_pixels16_c put_pixels16_c
1179
1180 de6d9b64 Fabrice Bellard
#define avg2(a,b) ((a+b+1)>>1)
1181
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
1182
1183 c0a0170c Michael Niedermayer
static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1184
    put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
1185
}
1186
1187
static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1188
    put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
1189
}
1190 073b013d Michael Niedermayer
1191 0c1a9eda Zdenek Kabelac
static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
1192 44eb4951 Michael Niedermayer
{
1193
    const int A=(16-x16)*(16-y16);
1194
    const int B=(   x16)*(16-y16);
1195
    const int C=(16-x16)*(   y16);
1196
    const int D=(   x16)*(   y16);
1197
    int i;
1198
1199
    for(i=0; i<h; i++)
1200
    {
1201 b3184779 Michael Niedermayer
        dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
1202
        dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
1203
        dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
1204
        dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
1205
        dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
1206
        dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
1207
        dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
1208
        dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
1209
        dst+= stride;
1210
        src+= stride;
1211 44eb4951 Michael Niedermayer
    }
1212
}
1213
1214 703c8195 Loren Merritt
void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
1215 073b013d Michael Niedermayer
                  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
1216
{
1217
    int y, vx, vy;
1218
    const int s= 1<<shift;
1219 115329f1 Diego Biurrun
1220 073b013d Michael Niedermayer
    width--;
1221
    height--;
1222
1223
    for(y=0; y<h; y++){
1224
        int x;
1225
1226
        vx= ox;
1227
        vy= oy;
1228
        for(x=0; x<8; x++){ //XXX FIXME optimize
1229
            int src_x, src_y, frac_x, frac_y, index;
1230
1231
            src_x= vx>>16;
1232
            src_y= vy>>16;
1233
            frac_x= src_x&(s-1);
1234
            frac_y= src_y&(s-1);
1235
            src_x>>=shift;
1236
            src_y>>=shift;
1237 115329f1 Diego Biurrun
1238 073b013d Michael Niedermayer
            if((unsigned)src_x < width){
1239
                if((unsigned)src_y < height){
1240
                    index= src_x + src_y*stride;
1241
                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
1242
                                           + src[index       +1]*   frac_x )*(s-frac_y)
1243
                                        + (  src[index+stride  ]*(s-frac_x)
1244
                                           + src[index+stride+1]*   frac_x )*   frac_y
1245
                                        + r)>>(shift*2);
1246
                }else{
1247 f66e4f5f Reimar Döffinger
                    index= src_x + av_clip(src_y, 0, height)*stride;
1248 115329f1 Diego Biurrun
                    dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
1249 073b013d Michael Niedermayer
                                          + src[index       +1]*   frac_x )*s
1250
                                        + r)>>(shift*2);
1251
                }
1252
            }else{
1253
                if((unsigned)src_y < height){
1254 f66e4f5f Reimar Döffinger
                    index= av_clip(src_x, 0, width) + src_y*stride;
1255 115329f1 Diego Biurrun
                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
1256 073b013d Michael Niedermayer
                                           + src[index+stride  ]*   frac_y )*s
1257
                                        + r)>>(shift*2);
1258
                }else{
1259 f66e4f5f Reimar Döffinger
                    index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
1260 073b013d Michael Niedermayer
                    dst[y*stride + x]=    src[index         ];
1261
                }
1262
            }
1263 115329f1 Diego Biurrun
1264 073b013d Michael Niedermayer
            vx+= dxx;
1265
            vy+= dyx;
1266
        }
1267
        ox += dxy;
1268
        oy += dyy;
1269
    }
1270
}
1271 669ac79c Michael Niedermayer
1272
static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1273
    switch(width){
1274
    case 2: put_pixels2_c (dst, src, stride, height); break;
1275
    case 4: put_pixels4_c (dst, src, stride, height); break;
1276
    case 8: put_pixels8_c (dst, src, stride, height); break;
1277
    case 16:put_pixels16_c(dst, src, stride, height); break;
1278
    }
1279
}
1280
1281
static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1282
    int i,j;
1283
    for (i=0; i < height; i++) {
1284
      for (j=0; j < width; j++) {
1285 bb270c08 Diego Biurrun
        dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
1286 669ac79c Michael Niedermayer
      }
1287
      src += stride;
1288
      dst += stride;
1289
    }
1290
}
1291
1292
static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1293
    int i,j;
1294
    for (i=0; i < height; i++) {
1295
      for (j=0; j < width; j++) {
1296 bb270c08 Diego Biurrun
        dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
1297 669ac79c Michael Niedermayer
      }
1298
      src += stride;
1299
      dst += stride;
1300
    }
1301
}
1302 115329f1 Diego Biurrun
1303 669ac79c Michael Niedermayer
static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1304
    int i,j;
1305
    for (i=0; i < height; i++) {
1306
      for (j=0; j < width; j++) {
1307 bb270c08 Diego Biurrun
        dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
1308 669ac79c Michael Niedermayer
      }
1309
      src += stride;
1310
      dst += stride;
1311
    }
1312
}
1313 115329f1 Diego Biurrun
1314 669ac79c Michael Niedermayer
static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1315
    int i,j;
1316
    for (i=0; i < height; i++) {
1317
      for (j=0; j < width; j++) {
1318 bb270c08 Diego Biurrun
        dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
1319 669ac79c Michael Niedermayer
      }
1320
      src += stride;
1321
      dst += stride;
1322
    }
1323
}
1324
1325
static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1326
    int i,j;
1327
    for (i=0; i < height; i++) {
1328
      for (j=0; j < width; j++) {
1329 bb270c08 Diego Biurrun
        dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
1330 669ac79c Michael Niedermayer
      }
1331
      src += stride;
1332
      dst += stride;
1333
    }
1334
}
1335
1336
static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1337
    int i,j;
1338
    for (i=0; i < height; i++) {
1339
      for (j=0; j < width; j++) {
1340 bb270c08 Diego Biurrun
        dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
1341 669ac79c Michael Niedermayer
      }
1342
      src += stride;
1343
      dst += stride;
1344
    }
1345
}
1346
1347
static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1348
    int i,j;
1349
    for (i=0; i < height; i++) {
1350
      for (j=0; j < width; j++) {
1351 bb270c08 Diego Biurrun
        dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
1352 669ac79c Michael Niedermayer
      }
1353
      src += stride;
1354
      dst += stride;
1355
    }
1356
}
1357
1358
static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1359
    int i,j;
1360
    for (i=0; i < height; i++) {
1361
      for (j=0; j < width; j++) {
1362 bb270c08 Diego Biurrun
        dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
1363 669ac79c Michael Niedermayer
      }
1364
      src += stride;
1365
      dst += stride;
1366
    }
1367
}
1368 da3b9756 Mike Melanson
1369
static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1370
    switch(width){
1371
    case 2: avg_pixels2_c (dst, src, stride, height); break;
1372
    case 4: avg_pixels4_c (dst, src, stride, height); break;
1373
    case 8: avg_pixels8_c (dst, src, stride, height); break;
1374
    case 16:avg_pixels16_c(dst, src, stride, height); break;
1375
    }
1376
}
1377
1378
static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1379
    int i,j;
1380
    for (i=0; i < height; i++) {
1381
      for (j=0; j < width; j++) {
1382 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
1383 da3b9756 Mike Melanson
      }
1384
      src += stride;
1385
      dst += stride;
1386
    }
1387
}
1388
1389
static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1390
    int i,j;
1391
    for (i=0; i < height; i++) {
1392
      for (j=0; j < width; j++) {
1393 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
1394 da3b9756 Mike Melanson
      }
1395
      src += stride;
1396
      dst += stride;
1397
    }
1398
}
1399 115329f1 Diego Biurrun
1400 da3b9756 Mike Melanson
static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1401
    int i,j;
1402
    for (i=0; i < height; i++) {
1403
      for (j=0; j < width; j++) {
1404 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
1405 da3b9756 Mike Melanson
      }
1406
      src += stride;
1407
      dst += stride;
1408
    }
1409
}
1410 115329f1 Diego Biurrun
1411 da3b9756 Mike Melanson
static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1412
    int i,j;
1413
    for (i=0; i < height; i++) {
1414
      for (j=0; j < width; j++) {
1415 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1416 da3b9756 Mike Melanson
      }
1417
      src += stride;
1418
      dst += stride;
1419
    }
1420
}
1421
1422
static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1423
    int i,j;
1424
    for (i=0; i < height; i++) {
1425
      for (j=0; j < width; j++) {
1426 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1427 da3b9756 Mike Melanson
      }
1428
      src += stride;
1429
      dst += stride;
1430
    }
1431
}
1432
1433
static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1434
    int i,j;
1435
    for (i=0; i < height; i++) {
1436
      for (j=0; j < width; j++) {
1437 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
1438 da3b9756 Mike Melanson
      }
1439
      src += stride;
1440
      dst += stride;
1441
    }
1442
}
1443
1444
static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1445
    int i,j;
1446
    for (i=0; i < height; i++) {
1447
      for (j=0; j < width; j++) {
1448 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1449 da3b9756 Mike Melanson
      }
1450
      src += stride;
1451
      dst += stride;
1452
    }
1453
}
1454
1455
static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1456
    int i,j;
1457
    for (i=0; i < height; i++) {
1458
      for (j=0; j < width; j++) {
1459 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1460 da3b9756 Mike Melanson
      }
1461
      src += stride;
1462
      dst += stride;
1463
    }
1464
}
1465 669ac79c Michael Niedermayer
#if 0
1466
#define TPEL_WIDTH(width)\
1467
static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1468
    void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1469
static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1470
    void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1471
static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1472
    void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1473
static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1474
    void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1475
static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1476
    void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1477
static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1478
    void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1479
static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1480
    void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1481
static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1482
    void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1483
static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1484
    void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1485
#endif
1486
1487 0da71265 Michael Niedermayer
#define H264_CHROMA_MC(OPNAME, OP)\
1488
static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1489
    const int A=(8-x)*(8-y);\
1490
    const int B=(  x)*(8-y);\
1491
    const int C=(8-x)*(  y);\
1492
    const int D=(  x)*(  y);\
1493
    int i;\
1494
    \
1495
    assert(x<8 && y<8 && x>=0 && y>=0);\
1496
\
1497 febdd0b9 Michael Niedermayer
    if(D){\
1498 f315b394 Michael Niedermayer
        for(i=0; i<h; i++){\
1499 76abb18e Michael Niedermayer
            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1500
            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1501
            dst+= stride;\
1502
            src+= stride;\
1503
        }\
1504 febdd0b9 Michael Niedermayer
    }else{\
1505
        const int E= B+C;\
1506
        const int step= C ? stride : 1;\
1507 f315b394 Michael Niedermayer
        for(i=0; i<h; i++){\
1508 febdd0b9 Michael Niedermayer
            OP(dst[0], (A*src[0] + E*src[step+0]));\
1509
            OP(dst[1], (A*src[1] + E*src[step+1]));\
1510
            dst+= stride;\
1511
            src+= stride;\
1512
        }\
1513
    }\
1514 0da71265 Michael Niedermayer
}\
1515
\
1516
static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1517
    const int A=(8-x)*(8-y);\
1518
    const int B=(  x)*(8-y);\
1519
    const int C=(8-x)*(  y);\
1520
    const int D=(  x)*(  y);\
1521
    int i;\
1522
    \
1523
    assert(x<8 && y<8 && x>=0 && y>=0);\
1524
\
1525 febdd0b9 Michael Niedermayer
    if(D){\
1526 f315b394 Michael Niedermayer
        for(i=0; i<h; i++){\
1527 76abb18e Michael Niedermayer
            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1528
            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1529
            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1530
            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1531
            dst+= stride;\
1532
            src+= stride;\
1533
        }\
1534 febdd0b9 Michael Niedermayer
    }else{\
1535
        const int E= B+C;\
1536
        const int step= C ? stride : 1;\
1537 f315b394 Michael Niedermayer
        for(i=0; i<h; i++){\
1538 febdd0b9 Michael Niedermayer
            OP(dst[0], (A*src[0] + E*src[step+0]));\
1539
            OP(dst[1], (A*src[1] + E*src[step+1]));\
1540
            OP(dst[2], (A*src[2] + E*src[step+2]));\
1541
            OP(dst[3], (A*src[3] + E*src[step+3]));\
1542
            dst+= stride;\
1543
            src+= stride;\
1544
        }\
1545
    }\
1546 0da71265 Michael Niedermayer
}\
1547
\
1548
static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1549
    const int A=(8-x)*(8-y);\
1550
    const int B=(  x)*(8-y);\
1551
    const int C=(8-x)*(  y);\
1552
    const int D=(  x)*(  y);\
1553
    int i;\
1554
    \
1555
    assert(x<8 && y<8 && x>=0 && y>=0);\
1556
\
1557 815c81c0 Michael Niedermayer
    if(D){\
1558 f315b394 Michael Niedermayer
        for(i=0; i<h; i++){\
1559 76abb18e Michael Niedermayer
            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1560
            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1561
            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1562
            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1563
            OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1564
            OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1565
            OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1566
            OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1567
            dst+= stride;\
1568
            src+= stride;\
1569
        }\
1570 815c81c0 Michael Niedermayer
    }else{\
1571
        const int E= B+C;\
1572
        const int step= C ? stride : 1;\
1573 f315b394 Michael Niedermayer
        for(i=0; i<h; i++){\
1574 815c81c0 Michael Niedermayer
            OP(dst[0], (A*src[0] + E*src[step+0]));\
1575
            OP(dst[1], (A*src[1] + E*src[step+1]));\
1576
            OP(dst[2], (A*src[2] + E*src[step+2]));\
1577
            OP(dst[3], (A*src[3] + E*src[step+3]));\
1578
            OP(dst[4], (A*src[4] + E*src[step+4]));\
1579
            OP(dst[5], (A*src[5] + E*src[step+5]));\
1580
            OP(dst[6], (A*src[6] + E*src[step+6]));\
1581
            OP(dst[7], (A*src[7] + E*src[step+7]));\
1582
            dst+= stride;\
1583
            src+= stride;\
1584
        }\
1585
    }\
1586 0da71265 Michael Niedermayer
}
1587
1588
#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1589
#define op_put(a, b) a = (((b) + 32)>>6)
1590
1591
H264_CHROMA_MC(put_       , op_put)
1592
H264_CHROMA_MC(avg_       , op_avg)
1593
#undef op_avg
1594
#undef op_put
1595
1596 c374691b David Conrad
static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
1597 e34350a3 Kostya Shishkov
    const int A=(8-x)*(8-y);
1598
    const int B=(  x)*(8-y);
1599
    const int C=(8-x)*(  y);
1600
    const int D=(  x)*(  y);
1601
    int i;
1602
1603
    assert(x<8 && y<8 && x>=0 && y>=0);
1604
1605
    for(i=0; i<h; i++)
1606
    {
1607
        dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
1608
        dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
1609
        dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
1610
        dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
1611
        dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
1612
        dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
1613
        dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
1614
        dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
1615
        dst+= stride;
1616
        src+= stride;
1617
    }
1618
}
1619
1620 8013da73 David Conrad
static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
1621
    const int A=(8-x)*(8-y);
1622
    const int B=(  x)*(8-y);
1623
    const int C=(8-x)*(  y);
1624
    const int D=(  x)*(  y);
1625
    int i;
1626
1627
    assert(x<8 && y<8 && x>=0 && y>=0);
1628
1629
    for(i=0; i<h; i++)
1630
    {
1631
        dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
1632
        dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
1633
        dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
1634
        dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
1635
        dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6));
1636
        dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6));
1637
        dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6));
1638
        dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6));
1639
        dst+= stride;
1640
        src+= stride;
1641
    }
1642
}
1643
1644 b3184779 Michael Niedermayer
#define QPEL_MC(r, OPNAME, RND, OP) \
1645 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1646 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1647 b3184779 Michael Niedermayer
    int i;\
1648
    for(i=0; i<h; i++)\
1649
    {\
1650
        OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1651
        OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1652
        OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1653
        OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1654
        OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1655
        OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1656
        OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1657
        OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1658
        dst+=dstStride;\
1659
        src+=srcStride;\
1660
    }\
1661 44eb4951 Michael Niedermayer
}\
1662
\
1663 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1664 db794953 Michael Niedermayer
    const int w=8;\
1665 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1666 b3184779 Michael Niedermayer
    int i;\
1667
    for(i=0; i<w; i++)\
1668
    {\
1669
        const int src0= src[0*srcStride];\
1670
        const int src1= src[1*srcStride];\
1671
        const int src2= src[2*srcStride];\
1672
        const int src3= src[3*srcStride];\
1673
        const int src4= src[4*srcStride];\
1674
        const int src5= src[5*srcStride];\
1675
        const int src6= src[6*srcStride];\
1676
        const int src7= src[7*srcStride];\
1677
        const int src8= src[8*srcStride];\
1678
        OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1679
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1680
        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1681
        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1682
        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1683
        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1684
        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1685
        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1686
        dst++;\
1687
        src++;\
1688
    }\
1689
}\
1690
\
1691 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1692 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1693 b3184779 Michael Niedermayer
    int i;\
1694 826f429a Michael Niedermayer
    \
1695 b3184779 Michael Niedermayer
    for(i=0; i<h; i++)\
1696
    {\
1697
        OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1698
        OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1699
        OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1700
        OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1701
        OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1702
        OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1703
        OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1704
        OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1705
        OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1706
        OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1707
        OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1708
        OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1709
        OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1710
        OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1711
        OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1712
        OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1713
        dst+=dstStride;\
1714
        src+=srcStride;\
1715
    }\
1716
}\
1717
\
1718 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1719 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1720 b3184779 Michael Niedermayer
    int i;\
1721 826f429a Michael Niedermayer
    const int w=16;\
1722 b3184779 Michael Niedermayer
    for(i=0; i<w; i++)\
1723
    {\
1724
        const int src0= src[0*srcStride];\
1725
        const int src1= src[1*srcStride];\
1726
        const int src2= src[2*srcStride];\
1727
        const int src3= src[3*srcStride];\
1728
        const int src4= src[4*srcStride];\
1729
        const int src5= src[5*srcStride];\
1730
        const int src6= src[6*srcStride];\
1731
        const int src7= src[7*srcStride];\
1732
        const int src8= src[8*srcStride];\
1733
        const int src9= src[9*srcStride];\
1734
        const int src10= src[10*srcStride];\
1735
        const int src11= src[11*srcStride];\
1736
        const int src12= src[12*srcStride];\
1737
        const int src13= src[13*srcStride];\
1738
        const int src14= src[14*srcStride];\
1739
        const int src15= src[15*srcStride];\
1740
        const int src16= src[16*srcStride];\
1741
        OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1742
        OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1743
        OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1744
        OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1745
        OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1746
        OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1747
        OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1748
        OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1749
        OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1750
        OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1751
        OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1752
        OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1753
        OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1754
        OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1755
        OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1756
        OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1757
        dst++;\
1758
        src++;\
1759
    }\
1760
}\
1761
\
1762 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1763
    uint8_t half[64];\
1764 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1765
    OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
1766 44eb4951 Michael Niedermayer
}\
1767
\
1768 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1769 b3184779 Michael Niedermayer
    OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
1770 44eb4951 Michael Niedermayer
}\
1771
\
1772 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1773
    uint8_t half[64];\
1774 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1775
    OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
1776 44eb4951 Michael Niedermayer
}\
1777
\
1778 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1779
    uint8_t full[16*9];\
1780
    uint8_t half[64];\
1781 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1782 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1783 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
1784 44eb4951 Michael Niedermayer
}\
1785
\
1786 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1787
    uint8_t full[16*9];\
1788 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1789 db794953 Michael Niedermayer
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
1790 44eb4951 Michael Niedermayer
}\
1791
\
1792 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1793
    uint8_t full[16*9];\
1794
    uint8_t half[64];\
1795 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1796 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1797 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
1798 44eb4951 Michael Niedermayer
}\
1799 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1800
    uint8_t full[16*9];\
1801
    uint8_t halfH[72];\
1802
    uint8_t halfV[64];\
1803
    uint8_t halfHV[64];\
1804 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1805
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1806 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1807
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1808 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1809 44eb4951 Michael Niedermayer
}\
1810 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1811
    uint8_t full[16*9];\
1812
    uint8_t halfH[72];\
1813
    uint8_t halfHV[64];\
1814 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1815
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1816
    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1817
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1818
    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1819
}\
1820 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1821
    uint8_t full[16*9];\
1822
    uint8_t halfH[72];\
1823
    uint8_t halfV[64];\
1824
    uint8_t halfHV[64];\
1825 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1826
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1827 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1828
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1829 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1830 44eb4951 Michael Niedermayer
}\
1831 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1832
    uint8_t full[16*9];\
1833
    uint8_t halfH[72];\
1834
    uint8_t halfHV[64];\
1835 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1836
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1837
    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1838
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1839
    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1840
}\
1841 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1842
    uint8_t full[16*9];\
1843
    uint8_t halfH[72];\
1844
    uint8_t halfV[64];\
1845
    uint8_t halfHV[64];\
1846 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1847
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1848 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1849
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1850 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1851 44eb4951 Michael Niedermayer
}\
1852 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1853
    uint8_t full[16*9];\
1854
    uint8_t halfH[72];\
1855
    uint8_t halfHV[64];\
1856 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1857
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1858
    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1859
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1860
    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1861
}\
1862 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1863
    uint8_t full[16*9];\
1864
    uint8_t halfH[72];\
1865
    uint8_t halfV[64];\
1866
    uint8_t halfHV[64];\
1867 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1868
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
1869 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1870
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1871 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1872 44eb4951 Michael Niedermayer
}\
1873 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1874
    uint8_t full[16*9];\
1875
    uint8_t halfH[72];\
1876
    uint8_t halfHV[64];\
1877 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1878
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1879
    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1880
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1881
    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1882
}\
1883 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1884
    uint8_t halfH[72];\
1885
    uint8_t halfHV[64];\
1886 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1887 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1888 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1889 44eb4951 Michael Niedermayer
}\
1890 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1891
    uint8_t halfH[72];\
1892
    uint8_t halfHV[64];\
1893 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1894 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1895 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1896 44eb4951 Michael Niedermayer
}\
1897 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1898
    uint8_t full[16*9];\
1899
    uint8_t halfH[72];\
1900
    uint8_t halfV[64];\
1901
    uint8_t halfHV[64];\
1902 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1903
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1904 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1905
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1906 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1907 44eb4951 Michael Niedermayer
}\
1908 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1909
    uint8_t full[16*9];\
1910
    uint8_t halfH[72];\
1911 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1912
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1913
    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1914
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1915
}\
1916 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1917
    uint8_t full[16*9];\
1918
    uint8_t halfH[72];\
1919
    uint8_t halfV[64];\
1920
    uint8_t halfHV[64];\
1921 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1922
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1923 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1924
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1925 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1926 44eb4951 Michael Niedermayer
}\
1927 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1928
    uint8_t full[16*9];\
1929
    uint8_t halfH[72];\
1930 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1931
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1932
    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1933
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1934
}\
1935 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1936
    uint8_t halfH[72];\
1937 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1938 db794953 Michael Niedermayer
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1939 b3184779 Michael Niedermayer
}\
1940
\
1941 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1942
    uint8_t half[256];\
1943 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1944
    OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
1945
}\
1946
\
1947 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1948 b3184779 Michael Niedermayer
    OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1949 44eb4951 Michael Niedermayer
}\
1950 b3184779 Michael Niedermayer
\
1951 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1952
    uint8_t half[256];\
1953 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1954
    OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
1955
}\
1956
\
1957 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1958
    uint8_t full[24*17];\
1959
    uint8_t half[256];\
1960 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1961 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1962 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
1963
}\
1964
\
1965 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1966
    uint8_t full[24*17];\
1967 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1968 826f429a Michael Niedermayer
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1969 b3184779 Michael Niedermayer
}\
1970
\
1971 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1972
    uint8_t full[24*17];\
1973
    uint8_t half[256];\
1974 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1975 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1976 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
1977
}\
1978 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1979
    uint8_t full[24*17];\
1980
    uint8_t halfH[272];\
1981
    uint8_t halfV[256];\
1982
    uint8_t halfHV[256];\
1983 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1984
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1985 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1986
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1987 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1988
}\
1989 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1990
    uint8_t full[24*17];\
1991
    uint8_t halfH[272];\
1992
    uint8_t halfHV[256];\
1993 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1994
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1995
    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1996
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1997
    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1998
}\
1999 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
2000
    uint8_t full[24*17];\
2001
    uint8_t halfH[272];\
2002
    uint8_t halfV[256];\
2003
    uint8_t halfHV[256];\
2004 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2005
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2006 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2007
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2008 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2009
}\
2010 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2011
    uint8_t full[24*17];\
2012
    uint8_t halfH[272];\
2013
    uint8_t halfHV[256];\
2014 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2015
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2016
    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2017
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2018
    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2019
}\
2020 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
2021
    uint8_t full[24*17];\
2022
    uint8_t halfH[272];\
2023
    uint8_t halfV[256];\
2024
    uint8_t halfHV[256];\
2025 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2026
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2027 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2028
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2029 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2030
}\
2031 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2032
    uint8_t full[24*17];\
2033
    uint8_t halfH[272];\
2034
    uint8_t halfHV[256];\
2035 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2036
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2037
    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2038
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2039
    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2040
}\
2041 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
2042
    uint8_t full[24*17];\
2043
    uint8_t halfH[272];\
2044
    uint8_t halfV[256];\
2045
    uint8_t halfHV[256];\
2046 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2047
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
2048 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2049
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2050 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2051
}\
2052 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2053
    uint8_t full[24*17];\
2054
    uint8_t halfH[272];\
2055
    uint8_t halfHV[256];\
2056 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2057
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2058
    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2059
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2060
    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2061
}\
2062 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2063
    uint8_t halfH[272];\
2064
    uint8_t halfHV[256];\
2065 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2066 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2067 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2068
}\
2069 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2070
    uint8_t halfH[272];\
2071
    uint8_t halfHV[256];\
2072 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2073 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2074 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2075
}\
2076 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
2077
    uint8_t full[24*17];\
2078
    uint8_t halfH[272];\
2079
    uint8_t halfV[256];\
2080
    uint8_t halfHV[256];\
2081 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2082
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2083 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2084
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2085 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2086
}\
2087 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2088
    uint8_t full[24*17];\
2089
    uint8_t halfH[272];\
2090 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2091
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2092
    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2093
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2094
}\
2095 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2096
    uint8_t full[24*17];\
2097
    uint8_t halfH[272];\
2098
    uint8_t halfV[256];\
2099
    uint8_t halfHV[256];\
2100 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2101
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2102 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2103
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2104 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2105
}\
2106 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2107
    uint8_t full[24*17];\
2108
    uint8_t halfH[272];\
2109 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2110
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2111
    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2112
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2113
}\
2114 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2115
    uint8_t halfH[272];\
2116 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2117 826f429a Michael Niedermayer
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2118 45553457 Zdenek Kabelac
}
2119 44eb4951 Michael Niedermayer
2120 b3184779 Michael Niedermayer
#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2121
#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
2122
#define op_put(a, b) a = cm[((b) + 16)>>5]
2123
#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
2124
2125
QPEL_MC(0, put_       , _       , op_put)
2126
QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
2127
QPEL_MC(0, avg_       , _       , op_avg)
2128
//QPEL_MC(1, avg_no_rnd , _       , op_avg)
2129
#undef op_avg
2130
#undef op_avg_no_rnd
2131
#undef op_put
2132
#undef op_put_no_rnd
2133 44eb4951 Michael Niedermayer
2134 3d1b1caa Måns Rullgård
#define put_qpel8_mc00_c  ff_put_pixels8x8_c
2135
#define avg_qpel8_mc00_c  ff_avg_pixels8x8_c
2136
#define put_qpel16_mc00_c ff_put_pixels16x16_c
2137
#define avg_qpel16_mc00_c ff_avg_pixels16x16_c
2138
#define put_no_rnd_qpel8_mc00_c  ff_put_pixels8x8_c
2139
#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
2140
2141 0da71265 Michael Niedermayer
#if 1
2142
#define H264_LOWPASS(OPNAME, OP, OP2) \
2143 bb5705b9 Måns Rullgård
static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2144 80e44bc3 Michael Niedermayer
    const int h=2;\
2145 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2146 80e44bc3 Michael Niedermayer
    int i;\
2147
    for(i=0; i<h; i++)\
2148
    {\
2149
        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2150
        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2151
        dst+=dstStride;\
2152
        src+=srcStride;\
2153
    }\
2154
}\
2155
\
2156 bb5705b9 Måns Rullgård
static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2157 80e44bc3 Michael Niedermayer
    const int w=2;\
2158 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2159 80e44bc3 Michael Niedermayer
    int i;\
2160
    for(i=0; i<w; i++)\
2161
    {\
2162
        const int srcB= src[-2*srcStride];\
2163
        const int srcA= src[-1*srcStride];\
2164
        const int src0= src[0 *srcStride];\
2165
        const int src1= src[1 *srcStride];\
2166
        const int src2= src[2 *srcStride];\
2167
        const int src3= src[3 *srcStride];\
2168
        const int src4= src[4 *srcStride];\
2169
        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2170
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2171
        dst++;\
2172
        src++;\
2173
    }\
2174
}\
2175
\
2176 bb5705b9 Måns Rullgård
static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2177 80e44bc3 Michael Niedermayer
    const int h=2;\
2178
    const int w=2;\
2179 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2180 80e44bc3 Michael Niedermayer
    int i;\
2181
    src -= 2*srcStride;\
2182
    for(i=0; i<h+5; i++)\
2183
    {\
2184
        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2185
        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2186
        tmp+=tmpStride;\
2187
        src+=srcStride;\
2188
    }\
2189
    tmp -= tmpStride*(h+5-2);\
2190
    for(i=0; i<w; i++)\
2191
    {\
2192
        const int tmpB= tmp[-2*tmpStride];\
2193
        const int tmpA= tmp[-1*tmpStride];\
2194
        const int tmp0= tmp[0 *tmpStride];\
2195
        const int tmp1= tmp[1 *tmpStride];\
2196
        const int tmp2= tmp[2 *tmpStride];\
2197
        const int tmp3= tmp[3 *tmpStride];\
2198
        const int tmp4= tmp[4 *tmpStride];\
2199
        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2200
        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2201
        dst++;\
2202
        tmp++;\
2203
    }\
2204
}\
2205 0da71265 Michael Niedermayer
static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2206
    const int h=4;\
2207 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2208 0da71265 Michael Niedermayer
    int i;\
2209
    for(i=0; i<h; i++)\
2210
    {\
2211
        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2212
        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2213
        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
2214
        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
2215
        dst+=dstStride;\
2216
        src+=srcStride;\
2217
    }\
2218
}\
2219
\
2220
static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2221
    const int w=4;\
2222 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2223 0da71265 Michael Niedermayer
    int i;\
2224
    for(i=0; i<w; i++)\
2225
    {\
2226
        const int srcB= src[-2*srcStride];\
2227
        const int srcA= src[-1*srcStride];\
2228
        const int src0= src[0 *srcStride];\
2229
        const int src1= src[1 *srcStride];\
2230
        const int src2= src[2 *srcStride];\
2231
        const int src3= src[3 *srcStride];\
2232
        const int src4= src[4 *srcStride];\
2233
        const int src5= src[5 *srcStride];\
2234
        const int src6= src[6 *srcStride];\
2235
        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2236
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2237
        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2238
        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2239
        dst++;\
2240
        src++;\
2241
    }\
2242
}\
2243
\
2244
static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2245
    const int h=4;\
2246
    const int w=4;\
2247 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2248 0da71265 Michael Niedermayer
    int i;\
2249
    src -= 2*srcStride;\
2250
    for(i=0; i<h+5; i++)\
2251
    {\
2252
        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2253
        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2254
        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
2255
        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
2256
        tmp+=tmpStride;\
2257
        src+=srcStride;\
2258
    }\
2259
    tmp -= tmpStride*(h+5-2);\
2260
    for(i=0; i<w; i++)\
2261
    {\
2262
        const int tmpB= tmp[-2*tmpStride];\
2263
        const int tmpA= tmp[-1*tmpStride];\
2264
        const int tmp0= tmp[0 *tmpStride];\
2265
        const int tmp1= tmp[1 *tmpStride];\
2266
        const int tmp2= tmp[2 *tmpStride];\
2267
        const int tmp3= tmp[3 *tmpStride];\
2268
        const int tmp4= tmp[4 *tmpStride];\
2269
        const int tmp5= tmp[5 *tmpStride];\
2270
        const int tmp6= tmp[6 *tmpStride];\
2271
        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2272
        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2273
        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2274
        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2275
        dst++;\
2276
        tmp++;\
2277
    }\
2278
}\
2279
\
2280
static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2281
    const int h=8;\
2282 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2283 0da71265 Michael Niedermayer
    int i;\
2284
    for(i=0; i<h; i++)\
2285
    {\
2286
        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
2287
        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
2288
        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
2289
        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
2290
        OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
2291
        OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
2292
        OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
2293
        OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
2294
        dst+=dstStride;\
2295
        src+=srcStride;\
2296
    }\
2297
}\
2298
\
2299
static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2300
    const int w=8;\
2301 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2302 0da71265 Michael Niedermayer
    int i;\
2303
    for(i=0; i<w; i++)\
2304
    {\
2305
        const int srcB= src[-2*srcStride];\
2306
        const int srcA= src[-1*srcStride];\
2307
        const int src0= src[0 *srcStride];\
2308
        const int src1= src[1 *srcStride];\
2309
        const int src2= src[2 *srcStride];\
2310
        const int src3= src[3 *srcStride];\
2311
        const int src4= src[4 *srcStride];\
2312
        const int src5= src[5 *srcStride];\
2313
        const int src6= src[6 *srcStride];\
2314
        const int src7= src[7 *srcStride];\
2315
        const int src8= src[8 *srcStride];\
2316
        const int src9= src[9 *srcStride];\
2317
        const int src10=src[10*srcStride];\
2318
        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2319
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2320
        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2321
        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2322
        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
2323
        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
2324
        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
2325
        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
2326
        dst++;\
2327
        src++;\
2328
    }\
2329
}\
2330
\
2331
static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2332
    const int h=8;\
2333
    const int w=8;\
2334 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2335 0da71265 Michael Niedermayer
    int i;\
2336
    src -= 2*srcStride;\
2337
    for(i=0; i<h+5; i++)\
2338
    {\
2339
        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
2340
        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
2341
        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
2342
        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
2343
        tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
2344
        tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
2345
        tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
2346
        tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
2347
        tmp+=tmpStride;\
2348
        src+=srcStride;\
2349
    }\
2350
    tmp -= tmpStride*(h+5-2);\
2351
    for(i=0; i<w; i++)\
2352
    {\
2353
        const int tmpB= tmp[-2*tmpStride];\
2354
        const int tmpA= tmp[-1*tmpStride];\
2355
        const int tmp0= tmp[0 *tmpStride];\
2356
        const int tmp1= tmp[1 *tmpStride];\
2357
        const int tmp2= tmp[2 *tmpStride];\
2358
        const int tmp3= tmp[3 *tmpStride];\
2359
        const int tmp4= tmp[4 *tmpStride];\
2360
        const int tmp5= tmp[5 *tmpStride];\
2361
        const int tmp6= tmp[6 *tmpStride];\
2362
        const int tmp7= tmp[7 *tmpStride];\
2363
        const int tmp8= tmp[8 *tmpStride];\
2364
        const int tmp9= tmp[9 *tmpStride];\
2365
        const int tmp10=tmp[10*tmpStride];\
2366
        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2367
        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2368
        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2369
        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2370
        OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
2371
        OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
2372
        OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
2373
        OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
2374
        dst++;\
2375
        tmp++;\
2376
    }\
2377
}\
2378
\
2379
static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2380
    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
2381
    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2382
    src += 8*srcStride;\
2383
    dst += 8*dstStride;\
2384
    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
2385
    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2386
}\
2387
\
2388
static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2389
    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
2390
    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2391
    src += 8*srcStride;\
2392
    dst += 8*dstStride;\
2393
    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
2394
    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2395
}\
2396
\
2397
static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2398
    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
2399
    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2400
    src += 8*srcStride;\
2401
    dst += 8*dstStride;\
2402
    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
2403
    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2404
}\
2405
2406
#define H264_MC(OPNAME, SIZE) \
2407 3d1b1caa Måns Rullgård
static av_unused void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2408 0da71265 Michael Niedermayer
    OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2409
}\
2410
\
2411
static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2412
    uint8_t half[SIZE*SIZE];\
2413
    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2414
    OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2415
}\
2416
\
2417
static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2418
    OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2419
}\
2420
\
2421
static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2422
    uint8_t half[SIZE*SIZE];\
2423
    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2424
    OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2425
}\
2426
\
2427
static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2428
    uint8_t full[SIZE*(SIZE+5)];\
2429
    uint8_t * const full_mid= full + SIZE*2;\
2430
    uint8_t half[SIZE*SIZE];\
2431
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2432
    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2433
    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2434
}\
2435
\
2436
static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2437
    uint8_t full[SIZE*(SIZE+5)];\
2438
    uint8_t * const full_mid= full + SIZE*2;\
2439
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2440
    OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2441
}\
2442
\
2443
static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2444
    uint8_t full[SIZE*(SIZE+5)];\
2445
    uint8_t * const full_mid= full + SIZE*2;\
2446
    uint8_t half[SIZE*SIZE];\
2447
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2448
    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2449
    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2450
}\
2451
\
2452
static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2453
    uint8_t full[SIZE*(SIZE+5)];\
2454
    uint8_t * const full_mid= full + SIZE*2;\
2455
    uint8_t halfH[SIZE*SIZE];\
2456
    uint8_t halfV[SIZE*SIZE];\
2457
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2458
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2459
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2460
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2461
}\
2462
\
2463
static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2464
    uint8_t full[SIZE*(SIZE+5)];\
2465
    uint8_t * const full_mid= full + SIZE*2;\
2466
    uint8_t halfH[SIZE*SIZE];\
2467
    uint8_t halfV[SIZE*SIZE];\
2468
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2469
    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
2470
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2471
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2472
}\
2473
\
2474
static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2475
    uint8_t full[SIZE*(SIZE+5)];\
2476
    uint8_t * const full_mid= full + SIZE*2;\
2477
    uint8_t halfH[SIZE*SIZE];\
2478
    uint8_t halfV[SIZE*SIZE];\
2479
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2480
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2481
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2482
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2483
}\
2484
\
2485
static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2486
    uint8_t full[SIZE*(SIZE+5)];\
2487
    uint8_t * const full_mid= full + SIZE*2;\
2488
    uint8_t halfH[SIZE*SIZE];\
2489
    uint8_t halfV[SIZE*SIZE];\
2490
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2491
    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
2492
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2493
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2494
}\
2495
\
2496
static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2497
    int16_t tmp[SIZE*(SIZE+5)];\
2498
    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
2499
}\
2500
\
2501
static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2502
    int16_t tmp[SIZE*(SIZE+5)];\
2503
    uint8_t halfH[SIZE*SIZE];\
2504
    uint8_t halfHV[SIZE*SIZE];\
2505
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2506
    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2507
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2508
}\
2509
\
2510
static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2511
    int16_t tmp[SIZE*(SIZE+5)];\
2512
    uint8_t halfH[SIZE*SIZE];\
2513
    uint8_t halfHV[SIZE*SIZE];\
2514
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2515
    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2516
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2517
}\
2518
\
2519
static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2520
    uint8_t full[SIZE*(SIZE+5)];\
2521
    uint8_t * const full_mid= full + SIZE*2;\
2522
    int16_t tmp[SIZE*(SIZE+5)];\
2523
    uint8_t halfV[SIZE*SIZE];\
2524
    uint8_t halfHV[SIZE*SIZE];\
2525
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2526
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2527
    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2528
    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2529
}\
2530
\
2531
static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2532
    uint8_t full[SIZE*(SIZE+5)];\
2533
    uint8_t * const full_mid= full + SIZE*2;\
2534
    int16_t tmp[SIZE*(SIZE+5)];\
2535
    uint8_t halfV[SIZE*SIZE];\
2536
    uint8_t halfHV[SIZE*SIZE];\
2537
    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
2538
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2539
    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2540
    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2541
}\
2542
2543
#define op_avg(a, b)  a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2544
//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2545
#define op_put(a, b)  a = cm[((b) + 16)>>5]
2546
#define op2_avg(a, b)  a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2547
#define op2_put(a, b)  a = cm[((b) + 512)>>10]
2548
2549
H264_LOWPASS(put_       , op_put, op2_put)
2550
H264_LOWPASS(avg_       , op_avg, op2_avg)
2551 80e44bc3 Michael Niedermayer
H264_MC(put_, 2)
2552 0da71265 Michael Niedermayer
H264_MC(put_, 4)
2553
H264_MC(put_, 8)
2554
H264_MC(put_, 16)
2555
H264_MC(avg_, 4)
2556
H264_MC(avg_, 8)
2557
H264_MC(avg_, 16)
2558
2559
#undef op_avg
2560
#undef op_put
2561
#undef op2_avg
2562
#undef op2_put
2563
#endif
2564
2565 3d1b1caa Måns Rullgård
#define put_h264_qpel8_mc00_c  ff_put_pixels8x8_c
2566
#define avg_h264_qpel8_mc00_c  ff_avg_pixels8x8_c
2567
#define put_h264_qpel16_mc00_c ff_put_pixels16x16_c
2568
#define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c
2569
2570 1457ab52 Michael Niedermayer
static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
2571 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2572 1457ab52 Michael Niedermayer
    int i;
2573
2574
    for(i=0; i<h; i++){
2575
        dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
2576
        dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
2577
        dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
2578
        dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
2579
        dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
2580
        dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
2581
        dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
2582
        dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
2583
        dst+=dstStride;
2584 115329f1 Diego Biurrun
        src+=srcStride;
2585 1457ab52 Michael Niedermayer
    }
2586
}
2587
2588 3d1b1caa Måns Rullgård
void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
2589 b482e2d1 Michael Niedermayer
    put_pixels8_c(dst, src, stride, 8);
2590
}
2591 3d1b1caa Måns Rullgård
void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
2592 b482e2d1 Michael Niedermayer
    avg_pixels8_c(dst, src, stride, 8);
2593
}
2594 3d1b1caa Måns Rullgård
void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
2595 b482e2d1 Michael Niedermayer
    put_pixels16_c(dst, src, stride, 16);
2596
}
2597 3d1b1caa Måns Rullgård
void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
2598 b482e2d1 Michael Niedermayer
    avg_pixels16_c(dst, src, stride, 16);
2599
}
2600 64db55ae Kostya Shishkov
2601 b250f9c6 Aurelien Jacobs
#if CONFIG_RV40_DECODER
2602 2d8a0815 Kostya Shishkov
static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2603
    put_pixels16_xy2_c(dst, src, stride, 16);
2604
}
2605
static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2606
    avg_pixels16_xy2_c(dst, src, stride, 16);
2607
}
2608
static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2609
    put_pixels8_xy2_c(dst, src, stride, 8);
2610
}
2611
static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2612
    avg_pixels8_xy2_c(dst, src, stride, 8);
2613
}
2614
#endif /* CONFIG_RV40_DECODER */
2615
2616 1457ab52 Michael Niedermayer
static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
2617 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2618 1457ab52 Michael Niedermayer
    int i;
2619
2620
    for(i=0; i<w; i++){
2621
        const int src_1= src[ -srcStride];
2622
        const int src0 = src[0          ];
2623
        const int src1 = src[  srcStride];
2624
        const int src2 = src[2*srcStride];
2625
        const int src3 = src[3*srcStride];
2626
        const int src4 = src[4*srcStride];
2627
        const int src5 = src[5*srcStride];
2628
        const int src6 = src[6*srcStride];
2629
        const int src7 = src[7*srcStride];
2630
        const int src8 = src[8*srcStride];
2631
        const int src9 = src[9*srcStride];
2632
        dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
2633
        dst[1*dstStride]= cm[(9*(src1 + src2) - (src0  + src3) + 8)>>4];
2634
        dst[2*dstStride]= cm[(9*(src2 + src3) - (src1  + src4) + 8)>>4];
2635
        dst[3*dstStride]= cm[(9*(src3 + src4) - (src2  + src5) + 8)>>4];
2636
        dst[4*dstStride]= cm[(9*(src4 + src5) - (src3  + src6) + 8)>>4];
2637
        dst[5*dstStride]= cm[(9*(src5 + src6) - (src4  + src7) + 8)>>4];
2638
        dst[6*dstStride]= cm[(9*(src6 + src7) - (src5  + src8) + 8)>>4];
2639
        dst[7*dstStride]= cm[(9*(src7 + src8) - (src6  + src9) + 8)>>4];
2640
        src++;
2641
        dst++;
2642
    }
2643
}
2644
2645
static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
2646
    uint8_t half[64];
2647
    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2648
    put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
2649
}
2650
2651
static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
2652
    wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
2653
}
2654
2655