Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dsputil.c @ af818f7a

History | View | Annotate | Download (173 KB)

1 de6d9b64 Fabrice Bellard
/*
2
 * DSP utils
3 406792e7 Diego Biurrun
 * Copyright (c) 2000, 2001 Fabrice Bellard
4 8f2ab833 Michael Niedermayer
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 de6d9b64 Fabrice Bellard
 *
6 7b94177e Diego Biurrun
 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7
 *
8 b78e7197 Diego Biurrun
 * This file is part of FFmpeg.
9
 *
10
 * FFmpeg is free software; you can redistribute it and/or
11 ff4ec49e Fabrice Bellard
 * modify it under the terms of the GNU Lesser General Public
12
 * License as published by the Free Software Foundation; either
13 b78e7197 Diego Biurrun
 * version 2.1 of the License, or (at your option) any later version.
14 de6d9b64 Fabrice Bellard
 *
15 b78e7197 Diego Biurrun
 * FFmpeg is distributed in the hope that it will be useful,
16 de6d9b64 Fabrice Bellard
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ff4ec49e Fabrice Bellard
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
 * Lesser General Public License for more details.
19 de6d9b64 Fabrice Bellard
 *
20 ff4ec49e Fabrice Bellard
 * You should have received a copy of the GNU Lesser General Public
21 b78e7197 Diego Biurrun
 * License along with FFmpeg; if not, write to the Free Software
22 5509bffa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 de6d9b64 Fabrice Bellard
 */
24 115329f1 Diego Biurrun
25 983e3246 Michael Niedermayer
/**
26 bad5537e Diego Biurrun
 * @file libavcodec/dsputil.c
27 983e3246 Michael Niedermayer
 * DSP utils
28
 */
29 115329f1 Diego Biurrun
30 de6d9b64 Fabrice Bellard
#include "avcodec.h"
31
#include "dsputil.h"
32 b0368839 Michael Niedermayer
#include "simple_idct.h"
33 65e4c8c9 Michael Niedermayer
#include "faandct.h"
34 6f08c541 Michael Niedermayer
#include "faanidct.h"
35 199436b9 Aurelien Jacobs
#include "mathops.h"
36 eb75a698 Aurelien Jacobs
#include "h263.h"
37 059715a4 Robert Edele
#include "snow.h"
38 af818f7a Diego Biurrun
#include "mpegvideo.h"
39
#include "config.h"
40 5596c60c Michael Niedermayer
41 88730be6 Måns Rullgård
/* snow.c */
42
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
43
44 2dac4acf Loren Merritt
/* vorbis.c */
45
void vorbis_inverse_coupling(float *mag, float *ang, int blocksize);
46
47 ac2e5564 Loren Merritt
/* ac3dec.c */
48
void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
49
50 fde82ca7 Justin Ruggles
/* lpc.c */
51
void ff_lpc_compute_autocorr(const int32_t *data, int len, int lag, double *autoc);
52 6810b93a Loren Merritt
53 4a9ca0a2 Loren Merritt
/* pngdec.c */
54
void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
55
56 28245435 Peter Ross
/* eaidct.c */
57
void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
58
59 55fde95e Måns Rullgård
uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
60 1d503957 Måns Rullgård
uint32_t ff_squareTbl[512] = {0, };
61 de6d9b64 Fabrice Bellard
62 917f55cc Loren Merritt
// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
63
#define pb_7f (~0UL/255 * 0x7f)
64
#define pb_80 (~0UL/255 * 0x80)
65 469bd7b1 Loren Merritt
66 0c1a9eda Zdenek Kabelac
const uint8_t ff_zigzag_direct[64] = {
67 2ad1516a Michael Niedermayer
    0,   1,  8, 16,  9,  2,  3, 10,
68
    17, 24, 32, 25, 18, 11,  4,  5,
69 e0eac44e Fabrice Bellard
    12, 19, 26, 33, 40, 48, 41, 34,
70 2ad1516a Michael Niedermayer
    27, 20, 13,  6,  7, 14, 21, 28,
71 e0eac44e Fabrice Bellard
    35, 42, 49, 56, 57, 50, 43, 36,
72
    29, 22, 15, 23, 30, 37, 44, 51,
73
    58, 59, 52, 45, 38, 31, 39, 46,
74
    53, 60, 61, 54, 47, 55, 62, 63
75
};
76
77 10acc479 Roman Shaposhnik
/* Specific zigzag scan for 248 idct. NOTE that unlike the
78
   specification, we interleave the fields */
79
const uint8_t ff_zigzag248_direct[64] = {
80
     0,  8,  1,  9, 16, 24,  2, 10,
81
    17, 25, 32, 40, 48, 56, 33, 41,
82
    18, 26,  3, 11,  4, 12, 19, 27,
83
    34, 42, 49, 57, 50, 58, 35, 43,
84
    20, 28,  5, 13,  6, 14, 21, 29,
85
    36, 44, 51, 59, 52, 60, 37, 45,
86
    22, 30,  7, 15, 23, 31, 38, 46,
87
    53, 61, 54, 62, 39, 47, 55, 63,
88
};
89
90 2f349de2 Michael Niedermayer
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
91 64e657fd Reimar Döffinger
DECLARE_ALIGNED_16(uint16_t, inv_zigzag_direct16[64]);
92 2f349de2 Michael Niedermayer
93 0c1a9eda Zdenek Kabelac
const uint8_t ff_alternate_horizontal_scan[64] = {
94 115329f1 Diego Biurrun
    0,  1,   2,  3,  8,  9, 16, 17,
95 e0eac44e Fabrice Bellard
    10, 11,  4,  5,  6,  7, 15, 14,
96 115329f1 Diego Biurrun
    13, 12, 19, 18, 24, 25, 32, 33,
97 e0eac44e Fabrice Bellard
    26, 27, 20, 21, 22, 23, 28, 29,
98 115329f1 Diego Biurrun
    30, 31, 34, 35, 40, 41, 48, 49,
99 e0eac44e Fabrice Bellard
    42, 43, 36, 37, 38, 39, 44, 45,
100 115329f1 Diego Biurrun
    46, 47, 50, 51, 56, 57, 58, 59,
101 e0eac44e Fabrice Bellard
    52, 53, 54, 55, 60, 61, 62, 63,
102
};
103
104 0c1a9eda Zdenek Kabelac
const uint8_t ff_alternate_vertical_scan[64] = {
105 115329f1 Diego Biurrun
    0,  8,  16, 24,  1,  9,  2, 10,
106 e0eac44e Fabrice Bellard
    17, 25, 32, 40, 48, 56, 57, 49,
107 115329f1 Diego Biurrun
    41, 33, 26, 18,  3, 11,  4, 12,
108 e0eac44e Fabrice Bellard
    19, 27, 34, 42, 50, 58, 35, 43,
109 115329f1 Diego Biurrun
    51, 59, 20, 28,  5, 13,  6, 14,
110 e0eac44e Fabrice Bellard
    21, 29, 36, 44, 52, 60, 37, 45,
111 115329f1 Diego Biurrun
    53, 61, 22, 30,  7, 15, 23, 31,
112 e0eac44e Fabrice Bellard
    38, 46, 54, 62, 39, 47, 55, 63,
113
};
114
115 1a918c08 Loren Merritt
/* a*inverse[b]>>32 == a/b for all 0<=a<=16909558 && 2<=b<=256
116
 * for a>16909558, is an overestimate by less than 1 part in 1<<24 */
117
const uint32_t ff_inverse[257]={
118 115329f1 Diego Biurrun
         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757,
119
 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154,
120
 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709,
121
 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333,
122
 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367,
123
 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283,
124
  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315,
125
  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085,
126
  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498,
127
  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675,
128
  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441,
129
  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183,
130
  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712,
131
  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400,
132
  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163,
133
  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641,
134
  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573,
135
  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737,
136
  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493,
137
  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373,
138
  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368,
139
  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671,
140
  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767,
141
  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740,
142
  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751,
143
  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635,
144
  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593,
145
  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944,
146
  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933,
147
  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575,
148
  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532,
149 2f349de2 Michael Niedermayer
  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
150 1a918c08 Loren Merritt
  16777216
151 2f349de2 Michael Niedermayer
};
152
153 b0368839 Michael Niedermayer
/* Input permutation for the simple_idct_mmx */
154
static const uint8_t simple_mmx_permutation[64]={
155 bb270c08 Diego Biurrun
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
156
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
157
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
158
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
159
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
160
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
161
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
162
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
163 b0368839 Michael Niedermayer
};
164
165 0e956ba2 Alexander Strange
static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
166
167 4c79b95c Aurelien Jacobs
void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
168
    int i;
169
    int end;
170
171
    st->scantable= src_scantable;
172
173
    for(i=0; i<64; i++){
174
        int j;
175
        j = src_scantable[i];
176
        st->permutated[i] = permutation[j];
177 b250f9c6 Aurelien Jacobs
#if ARCH_PPC
178 4c79b95c Aurelien Jacobs
        st->inverse[j] = i;
179
#endif
180
    }
181
182
    end=-1;
183
    for(i=0; i<64; i++){
184
        int j;
185
        j = st->permutated[i];
186
        if(j>end) end=j;
187
        st->raster_end[i]= end;
188
    }
189
}
190
191 0c1a9eda Zdenek Kabelac
static int pix_sum_c(uint8_t * pix, int line_size)
192 3aa102be Michael Niedermayer
{
193
    int s, i, j;
194
195
    s = 0;
196
    for (i = 0; i < 16; i++) {
197 bb270c08 Diego Biurrun
        for (j = 0; j < 16; j += 8) {
198
            s += pix[0];
199
            s += pix[1];
200
            s += pix[2];
201
            s += pix[3];
202
            s += pix[4];
203
            s += pix[5];
204
            s += pix[6];
205
            s += pix[7];
206
            pix += 8;
207
        }
208
        pix += line_size - 16;
209 3aa102be Michael Niedermayer
    }
210
    return s;
211
}
212
213 0c1a9eda Zdenek Kabelac
static int pix_norm1_c(uint8_t * pix, int line_size)
214 3aa102be Michael Niedermayer
{
215
    int s, i, j;
216 1d503957 Måns Rullgård
    uint32_t *sq = ff_squareTbl + 256;
217 3aa102be Michael Niedermayer
218
    s = 0;
219
    for (i = 0; i < 16; i++) {
220 bb270c08 Diego Biurrun
        for (j = 0; j < 16; j += 8) {
221 2a006cd3 Felix von Leitner
#if 0
222 bb270c08 Diego Biurrun
            s += sq[pix[0]];
223
            s += sq[pix[1]];
224
            s += sq[pix[2]];
225
            s += sq[pix[3]];
226
            s += sq[pix[4]];
227
            s += sq[pix[5]];
228
            s += sq[pix[6]];
229
            s += sq[pix[7]];
230 2a006cd3 Felix von Leitner
#else
231
#if LONG_MAX > 2147483647
232 bb270c08 Diego Biurrun
            register uint64_t x=*(uint64_t*)pix;
233
            s += sq[x&0xff];
234
            s += sq[(x>>8)&0xff];
235
            s += sq[(x>>16)&0xff];
236
            s += sq[(x>>24)&0xff];
237 2a006cd3 Felix von Leitner
            s += sq[(x>>32)&0xff];
238
            s += sq[(x>>40)&0xff];
239
            s += sq[(x>>48)&0xff];
240
            s += sq[(x>>56)&0xff];
241
#else
242 bb270c08 Diego Biurrun
            register uint32_t x=*(uint32_t*)pix;
243
            s += sq[x&0xff];
244
            s += sq[(x>>8)&0xff];
245
            s += sq[(x>>16)&0xff];
246
            s += sq[(x>>24)&0xff];
247 2a006cd3 Felix von Leitner
            x=*(uint32_t*)(pix+4);
248
            s += sq[x&0xff];
249
            s += sq[(x>>8)&0xff];
250
            s += sq[(x>>16)&0xff];
251
            s += sq[(x>>24)&0xff];
252
#endif
253
#endif
254 bb270c08 Diego Biurrun
            pix += 8;
255
        }
256
        pix += line_size - 16;
257 3aa102be Michael Niedermayer
    }
258
    return s;
259
}
260
261 96711ecf Michael Niedermayer
static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
262 3d2e8cce Michael Niedermayer
    int i;
263 115329f1 Diego Biurrun
264 3d2e8cce Michael Niedermayer
    for(i=0; i+8<=w; i+=8){
265
        dst[i+0]= bswap_32(src[i+0]);
266
        dst[i+1]= bswap_32(src[i+1]);
267
        dst[i+2]= bswap_32(src[i+2]);
268
        dst[i+3]= bswap_32(src[i+3]);
269
        dst[i+4]= bswap_32(src[i+4]);
270
        dst[i+5]= bswap_32(src[i+5]);
271
        dst[i+6]= bswap_32(src[i+6]);
272
        dst[i+7]= bswap_32(src[i+7]);
273
    }
274
    for(;i<w; i++){
275
        dst[i+0]= bswap_32(src[i+0]);
276
    }
277
}
278 3aa102be Michael Niedermayer
279 26efc54e Michael Niedermayer
static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
280
{
281
    int s, i;
282 1d503957 Måns Rullgård
    uint32_t *sq = ff_squareTbl + 256;
283 26efc54e Michael Niedermayer
284
    s = 0;
285
    for (i = 0; i < h; i++) {
286
        s += sq[pix1[0] - pix2[0]];
287
        s += sq[pix1[1] - pix2[1]];
288
        s += sq[pix1[2] - pix2[2]];
289
        s += sq[pix1[3] - pix2[3]];
290
        pix1 += line_size;
291
        pix2 += line_size;
292
    }
293
    return s;
294
}
295
296 bb198e19 Michael Niedermayer
static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
297 1457ab52 Michael Niedermayer
{
298
    int s, i;
299 1d503957 Måns Rullgård
    uint32_t *sq = ff_squareTbl + 256;
300 1457ab52 Michael Niedermayer
301
    s = 0;
302 bb198e19 Michael Niedermayer
    for (i = 0; i < h; i++) {
303 1457ab52 Michael Niedermayer
        s += sq[pix1[0] - pix2[0]];
304
        s += sq[pix1[1] - pix2[1]];
305
        s += sq[pix1[2] - pix2[2]];
306
        s += sq[pix1[3] - pix2[3]];
307
        s += sq[pix1[4] - pix2[4]];
308
        s += sq[pix1[5] - pix2[5]];
309
        s += sq[pix1[6] - pix2[6]];
310
        s += sq[pix1[7] - pix2[7]];
311
        pix1 += line_size;
312
        pix2 += line_size;
313
    }
314
    return s;
315
}
316
317 bb198e19 Michael Niedermayer
static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
318 9c76bd48 Brian Foley
{
319 6b026927 Falk Hüffner
    int s, i;
320 1d503957 Måns Rullgård
    uint32_t *sq = ff_squareTbl + 256;
321 9c76bd48 Brian Foley
322
    s = 0;
323 bb198e19 Michael Niedermayer
    for (i = 0; i < h; i++) {
324 6b026927 Falk Hüffner
        s += sq[pix1[ 0] - pix2[ 0]];
325
        s += sq[pix1[ 1] - pix2[ 1]];
326
        s += sq[pix1[ 2] - pix2[ 2]];
327
        s += sq[pix1[ 3] - pix2[ 3]];
328
        s += sq[pix1[ 4] - pix2[ 4]];
329
        s += sq[pix1[ 5] - pix2[ 5]];
330
        s += sq[pix1[ 6] - pix2[ 6]];
331
        s += sq[pix1[ 7] - pix2[ 7]];
332
        s += sq[pix1[ 8] - pix2[ 8]];
333
        s += sq[pix1[ 9] - pix2[ 9]];
334
        s += sq[pix1[10] - pix2[10]];
335
        s += sq[pix1[11] - pix2[11]];
336
        s += sq[pix1[12] - pix2[12]];
337
        s += sq[pix1[13] - pix2[13]];
338
        s += sq[pix1[14] - pix2[14]];
339
        s += sq[pix1[15] - pix2[15]];
340 2a006cd3 Felix von Leitner
341 6b026927 Falk Hüffner
        pix1 += line_size;
342
        pix2 += line_size;
343 9c76bd48 Brian Foley
    }
344
    return s;
345
}
346
347 26efc54e Michael Niedermayer
348 b250f9c6 Aurelien Jacobs
#if CONFIG_SNOW_ENCODER //dwt is in snow.c
349 3a6fc8fa Diego Pettenò
static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
350 26efc54e Michael Niedermayer
    int s, i, j;
351
    const int dec_count= w==8 ? 3 : 4;
352 871371a7 Loren Merritt
    int tmp[32*32];
353 26efc54e Michael Niedermayer
    int level, ori;
354 115329f1 Diego Biurrun
    static const int scale[2][2][4][4]={
355 26efc54e Michael Niedermayer
      {
356
        {
357 871371a7 Loren Merritt
            // 9/7 8x8 dec=3
358 26efc54e Michael Niedermayer
            {268, 239, 239, 213},
359
            {  0, 224, 224, 152},
360
            {  0, 135, 135, 110},
361
        },{
362 871371a7 Loren Merritt
            // 9/7 16x16 or 32x32 dec=4
363 26efc54e Michael Niedermayer
            {344, 310, 310, 280},
364
            {  0, 320, 320, 228},
365
            {  0, 175, 175, 136},
366
            {  0, 129, 129, 102},
367
        }
368
      },{
369 871371a7 Loren Merritt
        {
370
            // 5/3 8x8 dec=3
371 26efc54e Michael Niedermayer
            {275, 245, 245, 218},
372
            {  0, 230, 230, 156},
373
            {  0, 138, 138, 113},
374
        },{
375 871371a7 Loren Merritt
            // 5/3 16x16 or 32x32 dec=4
376 26efc54e Michael Niedermayer
            {352, 317, 317, 286},
377
            {  0, 328, 328, 233},
378
            {  0, 180, 180, 140},
379
            {  0, 132, 132, 105},
380
        }
381
      }
382
    };
383
384
    for (i = 0; i < h; i++) {
385
        for (j = 0; j < w; j+=4) {
386 871371a7 Loren Merritt
            tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
387
            tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
388
            tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
389
            tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
390 26efc54e Michael Niedermayer
        }
391
        pix1 += line_size;
392
        pix2 += line_size;
393
    }
394 8b975b7c Michael Niedermayer
395 871371a7 Loren Merritt
    ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
396 26efc54e Michael Niedermayer
397
    s=0;
398 871371a7 Loren Merritt
    assert(w==h);
399 26efc54e Michael Niedermayer
    for(level=0; level<dec_count; level++){
400
        for(ori= level ? 1 : 0; ori<4; ori++){
401 871371a7 Loren Merritt
            int size= w>>(dec_count-level);
402
            int sx= (ori&1) ? size : 0;
403
            int stride= 32<<(dec_count-level);
404 26efc54e Michael Niedermayer
            int sy= (ori&2) ? stride>>1 : 0;
405 115329f1 Diego Biurrun
406 26efc54e Michael Niedermayer
            for(i=0; i<size; i++){
407
                for(j=0; j<size; j++){
408
                    int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
409 c26abfa5 Diego Biurrun
                    s += FFABS(v);
410 26efc54e Michael Niedermayer
                }
411
            }
412
        }
413
    }
414 115329f1 Diego Biurrun
    assert(s>=0);
415 871371a7 Loren Merritt
    return s>>9;
416 26efc54e Michael Niedermayer
}
417
418
static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
419
    return w_c(v, pix1, pix2, line_size,  8, h, 1);
420
}
421
422
static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
423
    return w_c(v, pix1, pix2, line_size,  8, h, 0);
424
}
425
426
static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
427
    return w_c(v, pix1, pix2, line_size, 16, h, 1);
428
}
429
430
static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
431
    return w_c(v, pix1, pix2, line_size, 16, h, 0);
432
}
433
434 486497e0 Måns Rullgård
int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
435 871371a7 Loren Merritt
    return w_c(v, pix1, pix2, line_size, 32, h, 1);
436
}
437
438 486497e0 Måns Rullgård
int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
439 871371a7 Loren Merritt
    return w_c(v, pix1, pix2, line_size, 32, h, 0);
440
}
441 3a6fc8fa Diego Pettenò
#endif
442 871371a7 Loren Merritt
443 5a6a9e78 Aurelien Jacobs
/* draw the edges of width 'w' of an image of size width, height */
444
//FIXME check that this is ok for mpeg4 interlaced
445
static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
446
{
447
    uint8_t *ptr, *last_line;
448
    int i;
449
450
    last_line = buf + (height - 1) * wrap;
451
    for(i=0;i<w;i++) {
452
        /* top and bottom */
453
        memcpy(buf - (i + 1) * wrap, buf, width);
454
        memcpy(last_line + (i + 1) * wrap, last_line, width);
455
    }
456
    /* left and right */
457
    ptr = buf;
458
    for(i=0;i<height;i++) {
459
        memset(ptr - w, ptr[0], w);
460
        memset(ptr + width, ptr[width-1], w);
461
        ptr += wrap;
462
    }
463
    /* corners */
464
    for(i=0;i<w;i++) {
465
        memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
466
        memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
467
        memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
468
        memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
469
    }
470
}
471
472 288a44fb Aurelien Jacobs
/**
473
 * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
474
 * @param buf destination buffer
475
 * @param src source buffer
476
 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
477
 * @param block_w width of block
478
 * @param block_h height of block
479
 * @param src_x x coordinate of the top left sample of the block in the source buffer
480
 * @param src_y y coordinate of the top left sample of the block in the source buffer
481
 * @param w width of the source buffer
482
 * @param h height of the source buffer
483
 */
484
void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
485
                                    int src_x, int src_y, int w, int h){
486
    int x, y;
487
    int start_y, start_x, end_y, end_x;
488
489
    if(src_y>= h){
490
        src+= (h-1-src_y)*linesize;
491
        src_y=h-1;
492
    }else if(src_y<=-block_h){
493
        src+= (1-block_h-src_y)*linesize;
494
        src_y=1-block_h;
495
    }
496
    if(src_x>= w){
497
        src+= (w-1-src_x);
498
        src_x=w-1;
499
    }else if(src_x<=-block_w){
500
        src+= (1-block_w-src_x);
501
        src_x=1-block_w;
502
    }
503
504
    start_y= FFMAX(0, -src_y);
505
    start_x= FFMAX(0, -src_x);
506
    end_y= FFMIN(block_h, h-src_y);
507
    end_x= FFMIN(block_w, w-src_x);
508
509
    // copy existing part
510
    for(y=start_y; y<end_y; y++){
511
        for(x=start_x; x<end_x; x++){
512
            buf[x + y*linesize]= src[x + y*linesize];
513
        }
514
    }
515
516
    //top
517
    for(y=0; y<start_y; y++){
518
        for(x=start_x; x<end_x; x++){
519
            buf[x + y*linesize]= buf[x + start_y*linesize];
520
        }
521
    }
522
523
    //bottom
524
    for(y=end_y; y<block_h; y++){
525
        for(x=start_x; x<end_x; x++){
526
            buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
527
        }
528
    }
529
530
    for(y=0; y<block_h; y++){
531
       //left
532
        for(x=0; x<start_x; x++){
533
            buf[x + y*linesize]= buf[start_x + y*linesize];
534
        }
535
536
       //right
537
        for(x=end_x; x<block_w; x++){
538
            buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
539
        }
540
    }
541
}
542
543 0c1a9eda Zdenek Kabelac
static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
544 de6d9b64 Fabrice Bellard
{
545
    int i;
546
547
    /* read the pixels */
548
    for(i=0;i<8;i++) {
549 c13e1abd Falk Hüffner
        block[0] = pixels[0];
550
        block[1] = pixels[1];
551
        block[2] = pixels[2];
552
        block[3] = pixels[3];
553
        block[4] = pixels[4];
554
        block[5] = pixels[5];
555
        block[6] = pixels[6];
556
        block[7] = pixels[7];
557
        pixels += line_size;
558
        block += 8;
559 de6d9b64 Fabrice Bellard
    }
560
}
561
562 0c1a9eda Zdenek Kabelac
static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
563 bb270c08 Diego Biurrun
                          const uint8_t *s2, int stride){
564 9dbcbd92 Michael Niedermayer
    int i;
565
566
    /* read the pixels */
567
    for(i=0;i<8;i++) {
568 c13e1abd Falk Hüffner
        block[0] = s1[0] - s2[0];
569
        block[1] = s1[1] - s2[1];
570
        block[2] = s1[2] - s2[2];
571
        block[3] = s1[3] - s2[3];
572
        block[4] = s1[4] - s2[4];
573
        block[5] = s1[5] - s2[5];
574
        block[6] = s1[6] - s2[6];
575
        block[7] = s1[7] - s2[7];
576 9dbcbd92 Michael Niedermayer
        s1 += stride;
577
        s2 += stride;
578 c13e1abd Falk Hüffner
        block += 8;
579 9dbcbd92 Michael Niedermayer
    }
580
}
581
582
583 0c1a9eda Zdenek Kabelac
static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
584 bb270c08 Diego Biurrun
                                 int line_size)
585 de6d9b64 Fabrice Bellard
{
586
    int i;
587 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
588 115329f1 Diego Biurrun
589 de6d9b64 Fabrice Bellard
    /* read the pixels */
590
    for(i=0;i<8;i++) {
591 c13e1abd Falk Hüffner
        pixels[0] = cm[block[0]];
592
        pixels[1] = cm[block[1]];
593
        pixels[2] = cm[block[2]];
594
        pixels[3] = cm[block[3]];
595
        pixels[4] = cm[block[4]];
596
        pixels[5] = cm[block[5]];
597
        pixels[6] = cm[block[6]];
598
        pixels[7] = cm[block[7]];
599
600
        pixels += line_size;
601
        block += 8;
602 de6d9b64 Fabrice Bellard
    }
603
}
604
605 178fcca8 Michael Niedermayer
static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
606 bb270c08 Diego Biurrun
                                 int line_size)
607 178fcca8 Michael Niedermayer
{
608
    int i;
609 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
610 115329f1 Diego Biurrun
611 178fcca8 Michael Niedermayer
    /* read the pixels */
612
    for(i=0;i<4;i++) {
613
        pixels[0] = cm[block[0]];
614
        pixels[1] = cm[block[1]];
615
        pixels[2] = cm[block[2]];
616
        pixels[3] = cm[block[3]];
617
618
        pixels += line_size;
619
        block += 8;
620
    }
621
}
622
623 9ca358b9 Michael Niedermayer
static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
624 bb270c08 Diego Biurrun
                                 int line_size)
625 9ca358b9 Michael Niedermayer
{
626
    int i;
627 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
628 115329f1 Diego Biurrun
629 9ca358b9 Michael Niedermayer
    /* read the pixels */
630
    for(i=0;i<2;i++) {
631
        pixels[0] = cm[block[0]];
632
        pixels[1] = cm[block[1]];
633
634
        pixels += line_size;
635
        block += 8;
636
    }
637
}
638
639 115329f1 Diego Biurrun
static void put_signed_pixels_clamped_c(const DCTELEM *block,
640 f9ed9d85 Mike Melanson
                                        uint8_t *restrict pixels,
641
                                        int line_size)
642
{
643
    int i, j;
644
645
    for (i = 0; i < 8; i++) {
646
        for (j = 0; j < 8; j++) {
647
            if (*block < -128)
648
                *pixels = 0;
649
            else if (*block > 127)
650
                *pixels = 255;
651
            else
652
                *pixels = (uint8_t)(*block + 128);
653
            block++;
654
            pixels++;
655
        }
656
        pixels += (line_size - 8);
657
    }
658
}
659
660 0c1a9eda Zdenek Kabelac
static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
661 c13e1abd Falk Hüffner
                          int line_size)
662 de6d9b64 Fabrice Bellard
{
663
    int i;
664 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
665 115329f1 Diego Biurrun
666 de6d9b64 Fabrice Bellard
    /* read the pixels */
667
    for(i=0;i<8;i++) {
668 c13e1abd Falk Hüffner
        pixels[0] = cm[pixels[0] + block[0]];
669
        pixels[1] = cm[pixels[1] + block[1]];
670
        pixels[2] = cm[pixels[2] + block[2]];
671
        pixels[3] = cm[pixels[3] + block[3]];
672
        pixels[4] = cm[pixels[4] + block[4]];
673
        pixels[5] = cm[pixels[5] + block[5]];
674
        pixels[6] = cm[pixels[6] + block[6]];
675
        pixels[7] = cm[pixels[7] + block[7]];
676
        pixels += line_size;
677
        block += 8;
678 de6d9b64 Fabrice Bellard
    }
679
}
680 178fcca8 Michael Niedermayer
681
static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
682
                          int line_size)
683
{
684
    int i;
685 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
686 115329f1 Diego Biurrun
687 178fcca8 Michael Niedermayer
    /* read the pixels */
688
    for(i=0;i<4;i++) {
689
        pixels[0] = cm[pixels[0] + block[0]];
690
        pixels[1] = cm[pixels[1] + block[1]];
691
        pixels[2] = cm[pixels[2] + block[2]];
692
        pixels[3] = cm[pixels[3] + block[3]];
693
        pixels += line_size;
694
        block += 8;
695
    }
696
}
697 9ca358b9 Michael Niedermayer
698
static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
699
                          int line_size)
700
{
701
    int i;
702 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
703 115329f1 Diego Biurrun
704 9ca358b9 Michael Niedermayer
    /* read the pixels */
705
    for(i=0;i<2;i++) {
706
        pixels[0] = cm[pixels[0] + block[0]];
707
        pixels[1] = cm[pixels[1] + block[1]];
708
        pixels += line_size;
709
        block += 8;
710
    }
711
}
712 36940eca Loren Merritt
713
static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
714
{
715
    int i;
716
    for(i=0;i<8;i++) {
717
        pixels[0] += block[0];
718
        pixels[1] += block[1];
719
        pixels[2] += block[2];
720
        pixels[3] += block[3];
721
        pixels[4] += block[4];
722
        pixels[5] += block[5];
723
        pixels[6] += block[6];
724
        pixels[7] += block[7];
725
        pixels += line_size;
726
        block += 8;
727
    }
728
}
729
730
static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
731
{
732
    int i;
733
    for(i=0;i<4;i++) {
734
        pixels[0] += block[0];
735
        pixels[1] += block[1];
736
        pixels[2] += block[2];
737
        pixels[3] += block[3];
738
        pixels += line_size;
739
        block += 4;
740
    }
741
}
742
743 1edbfe19 Loren Merritt
static int sum_abs_dctelem_c(DCTELEM *block)
744
{
745
    int sum=0, i;
746
    for(i=0; i<64; i++)
747
        sum+= FFABS(block[i]);
748
    return sum;
749
}
750
751 59fe111e Michael Niedermayer
#if 0
752

753
#define PIXOP2(OPNAME, OP) \
754 b3184779 Michael Niedermayer
static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
755 59fe111e Michael Niedermayer
{\
756
    int i;\
757
    for(i=0; i<h; i++){\
758 905694d9 Roman Shaposhnik
        OP(*((uint64_t*)block), AV_RN64(pixels));\
759 59fe111e Michael Niedermayer
        pixels+=line_size;\
760
        block +=line_size;\
761
    }\
762
}\
763
\
764 45553457 Zdenek Kabelac
static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
765 59fe111e Michael Niedermayer
{\
766
    int i;\
767
    for(i=0; i<h; i++){\
768 905694d9 Roman Shaposhnik
        const uint64_t a= AV_RN64(pixels  );\
769
        const uint64_t b= AV_RN64(pixels+1);\
770 59fe111e Michael Niedermayer
        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
771
        pixels+=line_size;\
772
        block +=line_size;\
773
    }\
774
}\
775
\
776 45553457 Zdenek Kabelac
static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
777 59fe111e Michael Niedermayer
{\
778
    int i;\
779
    for(i=0; i<h; i++){\
780 905694d9 Roman Shaposhnik
        const uint64_t a= AV_RN64(pixels  );\
781
        const uint64_t b= AV_RN64(pixels+1);\
782 59fe111e Michael Niedermayer
        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
783
        pixels+=line_size;\
784
        block +=line_size;\
785
    }\
786
}\
787
\
788 45553457 Zdenek Kabelac
static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
789 59fe111e Michael Niedermayer
{\
790
    int i;\
791
    for(i=0; i<h; i++){\
792 905694d9 Roman Shaposhnik
        const uint64_t a= AV_RN64(pixels          );\
793
        const uint64_t b= AV_RN64(pixels+line_size);\
794 59fe111e Michael Niedermayer
        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
795
        pixels+=line_size;\
796
        block +=line_size;\
797
    }\
798
}\
799
\
800 45553457 Zdenek Kabelac
static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
801 59fe111e Michael Niedermayer
{\
802
    int i;\
803
    for(i=0; i<h; i++){\
804 905694d9 Roman Shaposhnik
        const uint64_t a= AV_RN64(pixels          );\
805
        const uint64_t b= AV_RN64(pixels+line_size);\
806 59fe111e Michael Niedermayer
        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
807
        pixels+=line_size;\
808
        block +=line_size;\
809
    }\
810
}\
811
\
812 45553457 Zdenek Kabelac
static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
813 59fe111e Michael Niedermayer
{\
814
        int i;\
815 905694d9 Roman Shaposhnik
        const uint64_t a= AV_RN64(pixels  );\
816
        const uint64_t b= AV_RN64(pixels+1);\
817 59fe111e Michael Niedermayer
        uint64_t l0=  (a&0x0303030303030303ULL)\
818
                    + (b&0x0303030303030303ULL)\
819
                    + 0x0202020202020202ULL;\
820
        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
821
                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
822
        uint64_t l1,h1;\
823
\
824
        pixels+=line_size;\
825
        for(i=0; i<h; i+=2){\
826 905694d9 Roman Shaposhnik
            uint64_t a= AV_RN64(pixels  );\
827
            uint64_t b= AV_RN64(pixels+1);\
828 59fe111e Michael Niedermayer
            l1=  (a&0x0303030303030303ULL)\
829
               + (b&0x0303030303030303ULL);\
830
            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
831
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
832
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
833
            pixels+=line_size;\
834
            block +=line_size;\
835 905694d9 Roman Shaposhnik
            a= AV_RN64(pixels  );\
836
            b= AV_RN64(pixels+1);\
837 59fe111e Michael Niedermayer
            l0=  (a&0x0303030303030303ULL)\
838
               + (b&0x0303030303030303ULL)\
839
               + 0x0202020202020202ULL;\
840
            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
841
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
842
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
843
            pixels+=line_size;\
844
            block +=line_size;\
845
        }\
846
}\
847
\
848 45553457 Zdenek Kabelac
static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
849 59fe111e Michael Niedermayer
{\
850
        int i;\
851 905694d9 Roman Shaposhnik
        const uint64_t a= AV_RN64(pixels  );\
852
        const uint64_t b= AV_RN64(pixels+1);\
853 59fe111e Michael Niedermayer
        uint64_t l0=  (a&0x0303030303030303ULL)\
854
                    + (b&0x0303030303030303ULL)\
855
                    + 0x0101010101010101ULL;\
856
        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
857
                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
858
        uint64_t l1,h1;\
859
\
860
        pixels+=line_size;\
861
        for(i=0; i<h; i+=2){\
862 905694d9 Roman Shaposhnik
            uint64_t a= AV_RN64(pixels  );\
863
            uint64_t b= AV_RN64(pixels+1);\
864 59fe111e Michael Niedermayer
            l1=  (a&0x0303030303030303ULL)\
865
               + (b&0x0303030303030303ULL);\
866
            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
867
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
868
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
869
            pixels+=line_size;\
870
            block +=line_size;\
871 905694d9 Roman Shaposhnik
            a= AV_RN64(pixels  );\
872
            b= AV_RN64(pixels+1);\
873 59fe111e Michael Niedermayer
            l0=  (a&0x0303030303030303ULL)\
874
               + (b&0x0303030303030303ULL)\
875
               + 0x0101010101010101ULL;\
876
            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
877
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
878
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
879
            pixels+=line_size;\
880
            block +=line_size;\
881
        }\
882
}\
883
\
884 45553457 Zdenek Kabelac
CALL_2X_PIXELS(OPNAME ## _pixels16_c    , OPNAME ## _pixels_c    , 8)\
885
CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
886
CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
887
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
888
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
889
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
890
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
891 59fe111e Michael Niedermayer

892
#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
893
#else // 64 bit variant
894
895
#define PIXOP2(OPNAME, OP) \
896 669ac79c Michael Niedermayer
static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
897
    int i;\
898
    for(i=0; i<h; i++){\
899 905694d9 Roman Shaposhnik
        OP(*((uint16_t*)(block  )), AV_RN16(pixels  ));\
900 669ac79c Michael Niedermayer
        pixels+=line_size;\
901
        block +=line_size;\
902
    }\
903
}\
904 0da71265 Michael Niedermayer
static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
905
    int i;\
906
    for(i=0; i<h; i++){\
907 905694d9 Roman Shaposhnik
        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
908 0da71265 Michael Niedermayer
        pixels+=line_size;\
909
        block +=line_size;\
910
    }\
911
}\
912 45553457 Zdenek Kabelac
static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
913 59fe111e Michael Niedermayer
    int i;\
914
    for(i=0; i<h; i++){\
915 905694d9 Roman Shaposhnik
        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
916
        OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
917 59fe111e Michael Niedermayer
        pixels+=line_size;\
918
        block +=line_size;\
919
    }\
920
}\
921 45553457 Zdenek Kabelac
static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
922
    OPNAME ## _pixels8_c(block, pixels, line_size, h);\
923 b3184779 Michael Niedermayer
}\
924 59fe111e Michael Niedermayer
\
925 b3184779 Michael Niedermayer
static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
926
                                                int src_stride1, int src_stride2, int h){\
927 59fe111e Michael Niedermayer
    int i;\
928
    for(i=0; i<h; i++){\
929 b3184779 Michael Niedermayer
        uint32_t a,b;\
930 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1  ]);\
931
        b= AV_RN32(&src2[i*src_stride2  ]);\
932 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride  ]), no_rnd_avg32(a, b));\
933 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1+4]);\
934
        b= AV_RN32(&src2[i*src_stride2+4]);\
935 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
936 59fe111e Michael Niedermayer
    }\
937
}\
938
\
939 b3184779 Michael Niedermayer
static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
940
                                                int src_stride1, int src_stride2, int h){\
941 59fe111e Michael Niedermayer
    int i;\
942
    for(i=0; i<h; i++){\
943 b3184779 Michael Niedermayer
        uint32_t a,b;\
944 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1  ]);\
945
        b= AV_RN32(&src2[i*src_stride2  ]);\
946 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
947 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1+4]);\
948
        b= AV_RN32(&src2[i*src_stride2+4]);\
949 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
950 59fe111e Michael Niedermayer
    }\
951
}\
952
\
953 0da71265 Michael Niedermayer
static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
954
                                                int src_stride1, int src_stride2, int h){\
955
    int i;\
956
    for(i=0; i<h; i++){\
957
        uint32_t a,b;\
958 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1  ]);\
959
        b= AV_RN32(&src2[i*src_stride2  ]);\
960 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
961 0da71265 Michael Niedermayer
    }\
962
}\
963
\
964 669ac79c Michael Niedermayer
static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
965
                                                int src_stride1, int src_stride2, int h){\
966
    int i;\
967
    for(i=0; i<h; i++){\
968
        uint32_t a,b;\
969 905694d9 Roman Shaposhnik
        a= AV_RN16(&src1[i*src_stride1  ]);\
970
        b= AV_RN16(&src2[i*src_stride2  ]);\
971 669ac79c Michael Niedermayer
        OP(*((uint16_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
972
    }\
973
}\
974
\
975 b3184779 Michael Niedermayer
static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
976
                                                int src_stride1, int src_stride2, int h){\
977
    OPNAME ## _pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
978
    OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
979
}\
980
\
981
static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
982
                                                int src_stride1, int src_stride2, int h){\
983
    OPNAME ## _no_rnd_pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
984
    OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
985
}\
986
\
987 45553457 Zdenek Kabelac
static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
988 b3184779 Michael Niedermayer
    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
989
}\
990
\
991 45553457 Zdenek Kabelac
static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
992 b3184779 Michael Niedermayer
    OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
993
}\
994
\
995 45553457 Zdenek Kabelac
static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
996 b3184779 Michael Niedermayer
    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
997
}\
998
\
999 45553457 Zdenek Kabelac
static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1000 b3184779 Michael Niedermayer
    OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1001
}\
1002
\
1003
static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1004
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1005 59fe111e Michael Niedermayer
    int i;\
1006
    for(i=0; i<h; i++){\
1007 b3184779 Michael Niedermayer
        uint32_t a, b, c, d, l0, l1, h0, h1;\
1008 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1]);\
1009
        b= AV_RN32(&src2[i*src_stride2]);\
1010
        c= AV_RN32(&src3[i*src_stride3]);\
1011
        d= AV_RN32(&src4[i*src_stride4]);\
1012 b3184779 Michael Niedermayer
        l0=  (a&0x03030303UL)\
1013
           + (b&0x03030303UL)\
1014
           + 0x02020202UL;\
1015
        h0= ((a&0xFCFCFCFCUL)>>2)\
1016
          + ((b&0xFCFCFCFCUL)>>2);\
1017
        l1=  (c&0x03030303UL)\
1018
           + (d&0x03030303UL);\
1019
        h1= ((c&0xFCFCFCFCUL)>>2)\
1020
          + ((d&0xFCFCFCFCUL)>>2);\
1021
        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1022 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1+4]);\
1023
        b= AV_RN32(&src2[i*src_stride2+4]);\
1024
        c= AV_RN32(&src3[i*src_stride3+4]);\
1025
        d= AV_RN32(&src4[i*src_stride4+4]);\
1026 b3184779 Michael Niedermayer
        l0=  (a&0x03030303UL)\
1027
           + (b&0x03030303UL)\
1028
           + 0x02020202UL;\
1029
        h0= ((a&0xFCFCFCFCUL)>>2)\
1030
          + ((b&0xFCFCFCFCUL)>>2);\
1031
        l1=  (c&0x03030303UL)\
1032
           + (d&0x03030303UL);\
1033
        h1= ((c&0xFCFCFCFCUL)>>2)\
1034
          + ((d&0xFCFCFCFCUL)>>2);\
1035
        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1036 59fe111e Michael Niedermayer
    }\
1037
}\
1038 669ac79c Michael Niedermayer
\
1039
static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1040
    OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1041
}\
1042
\
1043
static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1044
    OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1045
}\
1046
\
1047
static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1048
    OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1049
}\
1050
\
1051
static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1052
    OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1053
}\
1054
\
1055 b3184779 Michael Niedermayer
static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1056
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1057 59fe111e Michael Niedermayer
    int i;\
1058
    for(i=0; i<h; i++){\
1059 b3184779 Michael Niedermayer
        uint32_t a, b, c, d, l0, l1, h0, h1;\
1060 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1]);\
1061
        b= AV_RN32(&src2[i*src_stride2]);\
1062
        c= AV_RN32(&src3[i*src_stride3]);\
1063
        d= AV_RN32(&src4[i*src_stride4]);\
1064 b3184779 Michael Niedermayer
        l0=  (a&0x03030303UL)\
1065
           + (b&0x03030303UL)\
1066
           + 0x01010101UL;\
1067
        h0= ((a&0xFCFCFCFCUL)>>2)\
1068
          + ((b&0xFCFCFCFCUL)>>2);\
1069
        l1=  (c&0x03030303UL)\
1070
           + (d&0x03030303UL);\
1071
        h1= ((c&0xFCFCFCFCUL)>>2)\
1072
          + ((d&0xFCFCFCFCUL)>>2);\
1073
        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1074 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1+4]);\
1075
        b= AV_RN32(&src2[i*src_stride2+4]);\
1076
        c= AV_RN32(&src3[i*src_stride3+4]);\
1077
        d= AV_RN32(&src4[i*src_stride4+4]);\
1078 b3184779 Michael Niedermayer
        l0=  (a&0x03030303UL)\
1079
           + (b&0x03030303UL)\
1080
           + 0x01010101UL;\
1081
        h0= ((a&0xFCFCFCFCUL)>>2)\
1082
          + ((b&0xFCFCFCFCUL)>>2);\
1083
        l1=  (c&0x03030303UL)\
1084
           + (d&0x03030303UL);\
1085
        h1= ((c&0xFCFCFCFCUL)>>2)\
1086
          + ((d&0xFCFCFCFCUL)>>2);\
1087
        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1088 59fe111e Michael Niedermayer
    }\
1089
}\
1090 b3184779 Michael Niedermayer
static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1091
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1092
    OPNAME ## _pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1093
    OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1094
}\
1095
static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1096
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1097
    OPNAME ## _no_rnd_pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1098
    OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1099
}\
1100 59fe111e Michael Niedermayer
\
1101 669ac79c Michael Niedermayer
static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1102
{\
1103
        int i, a0, b0, a1, b1;\
1104
        a0= pixels[0];\
1105
        b0= pixels[1] + 2;\
1106
        a0 += b0;\
1107
        b0 += pixels[2];\
1108
\
1109
        pixels+=line_size;\
1110
        for(i=0; i<h; i+=2){\
1111
            a1= pixels[0];\
1112
            b1= pixels[1];\
1113
            a1 += b1;\
1114
            b1 += pixels[2];\
1115
\
1116
            block[0]= (a1+a0)>>2; /* FIXME non put */\
1117
            block[1]= (b1+b0)>>2;\
1118
\
1119
            pixels+=line_size;\
1120
            block +=line_size;\
1121
\
1122
            a0= pixels[0];\
1123
            b0= pixels[1] + 2;\
1124
            a0 += b0;\
1125
            b0 += pixels[2];\
1126
\
1127
            block[0]= (a1+a0)>>2;\
1128
            block[1]= (b1+b0)>>2;\
1129
            pixels+=line_size;\
1130
            block +=line_size;\
1131
        }\
1132
}\
1133
\
1134
static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1135
{\
1136
        int i;\
1137 905694d9 Roman Shaposhnik
        const uint32_t a= AV_RN32(pixels  );\
1138
        const uint32_t b= AV_RN32(pixels+1);\
1139 669ac79c Michael Niedermayer
        uint32_t l0=  (a&0x03030303UL)\
1140
                    + (b&0x03030303UL)\
1141
                    + 0x02020202UL;\
1142
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1143
                   + ((b&0xFCFCFCFCUL)>>2);\
1144
        uint32_t l1,h1;\
1145
\
1146
        pixels+=line_size;\
1147
        for(i=0; i<h; i+=2){\
1148 905694d9 Roman Shaposhnik
            uint32_t a= AV_RN32(pixels  );\
1149
            uint32_t b= AV_RN32(pixels+1);\
1150 669ac79c Michael Niedermayer
            l1=  (a&0x03030303UL)\
1151
               + (b&0x03030303UL);\
1152
            h1= ((a&0xFCFCFCFCUL)>>2)\
1153
              + ((b&0xFCFCFCFCUL)>>2);\
1154
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1155
            pixels+=line_size;\
1156
            block +=line_size;\
1157 905694d9 Roman Shaposhnik
            a= AV_RN32(pixels  );\
1158
            b= AV_RN32(pixels+1);\
1159 669ac79c Michael Niedermayer
            l0=  (a&0x03030303UL)\
1160
               + (b&0x03030303UL)\
1161
               + 0x02020202UL;\
1162
            h0= ((a&0xFCFCFCFCUL)>>2)\
1163
              + ((b&0xFCFCFCFCUL)>>2);\
1164
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1165
            pixels+=line_size;\
1166
            block +=line_size;\
1167
        }\
1168
}\
1169
\
1170 45553457 Zdenek Kabelac
static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1171 59fe111e Michael Niedermayer
{\
1172
    int j;\
1173
    for(j=0; j<2; j++){\
1174
        int i;\
1175 905694d9 Roman Shaposhnik
        const uint32_t a= AV_RN32(pixels  );\
1176
        const uint32_t b= AV_RN32(pixels+1);\
1177 59fe111e Michael Niedermayer
        uint32_t l0=  (a&0x03030303UL)\
1178
                    + (b&0x03030303UL)\
1179
                    + 0x02020202UL;\
1180
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1181
                   + ((b&0xFCFCFCFCUL)>>2);\
1182
        uint32_t l1,h1;\
1183
\
1184
        pixels+=line_size;\
1185
        for(i=0; i<h; i+=2){\
1186 905694d9 Roman Shaposhnik
            uint32_t a= AV_RN32(pixels  );\
1187
            uint32_t b= AV_RN32(pixels+1);\
1188 59fe111e Michael Niedermayer
            l1=  (a&0x03030303UL)\
1189
               + (b&0x03030303UL);\
1190
            h1= ((a&0xFCFCFCFCUL)>>2)\
1191
              + ((b&0xFCFCFCFCUL)>>2);\
1192
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1193
            pixels+=line_size;\
1194
            block +=line_size;\
1195 905694d9 Roman Shaposhnik
            a= AV_RN32(pixels  );\
1196
            b= AV_RN32(pixels+1);\
1197 59fe111e Michael Niedermayer
            l0=  (a&0x03030303UL)\
1198
               + (b&0x03030303UL)\
1199
               + 0x02020202UL;\
1200
            h0= ((a&0xFCFCFCFCUL)>>2)\
1201
              + ((b&0xFCFCFCFCUL)>>2);\
1202
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1203
            pixels+=line_size;\
1204
            block +=line_size;\
1205
        }\
1206
        pixels+=4-line_size*(h+1);\
1207
        block +=4-line_size*h;\
1208
    }\
1209
}\
1210
\
1211 45553457 Zdenek Kabelac
static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1212 59fe111e Michael Niedermayer
{\
1213
    int j;\
1214
    for(j=0; j<2; j++){\
1215
        int i;\
1216 905694d9 Roman Shaposhnik
        const uint32_t a= AV_RN32(pixels  );\
1217
        const uint32_t b= AV_RN32(pixels+1);\
1218 59fe111e Michael Niedermayer
        uint32_t l0=  (a&0x03030303UL)\
1219
                    + (b&0x03030303UL)\
1220
                    + 0x01010101UL;\
1221
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1222
                   + ((b&0xFCFCFCFCUL)>>2);\
1223
        uint32_t l1,h1;\
1224
\
1225
        pixels+=line_size;\
1226
        for(i=0; i<h; i+=2){\
1227 905694d9 Roman Shaposhnik
            uint32_t a= AV_RN32(pixels  );\
1228
            uint32_t b= AV_RN32(pixels+1);\
1229 59fe111e Michael Niedermayer
            l1=  (a&0x03030303UL)\
1230
               + (b&0x03030303UL);\
1231
            h1= ((a&0xFCFCFCFCUL)>>2)\
1232
              + ((b&0xFCFCFCFCUL)>>2);\
1233
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1234
            pixels+=line_size;\
1235
            block +=line_size;\
1236 905694d9 Roman Shaposhnik
            a= AV_RN32(pixels  );\
1237
            b= AV_RN32(pixels+1);\
1238 59fe111e Michael Niedermayer
            l0=  (a&0x03030303UL)\
1239
               + (b&0x03030303UL)\
1240
               + 0x01010101UL;\
1241
            h0= ((a&0xFCFCFCFCUL)>>2)\
1242
              + ((b&0xFCFCFCFCUL)>>2);\
1243
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1244
            pixels+=line_size;\
1245
            block +=line_size;\
1246
        }\
1247
        pixels+=4-line_size*(h+1);\
1248
        block +=4-line_size*h;\
1249
    }\
1250
}\
1251
\
1252 45553457 Zdenek Kabelac
CALL_2X_PIXELS(OPNAME ## _pixels16_c  , OPNAME ## _pixels8_c  , 8)\
1253
CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
1254
CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
1255
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
1256
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c  , OPNAME ## _pixels8_c         , 8)\
1257
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
1258
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
1259
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
1260 b3184779 Michael Niedermayer
1261 d8085ea7 Michael Niedermayer
#define op_avg(a, b) a = rnd_avg32(a, b)
1262 59fe111e Michael Niedermayer
#endif
1263
#define op_put(a, b) a = b
1264
1265
PIXOP2(avg, op_avg)
1266
PIXOP2(put, op_put)
1267
#undef op_avg
1268
#undef op_put
1269
1270 de6d9b64 Fabrice Bellard
#define avg2(a,b) ((a+b+1)>>1)
1271
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
1272
1273 c0a0170c Michael Niedermayer
static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1274
    put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
1275
}
1276
1277
static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1278
    put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
1279
}
1280 073b013d Michael Niedermayer
1281 0c1a9eda Zdenek Kabelac
static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
1282 44eb4951 Michael Niedermayer
{
1283
    const int A=(16-x16)*(16-y16);
1284
    const int B=(   x16)*(16-y16);
1285
    const int C=(16-x16)*(   y16);
1286
    const int D=(   x16)*(   y16);
1287
    int i;
1288
1289
    for(i=0; i<h; i++)
1290
    {
1291 b3184779 Michael Niedermayer
        dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
1292
        dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
1293
        dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
1294
        dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
1295
        dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
1296
        dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
1297
        dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
1298
        dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
1299
        dst+= stride;
1300
        src+= stride;
1301 44eb4951 Michael Niedermayer
    }
1302
}
1303
1304 703c8195 Loren Merritt
void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
1305 073b013d Michael Niedermayer
                  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
1306
{
1307
    int y, vx, vy;
1308
    const int s= 1<<shift;
1309 115329f1 Diego Biurrun
1310 073b013d Michael Niedermayer
    width--;
1311
    height--;
1312
1313
    for(y=0; y<h; y++){
1314
        int x;
1315
1316
        vx= ox;
1317
        vy= oy;
1318
        for(x=0; x<8; x++){ //XXX FIXME optimize
1319
            int src_x, src_y, frac_x, frac_y, index;
1320
1321
            src_x= vx>>16;
1322
            src_y= vy>>16;
1323
            frac_x= src_x&(s-1);
1324
            frac_y= src_y&(s-1);
1325
            src_x>>=shift;
1326
            src_y>>=shift;
1327 115329f1 Diego Biurrun
1328 073b013d Michael Niedermayer
            if((unsigned)src_x < width){
1329
                if((unsigned)src_y < height){
1330
                    index= src_x + src_y*stride;
1331
                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
1332
                                           + src[index       +1]*   frac_x )*(s-frac_y)
1333
                                        + (  src[index+stride  ]*(s-frac_x)
1334
                                           + src[index+stride+1]*   frac_x )*   frac_y
1335
                                        + r)>>(shift*2);
1336
                }else{
1337 f66e4f5f Reimar Döffinger
                    index= src_x + av_clip(src_y, 0, height)*stride;
1338 115329f1 Diego Biurrun
                    dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
1339 073b013d Michael Niedermayer
                                          + src[index       +1]*   frac_x )*s
1340
                                        + r)>>(shift*2);
1341
                }
1342
            }else{
1343
                if((unsigned)src_y < height){
1344 f66e4f5f Reimar Döffinger
                    index= av_clip(src_x, 0, width) + src_y*stride;
1345 115329f1 Diego Biurrun
                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
1346 073b013d Michael Niedermayer
                                           + src[index+stride  ]*   frac_y )*s
1347
                                        + r)>>(shift*2);
1348
                }else{
1349 f66e4f5f Reimar Döffinger
                    index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
1350 073b013d Michael Niedermayer
                    dst[y*stride + x]=    src[index         ];
1351
                }
1352
            }
1353 115329f1 Diego Biurrun
1354 073b013d Michael Niedermayer
            vx+= dxx;
1355
            vy+= dyx;
1356
        }
1357
        ox += dxy;
1358
        oy += dyy;
1359
    }
1360
}
1361 669ac79c Michael Niedermayer
1362
static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1363
    switch(width){
1364
    case 2: put_pixels2_c (dst, src, stride, height); break;
1365
    case 4: put_pixels4_c (dst, src, stride, height); break;
1366
    case 8: put_pixels8_c (dst, src, stride, height); break;
1367
    case 16:put_pixels16_c(dst, src, stride, height); break;
1368
    }
1369
}
1370
1371
static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1372
    int i,j;
1373
    for (i=0; i < height; i++) {
1374
      for (j=0; j < width; j++) {
1375 bb270c08 Diego Biurrun
        dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
1376 669ac79c Michael Niedermayer
      }
1377
      src += stride;
1378
      dst += stride;
1379
    }
1380
}
1381
1382
static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1383
    int i,j;
1384
    for (i=0; i < height; i++) {
1385
      for (j=0; j < width; j++) {
1386 bb270c08 Diego Biurrun
        dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
1387 669ac79c Michael Niedermayer
      }
1388
      src += stride;
1389
      dst += stride;
1390
    }
1391
}
1392 115329f1 Diego Biurrun
1393 669ac79c Michael Niedermayer
static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1394
    int i,j;
1395
    for (i=0; i < height; i++) {
1396
      for (j=0; j < width; j++) {
1397 bb270c08 Diego Biurrun
        dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
1398 669ac79c Michael Niedermayer
      }
1399
      src += stride;
1400
      dst += stride;
1401
    }
1402
}
1403 115329f1 Diego Biurrun
1404 669ac79c Michael Niedermayer
static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1405
    int i,j;
1406
    for (i=0; i < height; i++) {
1407
      for (j=0; j < width; j++) {
1408 bb270c08 Diego Biurrun
        dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
1409 669ac79c Michael Niedermayer
      }
1410
      src += stride;
1411
      dst += stride;
1412
    }
1413
}
1414
1415
static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1416
    int i,j;
1417
    for (i=0; i < height; i++) {
1418
      for (j=0; j < width; j++) {
1419 bb270c08 Diego Biurrun
        dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
1420 669ac79c Michael Niedermayer
      }
1421
      src += stride;
1422
      dst += stride;
1423
    }
1424
}
1425
1426
static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1427
    int i,j;
1428
    for (i=0; i < height; i++) {
1429
      for (j=0; j < width; j++) {
1430 bb270c08 Diego Biurrun
        dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
1431 669ac79c Michael Niedermayer
      }
1432
      src += stride;
1433
      dst += stride;
1434
    }
1435
}
1436
1437
static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1438
    int i,j;
1439
    for (i=0; i < height; i++) {
1440
      for (j=0; j < width; j++) {
1441 bb270c08 Diego Biurrun
        dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
1442 669ac79c Michael Niedermayer
      }
1443
      src += stride;
1444
      dst += stride;
1445
    }
1446
}
1447
1448
static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1449
    int i,j;
1450
    for (i=0; i < height; i++) {
1451
      for (j=0; j < width; j++) {
1452 bb270c08 Diego Biurrun
        dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
1453 669ac79c Michael Niedermayer
      }
1454
      src += stride;
1455
      dst += stride;
1456
    }
1457
}
1458 da3b9756 Mike Melanson
1459
static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1460
    switch(width){
1461
    case 2: avg_pixels2_c (dst, src, stride, height); break;
1462
    case 4: avg_pixels4_c (dst, src, stride, height); break;
1463
    case 8: avg_pixels8_c (dst, src, stride, height); break;
1464
    case 16:avg_pixels16_c(dst, src, stride, height); break;
1465
    }
1466
}
1467
1468
static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1469
    int i,j;
1470
    for (i=0; i < height; i++) {
1471
      for (j=0; j < width; j++) {
1472 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
1473 da3b9756 Mike Melanson
      }
1474
      src += stride;
1475
      dst += stride;
1476
    }
1477
}
1478
1479
static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1480
    int i,j;
1481
    for (i=0; i < height; i++) {
1482
      for (j=0; j < width; j++) {
1483 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
1484 da3b9756 Mike Melanson
      }
1485
      src += stride;
1486
      dst += stride;
1487
    }
1488
}
1489 115329f1 Diego Biurrun
1490 da3b9756 Mike Melanson
static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1491
    int i,j;
1492
    for (i=0; i < height; i++) {
1493
      for (j=0; j < width; j++) {
1494 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
1495 da3b9756 Mike Melanson
      }
1496
      src += stride;
1497
      dst += stride;
1498
    }
1499
}
1500 115329f1 Diego Biurrun
1501 da3b9756 Mike Melanson
static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1502
    int i,j;
1503
    for (i=0; i < height; i++) {
1504
      for (j=0; j < width; j++) {
1505 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1506 da3b9756 Mike Melanson
      }
1507
      src += stride;
1508
      dst += stride;
1509
    }
1510
}
1511
1512
static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1513
    int i,j;
1514
    for (i=0; i < height; i++) {
1515
      for (j=0; j < width; j++) {
1516 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1517 da3b9756 Mike Melanson
      }
1518
      src += stride;
1519
      dst += stride;
1520
    }
1521
}
1522
1523
static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1524
    int i,j;
1525
    for (i=0; i < height; i++) {
1526
      for (j=0; j < width; j++) {
1527 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
1528 da3b9756 Mike Melanson
      }
1529
      src += stride;
1530
      dst += stride;
1531
    }
1532
}
1533
1534
static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1535
    int i,j;
1536
    for (i=0; i < height; i++) {
1537
      for (j=0; j < width; j++) {
1538 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1539 da3b9756 Mike Melanson
      }
1540
      src += stride;
1541
      dst += stride;
1542
    }
1543
}
1544
1545
static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1546
    int i,j;
1547
    for (i=0; i < height; i++) {
1548
      for (j=0; j < width; j++) {
1549 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1550 da3b9756 Mike Melanson
      }
1551
      src += stride;
1552
      dst += stride;
1553
    }
1554
}
1555 669ac79c Michael Niedermayer
#if 0
1556
#define TPEL_WIDTH(width)\
1557
static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1558
    void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1559
static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1560
    void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1561
static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1562
    void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1563
static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1564
    void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1565
static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1566
    void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1567
static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1568
    void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1569
static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1570
    void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1571
static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1572
    void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1573
static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1574
    void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1575
#endif
1576
1577 0da71265 Michael Niedermayer
#define H264_CHROMA_MC(OPNAME, OP)\
1578
static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1579
    const int A=(8-x)*(8-y);\
1580
    const int B=(  x)*(8-y);\
1581
    const int C=(8-x)*(  y);\
1582
    const int D=(  x)*(  y);\
1583
    int i;\
1584
    \
1585
    assert(x<8 && y<8 && x>=0 && y>=0);\
1586
\
1587 febdd0b9 Michael Niedermayer
    if(D){\
1588 f315b394 Michael Niedermayer
        for(i=0; i<h; i++){\
1589 76abb18e Michael Niedermayer
            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1590
            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1591
            dst+= stride;\
1592
            src+= stride;\
1593
        }\
1594 febdd0b9 Michael Niedermayer
    }else{\
1595
        const int E= B+C;\
1596
        const int step= C ? stride : 1;\
1597 f315b394 Michael Niedermayer
        for(i=0; i<h; i++){\
1598 febdd0b9 Michael Niedermayer
            OP(dst[0], (A*src[0] + E*src[step+0]));\
1599
            OP(dst[1], (A*src[1] + E*src[step+1]));\
1600
            dst+= stride;\
1601
            src+= stride;\
1602
        }\
1603
    }\
1604 0da71265 Michael Niedermayer
}\
1605
\
1606
static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1607
    const int A=(8-x)*(8-y);\
1608
    const int B=(  x)*(8-y);\
1609
    const int C=(8-x)*(  y);\
1610
    const int D=(  x)*(  y);\
1611
    int i;\
1612
    \
1613
    assert(x<8 && y<8 && x>=0 && y>=0);\
1614
\
1615 febdd0b9 Michael Niedermayer
    if(D){\
1616 f315b394 Michael Niedermayer
        for(i=0; i<h; i++){\
1617 76abb18e Michael Niedermayer
            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1618
            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1619
            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1620
            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1621
            dst+= stride;\
1622
            src+= stride;\
1623
        }\
1624 febdd0b9 Michael Niedermayer
    }else{\
1625
        const int E= B+C;\
1626
        const int step= C ? stride : 1;\
1627 f315b394 Michael Niedermayer
        for(i=0; i<h; i++){\
1628 febdd0b9 Michael Niedermayer
            OP(dst[0], (A*src[0] + E*src[step+0]));\
1629
            OP(dst[1], (A*src[1] + E*src[step+1]));\
1630
            OP(dst[2], (A*src[2] + E*src[step+2]));\
1631
            OP(dst[3], (A*src[3] + E*src[step+3]));\
1632
            dst+= stride;\
1633
            src+= stride;\
1634
        }\
1635
    }\
1636 0da71265 Michael Niedermayer
}\
1637
\
1638
static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1639
    const int A=(8-x)*(8-y);\
1640
    const int B=(  x)*(8-y);\
1641
    const int C=(8-x)*(  y);\
1642
    const int D=(  x)*(  y);\
1643
    int i;\
1644
    \
1645
    assert(x<8 && y<8 && x>=0 && y>=0);\
1646
\
1647 815c81c0 Michael Niedermayer
    if(D){\
1648 f315b394 Michael Niedermayer
        for(i=0; i<h; i++){\
1649 76abb18e Michael Niedermayer
            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1650
            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1651
            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1652
            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1653
            OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1654
            OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1655
            OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1656
            OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1657
            dst+= stride;\
1658
            src+= stride;\
1659
        }\
1660 815c81c0 Michael Niedermayer
    }else{\
1661
        const int E= B+C;\
1662
        const int step= C ? stride : 1;\
1663 f315b394 Michael Niedermayer
        for(i=0; i<h; i++){\
1664 815c81c0 Michael Niedermayer
            OP(dst[0], (A*src[0] + E*src[step+0]));\
1665
            OP(dst[1], (A*src[1] + E*src[step+1]));\
1666
            OP(dst[2], (A*src[2] + E*src[step+2]));\
1667
            OP(dst[3], (A*src[3] + E*src[step+3]));\
1668
            OP(dst[4], (A*src[4] + E*src[step+4]));\
1669
            OP(dst[5], (A*src[5] + E*src[step+5]));\
1670
            OP(dst[6], (A*src[6] + E*src[step+6]));\
1671
            OP(dst[7], (A*src[7] + E*src[step+7]));\
1672
            dst+= stride;\
1673
            src+= stride;\
1674
        }\
1675
    }\
1676 0da71265 Michael Niedermayer
}
1677
1678
#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1679
#define op_put(a, b) a = (((b) + 32)>>6)
1680
1681
H264_CHROMA_MC(put_       , op_put)
1682
H264_CHROMA_MC(avg_       , op_avg)
1683
#undef op_avg
1684
#undef op_put
1685
1686 c374691b David Conrad
static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
1687 e34350a3 Kostya Shishkov
    const int A=(8-x)*(8-y);
1688
    const int B=(  x)*(8-y);
1689
    const int C=(8-x)*(  y);
1690
    const int D=(  x)*(  y);
1691
    int i;
1692
1693
    assert(x<8 && y<8 && x>=0 && y>=0);
1694
1695
    for(i=0; i<h; i++)
1696
    {
1697
        dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
1698
        dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
1699
        dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
1700
        dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
1701
        dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
1702
        dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
1703
        dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
1704
        dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
1705
        dst+= stride;
1706
        src+= stride;
1707
    }
1708
}
1709
1710 8013da73 David Conrad
static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
1711
    const int A=(8-x)*(8-y);
1712
    const int B=(  x)*(8-y);
1713
    const int C=(8-x)*(  y);
1714
    const int D=(  x)*(  y);
1715
    int i;
1716
1717
    assert(x<8 && y<8 && x>=0 && y>=0);
1718
1719
    for(i=0; i<h; i++)
1720
    {
1721
        dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
1722
        dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
1723
        dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
1724
        dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
1725
        dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6));
1726
        dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6));
1727
        dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6));
1728
        dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6));
1729
        dst+= stride;
1730
        src+= stride;
1731
    }
1732
}
1733
1734 b3184779 Michael Niedermayer
#define QPEL_MC(r, OPNAME, RND, OP) \
1735 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1736 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1737 b3184779 Michael Niedermayer
    int i;\
1738
    for(i=0; i<h; i++)\
1739
    {\
1740
        OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1741
        OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1742
        OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1743
        OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1744
        OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1745
        OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1746
        OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1747
        OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1748
        dst+=dstStride;\
1749
        src+=srcStride;\
1750
    }\
1751 44eb4951 Michael Niedermayer
}\
1752
\
1753 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1754 db794953 Michael Niedermayer
    const int w=8;\
1755 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1756 b3184779 Michael Niedermayer
    int i;\
1757
    for(i=0; i<w; i++)\
1758
    {\
1759
        const int src0= src[0*srcStride];\
1760
        const int src1= src[1*srcStride];\
1761
        const int src2= src[2*srcStride];\
1762
        const int src3= src[3*srcStride];\
1763
        const int src4= src[4*srcStride];\
1764
        const int src5= src[5*srcStride];\
1765
        const int src6= src[6*srcStride];\
1766
        const int src7= src[7*srcStride];\
1767
        const int src8= src[8*srcStride];\
1768
        OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1769
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1770
        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1771
        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1772
        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1773
        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1774
        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1775
        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1776
        dst++;\
1777
        src++;\
1778
    }\
1779
}\
1780
\
1781 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1782 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1783 b3184779 Michael Niedermayer
    int i;\
1784 826f429a Michael Niedermayer
    \
1785 b3184779 Michael Niedermayer
    for(i=0; i<h; i++)\
1786
    {\
1787
        OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1788
        OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1789
        OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1790
        OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1791
        OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1792
        OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1793
        OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1794
        OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1795
        OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1796
        OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1797
        OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1798
        OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1799
        OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1800
        OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1801
        OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1802
        OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1803
        dst+=dstStride;\
1804
        src+=srcStride;\
1805
    }\
1806
}\
1807
\
1808 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1809 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1810 b3184779 Michael Niedermayer
    int i;\
1811 826f429a Michael Niedermayer
    const int w=16;\
1812 b3184779 Michael Niedermayer
    for(i=0; i<w; i++)\
1813
    {\
1814
        const int src0= src[0*srcStride];\
1815
        const int src1= src[1*srcStride];\
1816
        const int src2= src[2*srcStride];\
1817
        const int src3= src[3*srcStride];\
1818
        const int src4= src[4*srcStride];\
1819
        const int src5= src[5*srcStride];\
1820
        const int src6= src[6*srcStride];\
1821
        const int src7= src[7*srcStride];\
1822
        const int src8= src[8*srcStride];\
1823
        const int src9= src[9*srcStride];\
1824
        const int src10= src[10*srcStride];\
1825
        const int src11= src[11*srcStride];\
1826
        const int src12= src[12*srcStride];\
1827
        const int src13= src[13*srcStride];\
1828
        const int src14= src[14*srcStride];\
1829
        const int src15= src[15*srcStride];\
1830
        const int src16= src[16*srcStride];\
1831
        OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1832
        OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1833
        OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1834
        OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1835
        OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1836
        OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1837
        OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1838
        OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1839
        OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1840
        OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1841
        OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1842
        OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1843
        OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1844
        OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1845
        OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1846
        OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1847
        dst++;\
1848
        src++;\
1849
    }\
1850
}\
1851
\
1852 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
1853 45553457 Zdenek Kabelac
    OPNAME ## pixels8_c(dst, src, stride, 8);\
1854 b3184779 Michael Niedermayer
}\
1855
\
1856 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1857
    uint8_t half[64];\
1858 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1859
    OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
1860 44eb4951 Michael Niedermayer
}\
1861
\
1862 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1863 b3184779 Michael Niedermayer
    OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
1864 44eb4951 Michael Niedermayer
}\
1865
\
1866 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1867
    uint8_t half[64];\
1868 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1869
    OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
1870 44eb4951 Michael Niedermayer
}\
1871
\
1872 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1873
    uint8_t full[16*9];\
1874
    uint8_t half[64];\
1875 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1876 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1877 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
1878 44eb4951 Michael Niedermayer
}\
1879
\
1880 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1881
    uint8_t full[16*9];\
1882 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1883 db794953 Michael Niedermayer
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
1884 44eb4951 Michael Niedermayer
}\
1885
\
1886 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1887
    uint8_t full[16*9];\
1888
    uint8_t half[64];\
1889 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1890 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1891 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
1892 44eb4951 Michael Niedermayer
}\
1893 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1894
    uint8_t full[16*9];\
1895
    uint8_t halfH[72];\
1896
    uint8_t halfV[64];\
1897
    uint8_t halfHV[64];\
1898 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1899
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1900 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1901
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1902 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1903 44eb4951 Michael Niedermayer
}\
1904 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1905
    uint8_t full[16*9];\
1906
    uint8_t halfH[72];\
1907
    uint8_t halfHV[64];\
1908 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1909
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1910
    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1911
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1912
    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1913
}\
1914 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1915
    uint8_t full[16*9];\
1916
    uint8_t halfH[72];\
1917
    uint8_t halfV[64];\
1918
    uint8_t halfHV[64];\
1919 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1920
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1921 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1922
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1923 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1924 44eb4951 Michael Niedermayer
}\
1925 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1926
    uint8_t full[16*9];\
1927
    uint8_t halfH[72];\
1928
    uint8_t halfHV[64];\
1929 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1930
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1931
    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1932
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1933
    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1934
}\
1935 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1936
    uint8_t full[16*9];\
1937
    uint8_t halfH[72];\
1938
    uint8_t halfV[64];\
1939
    uint8_t halfHV[64];\
1940 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1941
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1942 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1943
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1944 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1945 44eb4951 Michael Niedermayer
}\
1946 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1947
    uint8_t full[16*9];\
1948
    uint8_t halfH[72];\
1949
    uint8_t halfHV[64];\
1950 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1951
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1952
    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1953
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1954
    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1955
}\
1956 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1957
    uint8_t full[16*9];\
1958
    uint8_t halfH[72];\
1959
    uint8_t halfV[64];\
1960
    uint8_t halfHV[64];\
1961 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1962
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
1963 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1964
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1965 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1966 44eb4951 Michael Niedermayer
}\
1967 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1968
    uint8_t full[16*9];\
1969
    uint8_t halfH[72];\
1970
    uint8_t halfHV[64];\
1971 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1972
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1973
    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1974
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1975
    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1976
}\
1977 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1978
    uint8_t halfH[72];\
1979
    uint8_t halfHV[64];\
1980 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1981 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1982 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1983 44eb4951 Michael Niedermayer
}\
1984 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1985
    uint8_t halfH[72];\
1986
    uint8_t halfHV[64];\
1987 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1988 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1989 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1990 44eb4951 Michael Niedermayer
}\
1991 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1992
    uint8_t full[16*9];\
1993
    uint8_t halfH[72];\
1994
    uint8_t halfV[64];\
1995
    uint8_t halfHV[64];\
1996 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1997
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1998 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1999
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2000 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
2001 44eb4951 Michael Niedermayer
}\
2002 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2003
    uint8_t full[16*9];\
2004
    uint8_t halfH[72];\
2005 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
2006
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2007
    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
2008
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2009
}\
2010 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2011
    uint8_t full[16*9];\
2012
    uint8_t halfH[72];\
2013
    uint8_t halfV[64];\
2014
    uint8_t halfHV[64];\
2015 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
2016
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2017 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
2018
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2019 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
2020 44eb4951 Michael Niedermayer
}\
2021 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2022
    uint8_t full[16*9];\
2023
    uint8_t halfH[72];\
2024 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
2025
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2026
    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
2027
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2028
}\
2029 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2030
    uint8_t halfH[72];\
2031 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
2032 db794953 Michael Niedermayer
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2033 b3184779 Michael Niedermayer
}\
2034 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2035 45553457 Zdenek Kabelac
    OPNAME ## pixels16_c(dst, src, stride, 16);\
2036 b3184779 Michael Niedermayer
}\
2037
\
2038 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2039
    uint8_t half[256];\
2040 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2041
    OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
2042
}\
2043
\
2044 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2045 b3184779 Michael Niedermayer
    OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
2046 44eb4951 Michael Niedermayer
}\
2047 b3184779 Michael Niedermayer
\
2048 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2049
    uint8_t half[256];\
2050 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2051
    OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
2052
}\
2053
\
2054 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2055
    uint8_t full[24*17];\
2056
    uint8_t half[256];\
2057 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2058 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
2059 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
2060
}\
2061
\
2062 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2063
    uint8_t full[24*17];\
2064 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2065 826f429a Michael Niedermayer
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
2066 b3184779 Michael Niedermayer
}\
2067
\
2068 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2069
    uint8_t full[24*17];\
2070
    uint8_t half[256];\
2071 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2072 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
2073 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
2074
}\
2075 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
2076
    uint8_t full[24*17];\
2077
    uint8_t halfH[272];\
2078
    uint8_t halfV[256];\
2079
    uint8_t halfHV[256];\
2080 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2081
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2082 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2083
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2084 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2085
}\
2086 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2087
    uint8_t full[24*17];\
2088
    uint8_t halfH[272];\
2089
    uint8_t halfHV[256];\
2090 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2091
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2092
    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2093
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2094
    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2095
}\
2096 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
2097
    uint8_t full[24*17];\
2098
    uint8_t halfH[272];\
2099
    uint8_t halfV[256];\
2100
    uint8_t halfHV[256];\
2101 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2102
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2103 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2104
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2105 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2106
}\
2107 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2108
    uint8_t full[24*17];\
2109
    uint8_t halfH[272];\
2110
    uint8_t halfHV[256];\
2111 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2112
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2113
    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2114
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2115
    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2116
}\
2117 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
2118
    uint8_t full[24*17];\
2119
    uint8_t halfH[272];\
2120
    uint8_t halfV[256];\
2121
    uint8_t halfHV[256];\
2122 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2123
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2124 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2125
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2126 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2127
}\
2128 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2129
    uint8_t full[24*17];\
2130
    uint8_t halfH[272];\
2131
    uint8_t halfHV[256];\
2132 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2133
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2134
    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2135
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2136
    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2137
}\
2138 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
2139
    uint8_t full[24*17];\
2140
    uint8_t halfH[272];\
2141
    uint8_t halfV[256];\
2142
    uint8_t halfHV[256];\
2143 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2144
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
2145 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2146
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2147 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2148
}\
2149 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2150
    uint8_t full[24*17];\
2151
    uint8_t halfH[272];\
2152
    uint8_t halfHV[256];\
2153 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2154
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2155
    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2156
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2157
    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2158
}\
2159 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2160
    uint8_t halfH[272];\
2161
    uint8_t halfHV[256];\
2162 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2163 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2164 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2165
}\
2166 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2167
    uint8_t halfH[272];\
2168
    uint8_t halfHV[256];\
2169 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2170 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2171 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2172
}\
2173 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
2174
    uint8_t full[24*17];\
2175
    uint8_t halfH[272];\
2176
    uint8_t halfV[256];\
2177
    uint8_t halfHV[256];\
2178 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2179
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2180 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2181
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2182 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2183
}\
2184 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2185
    uint8_t full[24*17];\
2186
    uint8_t halfH[272];\
2187 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2188
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2189
    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2190
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2191
}\
2192 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2193
    uint8_t full[24*17];\
2194
    uint8_t halfH[272];\
2195
    uint8_t halfV[256];\
2196
    uint8_t halfHV[256];\
2197 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2198
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2199 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2200
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2201 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2202
}\
2203 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2204
    uint8_t full[24*17];\
2205
    uint8_t halfH[272];\
2206 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2207
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2208
    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2209
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2210
}\
2211 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2212
    uint8_t halfH[272];\
2213 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2214 826f429a Michael Niedermayer
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2215 45553457 Zdenek Kabelac
}
2216 44eb4951 Michael Niedermayer
2217 b3184779 Michael Niedermayer
#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2218
#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
2219
#define op_put(a, b) a = cm[((b) + 16)>>5]
2220
#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
2221
2222
QPEL_MC(0, put_       , _       , op_put)
2223
QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
2224
QPEL_MC(0, avg_       , _       , op_avg)
2225
//QPEL_MC(1, avg_no_rnd , _       , op_avg)
2226
#undef op_avg
2227
#undef op_avg_no_rnd
2228
#undef op_put
2229
#undef op_put_no_rnd
2230 44eb4951 Michael Niedermayer
2231 0da71265 Michael Niedermayer
#if 1
2232
#define H264_LOWPASS(OPNAME, OP, OP2) \
2233 bb5705b9 Måns Rullgård
static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2234 80e44bc3 Michael Niedermayer
    const int h=2;\
2235 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2236 80e44bc3 Michael Niedermayer
    int i;\
2237
    for(i=0; i<h; i++)\
2238
    {\
2239
        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2240
        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2241
        dst+=dstStride;\
2242
        src+=srcStride;\
2243
    }\
2244
}\
2245
\
2246 bb5705b9 Måns Rullgård
static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2247 80e44bc3 Michael Niedermayer
    const int w=2;\
2248 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2249 80e44bc3 Michael Niedermayer
    int i;\
2250
    for(i=0; i<w; i++)\
2251
    {\
2252
        const int srcB= src[-2*srcStride];\
2253
        const int srcA= src[-1*srcStride];\
2254
        const int src0= src[0 *srcStride];\
2255
        const int src1= src[1 *srcStride];\
2256
        const int src2= src[2 *srcStride];\
2257
        const int src3= src[3 *srcStride];\
2258
        const int src4= src[4 *srcStride];\
2259
        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2260
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2261
        dst++;\
2262
        src++;\
2263
    }\
2264
}\
2265
\
2266 bb5705b9 Måns Rullgård
static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2267 80e44bc3 Michael Niedermayer
    const int h=2;\
2268
    const int w=2;\
2269 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2270 80e44bc3 Michael Niedermayer
    int i;\
2271
    src -= 2*srcStride;\
2272
    for(i=0; i<h+5; i++)\
2273
    {\
2274
        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2275
        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2276
        tmp+=tmpStride;\
2277
        src+=srcStride;\
2278
    }\
2279
    tmp -= tmpStride*(h+5-2);\
2280
    for(i=0; i<w; i++)\
2281
    {\
2282
        const int tmpB= tmp[-2*tmpStride];\
2283
        const int tmpA= tmp[-1*tmpStride];\
2284
        const int tmp0= tmp[0 *tmpStride];\
2285
        const int tmp1= tmp[1 *tmpStride];\
2286
        const int tmp2= tmp[2 *tmpStride];\
2287
        const int tmp3= tmp[3 *tmpStride];\
2288
        const int tmp4= tmp[4 *tmpStride];\
2289
        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2290
        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2291
        dst++;\
2292
        tmp++;\
2293
    }\
2294
}\
2295 0da71265 Michael Niedermayer
static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2296
    const int h=4;\
2297 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2298 0da71265 Michael Niedermayer
    int i;\
2299
    for(i=0; i<h; i++)\
2300
    {\
2301
        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2302
        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2303
        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
2304
        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
2305
        dst+=dstStride;\
2306
        src+=srcStride;\
2307
    }\
2308
}\
2309
\
2310
static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2311
    const int w=4;\
2312 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2313 0da71265 Michael Niedermayer
    int i;\
2314
    for(i=0; i<w; i++)\
2315
    {\
2316
        const int srcB= src[-2*srcStride];\
2317
        const int srcA= src[-1*srcStride];\
2318
        const int src0= src[0 *srcStride];\
2319
        const int src1= src[1 *srcStride];\
2320
        const int src2= src[2 *srcStride];\
2321
        const int src3= src[3 *srcStride];\
2322
        const int src4= src[4 *srcStride];\
2323
        const int src5= src[5 *srcStride];\
2324
        const int src6= src[6 *srcStride];\
2325
        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2326
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2327
        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2328
        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2329
        dst++;\
2330
        src++;\
2331
    }\
2332
}\
2333
\
2334
static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2335
    const int h=4;\
2336
    const int w=4;\
2337 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2338 0da71265 Michael Niedermayer
    int i;\
2339
    src -= 2*srcStride;\
2340
    for(i=0; i<h+5; i++)\
2341
    {\
2342
        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2343
        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2344
        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
2345
        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
2346
        tmp+=tmpStride;\
2347
        src+=srcStride;\
2348
    }\
2349
    tmp -= tmpStride*(h+5-2);\
2350
    for(i=0; i<w; i++)\
2351
    {\
2352
        const int tmpB= tmp[-2*tmpStride];\
2353
        const int tmpA= tmp[-1*tmpStride];\
2354
        const int tmp0= tmp[0 *tmpStride];\
2355
        const int tmp1= tmp[1 *tmpStride];\
2356
        const int tmp2= tmp[2 *tmpStride];\
2357
        const int tmp3= tmp[3 *tmpStride];\
2358
        const int tmp4= tmp[4 *tmpStride];\
2359
        const int tmp5= tmp[5 *tmpStride];\
2360
        const int tmp6= tmp[6 *tmpStride];\
2361
        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2362
        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2363
        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2364
        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2365
        dst++;\
2366
        tmp++;\
2367
    }\
2368
}\
2369
\
2370
static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2371
    const int h=8;\
2372 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2373 0da71265 Michael Niedermayer
    int i;\
2374
    for(i=0; i<h; i++)\
2375
    {\
2376
        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
2377
        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
2378
        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
2379
        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
2380
        OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
2381
        OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
2382
        OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
2383
        OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
2384
        dst+=dstStride;\
2385
        src+=srcStride;\
2386
    }\
2387
}\
2388
\
2389
static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2390
    const int w=8;\
2391 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2392 0da71265 Michael Niedermayer
    int i;\
2393
    for(i=0; i<w; i++)\
2394
    {\
2395
        const int srcB= src[-2*srcStride];\
2396
        const int srcA= src[-1*srcStride];\
2397
        const int src0= src[0 *srcStride];\
2398
        const int src1= src[1 *srcStride];\
2399
        const int src2= src[2 *srcStride];\
2400
        const int src3= src[3 *srcStride];\
2401
        const int src4= src[4 *srcStride];\
2402
        const int src5= src[5 *srcStride];\
2403
        const int src6= src[6 *srcStride];\
2404
        const int src7= src[7 *srcStride];\
2405
        const int src8= src[8 *srcStride];\
2406
        const int src9= src[9 *srcStride];\
2407
        const int src10=src[10*srcStride];\
2408
        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2409
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2410
        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2411
        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2412
        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
2413
        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
2414
        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
2415
        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
2416
        dst++;\
2417
        src++;\
2418
    }\
2419
}\
2420
\
2421
static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2422
    const int h=8;\
2423
    const int w=8;\
2424 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2425 0da71265 Michael Niedermayer
    int i;\
2426
    src -= 2*srcStride;\
2427
    for(i=0; i<h+5; i++)\
2428
    {\
2429
        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
2430
        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
2431
        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
2432
        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
2433
        tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
2434
        tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
2435
        tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
2436
        tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
2437
        tmp+=tmpStride;\
2438
        src+=srcStride;\
2439
    }\
2440
    tmp -= tmpStride*(h+5-2);\
2441
    for(i=0; i<w; i++)\
2442
    {\
2443
        const int tmpB= tmp[-2*tmpStride];\
2444
        const int tmpA= tmp[-1*tmpStride];\
2445
        const int tmp0= tmp[0 *tmpStride];\
2446
        const int tmp1= tmp[1 *tmpStride];\
2447
        const int tmp2= tmp[2 *tmpStride];\
2448
        const int tmp3= tmp[3 *tmpStride];\
2449
        const int tmp4= tmp[4 *tmpStride];\
2450
        const int tmp5= tmp[5 *tmpStride];\
2451
        const int tmp6= tmp[6 *tmpStride];\
2452
        const int tmp7= tmp[7 *tmpStride];\
2453
        const int tmp8= tmp[8 *tmpStride];\
2454
        const int tmp9= tmp[9 *tmpStride];\
2455
        const int tmp10=tmp[10*tmpStride];\
2456
        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2457
        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2458
        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2459
        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2460
        OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
2461
        OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
2462
        OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
2463
        OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
2464
        dst++;\
2465
        tmp++;\
2466
    }\
2467
}\
2468
\
2469
static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2470
    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
2471
    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2472
    src += 8*srcStride;\
2473
    dst += 8*dstStride;\
2474
    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
2475
    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2476
}\
2477
\
2478
static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2479
    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
2480
    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2481
    src += 8*srcStride;\
2482
    dst += 8*dstStride;\
2483
    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
2484
    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2485
}\
2486
\
2487
static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2488
    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
2489
    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2490
    src += 8*srcStride;\
2491
    dst += 8*dstStride;\
2492
    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
2493
    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2494
}\
2495
2496
#define H264_MC(OPNAME, SIZE) \
2497
static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2498
    OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2499
}\
2500
\
2501
static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2502
    uint8_t half[SIZE*SIZE];\
2503
    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2504
    OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2505
}\
2506
\
2507
static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2508
    OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2509
}\
2510
\
2511
static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2512
    uint8_t half[SIZE*SIZE];\
2513
    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2514
    OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2515
}\
2516
\
2517
static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2518
    uint8_t full[SIZE*(SIZE+5)];\
2519
    uint8_t * const full_mid= full + SIZE*2;\
2520
    uint8_t half[SIZE*SIZE];\
2521
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2522
    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2523
    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2524
}\
2525
\
2526
static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2527
    uint8_t full[SIZE*(SIZE+5)];\
2528
    uint8_t * const full_mid= full + SIZE*2;\
2529
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2530
    OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2531
}\
2532
\
2533
static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2534
    uint8_t full[SIZE*(SIZE+5)];\
2535
    uint8_t * const full_mid= full + SIZE*2;\
2536
    uint8_t half[SIZE*SIZE];\
2537
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2538
    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2539
    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2540
}\
2541
\
2542
static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2543
    uint8_t full[SIZE*(SIZE+5)];\
2544
    uint8_t * const full_mid= full + SIZE*2;\
2545
    uint8_t halfH[SIZE*SIZE];\
2546
    uint8_t halfV[SIZE*SIZE];\
2547
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2548
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2549
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2550
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2551
}\
2552
\
2553
static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2554
    uint8_t full[SIZE*(SIZE+5)];\
2555
    uint8_t * const full_mid= full + SIZE*2;\
2556
    uint8_t halfH[SIZE*SIZE];\
2557
    uint8_t halfV[SIZE*SIZE];\
2558
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2559
    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
2560
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2561
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2562
}\
2563
\
2564
static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2565
    uint8_t full[SIZE*(SIZE+5)];\
2566
    uint8_t * const full_mid= full + SIZE*2;\
2567
    uint8_t halfH[SIZE*SIZE];\
2568
    uint8_t halfV[SIZE*SIZE];\
2569
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2570
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2571
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2572
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2573
}\
2574
\
2575
static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2576
    uint8_t full[SIZE*(SIZE+5)];\
2577
    uint8_t * const full_mid= full + SIZE*2;\
2578
    uint8_t halfH[SIZE*SIZE];\
2579
    uint8_t halfV[SIZE*SIZE];\
2580
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2581
    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
2582
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2583
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2584
}\
2585
\
2586
static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2587
    int16_t tmp[SIZE*(SIZE+5)];\
2588
    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
2589
}\
2590
\
2591
static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2592
    int16_t tmp[SIZE*(SIZE+5)];\
2593
    uint8_t halfH[SIZE*SIZE];\
2594
    uint8_t halfHV[SIZE*SIZE];\
2595
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2596
    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2597
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2598
}\
2599
\
2600
static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2601
    int16_t tmp[SIZE*(SIZE+5)];\
2602
    uint8_t halfH[SIZE*SIZE];\
2603
    uint8_t halfHV[SIZE*SIZE];\
2604
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2605
    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2606
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2607
}\
2608
\
2609
static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2610
    uint8_t full[SIZE*(SIZE+5)];\
2611
    uint8_t * const full_mid= full + SIZE*2;\
2612
    int16_t tmp[SIZE*(SIZE+5)];\
2613
    uint8_t halfV[SIZE*SIZE];\
2614
    uint8_t halfHV[SIZE*SIZE];\
2615
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2616
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2617
    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2618
    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2619
}\
2620
\
2621
static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2622
    uint8_t full[SIZE*(SIZE+5)];\
2623
    uint8_t * const full_mid= full + SIZE*2;\
2624
    int16_t tmp[SIZE*(SIZE+5)];\
2625
    uint8_t halfV[SIZE*SIZE];\