Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dsputil.c @ 3da11804

History | View | Annotate | Download (175 KB)

1 de6d9b64 Fabrice Bellard
/*
2
 * DSP utils
3 406792e7 Diego Biurrun
 * Copyright (c) 2000, 2001 Fabrice Bellard
4 8f2ab833 Michael Niedermayer
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 de6d9b64 Fabrice Bellard
 *
6 7b94177e Diego Biurrun
 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7
 *
8 b78e7197 Diego Biurrun
 * This file is part of FFmpeg.
9
 *
10
 * FFmpeg is free software; you can redistribute it and/or
11 ff4ec49e Fabrice Bellard
 * modify it under the terms of the GNU Lesser General Public
12
 * License as published by the Free Software Foundation; either
13 b78e7197 Diego Biurrun
 * version 2.1 of the License, or (at your option) any later version.
14 de6d9b64 Fabrice Bellard
 *
15 b78e7197 Diego Biurrun
 * FFmpeg is distributed in the hope that it will be useful,
16 de6d9b64 Fabrice Bellard
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ff4ec49e Fabrice Bellard
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
 * Lesser General Public License for more details.
19 de6d9b64 Fabrice Bellard
 *
20 ff4ec49e Fabrice Bellard
 * You should have received a copy of the GNU Lesser General Public
21 b78e7197 Diego Biurrun
 * License along with FFmpeg; if not, write to the Free Software
22 5509bffa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 de6d9b64 Fabrice Bellard
 */
24 115329f1 Diego Biurrun
25 983e3246 Michael Niedermayer
/**
26 bad5537e Diego Biurrun
 * @file libavcodec/dsputil.c
27 983e3246 Michael Niedermayer
 * DSP utils
28
 */
29 115329f1 Diego Biurrun
30 de6d9b64 Fabrice Bellard
#include "avcodec.h"
31
#include "dsputil.h"
32 b0368839 Michael Niedermayer
#include "simple_idct.h"
33 65e4c8c9 Michael Niedermayer
#include "faandct.h"
34 6f08c541 Michael Niedermayer
#include "faanidct.h"
35 199436b9 Aurelien Jacobs
#include "mathops.h"
36 059715a4 Robert Edele
#include "snow.h"
37 af818f7a Diego Biurrun
#include "mpegvideo.h"
38
#include "config.h"
39 3da11804 Måns Rullgård
#include "lpc.h"
40
#include "ac3dec.h"
41
#include "vorbis.h"
42
#include "png.h"
43 5596c60c Michael Niedermayer
44 88730be6 Måns Rullgård
/* snow.c */
45
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
46
47 28245435 Peter Ross
/* eaidct.c */
48
void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
49
50 342c7dfd Kostya Shishkov
/* binkidct.c */
51
void ff_bink_idct_c    (DCTELEM *block);
52
void ff_bink_idct_add_c(uint8_t *dest, int linesize, DCTELEM *block);
53
void ff_bink_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
54
55 55fde95e Måns Rullgård
uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
56 1d503957 Måns Rullgård
uint32_t ff_squareTbl[512] = {0, };
57 de6d9b64 Fabrice Bellard
58 917f55cc Loren Merritt
// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
59
#define pb_7f (~0UL/255 * 0x7f)
60
#define pb_80 (~0UL/255 * 0x80)
61 469bd7b1 Loren Merritt
62 0c1a9eda Zdenek Kabelac
const uint8_t ff_zigzag_direct[64] = {
63 2ad1516a Michael Niedermayer
    0,   1,  8, 16,  9,  2,  3, 10,
64
    17, 24, 32, 25, 18, 11,  4,  5,
65 e0eac44e Fabrice Bellard
    12, 19, 26, 33, 40, 48, 41, 34,
66 2ad1516a Michael Niedermayer
    27, 20, 13,  6,  7, 14, 21, 28,
67 e0eac44e Fabrice Bellard
    35, 42, 49, 56, 57, 50, 43, 36,
68
    29, 22, 15, 23, 30, 37, 44, 51,
69
    58, 59, 52, 45, 38, 31, 39, 46,
70
    53, 60, 61, 54, 47, 55, 62, 63
71
};
72
73 10acc479 Roman Shaposhnik
/* Specific zigzag scan for 248 idct. NOTE that unlike the
74
   specification, we interleave the fields */
75
const uint8_t ff_zigzag248_direct[64] = {
76
     0,  8,  1,  9, 16, 24,  2, 10,
77
    17, 25, 32, 40, 48, 56, 33, 41,
78
    18, 26,  3, 11,  4, 12, 19, 27,
79
    34, 42, 49, 57, 50, 58, 35, 43,
80
    20, 28,  5, 13,  6, 14, 21, 29,
81
    36, 44, 51, 59, 52, 60, 37, 45,
82
    22, 30,  7, 15, 23, 31, 38, 46,
83
    53, 61, 54, 62, 39, 47, 55, 63,
84
};
85
86 2f349de2 Michael Niedermayer
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
87 84dc2d8a Måns Rullgård
DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
88 2f349de2 Michael Niedermayer
89 0c1a9eda Zdenek Kabelac
const uint8_t ff_alternate_horizontal_scan[64] = {
90 115329f1 Diego Biurrun
    0,  1,   2,  3,  8,  9, 16, 17,
91 e0eac44e Fabrice Bellard
    10, 11,  4,  5,  6,  7, 15, 14,
92 115329f1 Diego Biurrun
    13, 12, 19, 18, 24, 25, 32, 33,
93 e0eac44e Fabrice Bellard
    26, 27, 20, 21, 22, 23, 28, 29,
94 115329f1 Diego Biurrun
    30, 31, 34, 35, 40, 41, 48, 49,
95 e0eac44e Fabrice Bellard
    42, 43, 36, 37, 38, 39, 44, 45,
96 115329f1 Diego Biurrun
    46, 47, 50, 51, 56, 57, 58, 59,
97 e0eac44e Fabrice Bellard
    52, 53, 54, 55, 60, 61, 62, 63,
98
};
99
100 0c1a9eda Zdenek Kabelac
const uint8_t ff_alternate_vertical_scan[64] = {
101 115329f1 Diego Biurrun
    0,  8,  16, 24,  1,  9,  2, 10,
102 e0eac44e Fabrice Bellard
    17, 25, 32, 40, 48, 56, 57, 49,
103 115329f1 Diego Biurrun
    41, 33, 26, 18,  3, 11,  4, 12,
104 e0eac44e Fabrice Bellard
    19, 27, 34, 42, 50, 58, 35, 43,
105 115329f1 Diego Biurrun
    51, 59, 20, 28,  5, 13,  6, 14,
106 e0eac44e Fabrice Bellard
    21, 29, 36, 44, 52, 60, 37, 45,
107 115329f1 Diego Biurrun
    53, 61, 22, 30,  7, 15, 23, 31,
108 e0eac44e Fabrice Bellard
    38, 46, 54, 62, 39, 47, 55, 63,
109
};
110
111 1a918c08 Loren Merritt
/* a*inverse[b]>>32 == a/b for all 0<=a<=16909558 && 2<=b<=256
112
 * for a>16909558, is an overestimate by less than 1 part in 1<<24 */
113
const uint32_t ff_inverse[257]={
114 115329f1 Diego Biurrun
         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757,
115
 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154,
116
 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709,
117
 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333,
118
 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367,
119
 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283,
120
  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315,
121
  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085,
122
  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498,
123
  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675,
124
  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441,
125
  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183,
126
  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712,
127
  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400,
128
  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163,
129
  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641,
130
  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573,
131
  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737,
132
  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493,
133
  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373,
134
  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368,
135
  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671,
136
  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767,
137
  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740,
138
  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751,
139
  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635,
140
  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593,
141
  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944,
142
  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933,
143
  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575,
144
  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532,
145 2f349de2 Michael Niedermayer
  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
146 1a918c08 Loren Merritt
  16777216
147 2f349de2 Michael Niedermayer
};
148
149 b0368839 Michael Niedermayer
/* Input permutation for the simple_idct_mmx */
150
static const uint8_t simple_mmx_permutation[64]={
151 bb270c08 Diego Biurrun
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
152
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
153
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
154
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
155
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
156
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
157
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
158
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
159 b0368839 Michael Niedermayer
};
160
161 0e956ba2 Alexander Strange
static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
162
163 4c79b95c Aurelien Jacobs
void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
164
    int i;
165
    int end;
166
167
    st->scantable= src_scantable;
168
169
    for(i=0; i<64; i++){
170
        int j;
171
        j = src_scantable[i];
172
        st->permutated[i] = permutation[j];
173 b250f9c6 Aurelien Jacobs
#if ARCH_PPC
174 4c79b95c Aurelien Jacobs
        st->inverse[j] = i;
175
#endif
176
    }
177
178
    end=-1;
179
    for(i=0; i<64; i++){
180
        int j;
181
        j = st->permutated[i];
182
        if(j>end) end=j;
183
        st->raster_end[i]= end;
184
    }
185
}
186
187 0c1a9eda Zdenek Kabelac
static int pix_sum_c(uint8_t * pix, int line_size)
188 3aa102be Michael Niedermayer
{
189
    int s, i, j;
190
191
    s = 0;
192
    for (i = 0; i < 16; i++) {
193 bb270c08 Diego Biurrun
        for (j = 0; j < 16; j += 8) {
194
            s += pix[0];
195
            s += pix[1];
196
            s += pix[2];
197
            s += pix[3];
198
            s += pix[4];
199
            s += pix[5];
200
            s += pix[6];
201
            s += pix[7];
202
            pix += 8;
203
        }
204
        pix += line_size - 16;
205 3aa102be Michael Niedermayer
    }
206
    return s;
207
}
208
209 0c1a9eda Zdenek Kabelac
static int pix_norm1_c(uint8_t * pix, int line_size)
210 3aa102be Michael Niedermayer
{
211
    int s, i, j;
212 1d503957 Måns Rullgård
    uint32_t *sq = ff_squareTbl + 256;
213 3aa102be Michael Niedermayer
214
    s = 0;
215
    for (i = 0; i < 16; i++) {
216 bb270c08 Diego Biurrun
        for (j = 0; j < 16; j += 8) {
217 2a006cd3 Felix von Leitner
#if 0
218 bb270c08 Diego Biurrun
            s += sq[pix[0]];
219
            s += sq[pix[1]];
220
            s += sq[pix[2]];
221
            s += sq[pix[3]];
222
            s += sq[pix[4]];
223
            s += sq[pix[5]];
224
            s += sq[pix[6]];
225
            s += sq[pix[7]];
226 2a006cd3 Felix von Leitner
#else
227
#if LONG_MAX > 2147483647
228 bb270c08 Diego Biurrun
            register uint64_t x=*(uint64_t*)pix;
229
            s += sq[x&0xff];
230
            s += sq[(x>>8)&0xff];
231
            s += sq[(x>>16)&0xff];
232
            s += sq[(x>>24)&0xff];
233 2a006cd3 Felix von Leitner
            s += sq[(x>>32)&0xff];
234
            s += sq[(x>>40)&0xff];
235
            s += sq[(x>>48)&0xff];
236
            s += sq[(x>>56)&0xff];
237
#else
238 bb270c08 Diego Biurrun
            register uint32_t x=*(uint32_t*)pix;
239
            s += sq[x&0xff];
240
            s += sq[(x>>8)&0xff];
241
            s += sq[(x>>16)&0xff];
242
            s += sq[(x>>24)&0xff];
243 2a006cd3 Felix von Leitner
            x=*(uint32_t*)(pix+4);
244
            s += sq[x&0xff];
245
            s += sq[(x>>8)&0xff];
246
            s += sq[(x>>16)&0xff];
247
            s += sq[(x>>24)&0xff];
248
#endif
249
#endif
250 bb270c08 Diego Biurrun
            pix += 8;
251
        }
252
        pix += line_size - 16;
253 3aa102be Michael Niedermayer
    }
254
    return s;
255
}
256
257 96711ecf Michael Niedermayer
static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
258 3d2e8cce Michael Niedermayer
    int i;
259 115329f1 Diego Biurrun
260 3d2e8cce Michael Niedermayer
    for(i=0; i+8<=w; i+=8){
261
        dst[i+0]= bswap_32(src[i+0]);
262
        dst[i+1]= bswap_32(src[i+1]);
263
        dst[i+2]= bswap_32(src[i+2]);
264
        dst[i+3]= bswap_32(src[i+3]);
265
        dst[i+4]= bswap_32(src[i+4]);
266
        dst[i+5]= bswap_32(src[i+5]);
267
        dst[i+6]= bswap_32(src[i+6]);
268
        dst[i+7]= bswap_32(src[i+7]);
269
    }
270
    for(;i<w; i++){
271
        dst[i+0]= bswap_32(src[i+0]);
272
    }
273
}
274 3aa102be Michael Niedermayer
275 26efc54e Michael Niedermayer
static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
276
{
277
    int s, i;
278 1d503957 Måns Rullgård
    uint32_t *sq = ff_squareTbl + 256;
279 26efc54e Michael Niedermayer
280
    s = 0;
281
    for (i = 0; i < h; i++) {
282
        s += sq[pix1[0] - pix2[0]];
283
        s += sq[pix1[1] - pix2[1]];
284
        s += sq[pix1[2] - pix2[2]];
285
        s += sq[pix1[3] - pix2[3]];
286
        pix1 += line_size;
287
        pix2 += line_size;
288
    }
289
    return s;
290
}
291
292 bb198e19 Michael Niedermayer
static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
293 1457ab52 Michael Niedermayer
{
294
    int s, i;
295 1d503957 Måns Rullgård
    uint32_t *sq = ff_squareTbl + 256;
296 1457ab52 Michael Niedermayer
297
    s = 0;
298 bb198e19 Michael Niedermayer
    for (i = 0; i < h; i++) {
299 1457ab52 Michael Niedermayer
        s += sq[pix1[0] - pix2[0]];
300
        s += sq[pix1[1] - pix2[1]];
301
        s += sq[pix1[2] - pix2[2]];
302
        s += sq[pix1[3] - pix2[3]];
303
        s += sq[pix1[4] - pix2[4]];
304
        s += sq[pix1[5] - pix2[5]];
305
        s += sq[pix1[6] - pix2[6]];
306
        s += sq[pix1[7] - pix2[7]];
307
        pix1 += line_size;
308
        pix2 += line_size;
309
    }
310
    return s;
311
}
312
313 bb198e19 Michael Niedermayer
static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
314 9c76bd48 Brian Foley
{
315 6b026927 Falk Hüffner
    int s, i;
316 1d503957 Måns Rullgård
    uint32_t *sq = ff_squareTbl + 256;
317 9c76bd48 Brian Foley
318
    s = 0;
319 bb198e19 Michael Niedermayer
    for (i = 0; i < h; i++) {
320 6b026927 Falk Hüffner
        s += sq[pix1[ 0] - pix2[ 0]];
321
        s += sq[pix1[ 1] - pix2[ 1]];
322
        s += sq[pix1[ 2] - pix2[ 2]];
323
        s += sq[pix1[ 3] - pix2[ 3]];
324
        s += sq[pix1[ 4] - pix2[ 4]];
325
        s += sq[pix1[ 5] - pix2[ 5]];
326
        s += sq[pix1[ 6] - pix2[ 6]];
327
        s += sq[pix1[ 7] - pix2[ 7]];
328
        s += sq[pix1[ 8] - pix2[ 8]];
329
        s += sq[pix1[ 9] - pix2[ 9]];
330
        s += sq[pix1[10] - pix2[10]];
331
        s += sq[pix1[11] - pix2[11]];
332
        s += sq[pix1[12] - pix2[12]];
333
        s += sq[pix1[13] - pix2[13]];
334
        s += sq[pix1[14] - pix2[14]];
335
        s += sq[pix1[15] - pix2[15]];
336 2a006cd3 Felix von Leitner
337 6b026927 Falk Hüffner
        pix1 += line_size;
338
        pix2 += line_size;
339 9c76bd48 Brian Foley
    }
340
    return s;
341
}
342
343 26efc54e Michael Niedermayer
344 b250f9c6 Aurelien Jacobs
#if CONFIG_SNOW_ENCODER //dwt is in snow.c
345 3a6fc8fa Diego Pettenò
static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
346 26efc54e Michael Niedermayer
    int s, i, j;
347
    const int dec_count= w==8 ? 3 : 4;
348 871371a7 Loren Merritt
    int tmp[32*32];
349 26efc54e Michael Niedermayer
    int level, ori;
350 115329f1 Diego Biurrun
    static const int scale[2][2][4][4]={
351 26efc54e Michael Niedermayer
      {
352
        {
353 871371a7 Loren Merritt
            // 9/7 8x8 dec=3
354 26efc54e Michael Niedermayer
            {268, 239, 239, 213},
355
            {  0, 224, 224, 152},
356
            {  0, 135, 135, 110},
357
        },{
358 871371a7 Loren Merritt
            // 9/7 16x16 or 32x32 dec=4
359 26efc54e Michael Niedermayer
            {344, 310, 310, 280},
360
            {  0, 320, 320, 228},
361
            {  0, 175, 175, 136},
362
            {  0, 129, 129, 102},
363
        }
364
      },{
365 871371a7 Loren Merritt
        {
366
            // 5/3 8x8 dec=3
367 26efc54e Michael Niedermayer
            {275, 245, 245, 218},
368
            {  0, 230, 230, 156},
369
            {  0, 138, 138, 113},
370
        },{
371 871371a7 Loren Merritt
            // 5/3 16x16 or 32x32 dec=4
372 26efc54e Michael Niedermayer
            {352, 317, 317, 286},
373
            {  0, 328, 328, 233},
374
            {  0, 180, 180, 140},
375
            {  0, 132, 132, 105},
376
        }
377
      }
378
    };
379
380
    for (i = 0; i < h; i++) {
381
        for (j = 0; j < w; j+=4) {
382 871371a7 Loren Merritt
            tmp[32*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
383
            tmp[32*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
384
            tmp[32*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
385
            tmp[32*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
386 26efc54e Michael Niedermayer
        }
387
        pix1 += line_size;
388
        pix2 += line_size;
389
    }
390 8b975b7c Michael Niedermayer
391 871371a7 Loren Merritt
    ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
392 26efc54e Michael Niedermayer
393
    s=0;
394 871371a7 Loren Merritt
    assert(w==h);
395 26efc54e Michael Niedermayer
    for(level=0; level<dec_count; level++){
396
        for(ori= level ? 1 : 0; ori<4; ori++){
397 871371a7 Loren Merritt
            int size= w>>(dec_count-level);
398
            int sx= (ori&1) ? size : 0;
399
            int stride= 32<<(dec_count-level);
400 26efc54e Michael Niedermayer
            int sy= (ori&2) ? stride>>1 : 0;
401 115329f1 Diego Biurrun
402 26efc54e Michael Niedermayer
            for(i=0; i<size; i++){
403
                for(j=0; j<size; j++){
404
                    int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
405 c26abfa5 Diego Biurrun
                    s += FFABS(v);
406 26efc54e Michael Niedermayer
                }
407
            }
408
        }
409
    }
410 115329f1 Diego Biurrun
    assert(s>=0);
411 871371a7 Loren Merritt
    return s>>9;
412 26efc54e Michael Niedermayer
}
413
414
static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
415
    return w_c(v, pix1, pix2, line_size,  8, h, 1);
416
}
417
418
static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
419
    return w_c(v, pix1, pix2, line_size,  8, h, 0);
420
}
421
422
static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
423
    return w_c(v, pix1, pix2, line_size, 16, h, 1);
424
}
425
426
static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
427
    return w_c(v, pix1, pix2, line_size, 16, h, 0);
428
}
429
430 486497e0 Måns Rullgård
int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
431 871371a7 Loren Merritt
    return w_c(v, pix1, pix2, line_size, 32, h, 1);
432
}
433
434 486497e0 Måns Rullgård
int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
435 871371a7 Loren Merritt
    return w_c(v, pix1, pix2, line_size, 32, h, 0);
436
}
437 3a6fc8fa Diego Pettenò
#endif
438 871371a7 Loren Merritt
439 5a6a9e78 Aurelien Jacobs
/* draw the edges of width 'w' of an image of size width, height */
440
//FIXME check that this is ok for mpeg4 interlaced
441
static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
442
{
443
    uint8_t *ptr, *last_line;
444
    int i;
445
446
    last_line = buf + (height - 1) * wrap;
447
    for(i=0;i<w;i++) {
448
        /* top and bottom */
449
        memcpy(buf - (i + 1) * wrap, buf, width);
450
        memcpy(last_line + (i + 1) * wrap, last_line, width);
451
    }
452
    /* left and right */
453
    ptr = buf;
454
    for(i=0;i<height;i++) {
455
        memset(ptr - w, ptr[0], w);
456
        memset(ptr + width, ptr[width-1], w);
457
        ptr += wrap;
458
    }
459
    /* corners */
460
    for(i=0;i<w;i++) {
461
        memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
462
        memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
463
        memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
464
        memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
465
    }
466
}
467
468 288a44fb Aurelien Jacobs
/**
469
 * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
470
 * @param buf destination buffer
471
 * @param src source buffer
472
 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
473
 * @param block_w width of block
474
 * @param block_h height of block
475
 * @param src_x x coordinate of the top left sample of the block in the source buffer
476
 * @param src_y y coordinate of the top left sample of the block in the source buffer
477
 * @param w width of the source buffer
478
 * @param h height of the source buffer
479
 */
480
void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
481
                                    int src_x, int src_y, int w, int h){
482
    int x, y;
483
    int start_y, start_x, end_y, end_x;
484
485
    if(src_y>= h){
486
        src+= (h-1-src_y)*linesize;
487
        src_y=h-1;
488
    }else if(src_y<=-block_h){
489
        src+= (1-block_h-src_y)*linesize;
490
        src_y=1-block_h;
491
    }
492
    if(src_x>= w){
493
        src+= (w-1-src_x);
494
        src_x=w-1;
495
    }else if(src_x<=-block_w){
496
        src+= (1-block_w-src_x);
497
        src_x=1-block_w;
498
    }
499
500
    start_y= FFMAX(0, -src_y);
501
    start_x= FFMAX(0, -src_x);
502
    end_y= FFMIN(block_h, h-src_y);
503
    end_x= FFMIN(block_w, w-src_x);
504
505
    // copy existing part
506
    for(y=start_y; y<end_y; y++){
507
        for(x=start_x; x<end_x; x++){
508
            buf[x + y*linesize]= src[x + y*linesize];
509
        }
510
    }
511
512
    //top
513
    for(y=0; y<start_y; y++){
514
        for(x=start_x; x<end_x; x++){
515
            buf[x + y*linesize]= buf[x + start_y*linesize];
516
        }
517
    }
518
519
    //bottom
520
    for(y=end_y; y<block_h; y++){
521
        for(x=start_x; x<end_x; x++){
522
            buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
523
        }
524
    }
525
526
    for(y=0; y<block_h; y++){
527
       //left
528
        for(x=0; x<start_x; x++){
529
            buf[x + y*linesize]= buf[start_x + y*linesize];
530
        }
531
532
       //right
533
        for(x=end_x; x<block_w; x++){
534
            buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
535
        }
536
    }
537
}
538
539 0c1a9eda Zdenek Kabelac
static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
540 de6d9b64 Fabrice Bellard
{
541
    int i;
542
543
    /* read the pixels */
544
    for(i=0;i<8;i++) {
545 c13e1abd Falk Hüffner
        block[0] = pixels[0];
546
        block[1] = pixels[1];
547
        block[2] = pixels[2];
548
        block[3] = pixels[3];
549
        block[4] = pixels[4];
550
        block[5] = pixels[5];
551
        block[6] = pixels[6];
552
        block[7] = pixels[7];
553
        pixels += line_size;
554
        block += 8;
555 de6d9b64 Fabrice Bellard
    }
556
}
557
558 0c1a9eda Zdenek Kabelac
static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
559 bb270c08 Diego Biurrun
                          const uint8_t *s2, int stride){
560 9dbcbd92 Michael Niedermayer
    int i;
561
562
    /* read the pixels */
563
    for(i=0;i<8;i++) {
564 c13e1abd Falk Hüffner
        block[0] = s1[0] - s2[0];
565
        block[1] = s1[1] - s2[1];
566
        block[2] = s1[2] - s2[2];
567
        block[3] = s1[3] - s2[3];
568
        block[4] = s1[4] - s2[4];
569
        block[5] = s1[5] - s2[5];
570
        block[6] = s1[6] - s2[6];
571
        block[7] = s1[7] - s2[7];
572 9dbcbd92 Michael Niedermayer
        s1 += stride;
573
        s2 += stride;
574 c13e1abd Falk Hüffner
        block += 8;
575 9dbcbd92 Michael Niedermayer
    }
576
}
577
578
579 0c1a9eda Zdenek Kabelac
static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
580 bb270c08 Diego Biurrun
                                 int line_size)
581 de6d9b64 Fabrice Bellard
{
582
    int i;
583 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
584 115329f1 Diego Biurrun
585 de6d9b64 Fabrice Bellard
    /* read the pixels */
586
    for(i=0;i<8;i++) {
587 c13e1abd Falk Hüffner
        pixels[0] = cm[block[0]];
588
        pixels[1] = cm[block[1]];
589
        pixels[2] = cm[block[2]];
590
        pixels[3] = cm[block[3]];
591
        pixels[4] = cm[block[4]];
592
        pixels[5] = cm[block[5]];
593
        pixels[6] = cm[block[6]];
594
        pixels[7] = cm[block[7]];
595
596
        pixels += line_size;
597
        block += 8;
598 de6d9b64 Fabrice Bellard
    }
599
}
600
601 178fcca8 Michael Niedermayer
static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
602 bb270c08 Diego Biurrun
                                 int line_size)
603 178fcca8 Michael Niedermayer
{
604
    int i;
605 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
606 115329f1 Diego Biurrun
607 178fcca8 Michael Niedermayer
    /* read the pixels */
608
    for(i=0;i<4;i++) {
609
        pixels[0] = cm[block[0]];
610
        pixels[1] = cm[block[1]];
611
        pixels[2] = cm[block[2]];
612
        pixels[3] = cm[block[3]];
613
614
        pixels += line_size;
615
        block += 8;
616
    }
617
}
618
619 9ca358b9 Michael Niedermayer
static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
620 bb270c08 Diego Biurrun
                                 int line_size)
621 9ca358b9 Michael Niedermayer
{
622
    int i;
623 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
624 115329f1 Diego Biurrun
625 9ca358b9 Michael Niedermayer
    /* read the pixels */
626
    for(i=0;i<2;i++) {
627
        pixels[0] = cm[block[0]];
628
        pixels[1] = cm[block[1]];
629
630
        pixels += line_size;
631
        block += 8;
632
    }
633
}
634
635 115329f1 Diego Biurrun
static void put_signed_pixels_clamped_c(const DCTELEM *block,
636 f9ed9d85 Mike Melanson
                                        uint8_t *restrict pixels,
637
                                        int line_size)
638
{
639
    int i, j;
640
641
    for (i = 0; i < 8; i++) {
642
        for (j = 0; j < 8; j++) {
643
            if (*block < -128)
644
                *pixels = 0;
645
            else if (*block > 127)
646
                *pixels = 255;
647
            else
648
                *pixels = (uint8_t)(*block + 128);
649
            block++;
650
            pixels++;
651
        }
652
        pixels += (line_size - 8);
653
    }
654
}
655
656 342c7dfd Kostya Shishkov
static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels,
657
                                    int line_size)
658
{
659
    int i;
660
661
    /* read the pixels */
662
    for(i=0;i<8;i++) {
663
        pixels[0] = block[0];
664
        pixels[1] = block[1];
665
        pixels[2] = block[2];
666
        pixels[3] = block[3];
667
        pixels[4] = block[4];
668
        pixels[5] = block[5];
669
        pixels[6] = block[6];
670
        pixels[7] = block[7];
671
672
        pixels += line_size;
673
        block += 8;
674
    }
675
}
676
677 0c1a9eda Zdenek Kabelac
static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
678 c13e1abd Falk Hüffner
                          int line_size)
679 de6d9b64 Fabrice Bellard
{
680
    int i;
681 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
682 115329f1 Diego Biurrun
683 de6d9b64 Fabrice Bellard
    /* read the pixels */
684
    for(i=0;i<8;i++) {
685 c13e1abd Falk Hüffner
        pixels[0] = cm[pixels[0] + block[0]];
686
        pixels[1] = cm[pixels[1] + block[1]];
687
        pixels[2] = cm[pixels[2] + block[2]];
688
        pixels[3] = cm[pixels[3] + block[3]];
689
        pixels[4] = cm[pixels[4] + block[4]];
690
        pixels[5] = cm[pixels[5] + block[5]];
691
        pixels[6] = cm[pixels[6] + block[6]];
692
        pixels[7] = cm[pixels[7] + block[7]];
693
        pixels += line_size;
694
        block += 8;
695 de6d9b64 Fabrice Bellard
    }
696
}
697 178fcca8 Michael Niedermayer
698
static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
699
                          int line_size)
700
{
701
    int i;
702 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
703 115329f1 Diego Biurrun
704 178fcca8 Michael Niedermayer
    /* read the pixels */
705
    for(i=0;i<4;i++) {
706
        pixels[0] = cm[pixels[0] + block[0]];
707
        pixels[1] = cm[pixels[1] + block[1]];
708
        pixels[2] = cm[pixels[2] + block[2]];
709
        pixels[3] = cm[pixels[3] + block[3]];
710
        pixels += line_size;
711
        block += 8;
712
    }
713
}
714 9ca358b9 Michael Niedermayer
715
static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
716
                          int line_size)
717
{
718
    int i;
719 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
720 115329f1 Diego Biurrun
721 9ca358b9 Michael Niedermayer
    /* read the pixels */
722
    for(i=0;i<2;i++) {
723
        pixels[0] = cm[pixels[0] + block[0]];
724
        pixels[1] = cm[pixels[1] + block[1]];
725
        pixels += line_size;
726
        block += 8;
727
    }
728
}
729 36940eca Loren Merritt
730
static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
731
{
732
    int i;
733
    for(i=0;i<8;i++) {
734
        pixels[0] += block[0];
735
        pixels[1] += block[1];
736
        pixels[2] += block[2];
737
        pixels[3] += block[3];
738
        pixels[4] += block[4];
739
        pixels[5] += block[5];
740
        pixels[6] += block[6];
741
        pixels[7] += block[7];
742
        pixels += line_size;
743
        block += 8;
744
    }
745
}
746
747
static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
748
{
749
    int i;
750
    for(i=0;i<4;i++) {
751
        pixels[0] += block[0];
752
        pixels[1] += block[1];
753
        pixels[2] += block[2];
754
        pixels[3] += block[3];
755
        pixels += line_size;
756
        block += 4;
757
    }
758
}
759
760 1edbfe19 Loren Merritt
static int sum_abs_dctelem_c(DCTELEM *block)
761
{
762
    int sum=0, i;
763
    for(i=0; i<64; i++)
764
        sum+= FFABS(block[i]);
765
    return sum;
766
}
767
768 342c7dfd Kostya Shishkov
static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
769
{
770
    int i;
771
772
    for (i = 0; i < h; i++) {
773
        memset(block, value, 16);
774
        block += line_size;
775
    }
776
}
777
778
static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
779
{
780
    int i;
781
782
    for (i = 0; i < h; i++) {
783
        memset(block, value, 8);
784
        block += line_size;
785
    }
786
}
787
788
static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize)
789
{
790
    int i, j;
791
    uint16_t *dst1 = dst;
792
    uint16_t *dst2 = dst + linesize;
793
794
    for (j = 0; j < 8; j++) {
795
        for (i = 0; i < 8; i++) {
796
            dst1[i] = dst2[i] = src[i] * 0x0101;
797
        }
798
        src  += 8;
799
        dst1 += linesize;
800
        dst2 += linesize;
801
    }
802
}
803
804 59fe111e Michael Niedermayer
#if 0
805

806
#define PIXOP2(OPNAME, OP) \
807 b3184779 Michael Niedermayer
static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
808 59fe111e Michael Niedermayer
{\
809
    int i;\
810
    for(i=0; i<h; i++){\
811 905694d9 Roman Shaposhnik
        OP(*((uint64_t*)block), AV_RN64(pixels));\
812 59fe111e Michael Niedermayer
        pixels+=line_size;\
813
        block +=line_size;\
814
    }\
815
}\
816
\
817 45553457 Zdenek Kabelac
static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
818 59fe111e Michael Niedermayer
{\
819
    int i;\
820
    for(i=0; i<h; i++){\
821 905694d9 Roman Shaposhnik
        const uint64_t a= AV_RN64(pixels  );\
822
        const uint64_t b= AV_RN64(pixels+1);\
823 59fe111e Michael Niedermayer
        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
824
        pixels+=line_size;\
825
        block +=line_size;\
826
    }\
827
}\
828
\
829 45553457 Zdenek Kabelac
static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
830 59fe111e Michael Niedermayer
{\
831
    int i;\
832
    for(i=0; i<h; i++){\
833 905694d9 Roman Shaposhnik
        const uint64_t a= AV_RN64(pixels  );\
834
        const uint64_t b= AV_RN64(pixels+1);\
835 59fe111e Michael Niedermayer
        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
836
        pixels+=line_size;\
837
        block +=line_size;\
838
    }\
839
}\
840
\
841 45553457 Zdenek Kabelac
static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
842 59fe111e Michael Niedermayer
{\
843
    int i;\
844
    for(i=0; i<h; i++){\
845 905694d9 Roman Shaposhnik
        const uint64_t a= AV_RN64(pixels          );\
846
        const uint64_t b= AV_RN64(pixels+line_size);\
847 59fe111e Michael Niedermayer
        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
848
        pixels+=line_size;\
849
        block +=line_size;\
850
    }\
851
}\
852
\
853 45553457 Zdenek Kabelac
static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
854 59fe111e Michael Niedermayer
{\
855
    int i;\
856
    for(i=0; i<h; i++){\
857 905694d9 Roman Shaposhnik
        const uint64_t a= AV_RN64(pixels          );\
858
        const uint64_t b= AV_RN64(pixels+line_size);\
859 59fe111e Michael Niedermayer
        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
860
        pixels+=line_size;\
861
        block +=line_size;\
862
    }\
863
}\
864
\
865 45553457 Zdenek Kabelac
static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
866 59fe111e Michael Niedermayer
{\
867
        int i;\
868 905694d9 Roman Shaposhnik
        const uint64_t a= AV_RN64(pixels  );\
869
        const uint64_t b= AV_RN64(pixels+1);\
870 59fe111e Michael Niedermayer
        uint64_t l0=  (a&0x0303030303030303ULL)\
871
                    + (b&0x0303030303030303ULL)\
872
                    + 0x0202020202020202ULL;\
873
        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
874
                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
875
        uint64_t l1,h1;\
876
\
877
        pixels+=line_size;\
878
        for(i=0; i<h; i+=2){\
879 905694d9 Roman Shaposhnik
            uint64_t a= AV_RN64(pixels  );\
880
            uint64_t b= AV_RN64(pixels+1);\
881 59fe111e Michael Niedermayer
            l1=  (a&0x0303030303030303ULL)\
882
               + (b&0x0303030303030303ULL);\
883
            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
884
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
885
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
886
            pixels+=line_size;\
887
            block +=line_size;\
888 905694d9 Roman Shaposhnik
            a= AV_RN64(pixels  );\
889
            b= AV_RN64(pixels+1);\
890 59fe111e Michael Niedermayer
            l0=  (a&0x0303030303030303ULL)\
891
               + (b&0x0303030303030303ULL)\
892
               + 0x0202020202020202ULL;\
893
            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
894
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
895
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
896
            pixels+=line_size;\
897
            block +=line_size;\
898
        }\
899
}\
900
\
901 45553457 Zdenek Kabelac
static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
902 59fe111e Michael Niedermayer
{\
903
        int i;\
904 905694d9 Roman Shaposhnik
        const uint64_t a= AV_RN64(pixels  );\
905
        const uint64_t b= AV_RN64(pixels+1);\
906 59fe111e Michael Niedermayer
        uint64_t l0=  (a&0x0303030303030303ULL)\
907
                    + (b&0x0303030303030303ULL)\
908
                    + 0x0101010101010101ULL;\
909
        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
910
                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
911
        uint64_t l1,h1;\
912
\
913
        pixels+=line_size;\
914
        for(i=0; i<h; i+=2){\
915 905694d9 Roman Shaposhnik
            uint64_t a= AV_RN64(pixels  );\
916
            uint64_t b= AV_RN64(pixels+1);\
917 59fe111e Michael Niedermayer
            l1=  (a&0x0303030303030303ULL)\
918
               + (b&0x0303030303030303ULL);\
919
            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
920
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
921
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
922
            pixels+=line_size;\
923
            block +=line_size;\
924 905694d9 Roman Shaposhnik
            a= AV_RN64(pixels  );\
925
            b= AV_RN64(pixels+1);\
926 59fe111e Michael Niedermayer
            l0=  (a&0x0303030303030303ULL)\
927
               + (b&0x0303030303030303ULL)\
928
               + 0x0101010101010101ULL;\
929
            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
930
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
931
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
932
            pixels+=line_size;\
933
            block +=line_size;\
934
        }\
935
}\
936
\
937 45553457 Zdenek Kabelac
CALL_2X_PIXELS(OPNAME ## _pixels16_c    , OPNAME ## _pixels_c    , 8)\
938
CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
939
CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
940
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
941
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
942
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
943
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
944 59fe111e Michael Niedermayer

945
#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
946
#else // 64 bit variant
947
948
#define PIXOP2(OPNAME, OP) \
949 669ac79c Michael Niedermayer
static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
950
    int i;\
951
    for(i=0; i<h; i++){\
952 905694d9 Roman Shaposhnik
        OP(*((uint16_t*)(block  )), AV_RN16(pixels  ));\
953 669ac79c Michael Niedermayer
        pixels+=line_size;\
954
        block +=line_size;\
955
    }\
956
}\
957 0da71265 Michael Niedermayer
static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
958
    int i;\
959
    for(i=0; i<h; i++){\
960 905694d9 Roman Shaposhnik
        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
961 0da71265 Michael Niedermayer
        pixels+=line_size;\
962
        block +=line_size;\
963
    }\
964
}\
965 45553457 Zdenek Kabelac
static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
966 59fe111e Michael Niedermayer
    int i;\
967
    for(i=0; i<h; i++){\
968 905694d9 Roman Shaposhnik
        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
969
        OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
970 59fe111e Michael Niedermayer
        pixels+=line_size;\
971
        block +=line_size;\
972
    }\
973
}\
974 45553457 Zdenek Kabelac
static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
975
    OPNAME ## _pixels8_c(block, pixels, line_size, h);\
976 b3184779 Michael Niedermayer
}\
977 59fe111e Michael Niedermayer
\
978 b3184779 Michael Niedermayer
static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
979
                                                int src_stride1, int src_stride2, int h){\
980 59fe111e Michael Niedermayer
    int i;\
981
    for(i=0; i<h; i++){\
982 b3184779 Michael Niedermayer
        uint32_t a,b;\
983 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1  ]);\
984
        b= AV_RN32(&src2[i*src_stride2  ]);\
985 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride  ]), no_rnd_avg32(a, b));\
986 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1+4]);\
987
        b= AV_RN32(&src2[i*src_stride2+4]);\
988 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
989 59fe111e Michael Niedermayer
    }\
990
}\
991
\
992 b3184779 Michael Niedermayer
static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
993
                                                int src_stride1, int src_stride2, int h){\
994 59fe111e Michael Niedermayer
    int i;\
995
    for(i=0; i<h; i++){\
996 b3184779 Michael Niedermayer
        uint32_t a,b;\
997 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1  ]);\
998
        b= AV_RN32(&src2[i*src_stride2  ]);\
999 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
1000 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1+4]);\
1001
        b= AV_RN32(&src2[i*src_stride2+4]);\
1002 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
1003 59fe111e Michael Niedermayer
    }\
1004
}\
1005
\
1006 0da71265 Michael Niedermayer
static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
1007
                                                int src_stride1, int src_stride2, int h){\
1008
    int i;\
1009
    for(i=0; i<h; i++){\
1010
        uint32_t a,b;\
1011 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1  ]);\
1012
        b= AV_RN32(&src2[i*src_stride2  ]);\
1013 d8085ea7 Michael Niedermayer
        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
1014 0da71265 Michael Niedermayer
    }\
1015
}\
1016
\
1017 669ac79c Michael Niedermayer
static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
1018
                                                int src_stride1, int src_stride2, int h){\
1019
    int i;\
1020
    for(i=0; i<h; i++){\
1021
        uint32_t a,b;\
1022 905694d9 Roman Shaposhnik
        a= AV_RN16(&src1[i*src_stride1  ]);\
1023
        b= AV_RN16(&src2[i*src_stride2  ]);\
1024 669ac79c Michael Niedermayer
        OP(*((uint16_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
1025
    }\
1026
}\
1027
\
1028 b3184779 Michael Niedermayer
static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
1029
                                                int src_stride1, int src_stride2, int h){\
1030
    OPNAME ## _pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
1031
    OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
1032
}\
1033
\
1034
static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
1035
                                                int src_stride1, int src_stride2, int h){\
1036
    OPNAME ## _no_rnd_pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
1037
    OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
1038
}\
1039
\
1040 45553457 Zdenek Kabelac
static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1041 b3184779 Michael Niedermayer
    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1042
}\
1043
\
1044 45553457 Zdenek Kabelac
static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1045 b3184779 Michael Niedermayer
    OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1046
}\
1047
\
1048 45553457 Zdenek Kabelac
static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1049 b3184779 Michael Niedermayer
    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1050
}\
1051
\
1052 45553457 Zdenek Kabelac
static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1053 b3184779 Michael Niedermayer
    OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1054
}\
1055
\
1056
static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1057
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1058 59fe111e Michael Niedermayer
    int i;\
1059
    for(i=0; i<h; i++){\
1060 b3184779 Michael Niedermayer
        uint32_t a, b, c, d, l0, l1, h0, h1;\
1061 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1]);\
1062
        b= AV_RN32(&src2[i*src_stride2]);\
1063
        c= AV_RN32(&src3[i*src_stride3]);\
1064
        d= AV_RN32(&src4[i*src_stride4]);\
1065 b3184779 Michael Niedermayer
        l0=  (a&0x03030303UL)\
1066
           + (b&0x03030303UL)\
1067
           + 0x02020202UL;\
1068
        h0= ((a&0xFCFCFCFCUL)>>2)\
1069
          + ((b&0xFCFCFCFCUL)>>2);\
1070
        l1=  (c&0x03030303UL)\
1071
           + (d&0x03030303UL);\
1072
        h1= ((c&0xFCFCFCFCUL)>>2)\
1073
          + ((d&0xFCFCFCFCUL)>>2);\
1074
        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1075 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1+4]);\
1076
        b= AV_RN32(&src2[i*src_stride2+4]);\
1077
        c= AV_RN32(&src3[i*src_stride3+4]);\
1078
        d= AV_RN32(&src4[i*src_stride4+4]);\
1079 b3184779 Michael Niedermayer
        l0=  (a&0x03030303UL)\
1080
           + (b&0x03030303UL)\
1081
           + 0x02020202UL;\
1082
        h0= ((a&0xFCFCFCFCUL)>>2)\
1083
          + ((b&0xFCFCFCFCUL)>>2);\
1084
        l1=  (c&0x03030303UL)\
1085
           + (d&0x03030303UL);\
1086
        h1= ((c&0xFCFCFCFCUL)>>2)\
1087
          + ((d&0xFCFCFCFCUL)>>2);\
1088
        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1089 59fe111e Michael Niedermayer
    }\
1090
}\
1091 669ac79c Michael Niedermayer
\
1092
static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1093
    OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1094
}\
1095
\
1096
static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1097
    OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1098
}\
1099
\
1100
static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1101
    OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
1102
}\
1103
\
1104
static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
1105
    OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
1106
}\
1107
\
1108 b3184779 Michael Niedermayer
static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1109
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1110 59fe111e Michael Niedermayer
    int i;\
1111
    for(i=0; i<h; i++){\
1112 b3184779 Michael Niedermayer
        uint32_t a, b, c, d, l0, l1, h0, h1;\
1113 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1]);\
1114
        b= AV_RN32(&src2[i*src_stride2]);\
1115
        c= AV_RN32(&src3[i*src_stride3]);\
1116
        d= AV_RN32(&src4[i*src_stride4]);\
1117 b3184779 Michael Niedermayer
        l0=  (a&0x03030303UL)\
1118
           + (b&0x03030303UL)\
1119
           + 0x01010101UL;\
1120
        h0= ((a&0xFCFCFCFCUL)>>2)\
1121
          + ((b&0xFCFCFCFCUL)>>2);\
1122
        l1=  (c&0x03030303UL)\
1123
           + (d&0x03030303UL);\
1124
        h1= ((c&0xFCFCFCFCUL)>>2)\
1125
          + ((d&0xFCFCFCFCUL)>>2);\
1126
        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1127 905694d9 Roman Shaposhnik
        a= AV_RN32(&src1[i*src_stride1+4]);\
1128
        b= AV_RN32(&src2[i*src_stride2+4]);\
1129
        c= AV_RN32(&src3[i*src_stride3+4]);\
1130
        d= AV_RN32(&src4[i*src_stride4+4]);\
1131 b3184779 Michael Niedermayer
        l0=  (a&0x03030303UL)\
1132
           + (b&0x03030303UL)\
1133
           + 0x01010101UL;\
1134
        h0= ((a&0xFCFCFCFCUL)>>2)\
1135
          + ((b&0xFCFCFCFCUL)>>2);\
1136
        l1=  (c&0x03030303UL)\
1137
           + (d&0x03030303UL);\
1138
        h1= ((c&0xFCFCFCFCUL)>>2)\
1139
          + ((d&0xFCFCFCFCUL)>>2);\
1140
        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1141 59fe111e Michael Niedermayer
    }\
1142
}\
1143 b3184779 Michael Niedermayer
static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1144
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1145
    OPNAME ## _pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1146
    OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1147
}\
1148
static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
1149
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1150
    OPNAME ## _no_rnd_pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1151
    OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1152
}\
1153 59fe111e Michael Niedermayer
\
1154 669ac79c Michael Niedermayer
static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1155
{\
1156
        int i, a0, b0, a1, b1;\
1157
        a0= pixels[0];\
1158
        b0= pixels[1] + 2;\
1159
        a0 += b0;\
1160
        b0 += pixels[2];\
1161
\
1162
        pixels+=line_size;\
1163
        for(i=0; i<h; i+=2){\
1164
            a1= pixels[0];\
1165
            b1= pixels[1];\
1166
            a1 += b1;\
1167
            b1 += pixels[2];\
1168
\
1169
            block[0]= (a1+a0)>>2; /* FIXME non put */\
1170
            block[1]= (b1+b0)>>2;\
1171
\
1172
            pixels+=line_size;\
1173
            block +=line_size;\
1174
\
1175
            a0= pixels[0];\
1176
            b0= pixels[1] + 2;\
1177
            a0 += b0;\
1178
            b0 += pixels[2];\
1179
\
1180
            block[0]= (a1+a0)>>2;\
1181
            block[1]= (b1+b0)>>2;\
1182
            pixels+=line_size;\
1183
            block +=line_size;\
1184
        }\
1185
}\
1186
\
1187
static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1188
{\
1189
        int i;\
1190 905694d9 Roman Shaposhnik
        const uint32_t a= AV_RN32(pixels  );\
1191
        const uint32_t b= AV_RN32(pixels+1);\
1192 669ac79c Michael Niedermayer
        uint32_t l0=  (a&0x03030303UL)\
1193
                    + (b&0x03030303UL)\
1194
                    + 0x02020202UL;\
1195
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1196
                   + ((b&0xFCFCFCFCUL)>>2);\
1197
        uint32_t l1,h1;\
1198
\
1199
        pixels+=line_size;\
1200
        for(i=0; i<h; i+=2){\
1201 905694d9 Roman Shaposhnik
            uint32_t a= AV_RN32(pixels  );\
1202
            uint32_t b= AV_RN32(pixels+1);\
1203 669ac79c Michael Niedermayer
            l1=  (a&0x03030303UL)\
1204
               + (b&0x03030303UL);\
1205
            h1= ((a&0xFCFCFCFCUL)>>2)\
1206
              + ((b&0xFCFCFCFCUL)>>2);\
1207
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1208
            pixels+=line_size;\
1209
            block +=line_size;\
1210 905694d9 Roman Shaposhnik
            a= AV_RN32(pixels  );\
1211
            b= AV_RN32(pixels+1);\
1212 669ac79c Michael Niedermayer
            l0=  (a&0x03030303UL)\
1213
               + (b&0x03030303UL)\
1214
               + 0x02020202UL;\
1215
            h0= ((a&0xFCFCFCFCUL)>>2)\
1216
              + ((b&0xFCFCFCFCUL)>>2);\
1217
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1218
            pixels+=line_size;\
1219
            block +=line_size;\
1220
        }\
1221
}\
1222
\
1223 45553457 Zdenek Kabelac
static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1224 59fe111e Michael Niedermayer
{\
1225
    int j;\
1226
    for(j=0; j<2; j++){\
1227
        int i;\
1228 905694d9 Roman Shaposhnik
        const uint32_t a= AV_RN32(pixels  );\
1229
        const uint32_t b= AV_RN32(pixels+1);\
1230 59fe111e Michael Niedermayer
        uint32_t l0=  (a&0x03030303UL)\
1231
                    + (b&0x03030303UL)\
1232
                    + 0x02020202UL;\
1233
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1234
                   + ((b&0xFCFCFCFCUL)>>2);\
1235
        uint32_t l1,h1;\
1236
\
1237
        pixels+=line_size;\
1238
        for(i=0; i<h; i+=2){\
1239 905694d9 Roman Shaposhnik
            uint32_t a= AV_RN32(pixels  );\
1240
            uint32_t b= AV_RN32(pixels+1);\
1241 59fe111e Michael Niedermayer
            l1=  (a&0x03030303UL)\
1242
               + (b&0x03030303UL);\
1243
            h1= ((a&0xFCFCFCFCUL)>>2)\
1244
              + ((b&0xFCFCFCFCUL)>>2);\
1245
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1246
            pixels+=line_size;\
1247
            block +=line_size;\
1248 905694d9 Roman Shaposhnik
            a= AV_RN32(pixels  );\
1249
            b= AV_RN32(pixels+1);\
1250 59fe111e Michael Niedermayer
            l0=  (a&0x03030303UL)\
1251
               + (b&0x03030303UL)\
1252
               + 0x02020202UL;\
1253
            h0= ((a&0xFCFCFCFCUL)>>2)\
1254
              + ((b&0xFCFCFCFCUL)>>2);\
1255
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1256
            pixels+=line_size;\
1257
            block +=line_size;\
1258
        }\
1259
        pixels+=4-line_size*(h+1);\
1260
        block +=4-line_size*h;\
1261
    }\
1262
}\
1263
\
1264 45553457 Zdenek Kabelac
static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1265 59fe111e Michael Niedermayer
{\
1266
    int j;\
1267
    for(j=0; j<2; j++){\
1268
        int i;\
1269 905694d9 Roman Shaposhnik
        const uint32_t a= AV_RN32(pixels  );\
1270
        const uint32_t b= AV_RN32(pixels+1);\
1271 59fe111e Michael Niedermayer
        uint32_t l0=  (a&0x03030303UL)\
1272
                    + (b&0x03030303UL)\
1273
                    + 0x01010101UL;\
1274
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1275
                   + ((b&0xFCFCFCFCUL)>>2);\
1276
        uint32_t l1,h1;\
1277
\
1278
        pixels+=line_size;\
1279
        for(i=0; i<h; i+=2){\
1280 905694d9 Roman Shaposhnik
            uint32_t a= AV_RN32(pixels  );\
1281
            uint32_t b= AV_RN32(pixels+1);\
1282 59fe111e Michael Niedermayer
            l1=  (a&0x03030303UL)\
1283
               + (b&0x03030303UL);\
1284
            h1= ((a&0xFCFCFCFCUL)>>2)\
1285
              + ((b&0xFCFCFCFCUL)>>2);\
1286
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1287
            pixels+=line_size;\
1288
            block +=line_size;\
1289 905694d9 Roman Shaposhnik
            a= AV_RN32(pixels  );\
1290
            b= AV_RN32(pixels+1);\
1291 59fe111e Michael Niedermayer
            l0=  (a&0x03030303UL)\
1292
               + (b&0x03030303UL)\
1293
               + 0x01010101UL;\
1294
            h0= ((a&0xFCFCFCFCUL)>>2)\
1295
              + ((b&0xFCFCFCFCUL)>>2);\
1296
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1297
            pixels+=line_size;\
1298
            block +=line_size;\
1299
        }\
1300
        pixels+=4-line_size*(h+1);\
1301
        block +=4-line_size*h;\
1302
    }\
1303
}\
1304
\
1305 45553457 Zdenek Kabelac
CALL_2X_PIXELS(OPNAME ## _pixels16_c  , OPNAME ## _pixels8_c  , 8)\
1306
CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
1307
CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
1308
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
1309
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c  , OPNAME ## _pixels8_c         , 8)\
1310
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
1311
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
1312
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
1313 b3184779 Michael Niedermayer
1314 d8085ea7 Michael Niedermayer
#define op_avg(a, b) a = rnd_avg32(a, b)
1315 59fe111e Michael Niedermayer
#endif
1316
#define op_put(a, b) a = b
1317
1318
PIXOP2(avg, op_avg)
1319
PIXOP2(put, op_put)
1320
#undef op_avg
1321
#undef op_put
1322
1323 de6d9b64 Fabrice Bellard
#define avg2(a,b) ((a+b+1)>>1)
1324
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
1325
1326 c0a0170c Michael Niedermayer
static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1327
    put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
1328
}
1329
1330
static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1331
    put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
1332
}
1333 073b013d Michael Niedermayer
1334 0c1a9eda Zdenek Kabelac
static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
1335 44eb4951 Michael Niedermayer
{
1336
    const int A=(16-x16)*(16-y16);
1337
    const int B=(   x16)*(16-y16);
1338
    const int C=(16-x16)*(   y16);
1339
    const int D=(   x16)*(   y16);
1340
    int i;
1341
1342
    for(i=0; i<h; i++)
1343
    {
1344 b3184779 Michael Niedermayer
        dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
1345
        dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
1346
        dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
1347
        dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
1348
        dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
1349
        dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
1350
        dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
1351
        dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
1352
        dst+= stride;
1353
        src+= stride;
1354 44eb4951 Michael Niedermayer
    }
1355
}
1356
1357 703c8195 Loren Merritt
void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
1358 073b013d Michael Niedermayer
                  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
1359
{
1360
    int y, vx, vy;
1361
    const int s= 1<<shift;
1362 115329f1 Diego Biurrun
1363 073b013d Michael Niedermayer
    width--;
1364
    height--;
1365
1366
    for(y=0; y<h; y++){
1367
        int x;
1368
1369
        vx= ox;
1370
        vy= oy;
1371
        for(x=0; x<8; x++){ //XXX FIXME optimize
1372
            int src_x, src_y, frac_x, frac_y, index;
1373
1374
            src_x= vx>>16;
1375
            src_y= vy>>16;
1376
            frac_x= src_x&(s-1);
1377
            frac_y= src_y&(s-1);
1378
            src_x>>=shift;
1379
            src_y>>=shift;
1380 115329f1 Diego Biurrun
1381 073b013d Michael Niedermayer
            if((unsigned)src_x < width){
1382
                if((unsigned)src_y < height){
1383
                    index= src_x + src_y*stride;
1384
                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
1385
                                           + src[index       +1]*   frac_x )*(s-frac_y)
1386
                                        + (  src[index+stride  ]*(s-frac_x)
1387
                                           + src[index+stride+1]*   frac_x )*   frac_y
1388
                                        + r)>>(shift*2);
1389
                }else{
1390 f66e4f5f Reimar Döffinger
                    index= src_x + av_clip(src_y, 0, height)*stride;
1391 115329f1 Diego Biurrun
                    dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
1392 073b013d Michael Niedermayer
                                          + src[index       +1]*   frac_x )*s
1393
                                        + r)>>(shift*2);
1394
                }
1395
            }else{
1396
                if((unsigned)src_y < height){
1397 f66e4f5f Reimar Döffinger
                    index= av_clip(src_x, 0, width) + src_y*stride;
1398 115329f1 Diego Biurrun
                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
1399 073b013d Michael Niedermayer
                                           + src[index+stride  ]*   frac_y )*s
1400
                                        + r)>>(shift*2);
1401
                }else{
1402 f66e4f5f Reimar Döffinger
                    index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
1403 073b013d Michael Niedermayer
                    dst[y*stride + x]=    src[index         ];
1404
                }
1405
            }
1406 115329f1 Diego Biurrun
1407 073b013d Michael Niedermayer
            vx+= dxx;
1408
            vy+= dyx;
1409
        }
1410
        ox += dxy;
1411
        oy += dyy;
1412
    }
1413
}
1414 669ac79c Michael Niedermayer
1415
static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1416
    switch(width){
1417
    case 2: put_pixels2_c (dst, src, stride, height); break;
1418
    case 4: put_pixels4_c (dst, src, stride, height); break;
1419
    case 8: put_pixels8_c (dst, src, stride, height); break;
1420
    case 16:put_pixels16_c(dst, src, stride, height); break;
1421
    }
1422
}
1423
1424
static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1425
    int i,j;
1426
    for (i=0; i < height; i++) {
1427
      for (j=0; j < width; j++) {
1428 bb270c08 Diego Biurrun
        dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
1429 669ac79c Michael Niedermayer
      }
1430
      src += stride;
1431
      dst += stride;
1432
    }
1433
}
1434
1435
static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1436
    int i,j;
1437
    for (i=0; i < height; i++) {
1438
      for (j=0; j < width; j++) {
1439 bb270c08 Diego Biurrun
        dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
1440 669ac79c Michael Niedermayer
      }
1441
      src += stride;
1442
      dst += stride;
1443
    }
1444
}
1445 115329f1 Diego Biurrun
1446 669ac79c Michael Niedermayer
static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1447
    int i,j;
1448
    for (i=0; i < height; i++) {
1449
      for (j=0; j < width; j++) {
1450 bb270c08 Diego Biurrun
        dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
1451 669ac79c Michael Niedermayer
      }
1452
      src += stride;
1453
      dst += stride;
1454
    }
1455
}
1456 115329f1 Diego Biurrun
1457 669ac79c Michael Niedermayer
static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1458
    int i,j;
1459
    for (i=0; i < height; i++) {
1460
      for (j=0; j < width; j++) {
1461 bb270c08 Diego Biurrun
        dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
1462 669ac79c Michael Niedermayer
      }
1463
      src += stride;
1464
      dst += stride;
1465
    }
1466
}
1467
1468
static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1469
    int i,j;
1470
    for (i=0; i < height; i++) {
1471
      for (j=0; j < width; j++) {
1472 bb270c08 Diego Biurrun
        dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
1473 669ac79c Michael Niedermayer
      }
1474
      src += stride;
1475
      dst += stride;
1476
    }
1477
}
1478
1479
static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1480
    int i,j;
1481
    for (i=0; i < height; i++) {
1482
      for (j=0; j < width; j++) {
1483 bb270c08 Diego Biurrun
        dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
1484 669ac79c Michael Niedermayer
      }
1485
      src += stride;
1486
      dst += stride;
1487
    }
1488
}
1489
1490
static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1491
    int i,j;
1492
    for (i=0; i < height; i++) {
1493
      for (j=0; j < width; j++) {
1494 bb270c08 Diego Biurrun
        dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
1495 669ac79c Michael Niedermayer
      }
1496
      src += stride;
1497
      dst += stride;
1498
    }
1499
}
1500
1501
static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1502
    int i,j;
1503
    for (i=0; i < height; i++) {
1504
      for (j=0; j < width; j++) {
1505 bb270c08 Diego Biurrun
        dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
1506 669ac79c Michael Niedermayer
      }
1507
      src += stride;
1508
      dst += stride;
1509
    }
1510
}
1511 da3b9756 Mike Melanson
1512
static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1513
    switch(width){
1514
    case 2: avg_pixels2_c (dst, src, stride, height); break;
1515
    case 4: avg_pixels4_c (dst, src, stride, height); break;
1516
    case 8: avg_pixels8_c (dst, src, stride, height); break;
1517
    case 16:avg_pixels16_c(dst, src, stride, height); break;
1518
    }
1519
}
1520
1521
static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1522
    int i,j;
1523
    for (i=0; i < height; i++) {
1524
      for (j=0; j < width; j++) {
1525 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
1526 da3b9756 Mike Melanson
      }
1527
      src += stride;
1528
      dst += stride;
1529
    }
1530
}
1531
1532
static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1533
    int i,j;
1534
    for (i=0; i < height; i++) {
1535
      for (j=0; j < width; j++) {
1536 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
1537 da3b9756 Mike Melanson
      }
1538
      src += stride;
1539
      dst += stride;
1540
    }
1541
}
1542 115329f1 Diego Biurrun
1543 da3b9756 Mike Melanson
static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1544
    int i,j;
1545
    for (i=0; i < height; i++) {
1546
      for (j=0; j < width; j++) {
1547 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
1548 da3b9756 Mike Melanson
      }
1549
      src += stride;
1550
      dst += stride;
1551
    }
1552
}
1553 115329f1 Diego Biurrun
1554 da3b9756 Mike Melanson
static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1555
    int i,j;
1556
    for (i=0; i < height; i++) {
1557
      for (j=0; j < width; j++) {
1558 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1559 da3b9756 Mike Melanson
      }
1560
      src += stride;
1561
      dst += stride;
1562
    }
1563
}
1564
1565
static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1566
    int i,j;
1567
    for (i=0; i < height; i++) {
1568
      for (j=0; j < width; j++) {
1569 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1570 da3b9756 Mike Melanson
      }
1571
      src += stride;
1572
      dst += stride;
1573
    }
1574
}
1575
1576
static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1577
    int i,j;
1578
    for (i=0; i < height; i++) {
1579
      for (j=0; j < width; j++) {
1580 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
1581 da3b9756 Mike Melanson
      }
1582
      src += stride;
1583
      dst += stride;
1584
    }
1585
}
1586
1587
static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1588
    int i,j;
1589
    for (i=0; i < height; i++) {
1590
      for (j=0; j < width; j++) {
1591 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1592 da3b9756 Mike Melanson
      }
1593
      src += stride;
1594
      dst += stride;
1595
    }
1596
}
1597
1598
static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1599
    int i,j;
1600
    for (i=0; i < height; i++) {
1601
      for (j=0; j < width; j++) {
1602 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1603 da3b9756 Mike Melanson
      }
1604
      src += stride;
1605
      dst += stride;
1606
    }
1607
}
1608 669ac79c Michael Niedermayer
#if 0
1609
#define TPEL_WIDTH(width)\
1610
static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1611
    void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1612
static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1613
    void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1614
static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1615
    void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1616
static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1617
    void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1618
static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1619
    void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1620
static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1621
    void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1622
static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1623
    void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1624
static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1625
    void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1626
static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1627
    void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1628
#endif
1629
1630 0da71265 Michael Niedermayer
#define H264_CHROMA_MC(OPNAME, OP)\
1631
static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1632
    const int A=(8-x)*(8-y);\
1633
    const int B=(  x)*(8-y);\
1634
    const int C=(8-x)*(  y);\
1635
    const int D=(  x)*(  y);\
1636
    int i;\
1637
    \
1638
    assert(x<8 && y<8 && x>=0 && y>=0);\
1639
\
1640 febdd0b9 Michael Niedermayer
    if(D){\
1641 f315b394 Michael Niedermayer
        for(i=0; i<h; i++){\
1642 76abb18e Michael Niedermayer
            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1643
            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1644
            dst+= stride;\
1645
            src+= stride;\
1646
        }\
1647 febdd0b9 Michael Niedermayer
    }else{\
1648
        const int E= B+C;\
1649
        const int step= C ? stride : 1;\
1650 f315b394 Michael Niedermayer
        for(i=0; i<h; i++){\
1651 febdd0b9 Michael Niedermayer
            OP(dst[0], (A*src[0] + E*src[step+0]));\
1652
            OP(dst[1], (A*src[1] + E*src[step+1]));\
1653
            dst+= stride;\
1654
            src+= stride;\
1655
        }\
1656
    }\
1657 0da71265 Michael Niedermayer
}\
1658
\
1659
static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1660
    const int A=(8-x)*(8-y);\
1661
    const int B=(  x)*(8-y);\
1662
    const int C=(8-x)*(  y);\
1663
    const int D=(  x)*(  y);\
1664
    int i;\
1665
    \
1666
    assert(x<8 && y<8 && x>=0 && y>=0);\
1667
\
1668 febdd0b9 Michael Niedermayer
    if(D){\
1669 f315b394 Michael Niedermayer
        for(i=0; i<h; i++){\
1670 76abb18e Michael Niedermayer
            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1671
            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1672
            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1673
            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1674
            dst+= stride;\
1675
            src+= stride;\
1676
        }\
1677 febdd0b9 Michael Niedermayer
    }else{\
1678
        const int E= B+C;\
1679
        const int step= C ? stride : 1;\
1680 f315b394 Michael Niedermayer
        for(i=0; i<h; i++){\
1681 febdd0b9 Michael Niedermayer
            OP(dst[0], (A*src[0] + E*src[step+0]));\
1682
            OP(dst[1], (A*src[1] + E*src[step+1]));\
1683
            OP(dst[2], (A*src[2] + E*src[step+2]));\
1684
            OP(dst[3], (A*src[3] + E*src[step+3]));\
1685
            dst+= stride;\
1686
            src+= stride;\
1687
        }\
1688
    }\
1689 0da71265 Michael Niedermayer
}\
1690
\
1691
static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1692
    const int A=(8-x)*(8-y);\
1693
    const int B=(  x)*(8-y);\
1694
    const int C=(8-x)*(  y);\
1695
    const int D=(  x)*(  y);\
1696
    int i;\
1697
    \
1698
    assert(x<8 && y<8 && x>=0 && y>=0);\
1699
\
1700 815c81c0 Michael Niedermayer
    if(D){\
1701 f315b394 Michael Niedermayer
        for(i=0; i<h; i++){\
1702 76abb18e Michael Niedermayer
            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1703
            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1704
            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1705
            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1706
            OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1707
            OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1708
            OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1709
            OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1710
            dst+= stride;\
1711
            src+= stride;\
1712
        }\
1713 815c81c0 Michael Niedermayer
    }else{\
1714
        const int E= B+C;\
1715
        const int step= C ? stride : 1;\
1716 f315b394 Michael Niedermayer
        for(i=0; i<h; i++){\
1717 815c81c0 Michael Niedermayer
            OP(dst[0], (A*src[0] + E*src[step+0]));\
1718
            OP(dst[1], (A*src[1] + E*src[step+1]));\
1719
            OP(dst[2], (A*src[2] + E*src[step+2]));\
1720
            OP(dst[3], (A*src[3] + E*src[step+3]));\
1721
            OP(dst[4], (A*src[4] + E*src[step+4]));\
1722
            OP(dst[5], (A*src[5] + E*src[step+5]));\
1723
            OP(dst[6], (A*src[6] + E*src[step+6]));\
1724
            OP(dst[7], (A*src[7] + E*src[step+7]));\
1725
            dst+= stride;\
1726
            src+= stride;\
1727
        }\
1728
    }\
1729 0da71265 Michael Niedermayer
}
1730
1731
#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1732
#define op_put(a, b) a = (((b) + 32)>>6)
1733
1734
H264_CHROMA_MC(put_       , op_put)
1735
H264_CHROMA_MC(avg_       , op_avg)
1736
#undef op_avg
1737
#undef op_put
1738
1739 c374691b David Conrad
static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
1740 e34350a3 Kostya Shishkov
    const int A=(8-x)*(8-y);
1741
    const int B=(  x)*(8-y);
1742
    const int C=(8-x)*(  y);
1743
    const int D=(  x)*(  y);
1744
    int i;
1745
1746
    assert(x<8 && y<8 && x>=0 && y>=0);
1747
1748
    for(i=0; i<h; i++)
1749
    {
1750
        dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
1751
        dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
1752
        dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
1753
        dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
1754
        dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
1755
        dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
1756
        dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
1757
        dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
1758
        dst+= stride;
1759
        src+= stride;
1760
    }
1761
}
1762
1763 8013da73 David Conrad
static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
1764
    const int A=(8-x)*(8-y);
1765
    const int B=(  x)*(8-y);
1766
    const int C=(8-x)*(  y);
1767
    const int D=(  x)*(  y);
1768
    int i;
1769
1770
    assert(x<8 && y<8 && x>=0 && y>=0);
1771
1772
    for(i=0; i<h; i++)
1773
    {
1774
        dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
1775
        dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
1776
        dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
1777
        dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
1778
        dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6));
1779
        dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6));
1780
        dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6));
1781
        dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6));
1782
        dst+= stride;
1783
        src+= stride;
1784
    }
1785
}
1786
1787 b3184779 Michael Niedermayer
#define QPEL_MC(r, OPNAME, RND, OP) \
1788 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1789 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1790 b3184779 Michael Niedermayer
    int i;\
1791
    for(i=0; i<h; i++)\
1792
    {\
1793
        OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1794
        OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1795
        OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1796
        OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1797
        OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1798
        OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1799
        OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1800
        OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1801
        dst+=dstStride;\
1802
        src+=srcStride;\
1803
    }\
1804 44eb4951 Michael Niedermayer
}\
1805
\
1806 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1807 db794953 Michael Niedermayer
    const int w=8;\
1808 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1809 b3184779 Michael Niedermayer
    int i;\
1810
    for(i=0; i<w; i++)\
1811
    {\
1812
        const int src0= src[0*srcStride];\
1813
        const int src1= src[1*srcStride];\
1814
        const int src2= src[2*srcStride];\
1815
        const int src3= src[3*srcStride];\
1816
        const int src4= src[4*srcStride];\
1817
        const int src5= src[5*srcStride];\
1818
        const int src6= src[6*srcStride];\
1819
        const int src7= src[7*srcStride];\
1820
        const int src8= src[8*srcStride];\
1821
        OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1822
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1823
        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1824
        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1825
        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1826
        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1827
        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1828
        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1829
        dst++;\
1830
        src++;\
1831
    }\
1832
}\
1833
\
1834 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1835 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1836 b3184779 Michael Niedermayer
    int i;\
1837 826f429a Michael Niedermayer
    \
1838 b3184779 Michael Niedermayer
    for(i=0; i<h; i++)\
1839
    {\
1840
        OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1841
        OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1842
        OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1843
        OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1844
        OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1845
        OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1846
        OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1847
        OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1848
        OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1849
        OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1850
        OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1851
        OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1852
        OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1853
        OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1854
        OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1855
        OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1856
        dst+=dstStride;\
1857
        src+=srcStride;\
1858
    }\
1859
}\
1860
\
1861 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1862 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1863 b3184779 Michael Niedermayer
    int i;\
1864 826f429a Michael Niedermayer
    const int w=16;\
1865 b3184779 Michael Niedermayer
    for(i=0; i<w; i++)\
1866
    {\
1867
        const int src0= src[0*srcStride];\
1868
        const int src1= src[1*srcStride];\
1869
        const int src2= src[2*srcStride];\
1870
        const int src3= src[3*srcStride];\
1871
        const int src4= src[4*srcStride];\
1872
        const int src5= src[5*srcStride];\
1873
        const int src6= src[6*srcStride];\
1874
        const int src7= src[7*srcStride];\
1875
        const int src8= src[8*srcStride];\
1876
        const int src9= src[9*srcStride];\
1877
        const int src10= src[10*srcStride];\
1878
        const int src11= src[11*srcStride];\
1879
        const int src12= src[12*srcStride];\
1880
        const int src13= src[13*srcStride];\
1881
        const int src14= src[14*srcStride];\
1882
        const int src15= src[15*srcStride];\
1883
        const int src16= src[16*srcStride];\
1884
        OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1885
        OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1886
        OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1887
        OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1888
        OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1889
        OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1890
        OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1891
        OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1892
        OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1893
        OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1894
        OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1895
        OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1896
        OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1897
        OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1898
        OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1899
        OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1900
        dst++;\
1901
        src++;\
1902
    }\
1903
}\
1904
\
1905 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
1906 45553457 Zdenek Kabelac
    OPNAME ## pixels8_c(dst, src, stride, 8);\
1907 b3184779 Michael Niedermayer
}\
1908
\
1909 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1910
    uint8_t half[64];\
1911 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1912
    OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
1913 44eb4951 Michael Niedermayer
}\
1914
\
1915 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1916 b3184779 Michael Niedermayer
    OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
1917 44eb4951 Michael Niedermayer
}\
1918
\
1919 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1920
    uint8_t half[64];\
1921 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1922
    OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
1923 44eb4951 Michael Niedermayer
}\
1924
\
1925 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1926
    uint8_t full[16*9];\
1927
    uint8_t half[64];\
1928 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1929 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1930 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
1931 44eb4951 Michael Niedermayer
}\
1932
\
1933 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1934
    uint8_t full[16*9];\
1935 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1936 db794953 Michael Niedermayer
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
1937 44eb4951 Michael Niedermayer
}\
1938
\
1939 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1940
    uint8_t full[16*9];\
1941
    uint8_t half[64];\
1942 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1943 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1944 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
1945 44eb4951 Michael Niedermayer
}\
1946 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1947
    uint8_t full[16*9];\
1948
    uint8_t halfH[72];\
1949
    uint8_t halfV[64];\
1950
    uint8_t halfHV[64];\
1951 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1952
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1953 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1954
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1955 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1956 44eb4951 Michael Niedermayer
}\
1957 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1958
    uint8_t full[16*9];\
1959
    uint8_t halfH[72];\
1960
    uint8_t halfHV[64];\
1961 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1962
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1963
    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1964
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1965
    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1966
}\
1967 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1968
    uint8_t full[16*9];\
1969
    uint8_t halfH[72];\
1970
    uint8_t halfV[64];\
1971
    uint8_t halfHV[64];\
1972 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1973
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1974 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1975
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1976 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1977 44eb4951 Michael Niedermayer
}\
1978 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1979
    uint8_t full[16*9];\
1980
    uint8_t halfH[72];\
1981
    uint8_t halfHV[64];\
1982 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1983
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1984
    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1985
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1986
    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1987
}\
1988 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1989
    uint8_t full[16*9];\
1990
    uint8_t halfH[72];\
1991
    uint8_t halfV[64];\
1992
    uint8_t halfHV[64];\
1993 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1994
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1995 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1996
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1997 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1998 44eb4951 Michael Niedermayer
}\
1999 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2000
    uint8_t full[16*9];\
2001
    uint8_t halfH[72];\
2002
    uint8_t halfHV[64];\
2003 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
2004
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2005
    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
2006
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2007
    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
2008
}\
2009 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
2010
    uint8_t full[16*9];\
2011
    uint8_t halfH[72];\
2012
    uint8_t halfV[64];\
2013
    uint8_t halfHV[64];\
2014 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
2015
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
2016 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
2017
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2018 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
2019 44eb4951 Michael Niedermayer
}\
2020 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2021
    uint8_t full[16*9];\
2022
    uint8_t halfH[72];\
2023
    uint8_t halfHV[64];\
2024 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
2025
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2026
    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
2027
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2028
    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
2029
}\
2030 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2031
    uint8_t halfH[72];\
2032
    uint8_t halfHV[64];\
2033 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
2034 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2035 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
2036 44eb4951 Michael Niedermayer
}\
2037 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2038
    uint8_t halfH[72];\
2039
    uint8_t halfHV[64];\
2040 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
2041 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2042 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
2043 44eb4951 Michael Niedermayer
}\
2044 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
2045
    uint8_t full[16*9];\
2046
    uint8_t halfH[72];\
2047
    uint8_t halfV[64];\
2048
    uint8_t halfHV[64];\
2049 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
2050
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2051 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
2052
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2053 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
2054 44eb4951 Michael Niedermayer
}\
2055 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2056
    uint8_t full[16*9];\
2057
    uint8_t halfH[72];\
2058 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
2059
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2060
    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
2061
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2062
}\
2063 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2064
    uint8_t full[16*9];\
2065
    uint8_t halfH[72];\
2066
    uint8_t halfV[64];\
2067
    uint8_t halfHV[64];\
2068 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
2069
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2070 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
2071
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
2072 b3184779 Michael Niedermayer
    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
2073 44eb4951 Michael Niedermayer
}\
2074 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2075
    uint8_t full[16*9];\
2076
    uint8_t halfH[72];\
2077 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
2078
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
2079
    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
2080
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2081
}\
2082 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2083
    uint8_t halfH[72];\
2084 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
2085 db794953 Michael Niedermayer
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
2086 b3184779 Michael Niedermayer
}\
2087 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2088 45553457 Zdenek Kabelac
    OPNAME ## pixels16_c(dst, src, stride, 16);\
2089 b3184779 Michael Niedermayer
}\
2090
\
2091 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2092
    uint8_t half[256];\
2093 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2094
    OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
2095
}\
2096
\
2097 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2098 b3184779 Michael Niedermayer
    OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
2099 44eb4951 Michael Niedermayer
}\
2100 b3184779 Michael Niedermayer
\
2101 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2102
    uint8_t half[256];\
2103 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
2104
    OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
2105
}\
2106
\
2107 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2108
    uint8_t full[24*17];\
2109
    uint8_t half[256];\
2110 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2111 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
2112 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
2113
}\
2114
\
2115 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2116
    uint8_t full[24*17];\
2117 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2118 826f429a Michael Niedermayer
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
2119 b3184779 Michael Niedermayer
}\
2120
\
2121 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2122
    uint8_t full[24*17];\
2123
    uint8_t half[256];\
2124 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2125 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
2126 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
2127
}\
2128 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
2129
    uint8_t full[24*17];\
2130
    uint8_t halfH[272];\
2131
    uint8_t halfV[256];\
2132
    uint8_t halfHV[256];\
2133 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2134
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2135 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2136
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2137 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2138
}\
2139 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2140
    uint8_t full[24*17];\
2141
    uint8_t halfH[272];\
2142
    uint8_t halfHV[256];\
2143 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2144
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2145
    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2146
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2147
    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2148
}\
2149 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
2150
    uint8_t full[24*17];\
2151
    uint8_t halfH[272];\
2152
    uint8_t halfV[256];\
2153
    uint8_t halfHV[256];\
2154 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2155
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2156 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2157
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2158 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2159
}\
2160 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2161
    uint8_t full[24*17];\
2162
    uint8_t halfH[272];\
2163
    uint8_t halfHV[256];\
2164 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2165
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2166
    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2167
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2168
    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2169
}\
2170 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
2171
    uint8_t full[24*17];\
2172
    uint8_t halfH[272];\
2173
    uint8_t halfV[256];\
2174
    uint8_t halfHV[256];\
2175 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2176
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2177 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2178
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2179 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2180
}\
2181 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2182
    uint8_t full[24*17];\
2183
    uint8_t halfH[272];\
2184
    uint8_t halfHV[256];\
2185 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2186
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2187
    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2188
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2189
    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2190
}\
2191 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
2192
    uint8_t full[24*17];\
2193
    uint8_t halfH[272];\
2194
    uint8_t halfV[256];\
2195
    uint8_t halfHV[256];\
2196 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2197
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
2198 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2199
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2200 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
2201
}\
2202 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2203
    uint8_t full[24*17];\
2204
    uint8_t halfH[272];\
2205
    uint8_t halfHV[256];\
2206 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2207
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2208
    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2209
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2210
    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2211
}\
2212 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2213
    uint8_t halfH[272];\
2214
    uint8_t halfHV[256];\
2215 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2216 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2217 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
2218
}\
2219 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2220
    uint8_t halfH[272];\
2221
    uint8_t halfHV[256];\
2222 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2223 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2224 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
2225
}\
2226 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
2227
    uint8_t full[24*17];\
2228
    uint8_t halfH[272];\
2229
    uint8_t halfV[256];\
2230
    uint8_t halfHV[256];\
2231 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2232
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2233 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2234
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2235 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2236
}\
2237 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2238
    uint8_t full[24*17];\
2239
    uint8_t halfH[272];\
2240 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2241
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2242
    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2243
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2244
}\
2245 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2246
    uint8_t full[24*17];\
2247
    uint8_t halfH[272];\
2248
    uint8_t halfV[256];\
2249
    uint8_t halfHV[256];\
2250 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2251
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2252 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2253
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2254 b3184779 Michael Niedermayer
    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2255
}\
2256 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2257
    uint8_t full[24*17];\
2258
    uint8_t halfH[272];\
2259 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
2260
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2261
    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2262
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2263
}\
2264 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2265
    uint8_t halfH[272];\
2266 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2267 826f429a Michael Niedermayer
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2268 45553457 Zdenek Kabelac
}
2269 44eb4951 Michael Niedermayer
2270 b3184779 Michael Niedermayer
#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2271
#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
2272
#define op_put(a, b) a = cm[((b) + 16)>>5]
2273
#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
2274
2275
QPEL_MC(0, put_       , _       , op_put)
2276
QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
2277
QPEL_MC(0, avg_       , _       , op_avg)
2278
//QPEL_MC(1, avg_no_rnd , _       , op_avg)
2279
#undef op_avg
2280
#undef op_avg_no_rnd
2281
#undef op_put
2282
#undef op_put_no_rnd
2283 44eb4951 Michael Niedermayer
2284 0da71265 Michael Niedermayer
#if 1
2285
#define H264_LOWPASS(OPNAME, OP, OP2) \
2286 bb5705b9 Måns Rullgård
static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2287 80e44bc3 Michael Niedermayer
    const int h=2;\
2288 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2289 80e44bc3 Michael Niedermayer
    int i;\
2290
    for(i=0; i<h; i++)\
2291
    {\
2292
        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2293
        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2294
        dst+=dstStride;\
2295
        src+=srcStride;\
2296
    }\
2297
}\
2298
\
2299 bb5705b9 Måns Rullgård
static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2300 80e44bc3 Michael Niedermayer
    const int w=2;\
2301 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2302 80e44bc3 Michael Niedermayer
    int i;\
2303
    for(i=0; i<w; i++)\
2304
    {\
2305
        const int srcB= src[-2*srcStride];\
2306
        const int srcA= src[-1*srcStride];\
2307
        const int src0= src[0 *srcStride];\
2308
        const int src1= src[1 *srcStride];\
2309
        const int src2= src[2 *srcStride];\
2310
        const int src3= src[3 *srcStride];\
2311
        const int src4= src[4 *srcStride];\
2312
        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2313
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2314
        dst++;\
2315
        src++;\
2316
    }\
2317
}\
2318
\
2319 bb5705b9 Måns Rullgård
static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2320 80e44bc3 Michael Niedermayer
    const int h=2;\
2321
    const int w=2;\
2322 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2323 80e44bc3 Michael Niedermayer
    int i;\
2324
    src -= 2*srcStride;\
2325
    for(i=0; i<h+5; i++)\
2326
    {\
2327
        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2328
        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2329
        tmp+=tmpStride;\
2330
        src+=srcStride;\
2331
    }\
2332
    tmp -= tmpStride*(h+5-2);\
2333
    for(i=0; i<w; i++)\
2334
    {\
2335
        const int tmpB= tmp[-2*tmpStride];\
2336
        const int tmpA= tmp[-1*tmpStride];\
2337
        const int tmp0= tmp[0 *tmpStride];\
2338
        const int tmp1= tmp[1 *tmpStride];\
2339
        const int tmp2= tmp[2 *tmpStride];\
2340
        const int tmp3= tmp[3 *tmpStride];\
2341
        const int tmp4= tmp[4 *tmpStride];\
2342
        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2343
        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2344
        dst++;\
2345
        tmp++;\
2346
    }\
2347
}\
2348 0da71265 Michael Niedermayer
static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2349
    const int h=4;\
2350 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2351 0da71265 Michael Niedermayer
    int i;\
2352
    for(i=0; i<h; i++)\
2353
    {\
2354
        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2355
        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2356
        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
2357
        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
2358
        dst+=dstStride;\
2359
        src+=srcStride;\
2360
    }\
2361
}\
2362
\
2363
static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2364
    const int w=4;\
2365 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2366 0da71265 Michael Niedermayer
    int i;\
2367
    for(i=0; i<w; i++)\
2368
    {\
2369
        const int srcB= src[-2*srcStride];\
2370
        const int srcA= src[-1*srcStride];\
2371
        const int src0= src[0 *srcStride];\
2372
        const int src1= src[1 *srcStride];\
2373
        const int src2= src[2 *srcStride];\
2374
        const int src3= src[3 *srcStride];\
2375
        const int src4= src[4 *srcStride];\
2376
        const int src5= src[5 *srcStride];\
2377
        const int src6= src[6 *srcStride];\
2378
        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2379
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2380
        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2381
        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2382
        dst++;\
2383
        src++;\
2384
    }\
2385
}\
2386
\
2387
static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2388
    const int h=4;\
2389
    const int w=4;\
2390 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2391 0da71265 Michael Niedermayer
    int i;\
2392
    src -= 2*srcStride;\
2393
    for(i=0; i<h+5; i++)\
2394
    {\
2395
        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2396
        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2397
        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
2398
        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
2399
        tmp+=tmpStride;\
2400
        src+=srcStride;\
2401
    }\
2402
    tmp -= tmpStride*(h+5-2);\
2403
    for(i=0; i<w; i++)\
2404
    {\
2405
        const int tmpB= tmp[-2*tmpStride];\
2406
        const int tmpA= tmp[-1*tmpStride];\
2407
        const int tmp0= tmp[0 *tmpStride];\
2408
        const int tmp1= tmp[1 *tmpStride];\
2409
        const int tmp2= tmp[2 *tmpStride];\
2410
        const int tmp3= tmp[3 *tmpStride];\
2411
        const int tmp4= tmp[4 *tmpStride];\
2412
        const int tmp5= tmp[5 *tmpStride];\
2413
        const int tmp6= tmp[6 *tmpStride];\
2414
        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2415
        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2416
        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2417
        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2418
        dst++;\
2419
        tmp++;\
2420
    }\
2421
}\
2422
\
2423
static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2424
    const int h=8;\
2425 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2426 0da71265 Michael Niedermayer
    int i;\
2427
    for(i=0; i<h; i++)\
2428
    {\
2429
        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
2430
        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
2431
        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
2432
        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
2433
        OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
2434
        OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
2435
        OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
2436
        OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
2437
        dst+=dstStride;\
2438
        src+=srcStride;\
2439
    }\
2440
}\
2441
\
2442
static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2443
    const int w=8;\
2444 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2445 0da71265 Michael Niedermayer
    int i;\
2446
    for(i=0; i<w; i++)\
2447
    {\
2448
        const int srcB= src[-2*srcStride];\
2449
        const int srcA= src[-1*srcStride];\
2450
        const int src0= src[0 *srcStride];\
2451
        const int src1= src[1 *srcStride];\
2452
        const int src2= src[2 *srcStride];\
2453
        const int src3= src[3 *srcStride];\
2454
        const int src4= src[4 *srcStride];\
2455
        const int src5= src[5 *srcStride];\
2456
        const int src6= src[6 *srcStride];\
2457
        const int src7= src[7 *srcStride];\
2458
        const int src8= src[8 *srcStride];\
2459
        const int src9= src[9 *srcStride];\
2460
        const int src10=src[10*srcStride];\
2461
        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2462
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2463
        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2464
        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2465
        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
2466
        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
2467
        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
2468
        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
2469
        dst++;\
2470
        src++;\
2471
    }\
2472
}\
2473
\
2474
static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2475
    const int h=8;\
2476
    const int w=8;\
2477 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2478 0da71265 Michael Niedermayer
    int i;\
2479
    src -= 2*srcStride;\
2480
    for(i=0; i<h+5; i++)\
2481
    {\
2482
        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
2483
        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
2484
        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
2485
        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
2486
        tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
2487
        tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
2488
        tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
2489
        tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
2490
        tmp+=tmpStride;\
2491
        src+=srcStride;\
2492
    }\
2493
    tmp -= tmpStride*(h+5-2);\
2494
    for(i=0; i<w; i++)\
2495
    {\
2496
        const int tmpB= tmp[-2*tmpStride];\
2497
        const int tmpA= tmp[-1*tmpStride];\
2498
        const int tmp0= tmp[0 *tmpStride];\
2499
        const int tmp1= tmp[1 *tmpStride];\
2500
        const int tmp2= tmp[2 *tmpStride];\
2501
        const int tmp3= tmp[3 *tmpStride];\
2502
        const int tmp4= tmp[4 *tmpStride];\
2503
        const int tmp5= tmp[5 *tmpStride];\
2504
        const int tmp6= tmp[6 *tmpStride];\
2505
        const int tmp7= tmp[7 *tmpStride];\
2506
        const int tmp8= tmp[8 *tmpStride];\
2507
        const int tmp9= tmp[9 *tmpStride];\
2508
        const int tmp10=tmp[10*tmpStride];\
2509
        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2510
        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2511
        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2512
        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2513
        OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
2514
        OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
2515
        OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
2516
        OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
2517
        dst++;\
2518
        tmp++;\
2519
    }\
2520
}\
2521
\
2522
static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2523
    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
2524
    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2525
    src += 8*srcStride;\
2526
    dst += 8*dstStride;\
2527
    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
2528
    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2529
}\
2530
\
2531
static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2532
    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
2533
    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2534
    src += 8*srcStride;\
2535
    dst += 8*dstStride;\
2536
    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
2537
    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2538
}\
2539
\
2540
static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2541
    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
2542
    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2543
    src += 8*srcStride;\
2544
    dst += 8*dstStride;\
2545
    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
2546
    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2547
}\
2548
2549
#define H264_MC(OPNAME, SIZE) \
2550
static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2551
    OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2552
}\
2553
\
2554
static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2555
    uint8_t half[SIZE*SIZE];\
2556
    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2557
    OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2558
}\
2559
\
2560
static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2561
    OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2562
}\
2563
\
2564
static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2565
    uint8_t half[SIZE*SIZE];\
2566
    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2567
    OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2568
}\
2569
\
2570
static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2571
    uint8_t full[SIZE*(SIZE+5)];\
2572
    uint8_t * const full_mid= full + SIZE*2;\
2573
    uint8_t half[SIZE*SIZE];\
2574
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2575
    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2576
    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2577
}\
2578
\
2579
static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2580
    uint8_t full[SIZE*(SIZE+5)];\
2581
    uint8_t * const full_mid= full + SIZE*2;\
2582
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2583
    OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2584
}\
2585
\
2586
static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2587
    uint8_t full[SIZE*(SIZE+5)];\
2588
    uint8_t * const full_mid= full + SIZE*2;\
2589
    uint8_t half[SIZE*SIZE];\
2590
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2591
    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2592
    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2593
}\
2594
\
2595
static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2596
    uint8_t full[SIZE*(SIZE+5)];\
2597
    uint8_t * const full_mid= full + SIZE*2;\
2598
    uint8_t halfH[SIZE*SIZE];\
2599
    uint8_t halfV[SIZE*SIZE];\
2600
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2601
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2602
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2603
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2604
}\
2605
\
2606
static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2607
    uint8_t full[SIZE*(SIZE+5)];\
2608
    uint8_t * const full_mid= full + SIZE*2;\
2609
    uint8_t halfH[SIZE*SIZE];\
2610
    uint8_t halfV[SIZE*SIZE];\
2611
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2612
    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
2613
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2614
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2615
}\
2616
\
2617
static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2618
    uint8_t full[SIZE*(SIZE+5)];\
2619
    uint8_t * const full_mid= full + SIZE*2;\
2620
    uint8_t halfH[SIZE*SIZE];\
2621
    uint8_t halfV[SIZE*SIZE];\
2622
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2623
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2624
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2625
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2626
}\
2627
\
2628
static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2629
    uint8_t full[SIZE*(SIZE+5)];\
2630
    uint8_t * const full_mid= full + SIZE*2;\
2631
    uint8_t halfH[SIZE*SIZE];\
2632
    uint8_t halfV[SIZE*SIZE];\
2633
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2634
    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
2635
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2636
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2637
}\
2638
\