Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dsputil.c @ 2d2b5a14

History | View | Annotate | Download (111 KB)

1 de6d9b64 Fabrice Bellard
/*
2
 * DSP utils
3 406792e7 Diego Biurrun
 * Copyright (c) 2000, 2001 Fabrice Bellard
4 8f2ab833 Michael Niedermayer
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 de6d9b64 Fabrice Bellard
 *
6 7b94177e Diego Biurrun
 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7
 *
8 b78e7197 Diego Biurrun
 * This file is part of FFmpeg.
9
 *
10
 * FFmpeg is free software; you can redistribute it and/or
11 ff4ec49e Fabrice Bellard
 * modify it under the terms of the GNU Lesser General Public
12
 * License as published by the Free Software Foundation; either
13 b78e7197 Diego Biurrun
 * version 2.1 of the License, or (at your option) any later version.
14 de6d9b64 Fabrice Bellard
 *
15 b78e7197 Diego Biurrun
 * FFmpeg is distributed in the hope that it will be useful,
16 de6d9b64 Fabrice Bellard
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ff4ec49e Fabrice Bellard
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
 * Lesser General Public License for more details.
19 de6d9b64 Fabrice Bellard
 *
20 ff4ec49e Fabrice Bellard
 * You should have received a copy of the GNU Lesser General Public
21 b78e7197 Diego Biurrun
 * License along with FFmpeg; if not, write to the Free Software
22 5509bffa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 de6d9b64 Fabrice Bellard
 */
24 115329f1 Diego Biurrun
25 983e3246 Michael Niedermayer
/**
26 ba87f080 Diego Biurrun
 * @file
27 983e3246 Michael Niedermayer
 * DSP utils
28
 */
29 115329f1 Diego Biurrun
30 737eb597 Reinhard Tartler
#include "libavutil/imgutils.h"
31 de6d9b64 Fabrice Bellard
#include "avcodec.h"
32
#include "dsputil.h"
33 b0368839 Michael Niedermayer
#include "simple_idct.h"
34 65e4c8c9 Michael Niedermayer
#include "faandct.h"
35 6f08c541 Michael Niedermayer
#include "faanidct.h"
36 199436b9 Aurelien Jacobs
#include "mathops.h"
37 af818f7a Diego Biurrun
#include "mpegvideo.h"
38
#include "config.h"
39 3da11804 Måns Rullgård
#include "ac3dec.h"
40
#include "vorbis.h"
41
#include "png.h"
42 5596c60c Michael Niedermayer
43 55fde95e Måns Rullgård
uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
44 1d503957 Måns Rullgård
uint32_t ff_squareTbl[512] = {0, };
45 de6d9b64 Fabrice Bellard
46 8dbe5856 Oskar Arvidsson
#define BIT_DEPTH 9
47
#include "dsputil_internal.h"
48
#undef BIT_DEPTH
49
50
#define BIT_DEPTH 10
51
#include "dsputil_internal.h"
52
#undef BIT_DEPTH
53
54
#define BIT_DEPTH 8
55 8dffcca5 Oskar Arvidsson
#include "dsputil_internal.h"
56
57 917f55cc Loren Merritt
// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
58
#define pb_7f (~0UL/255 * 0x7f)
59
#define pb_80 (~0UL/255 * 0x80)
60 469bd7b1 Loren Merritt
61 0c1a9eda Zdenek Kabelac
const uint8_t ff_zigzag_direct[64] = {
62 2ad1516a Michael Niedermayer
    0,   1,  8, 16,  9,  2,  3, 10,
63
    17, 24, 32, 25, 18, 11,  4,  5,
64 e0eac44e Fabrice Bellard
    12, 19, 26, 33, 40, 48, 41, 34,
65 2ad1516a Michael Niedermayer
    27, 20, 13,  6,  7, 14, 21, 28,
66 e0eac44e Fabrice Bellard
    35, 42, 49, 56, 57, 50, 43, 36,
67
    29, 22, 15, 23, 30, 37, 44, 51,
68
    58, 59, 52, 45, 38, 31, 39, 46,
69
    53, 60, 61, 54, 47, 55, 62, 63
70
};
71
72 10acc479 Roman Shaposhnik
/* Specific zigzag scan for 248 idct. NOTE that unlike the
73
   specification, we interleave the fields */
74
const uint8_t ff_zigzag248_direct[64] = {
75
     0,  8,  1,  9, 16, 24,  2, 10,
76
    17, 25, 32, 40, 48, 56, 33, 41,
77
    18, 26,  3, 11,  4, 12, 19, 27,
78
    34, 42, 49, 57, 50, 58, 35, 43,
79
    20, 28,  5, 13,  6, 14, 21, 29,
80
    36, 44, 51, 59, 52, 60, 37, 45,
81
    22, 30,  7, 15, 23, 31, 38, 46,
82
    53, 61, 54, 62, 39, 47, 55, 63,
83
};
84
85 2f349de2 Michael Niedermayer
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
86 84dc2d8a Måns Rullgård
DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
87 2f349de2 Michael Niedermayer
88 0c1a9eda Zdenek Kabelac
const uint8_t ff_alternate_horizontal_scan[64] = {
89 115329f1 Diego Biurrun
    0,  1,   2,  3,  8,  9, 16, 17,
90 e0eac44e Fabrice Bellard
    10, 11,  4,  5,  6,  7, 15, 14,
91 115329f1 Diego Biurrun
    13, 12, 19, 18, 24, 25, 32, 33,
92 e0eac44e Fabrice Bellard
    26, 27, 20, 21, 22, 23, 28, 29,
93 115329f1 Diego Biurrun
    30, 31, 34, 35, 40, 41, 48, 49,
94 e0eac44e Fabrice Bellard
    42, 43, 36, 37, 38, 39, 44, 45,
95 115329f1 Diego Biurrun
    46, 47, 50, 51, 56, 57, 58, 59,
96 e0eac44e Fabrice Bellard
    52, 53, 54, 55, 60, 61, 62, 63,
97
};
98
99 0c1a9eda Zdenek Kabelac
const uint8_t ff_alternate_vertical_scan[64] = {
100 115329f1 Diego Biurrun
    0,  8,  16, 24,  1,  9,  2, 10,
101 e0eac44e Fabrice Bellard
    17, 25, 32, 40, 48, 56, 57, 49,
102 115329f1 Diego Biurrun
    41, 33, 26, 18,  3, 11,  4, 12,
103 e0eac44e Fabrice Bellard
    19, 27, 34, 42, 50, 58, 35, 43,
104 115329f1 Diego Biurrun
    51, 59, 20, 28,  5, 13,  6, 14,
105 e0eac44e Fabrice Bellard
    21, 29, 36, 44, 52, 60, 37, 45,
106 115329f1 Diego Biurrun
    53, 61, 22, 30,  7, 15, 23, 31,
107 e0eac44e Fabrice Bellard
    38, 46, 54, 62, 39, 47, 55, 63,
108
};
109
110 b0368839 Michael Niedermayer
/* Input permutation for the simple_idct_mmx */
111
static const uint8_t simple_mmx_permutation[64]={
112 bb270c08 Diego Biurrun
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
113
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
114
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
115
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
116
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
117
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
118
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
119
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
120 b0368839 Michael Niedermayer
};
121
122 0e956ba2 Alexander Strange
static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
123
124 4c79b95c Aurelien Jacobs
void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
125
    int i;
126
    int end;
127
128
    st->scantable= src_scantable;
129
130
    for(i=0; i<64; i++){
131
        int j;
132
        j = src_scantable[i];
133
        st->permutated[i] = permutation[j];
134 b250f9c6 Aurelien Jacobs
#if ARCH_PPC
135 4c79b95c Aurelien Jacobs
        st->inverse[j] = i;
136
#endif
137
    }
138
139
    end=-1;
140
    for(i=0; i<64; i++){
141
        int j;
142
        j = st->permutated[i];
143
        if(j>end) end=j;
144
        st->raster_end[i]= end;
145
    }
146
}
147
148 0c1a9eda Zdenek Kabelac
static int pix_sum_c(uint8_t * pix, int line_size)
149 3aa102be Michael Niedermayer
{
150
    int s, i, j;
151
152
    s = 0;
153
    for (i = 0; i < 16; i++) {
154 bb270c08 Diego Biurrun
        for (j = 0; j < 16; j += 8) {
155
            s += pix[0];
156
            s += pix[1];
157
            s += pix[2];
158
            s += pix[3];
159
            s += pix[4];
160
            s += pix[5];
161
            s += pix[6];
162
            s += pix[7];
163
            pix += 8;
164
        }
165
        pix += line_size - 16;
166 3aa102be Michael Niedermayer
    }
167
    return s;
168
}
169
170 0c1a9eda Zdenek Kabelac
static int pix_norm1_c(uint8_t * pix, int line_size)
171 3aa102be Michael Niedermayer
{
172
    int s, i, j;
173 1d503957 Måns Rullgård
    uint32_t *sq = ff_squareTbl + 256;
174 3aa102be Michael Niedermayer
175
    s = 0;
176
    for (i = 0; i < 16; i++) {
177 bb270c08 Diego Biurrun
        for (j = 0; j < 16; j += 8) {
178 2a006cd3 Felix von Leitner
#if 0
179 bb270c08 Diego Biurrun
            s += sq[pix[0]];
180
            s += sq[pix[1]];
181
            s += sq[pix[2]];
182
            s += sq[pix[3]];
183
            s += sq[pix[4]];
184
            s += sq[pix[5]];
185
            s += sq[pix[6]];
186
            s += sq[pix[7]];
187 2a006cd3 Felix von Leitner
#else
188
#if LONG_MAX > 2147483647
189 bb270c08 Diego Biurrun
            register uint64_t x=*(uint64_t*)pix;
190
            s += sq[x&0xff];
191
            s += sq[(x>>8)&0xff];
192
            s += sq[(x>>16)&0xff];
193
            s += sq[(x>>24)&0xff];
194 2a006cd3 Felix von Leitner
            s += sq[(x>>32)&0xff];
195
            s += sq[(x>>40)&0xff];
196
            s += sq[(x>>48)&0xff];
197
            s += sq[(x>>56)&0xff];
198
#else
199 bb270c08 Diego Biurrun
            register uint32_t x=*(uint32_t*)pix;
200
            s += sq[x&0xff];
201
            s += sq[(x>>8)&0xff];
202
            s += sq[(x>>16)&0xff];
203
            s += sq[(x>>24)&0xff];
204 2a006cd3 Felix von Leitner
            x=*(uint32_t*)(pix+4);
205
            s += sq[x&0xff];
206
            s += sq[(x>>8)&0xff];
207
            s += sq[(x>>16)&0xff];
208
            s += sq[(x>>24)&0xff];
209
#endif
210
#endif
211 bb270c08 Diego Biurrun
            pix += 8;
212
        }
213
        pix += line_size - 16;
214 3aa102be Michael Niedermayer
    }
215
    return s;
216
}
217
218 96711ecf Michael Niedermayer
static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
219 3d2e8cce Michael Niedermayer
    int i;
220 115329f1 Diego Biurrun
221 3d2e8cce Michael Niedermayer
    for(i=0; i+8<=w; i+=8){
222 8fc0162a Måns Rullgård
        dst[i+0]= av_bswap32(src[i+0]);
223
        dst[i+1]= av_bswap32(src[i+1]);
224
        dst[i+2]= av_bswap32(src[i+2]);
225
        dst[i+3]= av_bswap32(src[i+3]);
226
        dst[i+4]= av_bswap32(src[i+4]);
227
        dst[i+5]= av_bswap32(src[i+5]);
228
        dst[i+6]= av_bswap32(src[i+6]);
229
        dst[i+7]= av_bswap32(src[i+7]);
230 3d2e8cce Michael Niedermayer
    }
231
    for(;i<w; i++){
232 8fc0162a Måns Rullgård
        dst[i+0]= av_bswap32(src[i+0]);
233 3d2e8cce Michael Niedermayer
    }
234
}
235 3aa102be Michael Niedermayer
236 381d37fd Mans Rullgard
static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
237
{
238
    while (len--)
239
        *dst++ = av_bswap16(*src++);
240
}
241
242 26efc54e Michael Niedermayer
static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
243
{
244
    int s, i;
245 1d503957 Måns Rullgård
    uint32_t *sq = ff_squareTbl + 256;
246 26efc54e Michael Niedermayer
247
    s = 0;
248
    for (i = 0; i < h; i++) {
249
        s += sq[pix1[0] - pix2[0]];
250
        s += sq[pix1[1] - pix2[1]];
251
        s += sq[pix1[2] - pix2[2]];
252
        s += sq[pix1[3] - pix2[3]];
253
        pix1 += line_size;
254
        pix2 += line_size;
255
    }
256
    return s;
257
}
258
259 bb198e19 Michael Niedermayer
static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
260 1457ab52 Michael Niedermayer
{
261
    int s, i;
262 1d503957 Måns Rullgård
    uint32_t *sq = ff_squareTbl + 256;
263 1457ab52 Michael Niedermayer
264
    s = 0;
265 bb198e19 Michael Niedermayer
    for (i = 0; i < h; i++) {
266 1457ab52 Michael Niedermayer
        s += sq[pix1[0] - pix2[0]];
267
        s += sq[pix1[1] - pix2[1]];
268
        s += sq[pix1[2] - pix2[2]];
269
        s += sq[pix1[3] - pix2[3]];
270
        s += sq[pix1[4] - pix2[4]];
271
        s += sq[pix1[5] - pix2[5]];
272
        s += sq[pix1[6] - pix2[6]];
273
        s += sq[pix1[7] - pix2[7]];
274
        pix1 += line_size;
275
        pix2 += line_size;
276
    }
277
    return s;
278
}
279
280 bb198e19 Michael Niedermayer
static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
281 9c76bd48 Brian Foley
{
282 6b026927 Falk Hüffner
    int s, i;
283 1d503957 Måns Rullgård
    uint32_t *sq = ff_squareTbl + 256;
284 9c76bd48 Brian Foley
285
    s = 0;
286 bb198e19 Michael Niedermayer
    for (i = 0; i < h; i++) {
287 6b026927 Falk Hüffner
        s += sq[pix1[ 0] - pix2[ 0]];
288
        s += sq[pix1[ 1] - pix2[ 1]];
289
        s += sq[pix1[ 2] - pix2[ 2]];
290
        s += sq[pix1[ 3] - pix2[ 3]];
291
        s += sq[pix1[ 4] - pix2[ 4]];
292
        s += sq[pix1[ 5] - pix2[ 5]];
293
        s += sq[pix1[ 6] - pix2[ 6]];
294
        s += sq[pix1[ 7] - pix2[ 7]];
295
        s += sq[pix1[ 8] - pix2[ 8]];
296
        s += sq[pix1[ 9] - pix2[ 9]];
297
        s += sq[pix1[10] - pix2[10]];
298
        s += sq[pix1[11] - pix2[11]];
299
        s += sq[pix1[12] - pix2[12]];
300
        s += sq[pix1[13] - pix2[13]];
301
        s += sq[pix1[14] - pix2[14]];
302
        s += sq[pix1[15] - pix2[15]];
303 2a006cd3 Felix von Leitner
304 6b026927 Falk Hüffner
        pix1 += line_size;
305
        pix2 += line_size;
306 9c76bd48 Brian Foley
    }
307
    return s;
308
}
309
310 0c1a9eda Zdenek Kabelac
static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
311 de6d9b64 Fabrice Bellard
{
312
    int i;
313
314
    /* read the pixels */
315
    for(i=0;i<8;i++) {
316 c13e1abd Falk Hüffner
        block[0] = pixels[0];
317
        block[1] = pixels[1];
318
        block[2] = pixels[2];
319
        block[3] = pixels[3];
320
        block[4] = pixels[4];
321
        block[5] = pixels[5];
322
        block[6] = pixels[6];
323
        block[7] = pixels[7];
324
        pixels += line_size;
325
        block += 8;
326 de6d9b64 Fabrice Bellard
    }
327
}
328
329 0c1a9eda Zdenek Kabelac
static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
330 bb270c08 Diego Biurrun
                          const uint8_t *s2, int stride){
331 9dbcbd92 Michael Niedermayer
    int i;
332
333
    /* read the pixels */
334
    for(i=0;i<8;i++) {
335 c13e1abd Falk Hüffner
        block[0] = s1[0] - s2[0];
336
        block[1] = s1[1] - s2[1];
337
        block[2] = s1[2] - s2[2];
338
        block[3] = s1[3] - s2[3];
339
        block[4] = s1[4] - s2[4];
340
        block[5] = s1[5] - s2[5];
341
        block[6] = s1[6] - s2[6];
342
        block[7] = s1[7] - s2[7];
343 9dbcbd92 Michael Niedermayer
        s1 += stride;
344
        s2 += stride;
345 c13e1abd Falk Hüffner
        block += 8;
346 9dbcbd92 Michael Niedermayer
    }
347
}
348
349
350 484a337c Ronald S. Bultje
void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
351
                             int line_size)
352 de6d9b64 Fabrice Bellard
{
353
    int i;
354 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
355 115329f1 Diego Biurrun
356 de6d9b64 Fabrice Bellard
    /* read the pixels */
357
    for(i=0;i<8;i++) {
358 c13e1abd Falk Hüffner
        pixels[0] = cm[block[0]];
359
        pixels[1] = cm[block[1]];
360
        pixels[2] = cm[block[2]];
361
        pixels[3] = cm[block[3]];
362
        pixels[4] = cm[block[4]];
363
        pixels[5] = cm[block[5]];
364
        pixels[6] = cm[block[6]];
365
        pixels[7] = cm[block[7]];
366
367
        pixels += line_size;
368
        block += 8;
369 de6d9b64 Fabrice Bellard
    }
370
}
371
372 178fcca8 Michael Niedermayer
static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
373 bb270c08 Diego Biurrun
                                 int line_size)
374 178fcca8 Michael Niedermayer
{
375
    int i;
376 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
377 115329f1 Diego Biurrun
378 178fcca8 Michael Niedermayer
    /* read the pixels */
379
    for(i=0;i<4;i++) {
380
        pixels[0] = cm[block[0]];
381
        pixels[1] = cm[block[1]];
382
        pixels[2] = cm[block[2]];
383
        pixels[3] = cm[block[3]];
384
385
        pixels += line_size;
386
        block += 8;
387
    }
388
}
389
390 9ca358b9 Michael Niedermayer
static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
391 bb270c08 Diego Biurrun
                                 int line_size)
392 9ca358b9 Michael Niedermayer
{
393
    int i;
394 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
395 115329f1 Diego Biurrun
396 9ca358b9 Michael Niedermayer
    /* read the pixels */
397
    for(i=0;i<2;i++) {
398
        pixels[0] = cm[block[0]];
399
        pixels[1] = cm[block[1]];
400
401
        pixels += line_size;
402
        block += 8;
403
    }
404
}
405
406 484a337c Ronald S. Bultje
void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
407
                                    uint8_t *restrict pixels,
408
                                    int line_size)
409 f9ed9d85 Mike Melanson
{
410
    int i, j;
411
412
    for (i = 0; i < 8; i++) {
413
        for (j = 0; j < 8; j++) {
414
            if (*block < -128)
415
                *pixels = 0;
416
            else if (*block > 127)
417
                *pixels = 255;
418
            else
419
                *pixels = (uint8_t)(*block + 128);
420
            block++;
421
            pixels++;
422
        }
423
        pixels += (line_size - 8);
424
    }
425
}
426
427 342c7dfd Kostya Shishkov
static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels,
428
                                    int line_size)
429
{
430
    int i;
431
432
    /* read the pixels */
433
    for(i=0;i<8;i++) {
434
        pixels[0] = block[0];
435
        pixels[1] = block[1];
436
        pixels[2] = block[2];
437
        pixels[3] = block[3];
438
        pixels[4] = block[4];
439
        pixels[5] = block[5];
440
        pixels[6] = block[6];
441
        pixels[7] = block[7];
442
443
        pixels += line_size;
444
        block += 8;
445
    }
446
}
447
448 484a337c Ronald S. Bultje
void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
449
                             int line_size)
450 de6d9b64 Fabrice Bellard
{
451
    int i;
452 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
453 115329f1 Diego Biurrun
454 de6d9b64 Fabrice Bellard
    /* read the pixels */
455
    for(i=0;i<8;i++) {
456 c13e1abd Falk Hüffner
        pixels[0] = cm[pixels[0] + block[0]];
457
        pixels[1] = cm[pixels[1] + block[1]];
458
        pixels[2] = cm[pixels[2] + block[2]];
459
        pixels[3] = cm[pixels[3] + block[3]];
460
        pixels[4] = cm[pixels[4] + block[4]];
461
        pixels[5] = cm[pixels[5] + block[5]];
462
        pixels[6] = cm[pixels[6] + block[6]];
463
        pixels[7] = cm[pixels[7] + block[7]];
464
        pixels += line_size;
465
        block += 8;
466 de6d9b64 Fabrice Bellard
    }
467
}
468 178fcca8 Michael Niedermayer
469
static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
470
                          int line_size)
471
{
472
    int i;
473 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
474 115329f1 Diego Biurrun
475 178fcca8 Michael Niedermayer
    /* read the pixels */
476
    for(i=0;i<4;i++) {
477
        pixels[0] = cm[pixels[0] + block[0]];
478
        pixels[1] = cm[pixels[1] + block[1]];
479
        pixels[2] = cm[pixels[2] + block[2]];
480
        pixels[3] = cm[pixels[3] + block[3]];
481
        pixels += line_size;
482
        block += 8;
483
    }
484
}
485 9ca358b9 Michael Niedermayer
486
static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
487
                          int line_size)
488
{
489
    int i;
490 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
491 115329f1 Diego Biurrun
492 9ca358b9 Michael Niedermayer
    /* read the pixels */
493
    for(i=0;i<2;i++) {
494
        pixels[0] = cm[pixels[0] + block[0]];
495
        pixels[1] = cm[pixels[1] + block[1]];
496
        pixels += line_size;
497
        block += 8;
498
    }
499
}
500 36940eca Loren Merritt
501 1edbfe19 Loren Merritt
static int sum_abs_dctelem_c(DCTELEM *block)
502
{
503
    int sum=0, i;
504
    for(i=0; i<64; i++)
505
        sum+= FFABS(block[i]);
506
    return sum;
507
}
508
509 342c7dfd Kostya Shishkov
static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
510
{
511
    int i;
512
513
    for (i = 0; i < h; i++) {
514
        memset(block, value, 16);
515
        block += line_size;
516
    }
517
}
518
519
static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
520
{
521
    int i;
522
523
    for (i = 0; i < h; i++) {
524
        memset(block, value, 8);
525
        block += line_size;
526
    }
527
}
528
529
static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize)
530
{
531
    int i, j;
532 2023cfea Måns Rullgård
    uint16_t *dst1 = (uint16_t *) dst;
533
    uint16_t *dst2 = (uint16_t *)(dst + linesize);
534 342c7dfd Kostya Shishkov
535
    for (j = 0; j < 8; j++) {
536
        for (i = 0; i < 8; i++) {
537
            dst1[i] = dst2[i] = src[i] * 0x0101;
538
        }
539
        src  += 8;
540
        dst1 += linesize;
541
        dst2 += linesize;
542
    }
543
}
544
545 de6d9b64 Fabrice Bellard
#define avg2(a,b) ((a+b+1)>>1)
546
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
547
548 0c1a9eda Zdenek Kabelac
static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
549 44eb4951 Michael Niedermayer
{
550
    const int A=(16-x16)*(16-y16);
551
    const int B=(   x16)*(16-y16);
552
    const int C=(16-x16)*(   y16);
553
    const int D=(   x16)*(   y16);
554
    int i;
555
556
    for(i=0; i<h; i++)
557
    {
558 b3184779 Michael Niedermayer
        dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
559
        dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
560
        dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
561
        dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
562
        dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
563
        dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
564
        dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
565
        dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
566
        dst+= stride;
567
        src+= stride;
568 44eb4951 Michael Niedermayer
    }
569
}
570
571 703c8195 Loren Merritt
void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
572 073b013d Michael Niedermayer
                  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
573
{
574
    int y, vx, vy;
575
    const int s= 1<<shift;
576 115329f1 Diego Biurrun
577 073b013d Michael Niedermayer
    width--;
578
    height--;
579
580
    for(y=0; y<h; y++){
581
        int x;
582
583
        vx= ox;
584
        vy= oy;
585
        for(x=0; x<8; x++){ //XXX FIXME optimize
586
            int src_x, src_y, frac_x, frac_y, index;
587
588
            src_x= vx>>16;
589
            src_y= vy>>16;
590
            frac_x= src_x&(s-1);
591
            frac_y= src_y&(s-1);
592
            src_x>>=shift;
593
            src_y>>=shift;
594 115329f1 Diego Biurrun
595 073b013d Michael Niedermayer
            if((unsigned)src_x < width){
596
                if((unsigned)src_y < height){
597
                    index= src_x + src_y*stride;
598
                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
599
                                           + src[index       +1]*   frac_x )*(s-frac_y)
600
                                        + (  src[index+stride  ]*(s-frac_x)
601
                                           + src[index+stride+1]*   frac_x )*   frac_y
602
                                        + r)>>(shift*2);
603
                }else{
604 f66e4f5f Reimar Döffinger
                    index= src_x + av_clip(src_y, 0, height)*stride;
605 115329f1 Diego Biurrun
                    dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
606 073b013d Michael Niedermayer
                                          + src[index       +1]*   frac_x )*s
607
                                        + r)>>(shift*2);
608
                }
609
            }else{
610
                if((unsigned)src_y < height){
611 f66e4f5f Reimar Döffinger
                    index= av_clip(src_x, 0, width) + src_y*stride;
612 115329f1 Diego Biurrun
                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
613 073b013d Michael Niedermayer
                                           + src[index+stride  ]*   frac_y )*s
614
                                        + r)>>(shift*2);
615
                }else{
616 f66e4f5f Reimar Döffinger
                    index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
617 073b013d Michael Niedermayer
                    dst[y*stride + x]=    src[index         ];
618
                }
619
            }
620 115329f1 Diego Biurrun
621 073b013d Michael Niedermayer
            vx+= dxx;
622
            vy+= dyx;
623
        }
624
        ox += dxy;
625
        oy += dyy;
626
    }
627
}
628 669ac79c Michael Niedermayer
629
static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
630
    switch(width){
631 8dbe5856 Oskar Arvidsson
    case 2: put_pixels2_8_c (dst, src, stride, height); break;
632
    case 4: put_pixels4_8_c (dst, src, stride, height); break;
633
    case 8: put_pixels8_8_c (dst, src, stride, height); break;
634
    case 16:put_pixels16_8_c(dst, src, stride, height); break;
635 669ac79c Michael Niedermayer
    }
636
}
637
638
static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
639
    int i,j;
640
    for (i=0; i < height; i++) {
641
      for (j=0; j < width; j++) {
642 bb270c08 Diego Biurrun
        dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
643 669ac79c Michael Niedermayer
      }
644
      src += stride;
645
      dst += stride;
646
    }
647
}
648
649
static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
650
    int i,j;
651
    for (i=0; i < height; i++) {
652
      for (j=0; j < width; j++) {
653 bb270c08 Diego Biurrun
        dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
654 669ac79c Michael Niedermayer
      }
655
      src += stride;
656
      dst += stride;
657
    }
658
}
659 115329f1 Diego Biurrun
660 669ac79c Michael Niedermayer
static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
661
    int i,j;
662
    for (i=0; i < height; i++) {
663
      for (j=0; j < width; j++) {
664 bb270c08 Diego Biurrun
        dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
665 669ac79c Michael Niedermayer
      }
666
      src += stride;
667
      dst += stride;
668
    }
669
}
670 115329f1 Diego Biurrun
671 669ac79c Michael Niedermayer
static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
672
    int i,j;
673
    for (i=0; i < height; i++) {
674
      for (j=0; j < width; j++) {
675 bb270c08 Diego Biurrun
        dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
676 669ac79c Michael Niedermayer
      }
677
      src += stride;
678
      dst += stride;
679
    }
680
}
681
682
static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
683
    int i,j;
684
    for (i=0; i < height; i++) {
685
      for (j=0; j < width; j++) {
686 bb270c08 Diego Biurrun
        dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
687 669ac79c Michael Niedermayer
      }
688
      src += stride;
689
      dst += stride;
690
    }
691
}
692
693
static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
694
    int i,j;
695
    for (i=0; i < height; i++) {
696
      for (j=0; j < width; j++) {
697 bb270c08 Diego Biurrun
        dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
698 669ac79c Michael Niedermayer
      }
699
      src += stride;
700
      dst += stride;
701
    }
702
}
703
704
static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
705
    int i,j;
706
    for (i=0; i < height; i++) {
707
      for (j=0; j < width; j++) {
708 bb270c08 Diego Biurrun
        dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
709 669ac79c Michael Niedermayer
      }
710
      src += stride;
711
      dst += stride;
712
    }
713
}
714
715
static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
716
    int i,j;
717
    for (i=0; i < height; i++) {
718
      for (j=0; j < width; j++) {
719 bb270c08 Diego Biurrun
        dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
720 669ac79c Michael Niedermayer
      }
721
      src += stride;
722
      dst += stride;
723
    }
724
}
725 da3b9756 Mike Melanson
726
static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
727
    switch(width){
728 8dbe5856 Oskar Arvidsson
    case 2: avg_pixels2_8_c (dst, src, stride, height); break;
729
    case 4: avg_pixels4_8_c (dst, src, stride, height); break;
730
    case 8: avg_pixels8_8_c (dst, src, stride, height); break;
731
    case 16:avg_pixels16_8_c(dst, src, stride, height); break;
732 da3b9756 Mike Melanson
    }
733
}
734
735
static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
736
    int i,j;
737
    for (i=0; i < height; i++) {
738
      for (j=0; j < width; j++) {
739 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
740 da3b9756 Mike Melanson
      }
741
      src += stride;
742
      dst += stride;
743
    }
744
}
745
746
static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
747
    int i,j;
748
    for (i=0; i < height; i++) {
749
      for (j=0; j < width; j++) {
750 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
751 da3b9756 Mike Melanson
      }
752
      src += stride;
753
      dst += stride;
754
    }
755
}
756 115329f1 Diego Biurrun
757 da3b9756 Mike Melanson
static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
758
    int i,j;
759
    for (i=0; i < height; i++) {
760
      for (j=0; j < width; j++) {
761 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
762 da3b9756 Mike Melanson
      }
763
      src += stride;
764
      dst += stride;
765
    }
766
}
767 115329f1 Diego Biurrun
768 da3b9756 Mike Melanson
static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
769
    int i,j;
770
    for (i=0; i < height; i++) {
771
      for (j=0; j < width; j++) {
772 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
773 da3b9756 Mike Melanson
      }
774
      src += stride;
775
      dst += stride;
776
    }
777
}
778
779
static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
780
    int i,j;
781
    for (i=0; i < height; i++) {
782
      for (j=0; j < width; j++) {
783 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
784 da3b9756 Mike Melanson
      }
785
      src += stride;
786
      dst += stride;
787
    }
788
}
789
790
static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
791
    int i,j;
792
    for (i=0; i < height; i++) {
793
      for (j=0; j < width; j++) {
794 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
795 da3b9756 Mike Melanson
      }
796
      src += stride;
797
      dst += stride;
798
    }
799
}
800
801
static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
802
    int i,j;
803
    for (i=0; i < height; i++) {
804
      for (j=0; j < width; j++) {
805 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
806 da3b9756 Mike Melanson
      }
807
      src += stride;
808
      dst += stride;
809
    }
810
}
811
812
static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
813
    int i,j;
814
    for (i=0; i < height; i++) {
815
      for (j=0; j < width; j++) {
816 bb270c08 Diego Biurrun
        dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
817 da3b9756 Mike Melanson
      }
818
      src += stride;
819
      dst += stride;
820
    }
821
}
822 669ac79c Michael Niedermayer
#if 0
823
#define TPEL_WIDTH(width)\
824
static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
825
    void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
826
static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
827
    void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
828
static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
829
    void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
830
static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
831
    void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
832
static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
833
    void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
834
static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
835
    void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
836
static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
837
    void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
838
static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
839
    void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
840
static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
841
    void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
842
#endif
843
844 b3184779 Michael Niedermayer
#define QPEL_MC(r, OPNAME, RND, OP) \
845 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
846 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
847 b3184779 Michael Niedermayer
    int i;\
848
    for(i=0; i<h; i++)\
849
    {\
850
        OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
851
        OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
852
        OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
853
        OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
854
        OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
855
        OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
856
        OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
857
        OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
858
        dst+=dstStride;\
859
        src+=srcStride;\
860
    }\
861 44eb4951 Michael Niedermayer
}\
862
\
863 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
864 db794953 Michael Niedermayer
    const int w=8;\
865 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
866 b3184779 Michael Niedermayer
    int i;\
867
    for(i=0; i<w; i++)\
868
    {\
869
        const int src0= src[0*srcStride];\
870
        const int src1= src[1*srcStride];\
871
        const int src2= src[2*srcStride];\
872
        const int src3= src[3*srcStride];\
873
        const int src4= src[4*srcStride];\
874
        const int src5= src[5*srcStride];\
875
        const int src6= src[6*srcStride];\
876
        const int src7= src[7*srcStride];\
877
        const int src8= src[8*srcStride];\
878
        OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
879
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
880
        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
881
        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
882
        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
883
        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
884
        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
885
        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
886
        dst++;\
887
        src++;\
888
    }\
889
}\
890
\
891 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
892 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
893 b3184779 Michael Niedermayer
    int i;\
894 826f429a Michael Niedermayer
    \
895 b3184779 Michael Niedermayer
    for(i=0; i<h; i++)\
896
    {\
897
        OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
898
        OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
899
        OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
900
        OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
901
        OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
902
        OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
903
        OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
904
        OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
905
        OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
906
        OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
907
        OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
908
        OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
909
        OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
910
        OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
911
        OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
912
        OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
913
        dst+=dstStride;\
914
        src+=srcStride;\
915
    }\
916
}\
917
\
918 0c1a9eda Zdenek Kabelac
static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
919 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
920 b3184779 Michael Niedermayer
    int i;\
921 826f429a Michael Niedermayer
    const int w=16;\
922 b3184779 Michael Niedermayer
    for(i=0; i<w; i++)\
923
    {\
924
        const int src0= src[0*srcStride];\
925
        const int src1= src[1*srcStride];\
926
        const int src2= src[2*srcStride];\
927
        const int src3= src[3*srcStride];\
928
        const int src4= src[4*srcStride];\
929
        const int src5= src[5*srcStride];\
930
        const int src6= src[6*srcStride];\
931
        const int src7= src[7*srcStride];\
932
        const int src8= src[8*srcStride];\
933
        const int src9= src[9*srcStride];\
934
        const int src10= src[10*srcStride];\
935
        const int src11= src[11*srcStride];\
936
        const int src12= src[12*srcStride];\
937
        const int src13= src[13*srcStride];\
938
        const int src14= src[14*srcStride];\
939
        const int src15= src[15*srcStride];\
940
        const int src16= src[16*srcStride];\
941
        OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
942
        OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
943
        OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
944
        OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
945
        OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
946
        OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
947
        OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
948
        OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
949
        OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
950
        OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
951
        OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
952
        OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
953
        OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
954
        OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
955
        OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
956
        OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
957
        dst++;\
958
        src++;\
959
    }\
960
}\
961
\
962 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
963
    uint8_t half[64];\
964 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
965 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
966 44eb4951 Michael Niedermayer
}\
967
\
968 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
969 b3184779 Michael Niedermayer
    OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
970 44eb4951 Michael Niedermayer
}\
971
\
972 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
973
    uint8_t half[64];\
974 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
975 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
976 44eb4951 Michael Niedermayer
}\
977
\
978 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
979
    uint8_t full[16*9];\
980
    uint8_t half[64];\
981 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
982 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
983 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
984 44eb4951 Michael Niedermayer
}\
985
\
986 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
987
    uint8_t full[16*9];\
988 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
989 db794953 Michael Niedermayer
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
990 44eb4951 Michael Niedermayer
}\
991
\
992 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
993
    uint8_t full[16*9];\
994
    uint8_t half[64];\
995 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
996 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
997 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
998 44eb4951 Michael Niedermayer
}\
999 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1000
    uint8_t full[16*9];\
1001
    uint8_t halfH[72];\
1002
    uint8_t halfV[64];\
1003
    uint8_t halfHV[64];\
1004 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1005
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1006 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1007
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1008 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1009 44eb4951 Michael Niedermayer
}\
1010 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1011
    uint8_t full[16*9];\
1012
    uint8_t halfH[72];\
1013
    uint8_t halfHV[64];\
1014 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1015
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1016 8dbe5856 Oskar Arvidsson
    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1017 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1018 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1019 db794953 Michael Niedermayer
}\
1020 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1021
    uint8_t full[16*9];\
1022
    uint8_t halfH[72];\
1023
    uint8_t halfV[64];\
1024
    uint8_t halfHV[64];\
1025 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1026
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1027 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1028
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1029 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1030 44eb4951 Michael Niedermayer
}\
1031 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1032
    uint8_t full[16*9];\
1033
    uint8_t halfH[72];\
1034
    uint8_t halfHV[64];\
1035 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1036
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1037 8dbe5856 Oskar Arvidsson
    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1038 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1039 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1040 db794953 Michael Niedermayer
}\
1041 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1042
    uint8_t full[16*9];\
1043
    uint8_t halfH[72];\
1044
    uint8_t halfV[64];\
1045
    uint8_t halfHV[64];\
1046 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1047
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1048 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1049
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1050 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1051 44eb4951 Michael Niedermayer
}\
1052 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1053
    uint8_t full[16*9];\
1054
    uint8_t halfH[72];\
1055
    uint8_t halfHV[64];\
1056 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1057
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1058 8dbe5856 Oskar Arvidsson
    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1059 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1060 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1061 db794953 Michael Niedermayer
}\
1062 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1063
    uint8_t full[16*9];\
1064
    uint8_t halfH[72];\
1065
    uint8_t halfV[64];\
1066
    uint8_t halfHV[64];\
1067 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1068
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
1069 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1070
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1071 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1072 44eb4951 Michael Niedermayer
}\
1073 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1074
    uint8_t full[16*9];\
1075
    uint8_t halfH[72];\
1076
    uint8_t halfHV[64];\
1077 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1078
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1079 8dbe5856 Oskar Arvidsson
    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1080 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1081 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1082 db794953 Michael Niedermayer
}\
1083 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1084
    uint8_t halfH[72];\
1085
    uint8_t halfHV[64];\
1086 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1087 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1088 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1089 44eb4951 Michael Niedermayer
}\
1090 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1091
    uint8_t halfH[72];\
1092
    uint8_t halfHV[64];\
1093 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1094 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1095 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1096 44eb4951 Michael Niedermayer
}\
1097 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1098
    uint8_t full[16*9];\
1099
    uint8_t halfH[72];\
1100
    uint8_t halfV[64];\
1101
    uint8_t halfHV[64];\
1102 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1103
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1104 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1105
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1106 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1107 44eb4951 Michael Niedermayer
}\
1108 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1109
    uint8_t full[16*9];\
1110
    uint8_t halfH[72];\
1111 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1112
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1113 8dbe5856 Oskar Arvidsson
    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1114 db794953 Michael Niedermayer
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1115
}\
1116 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1117
    uint8_t full[16*9];\
1118
    uint8_t halfH[72];\
1119
    uint8_t halfV[64];\
1120
    uint8_t halfHV[64];\
1121 b3184779 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1122
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1123 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1124
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1125 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1126 44eb4951 Michael Niedermayer
}\
1127 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1128
    uint8_t full[16*9];\
1129
    uint8_t halfH[72];\
1130 db794953 Michael Niedermayer
    copy_block9(full, src, 16, stride, 9);\
1131
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1132 8dbe5856 Oskar Arvidsson
    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1133 db794953 Michael Niedermayer
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1134
}\
1135 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1136
    uint8_t halfH[72];\
1137 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1138 db794953 Michael Niedermayer
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1139 b3184779 Michael Niedermayer
}\
1140
\
1141 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1142
    uint8_t half[256];\
1143 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1144 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1145 b3184779 Michael Niedermayer
}\
1146
\
1147 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1148 b3184779 Michael Niedermayer
    OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1149 44eb4951 Michael Niedermayer
}\
1150 b3184779 Michael Niedermayer
\
1151 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1152
    uint8_t half[256];\
1153 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1154 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1155 b3184779 Michael Niedermayer
}\
1156
\
1157 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1158
    uint8_t full[24*17];\
1159
    uint8_t half[256];\
1160 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1161 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1162 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1163 b3184779 Michael Niedermayer
}\
1164
\
1165 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1166
    uint8_t full[24*17];\
1167 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1168 826f429a Michael Niedermayer
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1169 b3184779 Michael Niedermayer
}\
1170
\
1171 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1172
    uint8_t full[24*17];\
1173
    uint8_t half[256];\
1174 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1175 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1176 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1177 b3184779 Michael Niedermayer
}\
1178 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1179
    uint8_t full[24*17];\
1180
    uint8_t halfH[272];\
1181
    uint8_t halfV[256];\
1182
    uint8_t halfHV[256];\
1183 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1184
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1185 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1186
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1187 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1188 b3184779 Michael Niedermayer
}\
1189 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1190
    uint8_t full[24*17];\
1191
    uint8_t halfH[272];\
1192
    uint8_t halfHV[256];\
1193 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1194
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1195 8dbe5856 Oskar Arvidsson
    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1196 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1197 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1198 db794953 Michael Niedermayer
}\
1199 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1200
    uint8_t full[24*17];\
1201
    uint8_t halfH[272];\
1202
    uint8_t halfV[256];\
1203
    uint8_t halfHV[256];\
1204 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1205
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1206 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1207
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1208 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1209 b3184779 Michael Niedermayer
}\
1210 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1211
    uint8_t full[24*17];\
1212
    uint8_t halfH[272];\
1213
    uint8_t halfHV[256];\
1214 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1215
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1216 8dbe5856 Oskar Arvidsson
    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1217 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1218 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1219 db794953 Michael Niedermayer
}\
1220 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1221
    uint8_t full[24*17];\
1222
    uint8_t halfH[272];\
1223
    uint8_t halfV[256];\
1224
    uint8_t halfHV[256];\
1225 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1226
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1227 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1228
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1229 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1230 b3184779 Michael Niedermayer
}\
1231 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1232
    uint8_t full[24*17];\
1233
    uint8_t halfH[272];\
1234
    uint8_t halfHV[256];\
1235 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1236
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1237 8dbe5856 Oskar Arvidsson
    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1238 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1239 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1240 db794953 Michael Niedermayer
}\
1241 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1242
    uint8_t full[24*17];\
1243
    uint8_t halfH[272];\
1244
    uint8_t halfV[256];\
1245
    uint8_t halfHV[256];\
1246 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1247
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
1248 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1249
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1250 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1251 b3184779 Michael Niedermayer
}\
1252 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1253
    uint8_t full[24*17];\
1254
    uint8_t halfH[272];\
1255
    uint8_t halfHV[256];\
1256 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1257
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1258 8dbe5856 Oskar Arvidsson
    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1259 db794953 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1260 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1261 db794953 Michael Niedermayer
}\
1262 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1263
    uint8_t halfH[272];\
1264
    uint8_t halfHV[256];\
1265 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1266 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1267 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1268 b3184779 Michael Niedermayer
}\
1269 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1270
    uint8_t halfH[272];\
1271
    uint8_t halfHV[256];\
1272 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1273 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1274 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1275 b3184779 Michael Niedermayer
}\
1276 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1277
    uint8_t full[24*17];\
1278
    uint8_t halfH[272];\
1279
    uint8_t halfV[256];\
1280
    uint8_t halfHV[256];\
1281 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1282
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1283 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1284
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1285 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1286 b3184779 Michael Niedermayer
}\
1287 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1288
    uint8_t full[24*17];\
1289
    uint8_t halfH[272];\
1290 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1291
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1292 8dbe5856 Oskar Arvidsson
    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1293 db794953 Michael Niedermayer
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1294
}\
1295 0c1a9eda Zdenek Kabelac
void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1296
    uint8_t full[24*17];\
1297
    uint8_t halfH[272];\
1298
    uint8_t halfV[256];\
1299
    uint8_t halfHV[256];\
1300 b3184779 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1301
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1302 826f429a Michael Niedermayer
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1303
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1304 8dbe5856 Oskar Arvidsson
    OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1305 b3184779 Michael Niedermayer
}\
1306 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1307
    uint8_t full[24*17];\
1308
    uint8_t halfH[272];\
1309 db794953 Michael Niedermayer
    copy_block17(full, src, 24, stride, 17);\
1310
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1311 8dbe5856 Oskar Arvidsson
    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1312 db794953 Michael Niedermayer
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1313
}\
1314 0c1a9eda Zdenek Kabelac
static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1315
    uint8_t halfH[272];\
1316 b3184779 Michael Niedermayer
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1317 826f429a Michael Niedermayer
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1318 45553457 Zdenek Kabelac
}
1319 44eb4951 Michael Niedermayer
1320 b3184779 Michael Niedermayer
#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1321
#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1322
#define op_put(a, b) a = cm[((b) + 16)>>5]
1323
#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1324
1325
QPEL_MC(0, put_       , _       , op_put)
1326
QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1327
QPEL_MC(0, avg_       , _       , op_avg)
1328
//QPEL_MC(1, avg_no_rnd , _       , op_avg)
1329
#undef op_avg
1330
#undef op_avg_no_rnd
1331
#undef op_put
1332
#undef op_put_no_rnd
1333 44eb4951 Michael Niedermayer
1334 3d1b1caa Måns Rullgård
#define put_qpel8_mc00_c  ff_put_pixels8x8_c
1335
#define avg_qpel8_mc00_c  ff_avg_pixels8x8_c
1336
#define put_qpel16_mc00_c ff_put_pixels16x16_c
1337
#define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1338
#define put_no_rnd_qpel8_mc00_c  ff_put_pixels8x8_c
1339 8dbe5856 Oskar Arvidsson
#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
1340 3d1b1caa Måns Rullgård
1341 1457ab52 Michael Niedermayer
static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1342 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1343 1457ab52 Michael Niedermayer
    int i;
1344
1345
    for(i=0; i<h; i++){
1346
        dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1347
        dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1348
        dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1349
        dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1350
        dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1351
        dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1352
        dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1353
        dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1354
        dst+=dstStride;
1355 115329f1 Diego Biurrun
        src+=srcStride;
1356 1457ab52 Michael Niedermayer
    }
1357
}
1358
1359 b250f9c6 Aurelien Jacobs
#if CONFIG_RV40_DECODER
1360 2d8a0815 Kostya Shishkov
static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1361 8dbe5856 Oskar Arvidsson
    put_pixels16_xy2_8_c(dst, src, stride, 16);
1362 2d8a0815 Kostya Shishkov
}
1363
static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1364 8dbe5856 Oskar Arvidsson
    avg_pixels16_xy2_8_c(dst, src, stride, 16);
1365 2d8a0815 Kostya Shishkov
}
1366
static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1367 8dbe5856 Oskar Arvidsson
    put_pixels8_xy2_8_c(dst, src, stride, 8);
1368 2d8a0815 Kostya Shishkov
}
1369
static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
1370 8dbe5856 Oskar Arvidsson
    avg_pixels8_xy2_8_c(dst, src, stride, 8);
1371 2d8a0815 Kostya Shishkov
}
1372
#endif /* CONFIG_RV40_DECODER */
1373
1374 1457ab52 Michael Niedermayer
static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1375 55fde95e Måns Rullgård
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1376 1457ab52 Michael Niedermayer
    int i;
1377
1378
    for(i=0; i<w; i++){
1379
        const int src_1= src[ -srcStride];
1380
        const int src0 = src[0          ];
1381
        const int src1 = src[  srcStride];
1382
        const int src2 = src[2*srcStride];
1383
        const int src3 = src[3*srcStride];
1384
        const int src4 = src[4*srcStride];
1385
        const int src5 = src[5*srcStride];
1386
        const int src6 = src[6*srcStride];
1387
        const int src7 = src[7*srcStride];
1388
        const int src8 = src[8*srcStride];
1389
        const int src9 = src[9*srcStride];
1390
        dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1391
        dst[1*dstStride]= cm[(9*(src1 + src2) - (src0  + src3) + 8)>>4];
1392
        dst[2*dstStride]= cm[(9*(src2 + src3) - (src1  + src4) + 8)>>4];
1393
        dst[3*dstStride]= cm[(9*(src3 + src4) - (src2  + src5) + 8)>>4];
1394
        dst[4*dstStride]= cm[(9*(src4 + src5) - (src3  + src6) + 8)>>4];
1395
        dst[5*dstStride]= cm[(9*(src5 + src6) - (src4  + src7) + 8)>>4];
1396
        dst[6*dstStride]= cm[(9*(src6 + src7) - (src5  + src8) + 8)>>4];
1397
        dst[7*dstStride]= cm[(9*(src7 + src8) - (src6  + src9) + 8)>>4];
1398
        src++;
1399
        dst++;
1400
    }
1401
}
1402
1403
static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
1404
    uint8_t half[64];
1405
    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1406 8dbe5856 Oskar Arvidsson
    put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1407 1457ab52 Michael Niedermayer
}
1408
1409
static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
1410
    wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1411
}
1412
1413
static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
1414
    uint8_t half[64];
1415
    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1416 8dbe5856 Oskar Arvidsson
    put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1417 1457ab52 Michael Niedermayer
}
1418
1419
static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
1420
    wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1421
}
1422
1423
static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
1424
    uint8_t halfH[88];
1425
    uint8_t halfV[64];
1426
    uint8_t halfHV[64];
1427
    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1428
    wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1429
    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1430 8dbe5856 Oskar Arvidsson
    put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1431 1457ab52 Michael Niedermayer
}
1432
static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
1433
    uint8_t halfH[88];
1434
    uint8_t halfV[64];
1435
    uint8_t halfHV[64];
1436
    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1437
    wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1438
    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1439 8dbe5856 Oskar Arvidsson
    put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1440 1457ab52 Michael Niedermayer
}
1441
static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
1442
    uint8_t halfH[88];
1443
    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1444
    wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1445
}
1446
1447 332f9ac4 Michael Niedermayer
static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
1448 4052cbf1 Diego Biurrun
    if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1449 332f9ac4 Michael Niedermayer
    int x;
1450
    const int strength= ff_h263_loop_filter_strength[qscale];
1451 115329f1 Diego Biurrun
1452 332f9ac4 Michael Niedermayer
    for(x=0; x<8; x++){
1453
        int d1, d2, ad1;
1454
        int p0= src[x-2*stride];
1455
        int p1= src[x-1*stride];
1456
        int p2= src[x+0*stride];
1457
        int p3= src[x+1*stride];
1458
        int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1459
1460
        if     (d<-2*strength) d1= 0;
1461
        else if(d<-  strength) d1=-2*strength - d;
1462
        else if(d<   strength) d1= d;
1463
        else if(d< 2*strength) d1= 2*strength - d;
1464
        else                   d1= 0;
1465 115329f1 Diego Biurrun
1466 332f9ac4 Michael Niedermayer
        p1 += d1;
1467
        p2 -= d1;
1468
        if(p1&256) p1= ~(p1>>31);
1469
        if(p2&256) p2= ~(p2>>31);
1470 115329f1 Diego Biurrun
1471 332f9ac4 Michael Niedermayer
        src[x-1*stride] = p1;
1472
        src[x+0*stride] = p2;
1473
1474 c26abfa5 Diego Biurrun
        ad1= FFABS(d1)>>1;
1475 115329f1 Diego Biurrun
1476 f66e4f5f Reimar Döffinger
        d2= av_clip((p0-p3)/4, -ad1, ad1);
1477 115329f1 Diego Biurrun
1478 332f9ac4 Michael Niedermayer
        src[x-2*stride] = p0 - d2;
1479
        src[x+  stride] = p3 + d2;
1480
    }
1481 73f51a4d Aurelien Jacobs
    }
1482 332f9ac4 Michael Niedermayer
}
1483
1484
static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
1485 4052cbf1 Diego Biurrun
    if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1486 332f9ac4 Michael Niedermayer
    int y;
1487
    const int strength= ff_h263_loop_filter_strength[qscale];
1488 115329f1 Diego Biurrun
1489 332f9ac4 Michael Niedermayer
    for(y=0; y<8; y++){
1490
        int d1, d2, ad1;
1491
        int p0= src[y*stride-2];
1492
        int p1= src[y*stride-1];
1493
        int p2= src[y*stride+0];
1494
        int p3= src[y*stride+1];
1495
        int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1496
1497
        if     (d<-2*strength) d1= 0;
1498
        else if(d<-  strength) d1=-2*strength - d;
1499
        else if(d<   strength) d1= d;
1500
        else if(d< 2*strength) d1= 2*strength - d;
1501
        else                   d1= 0;
1502 115329f1 Diego Biurrun
1503 332f9ac4 Michael Niedermayer
        p1 += d1;
1504
        p2 -= d1;
1505
        if(p1&256) p1= ~(p1>>31);
1506
        if(p2&256) p2= ~(p2>>31);
1507 115329f1 Diego Biurrun
1508 332f9ac4 Michael Niedermayer
        src[y*stride-1] = p1;
1509
        src[y*stride+0] = p2;
1510
1511 c26abfa5 Diego Biurrun
        ad1= FFABS(d1)>>1;
1512 115329f1 Diego Biurrun
1513 f66e4f5f Reimar Döffinger
        d2= av_clip((p0-p3)/4, -ad1, ad1);
1514 115329f1 Diego Biurrun
1515 332f9ac4 Michael Niedermayer
        src[y*stride-2] = p0 - d2;
1516
        src[y*stride+1] = p3 + d2;
1517
    }
1518 73f51a4d Aurelien Jacobs
    }
1519 332f9ac4 Michael Niedermayer
}
1520 1457ab52 Michael Niedermayer
1521 fdbbf2e0 Michael Niedermayer
static void h261_loop_filter_c(uint8_t *src, int stride){
1522
    int x,y,xy,yz;
1523
    int temp[64];
1524
1525
    for(x=0; x<8; x++){
1526
        temp[x      ] = 4*src[x           ];
1527
        temp[x + 7*8] = 4*src[x + 7*stride];
1528
    }
1529
    for(y=1; y<7; y++){
1530
        for(x=0; x<8; x++){
1531
            xy = y * stride + x;
1532
            yz = y * 8 + x;
1533
            temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
1534 c6148de2 Michael Niedermayer
        }
1535
    }
1536 115329f1 Diego Biurrun
1537 fdbbf2e0 Michael Niedermayer
    for(y=0; y<8; y++){
1538
        src[  y*stride] = (temp[  y*8] + 2)>>2;
1539
        src[7+y*stride] = (temp[7+y*8] + 2)>>2;
1540
        for(x=1; x<7; x++){
1541
            xy = y * stride + x;
1542
            yz = y * 8 + x;
1543
            src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
1544 c6148de2 Michael Niedermayer
        }
1545
    }
1546
}
1547
1548 bb198e19 Michael Niedermayer
static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1549 de6d9b64 Fabrice Bellard
{
1550
    int s, i;
1551
1552
    s = 0;
1553 bb198e19 Michael Niedermayer
    for(i=0;i<h;i++) {
1554 de6d9b64 Fabrice Bellard
        s += abs(pix1[0] - pix2[0]);
1555
        s += abs(pix1[1] - pix2[1]);
1556
        s += abs(pix1[2] - pix2[2]);
1557
        s += abs(pix1[3] - pix2[3]);
1558
        s += abs(pix1[4] - pix2[4]);
1559
        s += abs(pix1[5] - pix2[5]);
1560
        s += abs(pix1[6] - pix2[6]);
1561
        s += abs(pix1[7] - pix2[7]);
1562
        s += abs(pix1[8] - pix2[8]);
1563
        s += abs(pix1[9] - pix2[9]);
1564
        s += abs(pix1[10] - pix2[10]);
1565
        s += abs(pix1[11] - pix2[11]);
1566
        s += abs(pix1[12] - pix2[12]);
1567
        s += abs(pix1[13] - pix2[13]);
1568
        s += abs(pix1[14] - pix2[14]);
1569
        s += abs(pix1[15] - pix2[15]);
1570
        pix1 += line_size;
1571
        pix2 += line_size;
1572
    }
1573
    return s;
1574
}
1575
1576 bb198e19 Michael Niedermayer
static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1577 de6d9b64 Fabrice Bellard
{
1578
    int s, i;
1579
1580
    s = 0;
1581 bb198e19 Michael Niedermayer
    for(i=0;i<h;i++) {
1582 de6d9b64 Fabrice Bellard
        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1583
        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1584
        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1585
        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1586
        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1587
        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1588
        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1589
        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1590
        s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1591
        s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1592
        s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1593
        s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1594
        s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1595
        s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1596
        s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1597
        s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1598
        pix1 += line_size;
1599
        pix2 += line_size;
1600
    }
1601
    return s;
1602
}
1603
1604 bb198e19 Michael Niedermayer
static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1605 de6d9b64 Fabrice Bellard
{
1606
    int s, i;
1607 0c1a9eda Zdenek Kabelac
    uint8_t *pix3 = pix2 + line_size;
1608 de6d9b64 Fabrice Bellard
1609
    s = 0;
1610 bb198e19 Michael Niedermayer
    for(i=0;i<h;i++) {
1611 de6d9b64 Fabrice Bellard
        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1612
        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1613
        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1614
        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1615
        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1616
        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1617
        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1618
        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1619
        s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1620
        s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1621
        s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1622
        s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1623
        s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1624
        s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1625
        s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1626
        s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1627
        pix1 += line_size;
1628
        pix2 += line_size;
1629
        pix3 += line_size;
1630
    }
1631
    return s;
1632
}
1633
1634 bb198e19 Michael Niedermayer
static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1635 de6d9b64 Fabrice Bellard
{
1636
    int s, i;
1637 0c1a9eda Zdenek Kabelac
    uint8_t *pix3 = pix2 + line_size;
1638 de6d9b64 Fabrice Bellard
1639
    s = 0;
1640 bb198e19 Michael Niedermayer
    for(i=0;i<h;i++) {
1641 de6d9b64 Fabrice Bellard
        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1642
        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1643
        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1644
        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1645
        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1646
        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1647
        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1648
        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1649
        s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1650
        s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1651
        s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1652
        s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1653
        s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1654
        s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1655
        s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1656
        s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1657
        pix1 += line_size;
1658
        pix2 += line_size;
1659
        pix3 += line_size;
1660
    }
1661
    return s;
1662
}
1663
1664 bb198e19 Michael Niedermayer
static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1665 ba6802de Michael Niedermayer
{
1666
    int s, i;
1667
1668
    s = 0;
1669 bb198e19 Michael Niedermayer
    for(i=0;i<h;i++) {
1670 ba6802de Michael Niedermayer
        s += abs(pix1[0] - pix2[0]);
1671
        s += abs(pix1[1] - pix2[1]);
1672
        s += abs(pix1[2] - pix2[2]);
1673
        s += abs(pix1[3] - pix2[3]);
1674
        s += abs(pix1[4] - pix2[4]);
1675
        s += abs(pix1[5] - pix2[5]);
1676
        s += abs(pix1[6] - pix2[6]);
1677
        s += abs(pix1[7] - pix2[7]);
1678
        pix1 += line_size;
1679
        pix2 += line_size;
1680
    }
1681
    return s;
1682
}
1683
1684 bb198e19 Michael Niedermayer
static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1685 ba6802de Michael Niedermayer
{
1686
    int s, i;
1687
1688
    s = 0;
1689 bb198e19 Michael Niedermayer
    for(i=0;i<h;i++) {
1690 ba6802de Michael Niedermayer
        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1691
        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1692
        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1693
        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1694
        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1695
        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1696
        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1697
        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1698
        pix1 += line_size;
1699
        pix2 += line_size;
1700
    }
1701
    return s;
1702
}
1703
1704 bb198e19 Michael Niedermayer
static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1705 ba6802de Michael Niedermayer
{
1706
    int s, i;
1707 0c1a9eda Zdenek Kabelac
    uint8_t *pix3 = pix2 + line_size;
1708 ba6802de Michael Niedermayer
1709
    s = 0;
1710 bb198e19 Michael Niedermayer
    for(i=0;i<h;i++) {
1711 ba6802de Michael Niedermayer
        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1712
        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1713
        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1714
        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1715
        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1716
        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1717
        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1718
        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1719
        pix1 += line_size;
1720
        pix2 += line_size;
1721
        pix3 += line_size;
1722
    }
1723
    return s;
1724
}
1725
1726 bb198e19 Michael Niedermayer
static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1727 ba6802de Michael Niedermayer
{
1728
    int s, i;
1729 0c1a9eda Zdenek Kabelac
    uint8_t *pix3 = pix2 + line_size;
1730 ba6802de Michael Niedermayer
1731
    s = 0;
1732 bb198e19 Michael Niedermayer
    for(i=0;i<h;i++) {
1733 ba6802de Michael Niedermayer
        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1734
        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1735
        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1736
        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1737
        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1738
        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1739
        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1740
        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1741
        pix1 += line_size;
1742
        pix2 += line_size;
1743
        pix3 += line_size;
1744
    }
1745
    return s;
1746
}
1747
1748 bf4e3bd2 Måns Rullgård
static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1749
    MpegEncContext *c = v;
1750 e6a2ac34 Michael Niedermayer
    int score1=0;
1751
    int score2=0;
1752
    int x,y;
1753 d4c5d2ad Michael Niedermayer
1754 e6a2ac34 Michael Niedermayer
    for(y=0; y<h; y++){
1755
        for(x=0; x<16; x++){
1756
            score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
1757
        }
1758
        if(y+1<h){
1759
            for(x=0; x<15; x++){
1760 c26abfa5 Diego Biurrun
                score2+= FFABS(  s1[x  ] - s1[x  +stride]
1761 e6a2ac34 Michael Niedermayer
                             - s1[x+1] + s1[x+1+stride])
1762 c26abfa5 Diego Biurrun
                        -FFABS(  s2[x  ] - s2[x  +stride]
1763 e6a2ac34 Michael Niedermayer
                             - s2[x+1] + s2[x+1+stride]);
1764
            }
1765
        }
1766
        s1+= stride;
1767
        s2+= stride;
1768
    }
1769 d4c5d2ad Michael Niedermayer
1770 c26abfa5 Diego Biurrun
    if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1771
    else  return score1 + FFABS(score2)*8;
1772 e6a2ac34 Michael Niedermayer
}
1773
1774 bf4e3bd2 Måns Rullgård
static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1775
    MpegEncContext *c = v;
1776 e6a2ac34 Michael Niedermayer
    int score1=0;
1777
    int score2=0;
1778
    int x,y;
1779 115329f1 Diego Biurrun
1780 e6a2ac34 Michael Niedermayer
    for(y=0; y<h; y++){
1781
        for(x=0; x<8; x++){
1782
            score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
1783
        }
1784
        if(y+1<h){
1785
            for(x=0; x<7; x++){
1786 c26abfa5 Diego Biurrun
                score2+= FFABS(  s1[x  ] - s1[x  +stride]
1787 e6a2ac34 Michael Niedermayer
                             - s1[x+1] + s1[x+1+stride])
1788 c26abfa5 Diego Biurrun
                        -FFABS(  s2[x  ] - s2[x  +stride]
1789 e6a2ac34 Michael Niedermayer
                             - s2[x+1] + s2[x+1+stride]);
1790
            }
1791
        }
1792
        s1+= stride;
1793
        s2+= stride;
1794
    }
1795 115329f1 Diego Biurrun
1796 c26abfa5 Diego Biurrun
    if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1797
    else  return score1 + FFABS(score2)*8;
1798 e6a2ac34 Michael Niedermayer
}
1799
1800 364a1797 Michael Niedermayer
static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1801
    int i;
1802
    unsigned int sum=0;
1803
1804
    for(i=0; i<8*8; i++){
1805
        int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1806
        int w= weight[i];
1807
        b>>= RECON_SHIFT;
1808
        assert(-512<b && b<512);
1809
1810
        sum += (w*b)*(w*b)>>4;
1811
    }
1812
    return sum>>2;
1813
}
1814
1815
static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1816
    int i;
1817
1818
    for(i=0; i<8*8; i++){
1819
        rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
1820 115329f1 Diego Biurrun
    }
1821 364a1797 Michael Niedermayer
}
1822
1823 a9badb51 Michael Niedermayer
/**
1824
 * permutes an 8x8 block.
1825 2a5700de Michael Niedermayer
 * @param block the block which will be permuted according to the given permutation vector
1826 a9badb51 Michael Niedermayer
 * @param permutation the permutation vector
1827
 * @param last the last non zero coefficient in scantable order, used to speed the permutation up
1828 115329f1 Diego Biurrun
 * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
1829 2a5700de Michael Niedermayer
 *                  (inverse) permutated to scantable order!
1830 a9badb51 Michael Niedermayer
 */
1831 0c1a9eda Zdenek Kabelac
void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
1832 d962f6fd Arpi
{
1833 7801d21d Michael Niedermayer
    int i;
1834 477ab036 Michael Niedermayer
    DCTELEM temp[64];
1835 115329f1 Diego Biurrun
1836 7801d21d Michael Niedermayer
    if(last<=0) return;
1837 90b5b51e Diego Biurrun
    //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
1838 d962f6fd Arpi
1839 7801d21d Michael Niedermayer
    for(i=0; i<=last; i++){
1840
        const int j= scantable[i];
1841
        temp[j]= block[j];
1842
        block[j]=0;
1843
    }
1844 115329f1 Diego Biurrun
1845 7801d21d Michael Niedermayer
    for(i=0; i<=last; i++){
1846
        const int j= scantable[i];
1847
        const int perm_j= permutation[j];
1848
        block[perm_j]= temp[j];
1849
    }
1850 d962f6fd Arpi
}
1851 e0eac44e Fabrice Bellard
1852 622348f9 Michael Niedermayer
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1853
    return 0;
1854
}
1855
1856
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
1857
    int i;
1858 115329f1 Diego Biurrun
1859 3899eb2f Roman Shaposhnik
    memset(cmp, 0, sizeof(void*)*6);
1860 115329f1 Diego Biurrun
1861 3899eb2f Roman Shaposhnik
    for(i=0; i<6; i++){
1862 622348f9 Michael Niedermayer
        switch(type&0xFF){
1863
        case FF_CMP_SAD:
1864
            cmp[i]= c->sad[i];
1865
            break;
1866
        case FF_CMP_SATD:
1867
            cmp[i]= c->hadamard8_diff[i];
1868
            break;
1869
        case FF_CMP_SSE:
1870
            cmp[i]= c->sse[i];
1871
            break;
1872
        case FF_CMP_DCT:
1873
            cmp[i]= c->dct_sad[i];
1874
            break;
1875 27c61ac5 Michael Niedermayer
        case FF_CMP_DCT264:
1876
            cmp[i]= c->dct264_sad[i];
1877
            break;
1878 0fd6aea1 Michael Niedermayer
        case FF_CMP_DCTMAX:
1879
            cmp[i]= c->dct_max[i];
1880
            break;
1881 622348f9 Michael Niedermayer
        case FF_CMP_PSNR:
1882
            cmp[i]= c->quant_psnr[i];
1883
            break;
1884
        case FF_CMP_BIT:
1885
            cmp[i]= c->bit[i];
1886
            break;
1887
        case FF_CMP_RD:
1888
            cmp[i]= c->rd[i];
1889
            break;
1890
        case FF_CMP_VSAD:
1891
            cmp[i]= c->vsad[i];
1892
            break;
1893
        case FF_CMP_VSSE:
1894
            cmp[i]= c->vsse[i];
1895
            break;
1896
        case FF_CMP_ZERO:
1897
            cmp[i]= zero_cmp;
1898
            break;
1899 e6a2ac34 Michael Niedermayer
        case FF_CMP_NSSE:
1900
            cmp[i]= c->nsse[i];
1901
            break;
1902 05aec7bb Måns Rullgård
#if CONFIG_DWT
1903 26efc54e Michael Niedermayer
        case FF_CMP_W53:
1904
            cmp[i]= c->w53[i];
1905
            break;
1906
        case FF_CMP_W97:
1907
            cmp[i]= c->w97[i];
1908
            break;
1909 3a6fc8fa Diego Pettenò
#endif
1910 622348f9 Michael Niedermayer
        default:
1911
            av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1912
        }
1913
    }
1914
}
1915
1916 11f18faf Michael Niedermayer
static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1917 469bd7b1 Loren Merritt
    long i;
1918
    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1919
        long a = *(long*)(src+i);
1920
        long b = *(long*)(dst+i);
1921
        *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
1922 11f18faf Michael Niedermayer
    }
1923
    for(; i<w; i++)
1924
        dst[i+0] += src[i+0];
1925
}
1926
1927 4a9ca0a2 Loren Merritt
static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
1928 469bd7b1 Loren Merritt
    long i;
1929 4a9ca0a2 Loren Merritt
    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1930
        long a = *(long*)(src1+i);
1931
        long b = *(long*)(src2+i);
1932 469bd7b1 Loren Merritt
        *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
1933 4a9ca0a2 Loren Merritt
    }
1934
    for(; i<w; i++)
1935
        dst[i] = src1[i]+src2[i];
1936
}
1937
1938 11f18faf Michael Niedermayer
static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
1939 469bd7b1 Loren Merritt
    long i;
1940 b250f9c6 Aurelien Jacobs
#if !HAVE_FAST_UNALIGNED
1941 469bd7b1 Loren Merritt
    if((long)src2 & (sizeof(long)-1)){
1942 31304587 Loren Merritt
        for(i=0; i+7<w; i+=8){
1943
            dst[i+0] = src1[i+0]-src2[i+0];
1944
            dst[i+1] = src1[i+1]-src2[i+1];
1945
            dst[i+2] = src1[i+2]-src2[i+2];
1946
            dst[i+3] = src1[i+3]-src2[i+3];
1947
            dst[i+4] = src1[i+4]-src2[i+4];
1948
            dst[i+5] = src1[i+5]-src2[i+5];
1949
            dst[i+6] = src1[i+6]-src2[i+6];
1950
            dst[i+7] = src1[i+7]-src2[i+7];
1951
        }
1952 469bd7b1 Loren Merritt
    }else
1953
#endif
1954
    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
1955
        long a = *(long*)(src1+i);
1956
        long b = *(long*)(src2+i);
1957
        *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1958
    }
1959 11f18faf Michael Niedermayer
    for(; i<w; i++)
1960
        dst[i+0] = src1[i+0]-src2[i+0];
1961
}
1962
1963 e17ccf60 Loren Merritt
static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
1964 3daa434a Loren Merritt
    int i;
1965
    uint8_t l, lt;
1966
1967
    l= *left;
1968
    lt= *left_top;
1969
1970
    for(i=0; i<w; i++){
1971
        l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1972
        lt= src1[i];
1973
        dst[i]= l;
1974
    }
1975
1976
    *left= l;
1977
    *left_top= lt;
1978
}
1979
1980 e17ccf60 Loren Merritt
static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
1981 84705403 Michael Niedermayer
    int i;
1982
    uint8_t l, lt;
1983
1984
    l= *left;
1985
    lt= *left_top;
1986
1987
    for(i=0; i<w; i++){
1988
        const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1989
        lt= src1[i];
1990
        l= src2[i];
1991
        dst[i]= l - pred;
1992 115329f1 Diego Biurrun
    }
1993 84705403 Michael Niedermayer
1994
    *left= l;
1995
    *left_top= lt;
1996
}
1997
1998 2d4bbdec Alexander Strange
static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
1999 73c6f598 Nathan Caldwell
    int i;
2000
2001
    for(i=0; i<w-1; i++){
2002
        acc+= src[i];
2003
        dst[i]= acc;
2004
        i++;
2005
        acc+= src[i];
2006
        dst[i]= acc;
2007
    }
2008
2009
    for(; i<w; i++){
2010
        acc+= src[i];
2011
        dst[i]= acc;
2012
    }
2013
2014
    return acc;
2015
}
2016
2017
#if HAVE_BIGENDIAN
2018
#define B 3
2019
#define G 2
2020
#define R 1
2021 f267d3ac Alexander Strange
#define A 0
2022 73c6f598 Nathan Caldwell
#else
2023
#define B 0
2024
#define G 1
2025
#define R 2
2026 f267d3ac Alexander Strange
#define A 3
2027 73c6f598 Nathan Caldwell
#endif
2028 f267d3ac Alexander Strange
static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
2029 73c6f598 Nathan Caldwell
    int i;
2030 f267d3ac Alexander Strange
    int r,g,b,a;
2031 73c6f598 Nathan Caldwell
    r= *red;
2032
    g= *green;
2033
    b= *blue;
2034 f267d3ac Alexander Strange
    a= *alpha;
2035 73c6f598 Nathan Caldwell
2036
    for(i=0; i<w; i++){
2037
        b+= src[4*i+B];
2038
        g+= src[4*i+G];
2039
        r+= src[4*i+R];
2040 f267d3ac Alexander Strange
        a+= src[4*i+A];
2041 73c6f598 Nathan Caldwell
2042
        dst[4*i+B]= b;
2043
        dst[4*i+G]= g;
2044
        dst[4*i+R]= r;
2045 f267d3ac Alexander Strange
        dst[4*i+A]= a;
2046 73c6f598 Nathan Caldwell
    }
2047
2048
    *red= r;
2049
    *green= g;
2050
    *blue= b;
2051 f267d3ac Alexander Strange
    *alpha= a;
2052 73c6f598 Nathan Caldwell
}
2053
#undef B
2054
#undef G
2055
#undef R
2056 f267d3ac Alexander Strange
#undef A
2057 73c6f598 Nathan Caldwell
2058 1457ab52 Michael Niedermayer
#define BUTTERFLY2(o1,o2,i1,i2) \
2059
o1= (i1)+(i2);\
2060
o2= (i1)-(i2);
2061
2062
#define BUTTERFLY1(x,y) \
2063
{\
2064
    int a,b;\
2065
    a= x;\
2066
    b= y;\
2067
    x= a+b;\
2068
    y= a-b;\
2069
}
2070
2071 c26abfa5 Diego Biurrun
#define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
2072 1457ab52 Michael Niedermayer
2073 bb198e19 Michael Niedermayer
static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
2074 1457ab52 Michael Niedermayer
    int i;
2075
    int temp[64];
2076
    int sum=0;
2077 115329f1 Diego Biurrun
2078 bb198e19 Michael Niedermayer
    assert(h==8);
2079 1457ab52 Michael Niedermayer
2080
    for(i=0; i<8; i++){
2081
        //FIXME try pointer walks
2082
        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2083
        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2084
        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2085
        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2086 115329f1 Diego Biurrun
2087 1457ab52 Michael Niedermayer
        BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2088
        BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2089
        BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2090
        BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2091 115329f1 Diego Biurrun
2092 1457ab52 Michael Niedermayer
        BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2093
        BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2094
        BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2095
        BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2096
    }
2097
2098
    for(i=0; i<8; i++){
2099
        BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2100
        BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2101
        BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2102
        BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2103 115329f1 Diego Biurrun
2104 1457ab52 Michael Niedermayer
        BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2105
        BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2106
        BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2107
        BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2108
2109 115329f1 Diego Biurrun
        sum +=
2110 1457ab52 Michael Niedermayer
             BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2111
            +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2112
            +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2113
            +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2114
    }
2115
#if 0
2116
static int maxi=0;
2117
if(sum>maxi){
2118
    maxi=sum;
2119
    printf("MAX:%d\n", maxi);
2120
}
2121
#endif
2122
    return sum;
2123
}
2124
2125 622348f9 Michael Niedermayer
static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
2126 1457ab52 Michael Niedermayer
    int i;
2127
    int temp[64];
2128
    int sum=0;
2129 115329f1 Diego Biurrun
2130 622348f9 Michael Niedermayer
    assert(h==8);
2131 115329f1 Diego Biurrun
2132 1457ab52 Michael Niedermayer
    for(i=0; i<8; i++){
2133
        //FIXME try pointer walks
2134 622348f9 Michael Niedermayer
        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2135
        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2136
        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2137
        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
2138 115329f1 Diego Biurrun
2139 1457ab52 Michael Niedermayer
        BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2140
        BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2141
        BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2142
        BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2143 115329f1 Diego Biurrun
2144 1457ab52 Michael Niedermayer
        BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2145
        BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2146
        BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2147
        BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2148
    }
2149
2150
    for(i=0; i<8; i++){
2151
        BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2152
        BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2153
        BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2154
        BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2155 115329f1 Diego Biurrun
2156 1457ab52 Michael Niedermayer
        BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2157
        BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2158
        BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2159
        BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2160 115329f1 Diego Biurrun
2161
        sum +=
2162 1457ab52 Michael Niedermayer
             BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2163
            +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2164
            +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2165
            +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2166
    }
2167 115329f1 Diego Biurrun
2168 c26abfa5 Diego Biurrun
    sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
2169 115329f1 Diego Biurrun
2170 1457ab52 Michael Niedermayer
    return sum;
2171
}
2172
2173 bb198e19 Michael Niedermayer
static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2174 1457ab52 Michael Niedermayer
    MpegEncContext * const s= (MpegEncContext *)c;
2175 40d11227 Måns Rullgård
    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2176 115329f1 Diego Biurrun
2177 bb198e19 Michael Niedermayer
    assert(h==8);
2178 1457ab52 Michael Niedermayer
2179
    s->dsp.diff_pixels(temp, src1, src2, stride);
2180 b0368839 Michael Niedermayer
    s->dsp.fdct(temp);
2181 1edbfe19 Loren Merritt
    return s->dsp.sum_abs_dctelem(temp);
2182 1457ab52 Michael Niedermayer
}
2183
2184 b250f9c6 Aurelien Jacobs
#if CONFIG_GPL
2185 27c61ac5 Michael Niedermayer
#define DCT8_1D {\
2186
    const int s07 = SRC(0) + SRC(7);\
2187
    const int s16 = SRC(1) + SRC(6);\
2188
    const int s25 = SRC(2) + SRC(5);\
2189
    const int s34 = SRC(3) + SRC(4);\
2190
    const int a0 = s07 + s34;\
2191
    const int a1 = s16 + s25;\
2192
    const int a2 = s07 - s34;\
2193
    const int a3 = s16 - s25;\
2194
    const int d07 = SRC(0) - SRC(7);\
2195
    const int d16 = SRC(1) - SRC(6);\
2196
    const int d25 = SRC(2) - SRC(5);\
2197
    const int d34 = SRC(3) - SRC(4);\
2198
    const int a4 = d16 + d25 + (d07 + (d07>>1));\
2199
    const int a5 = d07 - d34 - (d25 + (d25>>1));\
2200
    const int a6 = d07 + d34 - (d16 + (d16>>1));\
2201
    const int a7 = d16 - d25 + (d34 + (d34>>1));\
2202
    DST(0,  a0 + a1     ) ;\
2203
    DST(1,  a4 + (a7>>2)) ;\
2204
    DST(2,  a2 + (a3>>1)) ;\
2205
    DST(3,  a5 + (a6>>2)) ;\
2206
    DST(4,  a0 - a1     ) ;\
2207
    DST(5,  a6 - (a5>>2)) ;\
2208
    DST(6, (a2>>1) - a3 ) ;\
2209
    DST(7, (a4>>2) - a7 ) ;\
2210
}
2211
2212
static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2213
    MpegEncContext * const s= (MpegEncContext *)c;
2214 8d15910a Måns Rullgård
    DCTELEM dct[8][8];
2215 27c61ac5 Michael Niedermayer
    int i;
2216
    int sum=0;
2217
2218 8d15910a Måns Rullgård
    s->dsp.diff_pixels(dct[0], src1, src2, stride);
2219 27c61ac5 Michael Niedermayer
2220
#define SRC(x) dct[i][x]
2221
#define DST(x,v) dct[i][x]= v
2222
    for( i = 0; i < 8; i++ )
2223
        DCT8_1D
2224
#undef SRC
2225
#undef DST
2226
2227
#define SRC(x) dct[x][i]
2228 c26abfa5 Diego Biurrun
#define DST(x,v) sum += FFABS(v)
2229 27c61ac5 Michael Niedermayer
    for( i = 0; i < 8; i++ )
2230
        DCT8_1D
2231
#undef SRC
2232
#undef DST
2233
    return sum;
2234
}
2235
#endif
2236
2237 0fd6aea1 Michael Niedermayer
static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2238
    MpegEncContext * const s= (MpegEncContext *)c;
2239 40d11227 Måns Rullgård
    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2240 0fd6aea1 Michael Niedermayer
    int sum=0, i;
2241 115329f1 Diego Biurrun
2242 0fd6aea1 Michael Niedermayer
    assert(h==8);
2243
2244
    s->dsp.diff_pixels(temp, src1, src2, stride);
2245
    s->dsp.fdct(temp);
2246
2247
    for(i=0; i<64; i++)
2248 c26abfa5 Diego Biurrun
        sum= FFMAX(sum, FFABS(temp[i]));
2249 115329f1 Diego Biurrun
2250 0fd6aea1 Michael Niedermayer
    return sum;
2251
}
2252
2253 bb198e19 Michael Niedermayer
static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2254 1457ab52 Michael Niedermayer
    MpegEncContext * const s= (MpegEncContext *)c;
2255 40d11227 Måns Rullgård
    LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
2256 2480c390 Måns Rullgård
    DCTELEM * const bak = temp+64;
2257 1457ab52 Michael Niedermayer
    int sum=0, i;
2258
2259 bb198e19 Michael Niedermayer
    assert(h==8);
2260 1457ab52 Michael Niedermayer
    s->mb_intra=0;
2261 115329f1 Diego Biurrun
2262 1457ab52 Michael Niedermayer
    s->dsp.diff_pixels(temp, src1, src2, stride);
2263 115329f1 Diego Biurrun
2264 1457ab52 Michael Niedermayer
    memcpy(bak, temp, 64*sizeof(DCTELEM));
2265 115329f1 Diego Biurrun
2266 67725183 Michael Niedermayer
    s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2267 d50635cd Michael Niedermayer
    s->dct_unquantize_inter(s, temp, 0, s->qscale);
2268 59e6f60a Aurelien Jacobs
    ff_simple_idct(temp); //FIXME
2269 115329f1 Diego Biurrun
2270 1457ab52 Michael Niedermayer
    for(i=0; i<64; i++)
2271
        sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2272 115329f1 Diego Biurrun
2273 1457ab52 Michael Niedermayer
    return sum;
2274
}
2275
2276 bb198e19 Michael Niedermayer
static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2277 3a87ac94 Michael Niedermayer
    MpegEncContext * const s= (MpegEncContext *)c;
2278 0c1a9eda Zdenek Kabelac
    const uint8_t *scantable= s->intra_scantable.permutated;
2279 40d11227 Måns Rullgård
    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2280
    LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2281
    LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
2282 e6dba5df Ramiro Polla
    int i, last, run, bits, level, distortion, start_i;
2283 3a87ac94 Michael Niedermayer
    const int esc_length= s->ac_esc_length;
2284
    uint8_t * length;
2285
    uint8_t * last_length;
2286 115329f1 Diego Biurrun
2287 bb198e19 Michael Niedermayer
    assert(h==8);
2288
2289 90d43b52 Måns Rullgård
    copy_block8(lsrc1, src1, 8, stride, 8);
2290
    copy_block8(lsrc2, src2, 8, stride, 8);
2291 3a87ac94 Michael Niedermayer
2292 90d43b52 Måns Rullgård
    s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
2293 67725183 Michael Niedermayer
2294
    s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2295
2296
    bits=0;
2297 115329f1 Diego Biurrun
2298 3a87ac94 Michael Niedermayer
    if (s->mb_intra) {
2299 115329f1 Diego Biurrun
        start_i = 1;
2300 3a87ac94 Michael Niedermayer
        length     = s->intra_ac_vlc_length;
2301
        last_length= s->intra_ac_vlc_last_length;
2302 67725183 Michael Niedermayer
        bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2303 3a87ac94 Michael Niedermayer
    } else {
2304
        start_i = 0;
2305
        length     = s->inter_ac_vlc_length;
2306
        last_length= s->inter_ac_vlc_last_length;
2307
    }
2308 115329f1 Diego Biurrun
2309 67725183 Michael Niedermayer
    if(last>=start_i){
2310 3a87ac94 Michael Niedermayer
        run=0;
2311
        for(i=start_i; i<last; i++){
2312
            int j= scantable[i];
2313
            level= temp[j];
2314 115329f1 Diego Biurrun
2315 3a87ac94 Michael Niedermayer
            if(level){
2316
                level+=64;
2317
                if((level&(~127)) == 0){
2318
                    bits+= length[UNI_AC_ENC_INDEX(run, level)];
2319
                }else
2320
                    bits+= esc_length;
2321
                run=0;
2322
            }else
2323
                run++;
2324
        }
2325
        i= scantable[last];
2326 115329f1 Diego Biurrun
2327 3a87ac94 Michael Niedermayer
        level= temp[i] + 64;
2328 1d0eab1d Michael Niedermayer
2329
        assert(level - 64);
2330 115329f1 Diego Biurrun
2331 3a87ac94 Michael Niedermayer
        if((level&(~127)) == 0){
2332
            bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2333
        }else
2334
            bits+= esc_length;
2335 115329f1 Diego Biurrun
2336 67725183 Michael Niedermayer
    }
2337
2338
    if(last>=0){
2339 d50635cd Michael Niedermayer
        if(s->mb_intra)
2340
            s->dct_unquantize_intra(s, temp, 0, s->qscale);
2341
        else
2342
            s->dct_unquantize_inter(s, temp, 0, s->qscale);
2343 3a87ac94 Michael Niedermayer
    }
2344 115329f1 Diego Biurrun
2345 90d43b52 Måns Rullgård
    s->dsp.idct_add(lsrc2, 8, temp);
2346 115329f1 Diego Biurrun
2347 90d43b52 Måns Rullgård
    distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
2348 3a87ac94 Michael Niedermayer
2349 e6dba5df Ramiro Polla
    return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
2350 3a87ac94 Michael Niedermayer
}
2351
2352 bb198e19 Michael Niedermayer
static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2353 3a87ac94 Michael Niedermayer
    MpegEncContext * const s= (MpegEncContext *)c;
2354 0c1a9eda Zdenek Kabelac
    const uint8_t *scantable= s->intra_scantable.permutated;
2355 40d11227 Måns Rullgård
    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
2356 3a87ac94 Michael Niedermayer
    int i, last, run, bits, level, start_i;
2357
    const int esc_length= s->ac_esc_length;
2358
    uint8_t * length;
2359
    uint8_t * last_length;
2360 bb198e19 Michael Niedermayer
2361
    assert(h==8);
2362 115329f1 Diego Biurrun
2363 67725183 Michael Niedermayer
    s->dsp.diff_pixels(temp, src1, src2, stride);
2364 3a87ac94 Michael Niedermayer
2365 67725183 Michael Niedermayer
    s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2366
2367
    bits=0;
2368 115329f1 Diego Biurrun
2369 3a87ac94 Michael Niedermayer
    if (s->mb_intra) {
2370 115329f1 Diego Biurrun
        start_i = 1;
2371 3a87ac94 Michael Niedermayer
        length     = s->intra_ac_vlc_length;
2372
        last_length= s->intra_ac_vlc_last_length;
2373 67725183 Michael Niedermayer
        bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2374 3a87ac94 Michael Niedermayer
    } else {
2375
        start_i = 0;
2376
        length     = s->inter_ac_vlc_length;
2377
        last_length= s->inter_ac_vlc_last_length;
2378
    }
2379 115329f1 Diego Biurrun
2380 67725183 Michael Niedermayer
    if(last>=start_i){
2381 3a87ac94 Michael Niedermayer
        run=0;
2382
        for(i=start_i; i<last; i++){
2383
            int j= scantable[i];
2384
            level= temp[j];
2385 115329f1 Diego Biurrun
2386 3a87ac94 Michael Niedermayer
            if(level){
2387
                level+=64;
2388
                if((level&(~127)) == 0){
2389
                    bits+= length[UNI_AC_ENC_INDEX(run, level)];
2390
                }else
2391
                    bits+= esc_length;
2392
                run=0;
2393
            }else
2394
                run++;
2395
        }
2396
        i= scantable[last];
2397 115329f1 Diego Biurrun
2398 67725183 Michael Niedermayer
        level= temp[i] + 64;
2399 115329f1 Diego Biurrun
2400 67725183 Michael Niedermayer
        assert(level - 64);
2401 115329f1 Diego Biurrun
2402 3a87ac94 Michael Niedermayer
        if((level&(~127)) == 0){
2403
            bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2404
        }else
2405
            bits+= esc_length;
2406
    }
2407
2408
    return bits;
2409
}
2410
2411 7fb7f636 Roman Shaposhnik
#define VSAD_INTRA(size) \
2412
static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2413
    int score=0;                                                                                            \
2414
    int x,y;                                                                                                \
2415
                                                                                                            \
2416
    for(y=1; y<h; y++){                                                                                     \
2417
        for(x=0; x<size; x+=4){                                                                             \
2418
            score+= FFABS(s[x  ] - s[x  +stride]) + FFABS(s[x+1] - s[x+1+stride])                           \
2419
                   +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]);                          \
2420
        }                                                                                                   \
2421
        s+= stride;                                                                                         \
2422
    }                                                                                                       \
2423
                                                                                                            \
2424
    return score;                                                                                           \
2425
}
2426
VSAD_INTRA(8)
2427
VSAD_INTRA(16)
2428 622348f9 Michael Niedermayer
2429
static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2430
    int score=0;
2431
    int x,y;
2432 115329f1 Diego Biurrun
2433 622348f9 Michael Niedermayer
    for(y=1; y<h; y++){
2434
        for(x=0; x<16; x++){
2435 c26abfa5 Diego Biurrun
            score+= FFABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2436 622348f9 Michael Niedermayer
        }
2437
        s1+= stride;
2438
        s2+= stride;
2439
    }
2440 115329f1 Diego Biurrun
2441 622348f9 Michael Niedermayer
    return score;
2442
}
2443
2444
#define SQ(a) ((a)*(a))
2445 7fb7f636 Roman Shaposhnik
#define VSSE_INTRA(size) \
2446
static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2447
    int score=0;                                                                                            \
2448
    int x,y;                                                                                                \
2449
                                                                                                            \
2450
    for(y=1; y<h; y++){                                                                                     \
2451
        for(x=0; x<size; x+=4){                                                                               \
2452
            score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])                                 \
2453
                   +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);                                \
2454
        }                                                                                                   \
2455
        s+= stride;                                                                                         \
2456
    }                                                                                                       \
2457
                                                                                                            \
2458
    return score;                                                                                           \
2459
}
2460
VSSE_INTRA(8)
2461
VSSE_INTRA(16)
2462 622348f9 Michael Niedermayer
2463
static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2464
    int score=0;
2465
    int x,y;
2466 115329f1 Diego Biurrun
2467 622348f9 Michael Niedermayer
    for(y=1; y<h; y++){
2468
        for(x=0; x<16; x++){
2469
            score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2470
        }
2471
        s1+= stride;
2472
        s2+= stride;
2473
    }
2474 115329f1 Diego Biurrun
2475 622348f9 Michael Niedermayer
    return score;
2476
}
2477
2478 a00177a9 Måns Rullgård
static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2479
                               int size){
2480 59006372 Loren Merritt
    int score=0;
2481
    int i;
2482
    for(i=0; i<size; i++)
2483
        score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2484
    return score;
2485
}
2486
2487 9fbd14ac Diego Biurrun
WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2488
WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2489
WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2490 b250f9c6 Aurelien Jacobs
#if CONFIG_GPL
2491 9fbd14ac Diego Biurrun
WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
2492 60900991 Mike Melanson
#endif
2493 9fbd14ac Diego Biurrun
WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2494
WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2495
WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2496
WRAPPER8_16_SQ(bit8x8_c, bit16_c)
2497 1457ab52 Michael Niedermayer
2498 6eabb0d3 Justin Ruggles
static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
2499 eb4825b5 Loren Merritt
    int i;
2500
    for(i=0; i<len; i++)
2501 6eabb0d3 Justin Ruggles
        dst[i] = src0[i] * src1[i];
2502 eb4825b5 Loren Merritt
}
2503
2504
static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
2505
    int i;
2506
    src1 += len-1;
2507
    for(i=0; i<len; i++)
2508
        dst[i] = src0[i] * src1[-i];
2509
}
2510
2511 952e8721 Måns Rullgård
static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
2512 eb4825b5 Loren Merritt
    int i;
2513
    for(i=0; i<len; i++)
2514 952e8721 Måns Rullgård
        dst[i] = src0[i] * src1[i] + src2[i];
2515 eb4825b5 Loren Merritt
}
2516
2517 80ba1ddb Justin Ruggles
static void vector_fmul_window_c(float *dst, const float *src0,
2518
                                 const float *src1, const float *win, int len)
2519
{
2520 b9fa3208 Loren Merritt
    int i,j;
2521
    dst += len;
2522
    win += len;
2523
    src0+= len;
2524
    for(i=-len, j=len-1; i<0; i++, j--) {
2525
        float s0 = src0[i];
2526
        float s1 = src1[j];
2527
        float wi = win[i];
2528
        float wj = win[j];
2529 80ba1ddb Justin Ruggles
        dst[i] = s0*wj - s1*wi;
2530
        dst[j] = s0*wi + s1*wj;
2531 b9fa3208 Loren Merritt
    }
2532 f27e1d64 Loren Merritt
}
2533
2534 53b57211 Måns Rullgård
static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
2535
                                 int len)
2536
{
2537
    int i;
2538
    for (i = 0; i < len; i++)
2539
        dst[i] = src[i] * mul;
2540
}
2541
2542
static void vector_fmul_sv_scalar_2_c(float *dst, const float *src,
2543
                                      const float **sv, float mul, int len)
2544
{
2545
    int i;
2546
    for (i = 0; i < len; i += 2, sv++) {
2547
        dst[i  ] = src[i  ] * sv[0][0] * mul;
2548
        dst[i+1] = src[i+1] * sv[0][1] * mul;
2549
    }
2550
}
2551
2552
static void vector_fmul_sv_scalar_4_c(float *dst, const float *src,
2553
                                      const float **sv, float mul, int len)
2554
{
2555
    int i;
2556
    for (i = 0; i < len; i += 4, sv++) {
2557
        dst[i  ] = src[i  ] * sv[0][0] * mul;
2558
        dst[i+1] = src[i+1] * sv[0][1] * mul;
2559
        dst[i+2] = src[i+2] * sv[0][2] * mul;
2560
        dst[i+3] = src[i+3] * sv[0][3] * mul;
2561
    }
2562
}
2563
2564
static void sv_fmul_scalar_2_c(float *dst, const float **sv, float mul,
2565
                               int len)
2566
{
2567
    int i;
2568
    for (i = 0; i < len; i += 2, sv++) {
2569
        dst[i  ] = sv[0][0] * mul;
2570
        dst[i+1] = sv[0][1] * mul;
2571
    }
2572
}
2573
2574
static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul,
2575
                               int len)
2576
{
2577
    int i;
2578
    for (i = 0; i < len; i += 4, sv++) {
2579
        dst[i  ] = sv[0][0] * mul;
2580
        dst[i+1] = sv[0][1] * mul;
2581
        dst[i+2] = sv[0][2] * mul;
2582
        dst[i+3] = sv[0][3] * mul;
2583
    }
2584
}
2585
2586
static void butterflies_float_c(float *restrict v1, float *restrict v2,
2587
                                int len)
2588
{
2589
    int i;
2590
    for (i = 0; i < len; i++) {
2591
        float t = v1[i] - v2[i];
2592
        v1[i] += v2[i];
2593
        v2[i] = t;
2594
    }
2595
}
2596
2597
static float scalarproduct_float_c(const float *v1, const float *v2, int len)
2598
{
2599
    float p = 0.0;
2600
    int i;
2601
2602
    for (i = 0; i < len; i++)
2603
        p += v1[i] * v2[i];
2604
2605
    return p;
2606
}
2607
2608 0a68cd87 Vitor Sessak
static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2609
                   uint32_t maxi, uint32_t maxisign)
2610
{
2611
2612
    if(a > mini) return mini;
2613 187a5379 Alex Converse
    else if((a^(1U<<31)) > maxisign) return maxi;
2614 0a68cd87 Vitor Sessak
    else return a;
2615
}
2616
2617 50e23ae9 Vitor Sessak
static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
2618 0a68cd87 Vitor Sessak
    int i;
2619
    uint32_t mini = *(uint32_t*)min;
2620
    uint32_t maxi = *(uint32_t*)max;
2621 187a5379 Alex Converse
    uint32_t maxisign = maxi ^ (1U<<31);
2622 0a68cd87 Vitor Sessak
    uint32_t *dsti = (uint32_t*)dst;
2623 50e23ae9