Statistics
| Branch: | Revision:

ffmpeg / libavcodec / vp8dsp.c @ cd29c2b5

History | View | Annotate | Download (12.1 KB)

1
/**
2
 * VP8 compatible video decoder
3
 *
4
 * Copyright (C) 2010 David Conrad
5
 * Copyright (C) 2010 Ronald S. Bultje
6
 *
7
 * This file is part of FFmpeg.
8
 *
9
 * FFmpeg is free software; you can redistribute it and/or
10
 * modify it under the terms of the GNU Lesser General Public
11
 * License as published by the Free Software Foundation; either
12
 * version 2.1 of the License, or (at your option) any later version.
13
 *
14
 * FFmpeg is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
 * Lesser General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public
20
 * License along with FFmpeg; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
 */
23

    
24
#include "dsputil.h"
25
#include "vp8dsp.h"
26

    
27
// TODO: Maybe add dequant
28
static void vp8_luma_dc_wht_c(DCTELEM block[4][4][16], DCTELEM dc[16])
29
{
30
    int i, t0, t1, t2, t3;
31

    
32
    for (i = 0; i < 4; i++) {
33
        t0 = dc[0*4+i] + dc[3*4+i];
34
        t1 = dc[1*4+i] + dc[2*4+i];
35
        t2 = dc[1*4+i] - dc[2*4+i];
36
        t3 = dc[0*4+i] - dc[3*4+i];
37

    
38
        dc[0*4+i] = t0 + t1;
39
        dc[1*4+i] = t3 + t2;
40
        dc[2*4+i] = t0 - t1;
41
        dc[3*4+i] = t3 - t2;
42
    }
43

    
44
    for (i = 0; i < 4; i++) {
45
        t0 = dc[i*4+0] + dc[i*4+3] + 3; // rounding
46
        t1 = dc[i*4+1] + dc[i*4+2];
47
        t2 = dc[i*4+1] - dc[i*4+2];
48
        t3 = dc[i*4+0] - dc[i*4+3] + 3; // rounding
49

    
50
        *block[i][0] = (t0 + t1) >> 3;
51
        *block[i][1] = (t3 + t2) >> 3;
52
        *block[i][2] = (t0 - t1) >> 3;
53
        *block[i][3] = (t3 - t2) >> 3;
54
    }
55
}
56

    
57

    
58
#define MUL_20091(a) ((((a)*20091) >> 16) + (a))
59
#define MUL_35468(a)  (((a)*35468) >> 16)
60

    
61
static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], int stride)
62
{
63
    int i, t0, t1, t2, t3;
64
    DCTELEM tmp[16];
65

    
66
    for (i = 0; i < 4; i++) {
67
        t0 = block[0*4+i] + block[2*4+i];
68
        t1 = block[0*4+i] - block[2*4+i];
69
        t2 = MUL_35468(block[1*4+i]) - MUL_20091(block[3*4+i]);
70
        t3 = MUL_20091(block[1*4+i]) + MUL_35468(block[3*4+i]);
71

    
72
        tmp[i*4+0] = t0 + t3;
73
        tmp[i*4+1] = t1 + t2;
74
        tmp[i*4+2] = t1 - t2;
75
        tmp[i*4+3] = t0 - t3;
76
    }
77

    
78
    for (i = 0; i < 4; i++) {
79
        t0 = tmp[0*4+i] + tmp[2*4+i];
80
        t1 = tmp[0*4+i] - tmp[2*4+i];
81
        t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]);
82
        t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]);
83

    
84
        dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3));
85
        dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3));
86
        dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3));
87
        dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3));
88
        dst += stride;
89
    }
90
}
91

    
92
static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], int stride)
93
{
94
    int i, dc = (block[0] + 4) >> 3;
95

    
96
    for (i = 0; i < 4; i++) {
97
        dst[0] = av_clip_uint8(dst[0] + dc);
98
        dst[1] = av_clip_uint8(dst[1] + dc);
99
        dst[2] = av_clip_uint8(dst[2] + dc);
100
        dst[3] = av_clip_uint8(dst[3] + dc);
101
        dst += stride;
102
    }
103
}
104

    
105

    
106
// because I like only having two parameters to pass functions...
107
#define LOAD_PIXELS\
108
    int av_unused p3 = p[-4*stride];\
109
    int av_unused p2 = p[-3*stride];\
110
    int av_unused p1 = p[-2*stride];\
111
    int av_unused p0 = p[-1*stride];\
112
    int av_unused q0 = p[ 0*stride];\
113
    int av_unused q1 = p[ 1*stride];\
114
    int av_unused q2 = p[ 2*stride];\
115
    int av_unused q3 = p[ 3*stride];
116

    
117
static av_always_inline void filter_common(uint8_t *p, int stride, int is4tap)
118
{
119
    LOAD_PIXELS
120
    int a, f1, f2;
121

    
122
    a = 3*(q0 - p0);
123

    
124
    if (is4tap)
125
        a += av_clip_int8(p1 - q1);
126

    
127
    a = av_clip_int8(a);
128

    
129
    // We deviate from the spec here with c(a+3) >> 3
130
    // since that's what libvpx does.
131
    f1 = FFMIN(a+4, 127) >> 3;
132
    f2 = FFMIN(a+3, 127) >> 3;
133

    
134
    // Despite what the spec says, we do need to clamp here to
135
    // be bitexact with libvpx.
136
    p[-1*stride] = av_clip_uint8(p0 + f2);
137
    p[ 0*stride] = av_clip_uint8(q0 - f1);
138

    
139
    // only used for _inner on blocks without high edge variance
140
    if (!is4tap) {
141
        a = (f1+1)>>1;
142
        p[-2*stride] = av_clip_uint8(p1 + a);
143
        p[ 1*stride] = av_clip_uint8(q1 - a);
144
    }
145
}
146

    
147
static av_always_inline int simple_limit(uint8_t *p, int stride, int flim)
148
{
149
    LOAD_PIXELS
150
    return 2*FFABS(p0-q0) + (FFABS(p1-q1) >> 1) <= flim;
151
}
152

    
153
/**
154
 * E - limit at the macroblock edge
155
 * I - limit for interior difference
156
 */
157
static av_always_inline int normal_limit(uint8_t *p, int stride, int E, int I)
158
{
159
    LOAD_PIXELS
160
    return simple_limit(p, stride, 2*E+I)
161
        && FFABS(p3-p2) <= I && FFABS(p2-p1) <= I && FFABS(p1-p0) <= I
162
        && FFABS(q3-q2) <= I && FFABS(q2-q1) <= I && FFABS(q1-q0) <= I;
163
}
164

    
165
// high edge variance
166
static av_always_inline int hev(uint8_t *p, int stride, int thresh)
167
{
168
    LOAD_PIXELS
169
    return FFABS(p1-p0) > thresh || FFABS(q1-q0) > thresh;
170
}
171

    
172
static av_always_inline void filter_mbedge(uint8_t *p, int stride)
173
{
174
    int a0, a1, a2, w;
175

    
176
    LOAD_PIXELS
177

    
178
    w = av_clip_int8(p1-q1);
179
    w = av_clip_int8(w + 3*(q0-p0));
180

    
181
    a0 = (27*w + 63) >> 7;
182
    a1 = (18*w + 63) >> 7;
183
    a2 = ( 9*w + 63) >> 7;
184

    
185
    p[-3*stride] = av_clip_uint8(p2 + a2);
186
    p[-2*stride] = av_clip_uint8(p1 + a1);
187
    p[-1*stride] = av_clip_uint8(p0 + a0);
188
    p[ 0*stride] = av_clip_uint8(q0 - a0);
189
    p[ 1*stride] = av_clip_uint8(q1 - a1);
190
    p[ 2*stride] = av_clip_uint8(q2 - a2);
191
}
192

    
193
#define LOOP_FILTER(dir, size, stridea, strideb) \
194
static void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, int stride,\
195
                                     int flim_E, int flim_I, int hev_thresh)\
196
{\
197
    int i;\
198
\
199
    for (i = 0; i < size; i++)\
200
        if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\
201
            if (hev(dst+i*stridea, strideb, hev_thresh))\
202
                filter_common(dst+i*stridea, strideb, 1);\
203
            else\
204
                filter_mbedge(dst+i*stridea, strideb);\
205
        }\
206
}\
207
\
208
static void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, int stride,\
209
                                      int flim_E, int flim_I, int hev_thresh)\
210
{\
211
    int i, hv;\
212
\
213
    for (i = 0; i < size; i++)\
214
        if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\
215
            hv = hev(dst+i*stridea, strideb, hev_thresh);\
216
            filter_common(dst+i*stridea, strideb, hv);\
217
        }\
218
}
219

    
220
LOOP_FILTER(v, 16, 1, stride)
221
LOOP_FILTER(h, 16, stride, 1)
222
LOOP_FILTER(v,  8, 1, stride)
223
LOOP_FILTER(h,  8, stride, 1)
224

    
225
static void vp8_v_loop_filter_simple_c(uint8_t *dst, int stride, int flim)
226
{
227
    int i;
228

    
229
    for (i = 0; i < 16; i++)
230
        if (simple_limit(dst+i, stride, flim))
231
            filter_common(dst+i, stride, 1);
232
}
233

    
234
static void vp8_h_loop_filter_simple_c(uint8_t *dst, int stride, int flim)
235
{
236
    int i;
237

    
238
    for (i = 0; i < 16; i++)
239
        if (simple_limit(dst+i*stride, 1, flim))
240
            filter_common(dst+i*stride, 1, 1);
241
}
242

    
243
static const uint8_t subpel_filters[7][6] = {
244
    { 0,   6, 123,  12,   1,   0 },
245
    { 2,  11, 108,  36,   8,   1 },
246
    { 0,   9,  93,  50,   6,   0 },
247
    { 3,  16,  77,  77,  16,   3 },
248
    { 0,   6,  50,  93,   9,   0 },
249
    { 1,   8,  36, 108,  11,   2 },
250
    { 0,   1,  12, 123,   6,   0 },
251
};
252

    
253
#define PUT_PIXELS(WIDTH) \
254
static void put_vp8_pixels ## WIDTH ##_c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int x, int y) { \
255
    int y; \
256
    for (y = 0; y < h; y++, dst+= dststride, src+= srcstride) { \
257
        memcpy(dst, src, WIDTH); \
258
    } \
259
}
260

    
261
PUT_PIXELS(16)
262
PUT_PIXELS(8)
263
PUT_PIXELS(4)
264

    
265
#define FILTER_6TAP(src, F, stride) \
266
    av_clip_uint8((F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + F[0]*src[x-2*stride] + \
267
                   F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + F[5]*src[x+3*stride] + 64) >> 7)
268

    
269
#define FILTER_4TAP(src, F, stride) \
270
    av_clip_uint8((F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + \
271
                   F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + 64) >> 7)
272

    
273
#define VP8_EPEL_H(SIZE, FILTER, FILTERNAME) \
274
static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \
275
{ \
276
    const uint8_t *filter = subpel_filters[mx-1]; \
277
    int x, y; \
278
\
279
    for (y = 0; y < h; y++) { \
280
        for (x = 0; x < SIZE; x++) \
281
            dst[x] = FILTER(src, filter, 1); \
282
        dst += dststride; \
283
        src += srcstride; \
284
    } \
285
}
286
#define VP8_EPEL_V(SIZE, FILTER, FILTERNAME) \
287
static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \
288
{ \
289
    const uint8_t *filter = subpel_filters[my-1]; \
290
    int x, y; \
291
\
292
    for (y = 0; y < h; y++) { \
293
        for (x = 0; x < SIZE; x++) \
294
            dst[x] = FILTER(src, filter, srcstride); \
295
        dst += dststride; \
296
        src += srcstride; \
297
    } \
298
}
299
#define VP8_EPEL_HV(SIZE, FILTERX, FILTERY, FILTERNAME) \
300
static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, int dststride, uint8_t *src, int srcstride, int h, int mx, int my) \
301
{ \
302
    const uint8_t *filter = subpel_filters[mx-1]; \
303
    int x, y; \
304
    uint8_t tmp_array[(2*SIZE+5)*SIZE]; \
305
    uint8_t *tmp = tmp_array; \
306
    src -= 2*srcstride; \
307
\
308
    for (y = 0; y < h+5; y++) { \
309
        for (x = 0; x < SIZE; x++) \
310
            tmp[x] = FILTERX(src, filter, 1); \
311
        tmp += SIZE; \
312
        src += srcstride; \
313
    } \
314
\
315
    tmp = tmp_array + 2*SIZE; \
316
    filter = subpel_filters[my-1]; \
317
\
318
    for (y = 0; y < h; y++) { \
319
        for (x = 0; x < SIZE; x++) \
320
            dst[x] = FILTERY(tmp, filter, SIZE); \
321
        dst += dststride; \
322
        tmp += SIZE; \
323
    } \
324
}
325

    
326
VP8_EPEL_H(16, FILTER_4TAP, h4)
327
VP8_EPEL_H(8,  FILTER_4TAP, h4)
328
VP8_EPEL_H(4,  FILTER_4TAP, h4)
329
VP8_EPEL_H(16, FILTER_6TAP, h6)
330
VP8_EPEL_H(8,  FILTER_6TAP, h6)
331
VP8_EPEL_H(4,  FILTER_6TAP, h6)
332
VP8_EPEL_V(16, FILTER_4TAP, v4)
333
VP8_EPEL_V(8,  FILTER_4TAP, v4)
334
VP8_EPEL_V(4,  FILTER_4TAP, v4)
335
VP8_EPEL_V(16, FILTER_6TAP, v6)
336
VP8_EPEL_V(8,  FILTER_6TAP, v6)
337
VP8_EPEL_V(4,  FILTER_6TAP, v6)
338
VP8_EPEL_HV(16, FILTER_4TAP, FILTER_4TAP, h4v4)
339
VP8_EPEL_HV(8,  FILTER_4TAP, FILTER_4TAP, h4v4)
340
VP8_EPEL_HV(4,  FILTER_4TAP, FILTER_4TAP, h4v4)
341
VP8_EPEL_HV(16, FILTER_4TAP, FILTER_6TAP, h4v6)
342
VP8_EPEL_HV(8,  FILTER_4TAP, FILTER_6TAP, h4v6)
343
VP8_EPEL_HV(4,  FILTER_4TAP, FILTER_6TAP, h4v6)
344
VP8_EPEL_HV(16, FILTER_6TAP, FILTER_4TAP, h6v4)
345
VP8_EPEL_HV(8,  FILTER_6TAP, FILTER_4TAP, h6v4)
346
VP8_EPEL_HV(4,  FILTER_6TAP, FILTER_4TAP, h6v4)
347
VP8_EPEL_HV(16, FILTER_6TAP, FILTER_6TAP, h6v6)
348
VP8_EPEL_HV(8,  FILTER_6TAP, FILTER_6TAP, h6v6)
349
VP8_EPEL_HV(4,  FILTER_6TAP, FILTER_6TAP, h6v6)
350

    
351
#define VP8_MC_FUNC(IDX, SIZE) \
352
    dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
353
    dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \
354
    dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \
355
    dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \
356
    dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \
357
    dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \
358
    dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \
359
    dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \
360
    dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c
361

    
362
av_cold void ff_vp8dsp_init(VP8DSPContext *dsp)
363
{
364
    dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c;
365
    dsp->vp8_idct_add    = vp8_idct_add_c;
366
    dsp->vp8_idct_dc_add = vp8_idct_dc_add_c;
367

    
368
    dsp->vp8_v_loop_filter16 = vp8_v_loop_filter16_c;
369
    dsp->vp8_h_loop_filter16 = vp8_h_loop_filter16_c;
370
    dsp->vp8_v_loop_filter8  = vp8_v_loop_filter8_c;
371
    dsp->vp8_h_loop_filter8  = vp8_h_loop_filter8_c;
372

    
373
    dsp->vp8_v_loop_filter16_inner = vp8_v_loop_filter16_inner_c;
374
    dsp->vp8_h_loop_filter16_inner = vp8_h_loop_filter16_inner_c;
375
    dsp->vp8_v_loop_filter8_inner  = vp8_v_loop_filter8_inner_c;
376
    dsp->vp8_h_loop_filter8_inner  = vp8_h_loop_filter8_inner_c;
377

    
378
    dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c;
379
    dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c;
380

    
381
    VP8_MC_FUNC(0, 16);
382
    VP8_MC_FUNC(1, 8);
383
    VP8_MC_FUNC(2, 4);
384
}