Statistics
| Branch: | Revision:

ffmpeg / libavcodec / vp8.c @ 4ae3ee4a

History | View | Annotate | Download (65.6 KB)

1
/**
2
 * VP8 compatible video decoder
3
 *
4
 * Copyright (C) 2010 David Conrad
5
 * Copyright (C) 2010 Ronald S. Bultje
6
 * Copyright (C) 2010 Jason Garrett-Glaser
7
 *
8
 * This file is part of FFmpeg.
9
 *
10
 * FFmpeg is free software; you can redistribute it and/or
11
 * modify it under the terms of the GNU Lesser General Public
12
 * License as published by the Free Software Foundation; either
13
 * version 2.1 of the License, or (at your option) any later version.
14
 *
15
 * FFmpeg is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
 * Lesser General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Lesser General Public
21
 * License along with FFmpeg; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23
 */
24

    
25
#include "libavcore/imgutils.h"
26
#include "avcodec.h"
27
#include "vp56.h"
28
#include "vp8data.h"
29
#include "vp8dsp.h"
30
#include "h264pred.h"
31
#include "rectangle.h"
32

    
33
#if ARCH_ARM
34
#   include "arm/vp8.h"
35
#endif
36

    
37
typedef struct {
38
    uint8_t filter_level;
39
    uint8_t inner_limit;
40
    uint8_t inner_filter;
41
} VP8FilterStrength;
42

    
43
typedef struct {
44
    uint8_t skip;
45
    // todo: make it possible to check for at least (i4x4 or split_mv)
46
    // in one op. are others needed?
47
    uint8_t mode;
48
    uint8_t ref_frame;
49
    uint8_t partitioning;
50
    VP56mv mv;
51
    VP56mv bmv[16];
52
} VP8Macroblock;
53

    
54
typedef struct {
55
    AVCodecContext *avctx;
56
    DSPContext dsp;
57
    VP8DSPContext vp8dsp;
58
    H264PredContext hpc;
59
    vp8_mc_func put_pixels_tab[3][3][3];
60
    AVFrame frames[4];
61
    AVFrame *framep[4];
62
    uint8_t *edge_emu_buffer;
63
    VP56RangeCoder c;   ///< header context, includes mb modes and motion vectors
64
    int profile;
65

    
66
    int mb_width;   /* number of horizontal MB */
67
    int mb_height;  /* number of vertical MB */
68
    int linesize;
69
    int uvlinesize;
70

    
71
    int keyframe;
72
    int invisible;
73
    int update_last;    ///< update VP56_FRAME_PREVIOUS with the current one
74
    int update_golden;  ///< VP56_FRAME_NONE if not updated, or which frame to copy if so
75
    int update_altref;
76
    int deblock_filter;
77

    
78
    /**
79
     * If this flag is not set, all the probability updates
80
     * are discarded after this frame is decoded.
81
     */
82
    int update_probabilities;
83

    
84
    /**
85
     * All coefficients are contained in separate arith coding contexts.
86
     * There can be 1, 2, 4, or 8 of these after the header context.
87
     */
88
    int num_coeff_partitions;
89
    VP56RangeCoder coeff_partition[8];
90

    
91
    VP8Macroblock *macroblocks;
92
    VP8Macroblock *macroblocks_base;
93
    VP8FilterStrength *filter_strength;
94

    
95
    uint8_t *intra4x4_pred_mode_top;
96
    uint8_t intra4x4_pred_mode_left[4];
97
    uint8_t *segmentation_map;
98

    
99
    /**
100
     * Cache of the top row needed for intra prediction
101
     * 16 for luma, 8 for each chroma plane
102
     */
103
    uint8_t (*top_border)[16+8+8];
104

    
105
    /**
106
     * For coeff decode, we need to know whether the above block had non-zero
107
     * coefficients. This means for each macroblock, we need data for 4 luma
108
     * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
109
     * per macroblock. We keep the last row in top_nnz.
110
     */
111
    uint8_t (*top_nnz)[9];
112
    DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];
113

    
114
    /**
115
     * This is the index plus one of the last non-zero coeff
116
     * for each of the blocks in the current macroblock.
117
     * So, 0 -> no coeffs
118
     *     1 -> dc-only (special transform)
119
     *     2+-> full transform
120
     */
121
    DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
122
    DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
123
    DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
124
    uint8_t intra4x4_pred_mode_mb[16];
125

    
126
    int chroma_pred_mode;    ///< 8x8c pred mode of the current macroblock
127
    int segment;             ///< segment of the current macroblock
128

    
129
    int mbskip_enabled;
130
    int sign_bias[4]; ///< one state [0, 1] per ref frame type
131
    int ref_count[3];
132

    
133
    /**
134
     * Base parameters for segmentation, i.e. per-macroblock parameters.
135
     * These must be kept unchanged even if segmentation is not used for
136
     * a frame, since the values persist between interframes.
137
     */
138
    struct {
139
        int enabled;
140
        int absolute_vals;
141
        int update_map;
142
        int8_t base_quant[4];
143
        int8_t filter_level[4];     ///< base loop filter level
144
    } segmentation;
145

    
146
    /**
147
     * Macroblocks can have one of 4 different quants in a frame when
148
     * segmentation is enabled.
149
     * If segmentation is disabled, only the first segment's values are used.
150
     */
151
    struct {
152
        // [0] - DC qmul  [1] - AC qmul
153
        int16_t luma_qmul[2];
154
        int16_t luma_dc_qmul[2];    ///< luma dc-only block quant
155
        int16_t chroma_qmul[2];
156
    } qmat[4];
157

    
158
    struct {
159
        int simple;
160
        int level;
161
        int sharpness;
162
    } filter;
163

    
164
    struct {
165
        int enabled;    ///< whether each mb can have a different strength based on mode/ref
166

    
167
        /**
168
         * filter strength adjustment for the following macroblock modes:
169
         * [0-3] - i16x16 (always zero)
170
         * [4]   - i4x4
171
         * [5]   - zero mv
172
         * [6]   - inter modes except for zero or split mv
173
         * [7]   - split mv
174
         *  i16x16 modes never have any adjustment
175
         */
176
        int8_t mode[VP8_MVMODE_SPLIT+1];
177

    
178
        /**
179
         * filter strength adjustment for macroblocks that reference:
180
         * [0] - intra / VP56_FRAME_CURRENT
181
         * [1] - VP56_FRAME_PREVIOUS
182
         * [2] - VP56_FRAME_GOLDEN
183
         * [3] - altref / VP56_FRAME_GOLDEN2
184
         */
185
        int8_t ref[4];
186
    } lf_delta;
187

    
188
    /**
189
     * These are all of the updatable probabilities for binary decisions.
190
     * They are only implictly reset on keyframes, making it quite likely
191
     * for an interframe to desync if a prior frame's header was corrupt
192
     * or missing outright!
193
     */
194
    struct {
195
        uint8_t segmentid[3];
196
        uint8_t mbskip;
197
        uint8_t intra;
198
        uint8_t last;
199
        uint8_t golden;
200
        uint8_t pred16x16[4];
201
        uint8_t pred8x8c[3];
202
        /* Padded to allow overreads */
203
        uint8_t token[4][17][3][NUM_DCT_TOKENS-1];
204
        uint8_t mvc[2][19];
205
    } prob[2];
206
} VP8Context;
207

    
208
static void vp8_decode_flush(AVCodecContext *avctx)
209
{
210
    VP8Context *s = avctx->priv_data;
211
    int i;
212

    
213
    for (i = 0; i < 4; i++)
214
        if (s->frames[i].data[0])
215
            avctx->release_buffer(avctx, &s->frames[i]);
216
    memset(s->framep, 0, sizeof(s->framep));
217

    
218
    av_freep(&s->macroblocks_base);
219
    av_freep(&s->filter_strength);
220
    av_freep(&s->intra4x4_pred_mode_top);
221
    av_freep(&s->top_nnz);
222
    av_freep(&s->edge_emu_buffer);
223
    av_freep(&s->top_border);
224
    av_freep(&s->segmentation_map);
225

    
226
    s->macroblocks        = NULL;
227
}
228

    
229
static int update_dimensions(VP8Context *s, int width, int height)
230
{
231
    if (av_image_check_size(width, height, 0, s->avctx))
232
        return AVERROR_INVALIDDATA;
233

    
234
    vp8_decode_flush(s->avctx);
235

    
236
    avcodec_set_dimensions(s->avctx, width, height);
237

    
238
    s->mb_width  = (s->avctx->coded_width +15) / 16;
239
    s->mb_height = (s->avctx->coded_height+15) / 16;
240

    
241
    s->macroblocks_base        = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
242
    s->filter_strength         = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
243
    s->intra4x4_pred_mode_top  = av_mallocz(s->mb_width*4);
244
    s->top_nnz                 = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
245
    s->top_border              = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
246
    s->segmentation_map        = av_mallocz(s->mb_width*s->mb_height);
247

    
248
    if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
249
        !s->top_nnz || !s->top_border || !s->segmentation_map)
250
        return AVERROR(ENOMEM);
251

    
252
    s->macroblocks        = s->macroblocks_base + 1;
253

    
254
    return 0;
255
}
256

    
257
static void parse_segment_info(VP8Context *s)
258
{
259
    VP56RangeCoder *c = &s->c;
260
    int i;
261

    
262
    s->segmentation.update_map = vp8_rac_get(c);
263

    
264
    if (vp8_rac_get(c)) { // update segment feature data
265
        s->segmentation.absolute_vals = vp8_rac_get(c);
266

    
267
        for (i = 0; i < 4; i++)
268
            s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
269

    
270
        for (i = 0; i < 4; i++)
271
            s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
272
    }
273
    if (s->segmentation.update_map)
274
        for (i = 0; i < 3; i++)
275
            s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
276
}
277

    
278
static void update_lf_deltas(VP8Context *s)
279
{
280
    VP56RangeCoder *c = &s->c;
281
    int i;
282

    
283
    for (i = 0; i < 4; i++)
284
        s->lf_delta.ref[i]  = vp8_rac_get_sint(c, 6);
285

    
286
    for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
287
        s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
288
}
289

    
290
static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
291
{
292
    const uint8_t *sizes = buf;
293
    int i;
294

    
295
    s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
296

    
297
    buf      += 3*(s->num_coeff_partitions-1);
298
    buf_size -= 3*(s->num_coeff_partitions-1);
299
    if (buf_size < 0)
300
        return -1;
301

    
302
    for (i = 0; i < s->num_coeff_partitions-1; i++) {
303
        int size = AV_RL24(sizes + 3*i);
304
        if (buf_size - size < 0)
305
            return -1;
306

    
307
        ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
308
        buf      += size;
309
        buf_size -= size;
310
    }
311
    ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
312

    
313
    return 0;
314
}
315

    
316
static void get_quants(VP8Context *s)
317
{
318
    VP56RangeCoder *c = &s->c;
319
    int i, base_qi;
320

    
321
    int yac_qi     = vp8_rac_get_uint(c, 7);
322
    int ydc_delta  = vp8_rac_get_sint(c, 4);
323
    int y2dc_delta = vp8_rac_get_sint(c, 4);
324
    int y2ac_delta = vp8_rac_get_sint(c, 4);
325
    int uvdc_delta = vp8_rac_get_sint(c, 4);
326
    int uvac_delta = vp8_rac_get_sint(c, 4);
327

    
328
    for (i = 0; i < 4; i++) {
329
        if (s->segmentation.enabled) {
330
            base_qi = s->segmentation.base_quant[i];
331
            if (!s->segmentation.absolute_vals)
332
                base_qi += yac_qi;
333
        } else
334
            base_qi = yac_qi;
335

    
336
        s->qmat[i].luma_qmul[0]    =       vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)];
337
        s->qmat[i].luma_qmul[1]    =       vp8_ac_qlookup[av_clip(base_qi             , 0, 127)];
338
        s->qmat[i].luma_dc_qmul[0] =   2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)];
339
        s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100;
340
        s->qmat[i].chroma_qmul[0]  =       vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)];
341
        s->qmat[i].chroma_qmul[1]  =       vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)];
342

    
343
        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
344
        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
345
    }
346
}
347

    
348
/**
349
 * Determine which buffers golden and altref should be updated with after this frame.
350
 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
351
 *
352
 * Intra frames update all 3 references
353
 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
354
 * If the update (golden|altref) flag is set, it's updated with the current frame
355
 *      if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
356
 * If the flag is not set, the number read means:
357
 *      0: no update
358
 *      1: VP56_FRAME_PREVIOUS
359
 *      2: update golden with altref, or update altref with golden
360
 */
361
static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
362
{
363
    VP56RangeCoder *c = &s->c;
364

    
365
    if (update)
366
        return VP56_FRAME_CURRENT;
367

    
368
    switch (vp8_rac_get_uint(c, 2)) {
369
    case 1:
370
        return VP56_FRAME_PREVIOUS;
371
    case 2:
372
        return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
373
    }
374
    return VP56_FRAME_NONE;
375
}
376

    
377
static void update_refs(VP8Context *s)
378
{
379
    VP56RangeCoder *c = &s->c;
380

    
381
    int update_golden = vp8_rac_get(c);
382
    int update_altref = vp8_rac_get(c);
383

    
384
    s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
385
    s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
386
}
387

    
388
static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
389
{
390
    VP56RangeCoder *c = &s->c;
391
    int header_size, hscale, vscale, i, j, k, l, m, ret;
392
    int width  = s->avctx->width;
393
    int height = s->avctx->height;
394

    
395
    s->keyframe  = !(buf[0] & 1);
396
    s->profile   =  (buf[0]>>1) & 7;
397
    s->invisible = !(buf[0] & 0x10);
398
    header_size  = AV_RL24(buf) >> 5;
399
    buf      += 3;
400
    buf_size -= 3;
401

    
402
    if (s->profile > 3)
403
        av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
404

    
405
    if (!s->profile)
406
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
407
    else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
408
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
409

    
410
    if (header_size > buf_size - 7*s->keyframe) {
411
        av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
412
        return AVERROR_INVALIDDATA;
413
    }
414

    
415
    if (s->keyframe) {
416
        if (AV_RL24(buf) != 0x2a019d) {
417
            av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
418
            return AVERROR_INVALIDDATA;
419
        }
420
        width  = AV_RL16(buf+3) & 0x3fff;
421
        height = AV_RL16(buf+5) & 0x3fff;
422
        hscale = buf[4] >> 6;
423
        vscale = buf[6] >> 6;
424
        buf      += 7;
425
        buf_size -= 7;
426

    
427
        if (hscale || vscale)
428
            av_log_missing_feature(s->avctx, "Upscaling", 1);
429

    
430
        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
431
        for (i = 0; i < 4; i++)
432
            for (j = 0; j < 16; j++)
433
                memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
434
                       sizeof(s->prob->token[i][j]));
435
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
436
        memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
437
        memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
438
        memset(&s->segmentation, 0, sizeof(s->segmentation));
439
    }
440

    
441
    if (!s->macroblocks_base || /* first frame */
442
        width != s->avctx->width || height != s->avctx->height) {
443
        if ((ret = update_dimensions(s, width, height) < 0))
444
            return ret;
445
    }
446

    
447
    ff_vp56_init_range_decoder(c, buf, header_size);
448
    buf      += header_size;
449
    buf_size -= header_size;
450

    
451
    if (s->keyframe) {
452
        if (vp8_rac_get(c))
453
            av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
454
        vp8_rac_get(c); // whether we can skip clamping in dsp functions
455
    }
456

    
457
    if ((s->segmentation.enabled = vp8_rac_get(c)))
458
        parse_segment_info(s);
459
    else
460
        s->segmentation.update_map = 0; // FIXME: move this to some init function?
461

    
462
    s->filter.simple    = vp8_rac_get(c);
463
    s->filter.level     = vp8_rac_get_uint(c, 6);
464
    s->filter.sharpness = vp8_rac_get_uint(c, 3);
465

    
466
    if ((s->lf_delta.enabled = vp8_rac_get(c)))
467
        if (vp8_rac_get(c))
468
            update_lf_deltas(s);
469

    
470
    if (setup_partitions(s, buf, buf_size)) {
471
        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
472
        return AVERROR_INVALIDDATA;
473
    }
474

    
475
    get_quants(s);
476

    
477
    if (!s->keyframe) {
478
        update_refs(s);
479
        s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
480
        s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
481
    }
482

    
483
    // if we aren't saving this frame's probabilities for future frames,
484
    // make a copy of the current probabilities
485
    if (!(s->update_probabilities = vp8_rac_get(c)))
486
        s->prob[1] = s->prob[0];
487

    
488
    s->update_last = s->keyframe || vp8_rac_get(c);
489

    
490
    for (i = 0; i < 4; i++)
491
        for (j = 0; j < 8; j++)
492
            for (k = 0; k < 3; k++)
493
                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
494
                    if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
495
                        int prob = vp8_rac_get_uint(c, 8);
496
                        for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
497
                            s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
498
                    }
499

    
500
    if ((s->mbskip_enabled = vp8_rac_get(c)))
501
        s->prob->mbskip = vp8_rac_get_uint(c, 8);
502

    
503
    if (!s->keyframe) {
504
        s->prob->intra  = vp8_rac_get_uint(c, 8);
505
        s->prob->last   = vp8_rac_get_uint(c, 8);
506
        s->prob->golden = vp8_rac_get_uint(c, 8);
507

    
508
        if (vp8_rac_get(c))
509
            for (i = 0; i < 4; i++)
510
                s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
511
        if (vp8_rac_get(c))
512
            for (i = 0; i < 3; i++)
513
                s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);
514

    
515
        // 17.2 MV probability update
516
        for (i = 0; i < 2; i++)
517
            for (j = 0; j < 19; j++)
518
                if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
519
                    s->prob->mvc[i][j] = vp8_rac_get_nn(c);
520
    }
521

    
522
    return 0;
523
}
524

    
525
static av_always_inline
526
void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, int mb_x, int mb_y)
527
{
528
#define MARGIN (16 << 2)
529
    dst->x = av_clip(src->x, -((mb_x << 6) + MARGIN),
530
                     ((s->mb_width  - 1 - mb_x) << 6) + MARGIN);
531
    dst->y = av_clip(src->y, -((mb_y << 6) + MARGIN),
532
                     ((s->mb_height - 1 - mb_y) << 6) + MARGIN);
533
}
534

    
535
/**
536
 * Motion vector coding, 17.1.
537
 */
538
static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
539
{
540
    int bit, x = 0;
541

    
542
    if (vp56_rac_get_prob_branchy(c, p[0])) {
543
        int i;
544

    
545
        for (i = 0; i < 3; i++)
546
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
547
        for (i = 9; i > 3; i--)
548
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
549
        if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
550
            x += 8;
551
    } else {
552
        // small_mvtree
553
        const uint8_t *ps = p+2;
554
        bit = vp56_rac_get_prob(c, *ps);
555
        ps += 1 + 3*bit;
556
        x  += 4*bit;
557
        bit = vp56_rac_get_prob(c, *ps);
558
        ps += 1 + bit;
559
        x  += 2*bit;
560
        x  += vp56_rac_get_prob(c, *ps);
561
    }
562

    
563
    return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
564
}
565

    
566
static av_always_inline
567
const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
568
{
569
    if (left == top)
570
        return vp8_submv_prob[4-!!left];
571
    if (!top)
572
        return vp8_submv_prob[2];
573
    return vp8_submv_prob[1-!!left];
574
}
575

    
576
/**
577
 * Split motion vector prediction, 16.4.
578
 * @returns the number of motion vectors parsed (2, 4 or 16)
579
 */
580
static av_always_inline
581
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
582
{
583
    int part_idx;
584
    int n, num;
585
    VP8Macroblock *top_mb  = &mb[2];
586
    VP8Macroblock *left_mb = &mb[-1];
587
    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
588
                  *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
589
                  *mbsplits_cur, *firstidx;
590
    VP56mv *top_mv  = top_mb->bmv;
591
    VP56mv *left_mv = left_mb->bmv;
592
    VP56mv *cur_mv  = mb->bmv;
593

    
594
    if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
595
        if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
596
            part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
597
        } else {
598
            part_idx = VP8_SPLITMVMODE_8x8;
599
        }
600
    } else {
601
        part_idx = VP8_SPLITMVMODE_4x4;
602
    }
603

    
604
    num = vp8_mbsplit_count[part_idx];
605
    mbsplits_cur = vp8_mbsplits[part_idx],
606
    firstidx = vp8_mbfirstidx[part_idx];
607
    mb->partitioning = part_idx;
608

    
609
    for (n = 0; n < num; n++) {
610
        int k = firstidx[n];
611
        uint32_t left, above;
612
        const uint8_t *submv_prob;
613

    
614
        if (!(k & 3))
615
            left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
616
        else
617
            left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
618
        if (k <= 3)
619
            above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
620
        else
621
            above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
622

    
623
        submv_prob = get_submv_prob(left, above);
624

    
625
        if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
626
            if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
627
                if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
628
                    mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
629
                    mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
630
                } else {
631
                    AV_ZERO32(&mb->bmv[n]);
632
                }
633
            } else {
634
                AV_WN32A(&mb->bmv[n], above);
635
            }
636
        } else {
637
            AV_WN32A(&mb->bmv[n], left);
638
        }
639
    }
640

    
641
    return num;
642
}
643

    
644
static av_always_inline
645
void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
646
{
647
    VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
648
                                  mb - 1 /* left */,
649
                                  mb + 1 /* top-left */ };
650
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
651
    enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT };
652
    int idx = CNT_ZERO;
653
    int cur_sign_bias = s->sign_bias[mb->ref_frame];
654
    int *sign_bias = s->sign_bias;
655
    VP56mv near_mv[4];
656
    uint8_t cnt[4] = { 0 };
657
    VP56RangeCoder *c = &s->c;
658

    
659
    AV_ZERO32(&near_mv[0]);
660
    AV_ZERO32(&near_mv[1]);
661
    AV_ZERO32(&near_mv[2]);
662

    
663
    /* Process MB on top, left and top-left */
664
    #define MV_EDGE_CHECK(n)\
665
    {\
666
        VP8Macroblock *edge = mb_edge[n];\
667
        int edge_ref = edge->ref_frame;\
668
        if (edge_ref != VP56_FRAME_CURRENT) {\
669
            uint32_t mv = AV_RN32A(&edge->mv);\
670
            if (mv) {\
671
                if (cur_sign_bias != sign_bias[edge_ref]) {\
672
                    /* SWAR negate of the values in mv. */\
673
                    mv = ~mv;\
674
                    mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
675
                }\
676
                if (!n || mv != AV_RN32A(&near_mv[idx]))\
677
                    AV_WN32A(&near_mv[++idx], mv);\
678
                cnt[idx]      += 1 + (n != 2);\
679
            } else\
680
                cnt[CNT_ZERO] += 1 + (n != 2);\
681
        }\
682
    }
683

    
684
    MV_EDGE_CHECK(0)
685
    MV_EDGE_CHECK(1)
686
    MV_EDGE_CHECK(2)
687

    
688
    mb->partitioning = VP8_SPLITMVMODE_NONE;
689
    if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
690
        mb->mode = VP8_MVMODE_MV;
691

    
692
        /* If we have three distinct MVs, merge first and last if they're the same */
693
        if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1+EDGE_TOP]) == AV_RN32A(&near_mv[1+EDGE_TOPLEFT]))
694
            cnt[CNT_NEAREST] += 1;
695

    
696
        /* Swap near and nearest if necessary */
697
        if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
698
            FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
699
            FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
700
        }
701

    
702
        if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
703
            if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
704

    
705
                /* Choose the best mv out of 0,0 and the nearest mv */
706
                clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])], mb_x, mb_y);
707
                cnt[CNT_SPLITMV] = ((mb_edge[EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
708
                                    (mb_edge[EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
709
                                    (mb_edge[EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
710

    
711
                if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
712
                    mb->mode = VP8_MVMODE_SPLIT;
713
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
714
                } else {
715
                    mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
716
                    mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
717
                    mb->bmv[0] = mb->mv;
718
                }
719
            } else {
720
                clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR], mb_x, mb_y);
721
                mb->bmv[0] = mb->mv;
722
            }
723
        } else {
724
            clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST], mb_x, mb_y);
725
            mb->bmv[0] = mb->mv;
726
        }
727
    } else {
728
        mb->mode = VP8_MVMODE_ZERO;
729
        AV_ZERO32(&mb->mv);
730
        mb->bmv[0] = mb->mv;
731
    }
732
}
733

    
734
static av_always_inline
735
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
736
                           int mb_x, int keyframe)
737
{
738
    uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
739
    if (keyframe) {
740
        int x, y;
741
        uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
742
        uint8_t* const left = s->intra4x4_pred_mode_left;
743
        for (y = 0; y < 4; y++) {
744
            for (x = 0; x < 4; x++) {
745
                const uint8_t *ctx;
746
                ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
747
                *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
748
                left[y] = top[x] = *intra4x4;
749
                intra4x4++;
750
            }
751
        }
752
    } else {
753
        int i;
754
        for (i = 0; i < 16; i++)
755
            intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
756
    }
757
}
758

    
759
static av_always_inline
760
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment)
761
{
762
    VP56RangeCoder *c = &s->c;
763

    
764
    if (s->segmentation.update_map)
765
        *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
766
    s->segment = *segment;
767

    
768
    mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
769

    
770
    if (s->keyframe) {
771
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
772

    
773
        if (mb->mode == MODE_I4x4) {
774
            decode_intra4x4_modes(s, c, mb_x, 1);
775
        } else {
776
            const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
777
            AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
778
            AV_WN32A(s->intra4x4_pred_mode_left, modes);
779
        }
780

    
781
        s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
782
        mb->ref_frame = VP56_FRAME_CURRENT;
783
    } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
784
        // inter MB, 16.2
785
        if (vp56_rac_get_prob_branchy(c, s->prob->last))
786
            mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
787
                VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
788
        else
789
            mb->ref_frame = VP56_FRAME_PREVIOUS;
790
        s->ref_count[mb->ref_frame-1]++;
791

    
792
        // motion vectors, 16.3
793
        decode_mvs(s, mb, mb_x, mb_y);
794
    } else {
795
        // intra MB, 16.1
796
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
797

    
798
        if (mb->mode == MODE_I4x4)
799
            decode_intra4x4_modes(s, c, mb_x, 0);
800

    
801
        s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
802
        mb->ref_frame = VP56_FRAME_CURRENT;
803
        mb->partitioning = VP8_SPLITMVMODE_NONE;
804
        AV_ZERO32(&mb->bmv[0]);
805
    }
806
}
807

    
808
#ifndef decode_block_coeffs_internal
809
/**
810
 * @param c arithmetic bitstream reader context
811
 * @param block destination for block coefficients
812
 * @param probs probabilities to use when reading trees from the bitstream
813
 * @param i initial coeff index, 0 unless a separate DC block is coded
814
 * @param zero_nhood the initial prediction context for number of surrounding
815
 *                   all-zero blocks (only left/top, so 0-2)
816
 * @param qmul array holding the dc/ac dequant factor at position 0/1
817
 * @return 0 if no coeffs were decoded
818
 *         otherwise, the index of the last coeff decoded plus one
819
 */
820
static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
821
                                        uint8_t probs[8][3][NUM_DCT_TOKENS-1],
822
                                        int i, uint8_t *token_prob, int16_t qmul[2])
823
{
824
    goto skip_eob;
825
    do {
826
        int coeff;
827
        if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
828
            return i;
829

    
830
skip_eob:
831
        if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
832
            if (++i == 16)
833
                return i; // invalid input; blocks should end with EOB
834
            token_prob = probs[i][0];
835
            goto skip_eob;
836
        }
837

    
838
        if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
839
            coeff = 1;
840
            token_prob = probs[i+1][1];
841
        } else {
842
            if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
843
                coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
844
                if (coeff)
845
                    coeff += vp56_rac_get_prob(c, token_prob[5]);
846
                coeff += 2;
847
            } else {
848
                // DCT_CAT*
849
                if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
850
                    if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
851
                        coeff  = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
852
                    } else {                                    // DCT_CAT2
853
                        coeff  = 7;
854
                        coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
855
                        coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
856
                    }
857
                } else {    // DCT_CAT3 and up
858
                    int a = vp56_rac_get_prob(c, token_prob[8]);
859
                    int b = vp56_rac_get_prob(c, token_prob[9+a]);
860
                    int cat = (a<<1) + b;
861
                    coeff  = 3 + (8<<cat);
862
                    coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
863
                }
864
            }
865
            token_prob = probs[i+1][2];
866
        }
867
        block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
868
    } while (++i < 16);
869

    
870
    return i;
871
}
872
#endif
873

    
874
static av_always_inline
875
int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
876
                        uint8_t probs[8][3][NUM_DCT_TOKENS-1],
877
                        int i, int zero_nhood, int16_t qmul[2])
878
{
879
    uint8_t *token_prob = probs[i][zero_nhood];
880
    if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
881
        return 0;
882
    return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
883
}
884

    
885
static av_always_inline
886
void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
887
                      uint8_t t_nnz[9], uint8_t l_nnz[9])
888
{
889
    int i, x, y, luma_start = 0, luma_ctx = 3;
890
    int nnz_pred, nnz, nnz_total = 0;
891
    int segment = s->segment;
892
    int block_dc = 0;
893

    
894
    if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
895
        nnz_pred = t_nnz[8] + l_nnz[8];
896

    
897
        // decode DC values and do hadamard
898
        nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
899
                                  s->qmat[segment].luma_dc_qmul);
900
        l_nnz[8] = t_nnz[8] = !!nnz;
901
        if (nnz) {
902
            nnz_total += nnz;
903
            block_dc = 1;
904
            if (nnz == 1)
905
                s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
906
            else
907
                s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
908
        }
909
        luma_start = 1;
910
        luma_ctx = 0;
911
    }
912

    
913
    // luma blocks
914
    for (y = 0; y < 4; y++)
915
        for (x = 0; x < 4; x++) {
916
            nnz_pred = l_nnz[y] + t_nnz[x];
917
            nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
918
                                      nnz_pred, s->qmat[segment].luma_qmul);
919
            // nnz+block_dc may be one more than the actual last index, but we don't care
920
            s->non_zero_count_cache[y][x] = nnz + block_dc;
921
            t_nnz[x] = l_nnz[y] = !!nnz;
922
            nnz_total += nnz;
923
        }
924

    
925
    // chroma blocks
926
    // TODO: what to do about dimensions? 2nd dim for luma is x,
927
    // but for chroma it's (y<<1)|x
928
    for (i = 4; i < 6; i++)
929
        for (y = 0; y < 2; y++)
930
            for (x = 0; x < 2; x++) {
931
                nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
932
                nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
933
                                          nnz_pred, s->qmat[segment].chroma_qmul);
934
                s->non_zero_count_cache[i][(y<<1)+x] = nnz;
935
                t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
936
                nnz_total += nnz;
937
            }
938

    
939
    // if there were no coded coeffs despite the macroblock not being marked skip,
940
    // we MUST not do the inner loop filter and should not do IDCT
941
    // Since skip isn't used for bitstream prediction, just manually set it.
942
    if (!nnz_total)
943
        mb->skip = 1;
944
}
945

    
946
static av_always_inline
947
void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
948
                      int linesize, int uvlinesize, int simple)
949
{
950
    AV_COPY128(top_border, src_y + 15*linesize);
951
    if (!simple) {
952
        AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
953
        AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
954
    }
955
}
956

    
957
static av_always_inline
958
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
959
                    int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
960
                    int simple, int xchg)
961
{
962
    uint8_t *top_border_m1 = top_border-32;     // for TL prediction
963
    src_y  -=   linesize;
964
    src_cb -= uvlinesize;
965
    src_cr -= uvlinesize;
966

    
967
#define XCHG(a,b,xchg) do {                     \
968
        if (xchg) AV_SWAP64(b,a);               \
969
        else      AV_COPY64(b,a);               \
970
    } while (0)
971

    
972
    XCHG(top_border_m1+8, src_y-8, xchg);
973
    XCHG(top_border,      src_y,   xchg);
974
    XCHG(top_border+8,    src_y+8, 1);
975
    if (mb_x < mb_width-1)
976
        XCHG(top_border+32, src_y+16, 1);
977

    
978
    // only copy chroma for normal loop filter
979
    // or to initialize the top row to 127
980
    if (!simple || !mb_y) {
981
        XCHG(top_border_m1+16, src_cb-8, xchg);
982
        XCHG(top_border_m1+24, src_cr-8, xchg);
983
        XCHG(top_border+16,    src_cb, 1);
984
        XCHG(top_border+24,    src_cr, 1);
985
    }
986
}
987

    
988
static av_always_inline
989
int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
990
{
991
    if (!mb_x) {
992
        return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
993
    } else {
994
        return mb_y ? mode : LEFT_DC_PRED8x8;
995
    }
996
}
997

    
998
static av_always_inline
999
int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
1000
{
1001
    if (!mb_x) {
1002
        return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
1003
    } else {
1004
        return mb_y ? mode : HOR_PRED8x8;
1005
    }
1006
}
1007

    
1008
static av_always_inline
1009
int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
1010
{
1011
    if (mode == DC_PRED8x8) {
1012
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1013
    } else {
1014
        return mode;
1015
    }
1016
}
1017

    
1018
static av_always_inline
1019
int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
1020
{
1021
    switch (mode) {
1022
    case DC_PRED8x8:
1023
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1024
    case VERT_PRED8x8:
1025
        return !mb_y ? DC_127_PRED8x8 : mode;
1026
    case HOR_PRED8x8:
1027
        return !mb_x ? DC_129_PRED8x8 : mode;
1028
    case PLANE_PRED8x8 /*TM*/:
1029
        return check_tm_pred8x8_mode(mode, mb_x, mb_y);
1030
    }
1031
    return mode;
1032
}
1033

    
1034
static av_always_inline
1035
int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
1036
{
1037
    if (!mb_x) {
1038
        return mb_y ? VERT_VP8_PRED : DC_129_PRED;
1039
    } else {
1040
        return mb_y ? mode : HOR_VP8_PRED;
1041
    }
1042
}
1043

    
1044
static av_always_inline
1045
int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
1046
{
1047
    switch (mode) {
1048
    case VERT_PRED:
1049
        if (!mb_x && mb_y) {
1050
            *copy_buf = 1;
1051
            return mode;
1052
        }
1053
        /* fall-through */
1054
    case DIAG_DOWN_LEFT_PRED:
1055
    case VERT_LEFT_PRED:
1056
        return !mb_y ? DC_127_PRED : mode;
1057
    case HOR_PRED:
1058
        if (!mb_y) {
1059
            *copy_buf = 1;
1060
            return mode;
1061
        }
1062
        /* fall-through */
1063
    case HOR_UP_PRED:
1064
        return !mb_x ? DC_129_PRED : mode;
1065
    case TM_VP8_PRED:
1066
        return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1067
    case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1068
    case DIAG_DOWN_RIGHT_PRED:
1069
    case VERT_RIGHT_PRED:
1070
    case HOR_DOWN_PRED:
1071
        if (!mb_y || !mb_x)
1072
            *copy_buf = 1;
1073
        return mode;
1074
    }
1075
    return mode;
1076
}
1077

    
1078
static av_always_inline
1079
void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1080
                   int mb_x, int mb_y)
1081
{
1082
    AVCodecContext *avctx = s->avctx;
1083
    int x, y, mode, nnz, tr;
1084

    
1085
    // for the first row, we need to run xchg_mb_border to init the top edge to 127
1086
    // otherwise, skip it if we aren't going to deblock
1087
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
1088
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1089
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1090
                       s->filter.simple, 1);
1091

    
1092
    if (mb->mode < MODE_I4x4) {
1093
        if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1094
            mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1095
        } else {
1096
            mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1097
        }
1098
        s->hpc.pred16x16[mode](dst[0], s->linesize);
1099
    } else {
1100
        uint8_t *ptr = dst[0];
1101
        uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
1102
        uint8_t tr_top[4] = { 127, 127, 127, 127 };
1103

    
1104
        // all blocks on the right edge of the macroblock use bottom edge
1105
        // the top macroblock for their topright edge
1106
        uint8_t *tr_right = ptr - s->linesize + 16;
1107

    
1108
        // if we're on the right edge of the frame, said edge is extended
1109
        // from the top macroblock
1110
        if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1111
            mb_x == s->mb_width-1) {
1112
            tr = tr_right[-1]*0x01010101;
1113
            tr_right = (uint8_t *)&tr;
1114
        }
1115

    
1116
        if (mb->skip)
1117
            AV_ZERO128(s->non_zero_count_cache);
1118

    
1119
        for (y = 0; y < 4; y++) {
1120
            uint8_t *topright = ptr + 4 - s->linesize;
1121
            for (x = 0; x < 4; x++) {
1122
                int copy = 0, linesize = s->linesize;
1123
                uint8_t *dst = ptr+4*x;
1124
                DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1125

    
1126
                if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1127
                    topright = tr_top;
1128
                } else if (x == 3)
1129
                    topright = tr_right;
1130

    
1131
                if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1132
                    mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
1133
                    if (copy) {
1134
                        dst = copy_dst + 12;
1135
                        linesize = 8;
1136
                        if (!(mb_y + y)) {
1137
                            copy_dst[3] = 127U;
1138
                            AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1139
                        } else {
1140
                            AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1141
                            if (!(mb_x + x)) {
1142
                                copy_dst[3] = 129U;
1143
                            } else {
1144
                                copy_dst[3] = ptr[4*x-s->linesize-1];
1145
                            }
1146
                        }
1147
                        if (!(mb_x + x)) {
1148
                            copy_dst[11] =
1149
                            copy_dst[19] =
1150
                            copy_dst[27] =
1151
                            copy_dst[35] = 129U;
1152
                        } else {
1153
                            copy_dst[11] = ptr[4*x              -1];
1154
                            copy_dst[19] = ptr[4*x+s->linesize  -1];
1155
                            copy_dst[27] = ptr[4*x+s->linesize*2-1];
1156
                            copy_dst[35] = ptr[4*x+s->linesize*3-1];
1157
                        }
1158
                    }
1159
                } else {
1160
                    mode = intra4x4[x];
1161
                }
1162
                s->hpc.pred4x4[mode](dst, topright, linesize);
1163
                if (copy) {
1164
                    AV_COPY32(ptr+4*x              , copy_dst+12);
1165
                    AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
1166
                    AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1167
                    AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1168
                }
1169

    
1170
                nnz = s->non_zero_count_cache[y][x];
1171
                if (nnz) {
1172
                    if (nnz == 1)
1173
                        s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
1174
                    else
1175
                        s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
1176
                }
1177
                topright += 4;
1178
            }
1179

    
1180
            ptr   += 4*s->linesize;
1181
            intra4x4 += 4;
1182
        }
1183
    }
1184

    
1185
    if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1186
        mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
1187
    } else {
1188
        mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
1189
    }
1190
    s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1191
    s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1192

    
1193
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
1194
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1195
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1196
                       s->filter.simple, 0);
1197
}
1198

    
1199
static const uint8_t subpel_idx[3][8] = {
1200
    { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1201
                                // also function pointer index
1202
    { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1203
    { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1204
};
1205

    
1206
/**
1207
 * Generic MC function.
1208
 *
1209
 * @param s VP8 decoding context
1210
 * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes
1211
 * @param dst target buffer for block data at block position
1212
 * @param src reference picture buffer at origin (0, 0)
1213
 * @param mv motion vector (relative to block position) to get pixel data from
1214
 * @param x_off horizontal position of block from origin (0, 0)
1215
 * @param y_off vertical position of block from origin (0, 0)
1216
 * @param block_w width of block (16, 8 or 4)
1217
 * @param block_h height of block (always same as block_w)
1218
 * @param width width of src/dst plane data
1219
 * @param height height of src/dst plane data
1220
 * @param linesize size of a single line of plane data, including padding
1221
 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1222
 */
1223
static av_always_inline
1224
void vp8_mc_luma(VP8Context *s, uint8_t *dst, uint8_t *src, const VP56mv *mv,
1225
                 int x_off, int y_off, int block_w, int block_h,
1226
                 int width, int height, int linesize,
1227
                 vp8_mc_func mc_func[3][3])
1228
{
1229
    if (AV_RN32A(mv)) {
1230

    
1231
        int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1232
        int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1233

    
1234
        x_off += mv->x >> 2;
1235
        y_off += mv->y >> 2;
1236

    
1237
        // edge emulation
1238
        src += y_off * linesize + x_off;
1239
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1240
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1241
            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1242
                                    block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1243
                                    x_off - mx_idx, y_off - my_idx, width, height);
1244
            src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1245
        }
1246
        mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1247
    } else
1248
        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1249
}
1250

    
1251
static av_always_inline
1252
void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, uint8_t *src1,
1253
                   uint8_t *src2, const VP56mv *mv, int x_off, int y_off,
1254
                   int block_w, int block_h, int width, int height, int linesize,
1255
                   vp8_mc_func mc_func[3][3])
1256
{
1257
    if (AV_RN32A(mv)) {
1258
        int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1259
        int my = mv->y&7, my_idx = subpel_idx[0][my];
1260

    
1261
        x_off += mv->x >> 3;
1262
        y_off += mv->y >> 3;
1263

    
1264
        // edge emulation
1265
        src1 += y_off * linesize + x_off;
1266
        src2 += y_off * linesize + x_off;
1267
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1268
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1269
            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1270
                                    block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1271
                                    x_off - mx_idx, y_off - my_idx, width, height);
1272
            src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1273
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1274

    
1275
            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1276
                                    block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1277
                                    x_off - mx_idx, y_off - my_idx, width, height);
1278
            src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1279
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1280
        } else {
1281
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1282
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1283
        }
1284
    } else {
1285
        mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1286
        mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1287
    }
1288
}
1289

    
1290
static av_always_inline
1291
void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1292
                 AVFrame *ref_frame, int x_off, int y_off,
1293
                 int bx_off, int by_off,
1294
                 int block_w, int block_h,
1295
                 int width, int height, VP56mv *mv)
1296
{
1297
    VP56mv uvmv = *mv;
1298

    
1299
    /* Y */
1300
    vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
1301
                ref_frame->data[0], mv, x_off + bx_off, y_off + by_off,
1302
                block_w, block_h, width, height, s->linesize,
1303
                s->put_pixels_tab[block_w == 8]);
1304

    
1305
    /* U/V */
1306
    if (s->profile == 3) {
1307
        uvmv.x &= ~7;
1308
        uvmv.y &= ~7;
1309
    }
1310
    x_off   >>= 1; y_off   >>= 1;
1311
    bx_off  >>= 1; by_off  >>= 1;
1312
    width   >>= 1; height  >>= 1;
1313
    block_w >>= 1; block_h >>= 1;
1314
    vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
1315
                  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame->data[1],
1316
                  ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off,
1317
                  block_w, block_h, width, height, s->uvlinesize,
1318
                  s->put_pixels_tab[1 + (block_w == 4)]);
1319
}
1320

    
1321
/* Fetch pixels for estimated mv 4 macroblocks ahead.
1322
 * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
1323
static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1324
{
1325
    /* Don't prefetch refs that haven't been used very often this frame. */
1326
    if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1327
        int x_off = mb_x << 4, y_off = mb_y << 4;
1328
        int mx = (mb->mv.x>>2) + x_off + 8;
1329
        int my = (mb->mv.y>>2) + y_off;
1330
        uint8_t **src= s->framep[ref]->data;
1331
        int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1332
        s->dsp.prefetch(src[0]+off, s->linesize, 4);
1333
        off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1334
        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1335
    }
1336
}
1337

    
1338
/**
1339
 * Apply motion vectors to prediction buffer, chapter 18.
1340
 */
1341
static av_always_inline
1342
void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1343
                   int mb_x, int mb_y)
1344
{
1345
    int x_off = mb_x << 4, y_off = mb_y << 4;
1346
    int width = 16*s->mb_width, height = 16*s->mb_height;
1347
    AVFrame *ref = s->framep[mb->ref_frame];
1348
    VP56mv *bmv = mb->bmv;
1349

    
1350
    switch (mb->partitioning) {
1351
    case VP8_SPLITMVMODE_NONE:
1352
        vp8_mc_part(s, dst, ref, x_off, y_off,
1353
                    0, 0, 16, 16, width, height, &mb->mv);
1354
        break;
1355
    case VP8_SPLITMVMODE_4x4: {
1356
        int x, y;
1357
        VP56mv uvmv;
1358

    
1359
        /* Y */
1360
        for (y = 0; y < 4; y++) {
1361
            for (x = 0; x < 4; x++) {
1362
                vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
1363
                            ref->data[0], &bmv[4*y + x],
1364
                            4*x + x_off, 4*y + y_off, 4, 4,
1365
                            width, height, s->linesize,
1366
                            s->put_pixels_tab[2]);
1367
            }
1368
        }
1369

    
1370
        /* U/V */
1371
        x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1372
        for (y = 0; y < 2; y++) {
1373
            for (x = 0; x < 2; x++) {
1374
                uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
1375
                         mb->bmv[ 2*y    * 4 + 2*x+1].x +
1376
                         mb->bmv[(2*y+1) * 4 + 2*x  ].x +
1377
                         mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1378
                uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
1379
                         mb->bmv[ 2*y    * 4 + 2*x+1].y +
1380
                         mb->bmv[(2*y+1) * 4 + 2*x  ].y +
1381
                         mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1382
                uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1383
                uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1384
                if (s->profile == 3) {
1385
                    uvmv.x &= ~7;
1386
                    uvmv.y &= ~7;
1387
                }
1388
                vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
1389
                              dst[2] + 4*y*s->uvlinesize + x*4,
1390
                              ref->data[1], ref->data[2], &uvmv,
1391
                              4*x + x_off, 4*y + y_off, 4, 4,
1392
                              width, height, s->uvlinesize,
1393
                              s->put_pixels_tab[2]);
1394
            }
1395
        }
1396
        break;
1397
    }
1398
    case VP8_SPLITMVMODE_16x8:
1399
        vp8_mc_part(s, dst, ref, x_off, y_off,
1400
                    0, 0, 16, 8, width, height, &bmv[0]);
1401
        vp8_mc_part(s, dst, ref, x_off, y_off,
1402
                    0, 8, 16, 8, width, height, &bmv[1]);
1403
        break;
1404
    case VP8_SPLITMVMODE_8x16:
1405
        vp8_mc_part(s, dst, ref, x_off, y_off,
1406
                    0, 0, 8, 16, width, height, &bmv[0]);
1407
        vp8_mc_part(s, dst, ref, x_off, y_off,
1408
                    8, 0, 8, 16, width, height, &bmv[1]);
1409
        break;
1410
    case VP8_SPLITMVMODE_8x8:
1411
        vp8_mc_part(s, dst, ref, x_off, y_off,
1412
                    0, 0, 8, 8, width, height, &bmv[0]);
1413
        vp8_mc_part(s, dst, ref, x_off, y_off,
1414
                    8, 0, 8, 8, width, height, &bmv[1]);
1415
        vp8_mc_part(s, dst, ref, x_off, y_off,
1416
                    0, 8, 8, 8, width, height, &bmv[2]);
1417
        vp8_mc_part(s, dst, ref, x_off, y_off,
1418
                    8, 8, 8, 8, width, height, &bmv[3]);
1419
        break;
1420
    }
1421
}
1422

    
1423
static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
1424
{
1425
    int x, y, ch;
1426

    
1427
    if (mb->mode != MODE_I4x4) {
1428
        uint8_t *y_dst = dst[0];
1429
        for (y = 0; y < 4; y++) {
1430
            uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
1431
            if (nnz4) {
1432
                if (nnz4&~0x01010101) {
1433
                    for (x = 0; x < 4; x++) {
1434
                        if ((uint8_t)nnz4 == 1)
1435
                            s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
1436
                        else if((uint8_t)nnz4 > 1)
1437
                            s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1438
                        nnz4 >>= 8;
1439
                        if (!nnz4)
1440
                            break;
1441
                    }
1442
                } else {
1443
                    s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
1444
                }
1445
            }
1446
            y_dst += 4*s->linesize;
1447
        }
1448
    }
1449

    
1450
    for (ch = 0; ch < 2; ch++) {
1451
        uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
1452
        if (nnz4) {
1453
            uint8_t *ch_dst = dst[1+ch];
1454
            if (nnz4&~0x01010101) {
1455
                for (y = 0; y < 2; y++) {
1456
                    for (x = 0; x < 2; x++) {
1457
                        if ((uint8_t)nnz4 == 1)
1458
                            s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1459
                        else if((uint8_t)nnz4 > 1)
1460
                            s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1461
                        nnz4 >>= 8;
1462
                        if (!nnz4)
1463
                            break;
1464
                    }
1465
                    ch_dst += 4*s->uvlinesize;
1466
                }
1467
            } else {
1468
                s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
1469
            }
1470
        }
1471
    }
1472
}
1473

    
1474
static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1475
{
1476
    int interior_limit, filter_level;
1477

    
1478
    if (s->segmentation.enabled) {
1479
        filter_level = s->segmentation.filter_level[s->segment];
1480
        if (!s->segmentation.absolute_vals)
1481
            filter_level += s->filter.level;
1482
    } else
1483
        filter_level = s->filter.level;
1484

    
1485
    if (s->lf_delta.enabled) {
1486
        filter_level += s->lf_delta.ref[mb->ref_frame];
1487
        filter_level += s->lf_delta.mode[mb->mode];
1488
    }
1489

    
1490
/* Like av_clip for inputs 0 and max, where max is equal to (2^n-1) */
1491
#define POW2CLIP(x,max) (((x) & ~max) ? (-(x))>>31 & max : (x));
1492
    filter_level = POW2CLIP(filter_level, 63);
1493

    
1494
    interior_limit = filter_level;
1495
    if (s->filter.sharpness) {
1496
        interior_limit >>= (s->filter.sharpness + 3) >> 2;
1497
        interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1498
    }
1499
    interior_limit = FFMAX(interior_limit, 1);
1500

    
1501
    f->filter_level = filter_level;
1502
    f->inner_limit = interior_limit;
1503
    f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1504
}
1505

    
1506
static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1507
{
1508
    int mbedge_lim, bedge_lim, hev_thresh;
1509
    int filter_level = f->filter_level;
1510
    int inner_limit = f->inner_limit;
1511
    int inner_filter = f->inner_filter;
1512
    int linesize = s->linesize;
1513
    int uvlinesize = s->uvlinesize;
1514
    static const uint8_t hev_thresh_lut[2][64] = {
1515
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1516
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1517
          3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1518
          3, 3, 3, 3 },
1519
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1520
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1521
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1522
          2, 2, 2, 2 }
1523
    };
1524

    
1525
    if (!filter_level)
1526
        return;
1527

    
1528
     bedge_lim = 2*filter_level + inner_limit;
1529
    mbedge_lim = bedge_lim + 4;
1530

    
1531
    hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1532

    
1533
    if (mb_x) {
1534
        s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
1535
                                       mbedge_lim, inner_limit, hev_thresh);
1536
        s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1537
                                       mbedge_lim, inner_limit, hev_thresh);
1538
    }
1539

    
1540
    if (inner_filter) {
1541
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1542
                                             inner_limit, hev_thresh);
1543
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1544
                                             inner_limit, hev_thresh);
1545
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1546
                                             inner_limit, hev_thresh);
1547
        s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1548
                                             uvlinesize,  bedge_lim,
1549
                                             inner_limit, hev_thresh);
1550
    }
1551

    
1552
    if (mb_y) {
1553
        s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
1554
                                       mbedge_lim, inner_limit, hev_thresh);
1555
        s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1556
                                       mbedge_lim, inner_limit, hev_thresh);
1557
    }
1558

    
1559
    if (inner_filter) {
1560
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1561
                                             linesize,    bedge_lim,
1562
                                             inner_limit, hev_thresh);
1563
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1564
                                             linesize,    bedge_lim,
1565
                                             inner_limit, hev_thresh);
1566
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1567
                                             linesize,    bedge_lim,
1568
                                             inner_limit, hev_thresh);
1569
        s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1570
                                             dst[2] + 4 * uvlinesize,
1571
                                             uvlinesize,  bedge_lim,
1572
                                             inner_limit, hev_thresh);
1573
    }
1574
}
1575

    
1576
static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1577
{
1578
    int mbedge_lim, bedge_lim;
1579
    int filter_level = f->filter_level;
1580
    int inner_limit = f->inner_limit;
1581
    int inner_filter = f->inner_filter;
1582
    int linesize = s->linesize;
1583

    
1584
    if (!filter_level)
1585
        return;
1586

    
1587
     bedge_lim = 2*filter_level + inner_limit;
1588
    mbedge_lim = bedge_lim + 4;
1589

    
1590
    if (mb_x)
1591
        s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1592
    if (inner_filter) {
1593
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1594
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1595
        s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1596
    }
1597

    
1598
    if (mb_y)
1599
        s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1600
    if (inner_filter) {
1601
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1602
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1603
        s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1604
    }
1605
}
1606

    
1607
static void filter_mb_row(VP8Context *s, int mb_y)
1608
{
1609
    VP8FilterStrength *f = s->filter_strength;
1610
    uint8_t *dst[3] = {
1611
        s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize,
1612
        s->framep[VP56_FRAME_CURRENT]->data[1] +  8*mb_y*s->uvlinesize,
1613
        s->framep[VP56_FRAME_CURRENT]->data[2] +  8*mb_y*s->uvlinesize
1614
    };
1615
    int mb_x;
1616

    
1617
    for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1618
        backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1619
        filter_mb(s, dst, f++, mb_x, mb_y);
1620
        dst[0] += 16;
1621
        dst[1] += 8;
1622
        dst[2] += 8;
1623
    }
1624
}
1625

    
1626
static void filter_mb_row_simple(VP8Context *s, int mb_y)
1627
{
1628
    VP8FilterStrength *f = s->filter_strength;
1629
    uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize;
1630
    int mb_x;
1631

    
1632
    for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1633
        backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
1634
        filter_mb_simple(s, dst, f++, mb_x, mb_y);
1635
        dst += 16;
1636
    }
1637
}
1638

    
1639
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1640
                            AVPacket *avpkt)
1641
{
1642
    VP8Context *s = avctx->priv_data;
1643
    int ret, mb_x, mb_y, i, y, referenced;
1644
    enum AVDiscard skip_thresh;
1645
    AVFrame *av_uninit(curframe);
1646

    
1647
    if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1648
        return ret;
1649

    
1650
    referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1651
                                || s->update_altref == VP56_FRAME_CURRENT;
1652

    
1653
    skip_thresh = !referenced ? AVDISCARD_NONREF :
1654
                    !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1655

    
1656
    if (avctx->skip_frame >= skip_thresh) {
1657
        s->invisible = 1;
1658
        goto skip_decode;
1659
    }
1660
    s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1661

    
1662
    for (i = 0; i < 4; i++)
1663
        if (&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1664
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1665
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1666
            curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1667
            break;
1668
        }
1669
    if (curframe->data[0])
1670
        avctx->release_buffer(avctx, curframe);
1671

    
1672
    curframe->key_frame = s->keyframe;
1673
    curframe->pict_type = s->keyframe ? FF_I_TYPE : FF_P_TYPE;
1674
    curframe->reference = referenced ? 3 : 0;
1675
    if ((ret = avctx->get_buffer(avctx, curframe))) {
1676
        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1677
        return ret;
1678
    }
1679

    
1680
    // Given that arithmetic probabilities are updated every frame, it's quite likely
1681
    // that the values we have on a random interframe are complete junk if we didn't
1682
    // start decode on a keyframe. So just don't display anything rather than junk.
1683
    if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1684
                         !s->framep[VP56_FRAME_GOLDEN] ||
1685
                         !s->framep[VP56_FRAME_GOLDEN2])) {
1686
        av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1687
        return AVERROR_INVALIDDATA;
1688
    }
1689

    
1690
    s->linesize   = curframe->linesize[0];
1691
    s->uvlinesize = curframe->linesize[1];
1692

    
1693
    if (!s->edge_emu_buffer)
1694
        s->edge_emu_buffer = av_malloc(21*s->linesize);
1695

    
1696
    memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1697

    
1698
    /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1699
    memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1700

    
1701
    // top edge of 127 for intra prediction
1702
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1703
        s->top_border[0][15] = s->top_border[0][23] = 127;
1704
        memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1705
    }
1706
    memset(s->ref_count, 0, sizeof(s->ref_count));
1707
    if (s->keyframe)
1708
        memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1709

    
1710
    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1711
        VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1712
        VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1713
        int mb_xy = mb_y*s->mb_width;
1714
        uint8_t *dst[3] = {
1715
            curframe->data[0] + 16*mb_y*s->linesize,
1716
            curframe->data[1] +  8*mb_y*s->uvlinesize,
1717
            curframe->data[2] +  8*mb_y*s->uvlinesize
1718
        };
1719

    
1720
        memset(mb - 1, 0, sizeof(*mb));   // zero left macroblock
1721
        memset(s->left_nnz, 0, sizeof(s->left_nnz));
1722
        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1723

    
1724
        // left edge of 129 for intra prediction
1725
        if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1726
            for (i = 0; i < 3; i++)
1727
                for (y = 0; y < 16>>!!i; y++)
1728
                    dst[i][y*curframe->linesize[i]-1] = 129;
1729
            if (mb_y == 1) // top left edge is also 129
1730
                s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1731
        }
1732

    
1733
        for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1734
            /* Prefetch the current frame, 4 MBs ahead */
1735
            s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1736
            s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1737

    
1738
            decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy);
1739

    
1740
            prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1741

    
1742
            if (!mb->skip)
1743
                decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1744

    
1745
            if (mb->mode <= MODE_I4x4)
1746
                intra_predict(s, dst, mb, mb_x, mb_y);
1747
            else
1748
                inter_predict(s, dst, mb, mb_x, mb_y);
1749

    
1750
            prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1751

    
1752
            if (!mb->skip) {
1753
                idct_mb(s, dst, mb);
1754
            } else {
1755
                AV_ZERO64(s->left_nnz);
1756
                AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
1757

    
1758
                // Reset DC block predictors if they would exist if the mb had coefficients
1759
                if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1760
                    s->left_nnz[8]      = 0;
1761
                    s->top_nnz[mb_x][8] = 0;
1762
                }
1763
            }
1764

    
1765
            if (s->deblock_filter)
1766
                filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1767

    
1768
            prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1769

    
1770
            dst[0] += 16;
1771
            dst[1] += 8;
1772
            dst[2] += 8;
1773
        }
1774
        if (s->deblock_filter) {
1775
            if (s->filter.simple)
1776
                filter_mb_row_simple(s, mb_y);
1777
            else
1778
                filter_mb_row(s, mb_y);
1779
        }
1780
    }
1781

    
1782
skip_decode:
1783
    // if future frames don't use the updated probabilities,
1784
    // reset them to the values we saved
1785
    if (!s->update_probabilities)
1786
        s->prob[0] = s->prob[1];
1787

    
1788
    // check if golden and altref are swapped
1789
    if (s->update_altref == VP56_FRAME_GOLDEN &&
1790
        s->update_golden == VP56_FRAME_GOLDEN2)
1791
        FFSWAP(AVFrame *, s->framep[VP56_FRAME_GOLDEN], s->framep[VP56_FRAME_GOLDEN2]);
1792
    else {
1793
        if (s->update_altref != VP56_FRAME_NONE)
1794
            s->framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1795

    
1796
        if (s->update_golden != VP56_FRAME_NONE)
1797
            s->framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1798
    }
1799

    
1800
    if (s->update_last) // move cur->prev
1801
        s->framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_CURRENT];
1802

    
1803
    // release no longer referenced frames
1804
    for (i = 0; i < 4; i++)
1805
        if (s->frames[i].data[0] &&
1806
            &s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
1807
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1808
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1809
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1810
            avctx->release_buffer(avctx, &s->frames[i]);
1811

    
1812
    if (!s->invisible) {
1813
        *(AVFrame*)data = *s->framep[VP56_FRAME_CURRENT];
1814
        *data_size = sizeof(AVFrame);
1815
    }
1816

    
1817
    return avpkt->size;
1818
}
1819

    
1820
static av_cold int vp8_decode_init(AVCodecContext *avctx)
1821
{
1822
    VP8Context *s = avctx->priv_data;
1823

    
1824
    s->avctx = avctx;
1825
    avctx->pix_fmt = PIX_FMT_YUV420P;
1826

    
1827
    dsputil_init(&s->dsp, avctx);
1828
    ff_h264_pred_init(&s->hpc, CODEC_ID_VP8);
1829
    ff_vp8dsp_init(&s->vp8dsp);
1830

    
1831
    return 0;
1832
}
1833

    
1834
static av_cold int vp8_decode_free(AVCodecContext *avctx)
1835
{
1836
    vp8_decode_flush(avctx);
1837
    return 0;
1838
}
1839

    
1840
AVCodec ff_vp8_decoder = {
1841
    "vp8",
1842
    AVMEDIA_TYPE_VIDEO,
1843
    CODEC_ID_VP8,
1844
    sizeof(VP8Context),
1845
    vp8_decode_init,
1846
    NULL,
1847
    vp8_decode_free,
1848
    vp8_decode_frame,
1849
    CODEC_CAP_DR1,
1850
    .flush = vp8_decode_flush,
1851
    .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
1852
};