Statistics
| Branch: | Revision:

ffmpeg / libavcodec / vp8.c @ 1550f45a

History | View | Annotate | Download (63 KB)

1
/**
2
 * VP8 compatible video decoder
3
 *
4
 * Copyright (C) 2010 David Conrad
5
 * Copyright (C) 2010 Ronald S. Bultje
6
 * Copyright (C) 2010 Jason Garrett-Glaser
7
 *
8
 * This file is part of Libav.
9
 *
10
 * Libav is free software; you can redistribute it and/or
11
 * modify it under the terms of the GNU Lesser General Public
12
 * License as published by the Free Software Foundation; either
13
 * version 2.1 of the License, or (at your option) any later version.
14
 *
15
 * Libav is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
 * Lesser General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Lesser General Public
21
 * License along with Libav; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23
 */
24

    
25
#include "libavutil/imgutils.h"
26
#include "avcodec.h"
27
#include "vp8.h"
28
#include "vp8data.h"
29
#include "rectangle.h"
30
#include "thread.h"
31

    
32
#if ARCH_ARM
33
#   include "arm/vp8.h"
34
#endif
35

    
36
static void vp8_decode_flush(AVCodecContext *avctx)
37
{
38
    VP8Context *s = avctx->priv_data;
39
    int i;
40

    
41
    if (!avctx->is_copy) {
42
        for (i = 0; i < 5; i++)
43
            if (s->frames[i].data[0])
44
                ff_thread_release_buffer(avctx, &s->frames[i]);
45
    }
46
    memset(s->framep, 0, sizeof(s->framep));
47

    
48
    av_freep(&s->macroblocks_base);
49
    av_freep(&s->filter_strength);
50
    av_freep(&s->intra4x4_pred_mode_top);
51
    av_freep(&s->top_nnz);
52
    av_freep(&s->edge_emu_buffer);
53
    av_freep(&s->top_border);
54
    av_freep(&s->segmentation_map);
55

    
56
    s->macroblocks        = NULL;
57
}
58

    
59
static int update_dimensions(VP8Context *s, int width, int height)
60
{
61
    if (width  != s->avctx->width ||
62
        height != s->avctx->height) {
63
        if (av_image_check_size(width, height, 0, s->avctx))
64
            return AVERROR_INVALIDDATA;
65

    
66
        vp8_decode_flush(s->avctx);
67

    
68
        avcodec_set_dimensions(s->avctx, width, height);
69
    }
70

    
71
    s->mb_width  = (s->avctx->coded_width +15) / 16;
72
    s->mb_height = (s->avctx->coded_height+15) / 16;
73

    
74
    s->macroblocks_base        = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
75
    s->filter_strength         = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
76
    s->intra4x4_pred_mode_top  = av_mallocz(s->mb_width*4);
77
    s->top_nnz                 = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
78
    s->top_border              = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
79
    s->segmentation_map        = av_mallocz(s->mb_width*s->mb_height);
80

    
81
    if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
82
        !s->top_nnz || !s->top_border || !s->segmentation_map)
83
        return AVERROR(ENOMEM);
84

    
85
    s->macroblocks        = s->macroblocks_base + 1;
86

    
87
    return 0;
88
}
89

    
90
static void parse_segment_info(VP8Context *s)
91
{
92
    VP56RangeCoder *c = &s->c;
93
    int i;
94

    
95
    s->segmentation.update_map = vp8_rac_get(c);
96

    
97
    if (vp8_rac_get(c)) { // update segment feature data
98
        s->segmentation.absolute_vals = vp8_rac_get(c);
99

    
100
        for (i = 0; i < 4; i++)
101
            s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
102

    
103
        for (i = 0; i < 4; i++)
104
            s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
105
    }
106
    if (s->segmentation.update_map)
107
        for (i = 0; i < 3; i++)
108
            s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
109
}
110

    
111
static void update_lf_deltas(VP8Context *s)
112
{
113
    VP56RangeCoder *c = &s->c;
114
    int i;
115

    
116
    for (i = 0; i < 4; i++)
117
        s->lf_delta.ref[i]  = vp8_rac_get_sint(c, 6);
118

    
119
    for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
120
        s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
121
}
122

    
123
static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
124
{
125
    const uint8_t *sizes = buf;
126
    int i;
127

    
128
    s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
129

    
130
    buf      += 3*(s->num_coeff_partitions-1);
131
    buf_size -= 3*(s->num_coeff_partitions-1);
132
    if (buf_size < 0)
133
        return -1;
134

    
135
    for (i = 0; i < s->num_coeff_partitions-1; i++) {
136
        int size = AV_RL24(sizes + 3*i);
137
        if (buf_size - size < 0)
138
            return -1;
139

    
140
        ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
141
        buf      += size;
142
        buf_size -= size;
143
    }
144
    ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
145

    
146
    return 0;
147
}
148

    
149
static void get_quants(VP8Context *s)
150
{
151
    VP56RangeCoder *c = &s->c;
152
    int i, base_qi;
153

    
154
    int yac_qi     = vp8_rac_get_uint(c, 7);
155
    int ydc_delta  = vp8_rac_get_sint(c, 4);
156
    int y2dc_delta = vp8_rac_get_sint(c, 4);
157
    int y2ac_delta = vp8_rac_get_sint(c, 4);
158
    int uvdc_delta = vp8_rac_get_sint(c, 4);
159
    int uvac_delta = vp8_rac_get_sint(c, 4);
160

    
161
    for (i = 0; i < 4; i++) {
162
        if (s->segmentation.enabled) {
163
            base_qi = s->segmentation.base_quant[i];
164
            if (!s->segmentation.absolute_vals)
165
                base_qi += yac_qi;
166
        } else
167
            base_qi = yac_qi;
168

    
169
        s->qmat[i].luma_qmul[0]    =       vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)];
170
        s->qmat[i].luma_qmul[1]    =       vp8_ac_qlookup[av_clip(base_qi             , 0, 127)];
171
        s->qmat[i].luma_dc_qmul[0] =   2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)];
172
        s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100;
173
        s->qmat[i].chroma_qmul[0]  =       vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)];
174
        s->qmat[i].chroma_qmul[1]  =       vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)];
175

    
176
        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
177
        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
178
    }
179
}
180

    
181
/**
182
 * Determine which buffers golden and altref should be updated with after this frame.
183
 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
184
 *
185
 * Intra frames update all 3 references
186
 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
187
 * If the update (golden|altref) flag is set, it's updated with the current frame
188
 *      if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
189
 * If the flag is not set, the number read means:
190
 *      0: no update
191
 *      1: VP56_FRAME_PREVIOUS
192
 *      2: update golden with altref, or update altref with golden
193
 */
194
static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
195
{
196
    VP56RangeCoder *c = &s->c;
197

    
198
    if (update)
199
        return VP56_FRAME_CURRENT;
200

    
201
    switch (vp8_rac_get_uint(c, 2)) {
202
    case 1:
203
        return VP56_FRAME_PREVIOUS;
204
    case 2:
205
        return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
206
    }
207
    return VP56_FRAME_NONE;
208
}
209

    
210
static void update_refs(VP8Context *s)
211
{
212
    VP56RangeCoder *c = &s->c;
213

    
214
    int update_golden = vp8_rac_get(c);
215
    int update_altref = vp8_rac_get(c);
216

    
217
    s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
218
    s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
219
}
220

    
221
static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
222
{
223
    VP56RangeCoder *c = &s->c;
224
    int header_size, hscale, vscale, i, j, k, l, m, ret;
225
    int width  = s->avctx->width;
226
    int height = s->avctx->height;
227

    
228
    s->keyframe  = !(buf[0] & 1);
229
    s->profile   =  (buf[0]>>1) & 7;
230
    s->invisible = !(buf[0] & 0x10);
231
    header_size  = AV_RL24(buf) >> 5;
232
    buf      += 3;
233
    buf_size -= 3;
234

    
235
    if (s->profile > 3)
236
        av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
237

    
238
    if (!s->profile)
239
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
240
    else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
241
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
242

    
243
    if (header_size > buf_size - 7*s->keyframe) {
244
        av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
245
        return AVERROR_INVALIDDATA;
246
    }
247

    
248
    if (s->keyframe) {
249
        if (AV_RL24(buf) != 0x2a019d) {
250
            av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
251
            return AVERROR_INVALIDDATA;
252
        }
253
        width  = AV_RL16(buf+3) & 0x3fff;
254
        height = AV_RL16(buf+5) & 0x3fff;
255
        hscale = buf[4] >> 6;
256
        vscale = buf[6] >> 6;
257
        buf      += 7;
258
        buf_size -= 7;
259

    
260
        if (hscale || vscale)
261
            av_log_missing_feature(s->avctx, "Upscaling", 1);
262

    
263
        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
264
        for (i = 0; i < 4; i++)
265
            for (j = 0; j < 16; j++)
266
                memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
267
                       sizeof(s->prob->token[i][j]));
268
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
269
        memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
270
        memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
271
        memset(&s->segmentation, 0, sizeof(s->segmentation));
272
    }
273

    
274
    if (!s->macroblocks_base || /* first frame */
275
        width != s->avctx->width || height != s->avctx->height) {
276
        if ((ret = update_dimensions(s, width, height) < 0))
277
            return ret;
278
    }
279

    
280
    ff_vp56_init_range_decoder(c, buf, header_size);
281
    buf      += header_size;
282
    buf_size -= header_size;
283

    
284
    if (s->keyframe) {
285
        if (vp8_rac_get(c))
286
            av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
287
        vp8_rac_get(c); // whether we can skip clamping in dsp functions
288
    }
289

    
290
    if ((s->segmentation.enabled = vp8_rac_get(c)))
291
        parse_segment_info(s);
292
    else
293
        s->segmentation.update_map = 0; // FIXME: move this to some init function?
294

    
295
    s->filter.simple    = vp8_rac_get(c);
296
    s->filter.level     = vp8_rac_get_uint(c, 6);
297
    s->filter.sharpness = vp8_rac_get_uint(c, 3);
298

    
299
    if ((s->lf_delta.enabled = vp8_rac_get(c)))
300
        if (vp8_rac_get(c))
301
            update_lf_deltas(s);
302

    
303
    if (setup_partitions(s, buf, buf_size)) {
304
        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
305
        return AVERROR_INVALIDDATA;
306
    }
307

    
308
    get_quants(s);
309

    
310
    if (!s->keyframe) {
311
        update_refs(s);
312
        s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
313
        s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
314
    }
315

    
316
    // if we aren't saving this frame's probabilities for future frames,
317
    // make a copy of the current probabilities
318
    if (!(s->update_probabilities = vp8_rac_get(c)))
319
        s->prob[1] = s->prob[0];
320

    
321
    s->update_last = s->keyframe || vp8_rac_get(c);
322

    
323
    for (i = 0; i < 4; i++)
324
        for (j = 0; j < 8; j++)
325
            for (k = 0; k < 3; k++)
326
                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
327
                    if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
328
                        int prob = vp8_rac_get_uint(c, 8);
329
                        for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
330
                            s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
331
                    }
332

    
333
    if ((s->mbskip_enabled = vp8_rac_get(c)))
334
        s->prob->mbskip = vp8_rac_get_uint(c, 8);
335

    
336
    if (!s->keyframe) {
337
        s->prob->intra  = vp8_rac_get_uint(c, 8);
338
        s->prob->last   = vp8_rac_get_uint(c, 8);
339
        s->prob->golden = vp8_rac_get_uint(c, 8);
340

    
341
        if (vp8_rac_get(c))
342
            for (i = 0; i < 4; i++)
343
                s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
344
        if (vp8_rac_get(c))
345
            for (i = 0; i < 3; i++)
346
                s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);
347

    
348
        // 17.2 MV probability update
349
        for (i = 0; i < 2; i++)
350
            for (j = 0; j < 19; j++)
351
                if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
352
                    s->prob->mvc[i][j] = vp8_rac_get_nn(c);
353
    }
354

    
355
    return 0;
356
}
357

    
358
static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
359
{
360
    dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
361
    dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
362
}
363

    
364
/**
365
 * Motion vector coding, 17.1.
366
 */
367
static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
368
{
369
    int bit, x = 0;
370

    
371
    if (vp56_rac_get_prob_branchy(c, p[0])) {
372
        int i;
373

    
374
        for (i = 0; i < 3; i++)
375
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
376
        for (i = 9; i > 3; i--)
377
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
378
        if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
379
            x += 8;
380
    } else {
381
        // small_mvtree
382
        const uint8_t *ps = p+2;
383
        bit = vp56_rac_get_prob(c, *ps);
384
        ps += 1 + 3*bit;
385
        x  += 4*bit;
386
        bit = vp56_rac_get_prob(c, *ps);
387
        ps += 1 + bit;
388
        x  += 2*bit;
389
        x  += vp56_rac_get_prob(c, *ps);
390
    }
391

    
392
    return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
393
}
394

    
395
static av_always_inline
396
const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
397
{
398
    if (left == top)
399
        return vp8_submv_prob[4-!!left];
400
    if (!top)
401
        return vp8_submv_prob[2];
402
    return vp8_submv_prob[1-!!left];
403
}
404

    
405
/**
406
 * Split motion vector prediction, 16.4.
407
 * @returns the number of motion vectors parsed (2, 4 or 16)
408
 */
409
static av_always_inline
410
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
411
{
412
    int part_idx;
413
    int n, num;
414
    VP8Macroblock *top_mb  = &mb[2];
415
    VP8Macroblock *left_mb = &mb[-1];
416
    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
417
                  *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
418
                  *mbsplits_cur, *firstidx;
419
    VP56mv *top_mv  = top_mb->bmv;
420
    VP56mv *left_mv = left_mb->bmv;
421
    VP56mv *cur_mv  = mb->bmv;
422

    
423
    if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
424
        if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
425
            part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
426
        } else {
427
            part_idx = VP8_SPLITMVMODE_8x8;
428
        }
429
    } else {
430
        part_idx = VP8_SPLITMVMODE_4x4;
431
    }
432

    
433
    num = vp8_mbsplit_count[part_idx];
434
    mbsplits_cur = vp8_mbsplits[part_idx],
435
    firstidx = vp8_mbfirstidx[part_idx];
436
    mb->partitioning = part_idx;
437

    
438
    for (n = 0; n < num; n++) {
439
        int k = firstidx[n];
440
        uint32_t left, above;
441
        const uint8_t *submv_prob;
442

    
443
        if (!(k & 3))
444
            left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
445
        else
446
            left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
447
        if (k <= 3)
448
            above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
449
        else
450
            above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
451

    
452
        submv_prob = get_submv_prob(left, above);
453

    
454
        if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
455
            if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
456
                if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
457
                    mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
458
                    mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
459
                } else {
460
                    AV_ZERO32(&mb->bmv[n]);
461
                }
462
            } else {
463
                AV_WN32A(&mb->bmv[n], above);
464
            }
465
        } else {
466
            AV_WN32A(&mb->bmv[n], left);
467
        }
468
    }
469

    
470
    return num;
471
}
472

    
473
static av_always_inline
474
void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
475
{
476
    VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
477
                                  mb - 1 /* left */,
478
                                  mb + 1 /* top-left */ };
479
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
480
    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
481
    int idx = CNT_ZERO;
482
    int cur_sign_bias = s->sign_bias[mb->ref_frame];
483
    int8_t *sign_bias = s->sign_bias;
484
    VP56mv near_mv[4];
485
    uint8_t cnt[4] = { 0 };
486
    VP56RangeCoder *c = &s->c;
487

    
488
    AV_ZERO32(&near_mv[0]);
489
    AV_ZERO32(&near_mv[1]);
490

    
491
    /* Process MB on top, left and top-left */
492
    #define MV_EDGE_CHECK(n)\
493
    {\
494
        VP8Macroblock *edge = mb_edge[n];\
495
        int edge_ref = edge->ref_frame;\
496
        if (edge_ref != VP56_FRAME_CURRENT) {\
497
            uint32_t mv = AV_RN32A(&edge->mv);\
498
            if (mv) {\
499
                if (cur_sign_bias != sign_bias[edge_ref]) {\
500
                    /* SWAR negate of the values in mv. */\
501
                    mv = ~mv;\
502
                    mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
503
                }\
504
                if (!n || mv != AV_RN32A(&near_mv[idx]))\
505
                    AV_WN32A(&near_mv[++idx], mv);\
506
                cnt[idx]      += 1 + (n != 2);\
507
            } else\
508
                cnt[CNT_ZERO] += 1 + (n != 2);\
509
        }\
510
    }
511

    
512
    MV_EDGE_CHECK(0)
513
    MV_EDGE_CHECK(1)
514
    MV_EDGE_CHECK(2)
515

    
516
    mb->partitioning = VP8_SPLITMVMODE_NONE;
517
    if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
518
        mb->mode = VP8_MVMODE_MV;
519

    
520
        /* If we have three distinct MVs, merge first and last if they're the same */
521
        if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
522
            cnt[CNT_NEAREST] += 1;
523

    
524
        /* Swap near and nearest if necessary */
525
        if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
526
            FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
527
            FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
528
        }
529

    
530
        if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
531
            if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
532

    
533
                /* Choose the best mv out of 0,0 and the nearest mv */
534
                clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
535
                cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
536
                                    (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
537
                                    (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
538

    
539
                if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
540
                    mb->mode = VP8_MVMODE_SPLIT;
541
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
542
                } else {
543
                    mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
544
                    mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
545
                    mb->bmv[0] = mb->mv;
546
                }
547
            } else {
548
                clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
549
                mb->bmv[0] = mb->mv;
550
            }
551
        } else {
552
            clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
553
            mb->bmv[0] = mb->mv;
554
        }
555
    } else {
556
        mb->mode = VP8_MVMODE_ZERO;
557
        AV_ZERO32(&mb->mv);
558
        mb->bmv[0] = mb->mv;
559
    }
560
}
561

    
562
static av_always_inline
563
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
564
                           int mb_x, int keyframe)
565
{
566
    uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
567
    if (keyframe) {
568
        int x, y;
569
        uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
570
        uint8_t* const left = s->intra4x4_pred_mode_left;
571
        for (y = 0; y < 4; y++) {
572
            for (x = 0; x < 4; x++) {
573
                const uint8_t *ctx;
574
                ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
575
                *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
576
                left[y] = top[x] = *intra4x4;
577
                intra4x4++;
578
            }
579
        }
580
    } else {
581
        int i;
582
        for (i = 0; i < 16; i++)
583
            intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
584
    }
585
}
586

    
587
static av_always_inline
588
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
589
{
590
    VP56RangeCoder *c = &s->c;
591

    
592
    if (s->segmentation.update_map)
593
        *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
594
    else
595
        *segment = ref ? *ref : *segment;
596
    s->segment = *segment;
597

    
598
    mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
599

    
600
    if (s->keyframe) {
601
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
602

    
603
        if (mb->mode == MODE_I4x4) {
604
            decode_intra4x4_modes(s, c, mb_x, 1);
605
        } else {
606
            const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
607
            AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
608
            AV_WN32A(s->intra4x4_pred_mode_left, modes);
609
        }
610

    
611
        s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
612
        mb->ref_frame = VP56_FRAME_CURRENT;
613
    } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
614
        // inter MB, 16.2
615
        if (vp56_rac_get_prob_branchy(c, s->prob->last))
616
            mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
617
                VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
618
        else
619
            mb->ref_frame = VP56_FRAME_PREVIOUS;
620
        s->ref_count[mb->ref_frame-1]++;
621

    
622
        // motion vectors, 16.3
623
        decode_mvs(s, mb, mb_x, mb_y);
624
    } else {
625
        // intra MB, 16.1
626
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
627

    
628
        if (mb->mode == MODE_I4x4)
629
            decode_intra4x4_modes(s, c, mb_x, 0);
630

    
631
        s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
632
        mb->ref_frame = VP56_FRAME_CURRENT;
633
        mb->partitioning = VP8_SPLITMVMODE_NONE;
634
        AV_ZERO32(&mb->bmv[0]);
635
    }
636
}
637

    
638
#ifndef decode_block_coeffs_internal
639
/**
640
 * @param c arithmetic bitstream reader context
641
 * @param block destination for block coefficients
642
 * @param probs probabilities to use when reading trees from the bitstream
643
 * @param i initial coeff index, 0 unless a separate DC block is coded
644
 * @param zero_nhood the initial prediction context for number of surrounding
645
 *                   all-zero blocks (only left/top, so 0-2)
646
 * @param qmul array holding the dc/ac dequant factor at position 0/1
647
 * @return 0 if no coeffs were decoded
648
 *         otherwise, the index of the last coeff decoded plus one
649
 */
650
static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
651
                                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
652
                                        int i, uint8_t *token_prob, int16_t qmul[2])
653
{
654
    goto skip_eob;
655
    do {
656
        int coeff;
657
        if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
658
            return i;
659

    
660
skip_eob:
661
        if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
662
            if (++i == 16)
663
                return i; // invalid input; blocks should end with EOB
664
            token_prob = probs[i][0];
665
            goto skip_eob;
666
        }
667

    
668
        if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
669
            coeff = 1;
670
            token_prob = probs[i+1][1];
671
        } else {
672
            if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
673
                coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
674
                if (coeff)
675
                    coeff += vp56_rac_get_prob(c, token_prob[5]);
676
                coeff += 2;
677
            } else {
678
                // DCT_CAT*
679
                if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
680
                    if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
681
                        coeff  = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
682
                    } else {                                    // DCT_CAT2
683
                        coeff  = 7;
684
                        coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
685
                        coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
686
                    }
687
                } else {    // DCT_CAT3 and up
688
                    int a = vp56_rac_get_prob(c, token_prob[8]);
689
                    int b = vp56_rac_get_prob(c, token_prob[9+a]);
690
                    int cat = (a<<1) + b;
691
                    coeff  = 3 + (8<<cat);
692
                    coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
693
                }
694
            }
695
            token_prob = probs[i+1][2];
696
        }
697
        block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
698
    } while (++i < 16);
699

    
700
    return i;
701
}
702
#endif
703

    
704
static av_always_inline
705
int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
706
                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
707
                        int i, int zero_nhood, int16_t qmul[2])
708
{
709
    uint8_t *token_prob = probs[i][zero_nhood];
710
    if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
711
        return 0;
712
    return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
713
}
714

    
715
static av_always_inline
716
void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
717
                      uint8_t t_nnz[9], uint8_t l_nnz[9])
718
{
719
    int i, x, y, luma_start = 0, luma_ctx = 3;
720
    int nnz_pred, nnz, nnz_total = 0;
721
    int segment = s->segment;
722
    int block_dc = 0;
723

    
724
    if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
725
        nnz_pred = t_nnz[8] + l_nnz[8];
726

    
727
        // decode DC values and do hadamard
728
        nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
729
                                  s->qmat[segment].luma_dc_qmul);
730
        l_nnz[8] = t_nnz[8] = !!nnz;
731
        if (nnz) {
732
            nnz_total += nnz;
733
            block_dc = 1;
734
            if (nnz == 1)
735
                s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
736
            else
737
                s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
738
        }
739
        luma_start = 1;
740
        luma_ctx = 0;
741
    }
742

    
743
    // luma blocks
744
    for (y = 0; y < 4; y++)
745
        for (x = 0; x < 4; x++) {
746
            nnz_pred = l_nnz[y] + t_nnz[x];
747
            nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
748
                                      nnz_pred, s->qmat[segment].luma_qmul);
749
            // nnz+block_dc may be one more than the actual last index, but we don't care
750
            s->non_zero_count_cache[y][x] = nnz + block_dc;
751
            t_nnz[x] = l_nnz[y] = !!nnz;
752
            nnz_total += nnz;
753
        }
754

    
755
    // chroma blocks
756
    // TODO: what to do about dimensions? 2nd dim for luma is x,
757
    // but for chroma it's (y<<1)|x
758
    for (i = 4; i < 6; i++)
759
        for (y = 0; y < 2; y++)
760
            for (x = 0; x < 2; x++) {
761
                nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
762
                nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
763
                                          nnz_pred, s->qmat[segment].chroma_qmul);
764
                s->non_zero_count_cache[i][(y<<1)+x] = nnz;
765
                t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
766
                nnz_total += nnz;
767
            }
768

    
769
    // if there were no coded coeffs despite the macroblock not being marked skip,
770
    // we MUST not do the inner loop filter and should not do IDCT
771
    // Since skip isn't used for bitstream prediction, just manually set it.
772
    if (!nnz_total)
773
        mb->skip = 1;
774
}
775

    
776
static av_always_inline
777
void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
778
                      int linesize, int uvlinesize, int simple)
779
{
780
    AV_COPY128(top_border, src_y + 15*linesize);
781
    if (!simple) {
782
        AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
783
        AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
784
    }
785
}
786

    
787
static av_always_inline
788
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
789
                    int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
790
                    int simple, int xchg)
791
{
792
    uint8_t *top_border_m1 = top_border-32;     // for TL prediction
793
    src_y  -=   linesize;
794
    src_cb -= uvlinesize;
795
    src_cr -= uvlinesize;
796

    
797
#define XCHG(a,b,xchg) do {                     \
798
        if (xchg) AV_SWAP64(b,a);               \
799
        else      AV_COPY64(b,a);               \
800
    } while (0)
801

    
802
    XCHG(top_border_m1+8, src_y-8, xchg);
803
    XCHG(top_border,      src_y,   xchg);
804
    XCHG(top_border+8,    src_y+8, 1);
805
    if (mb_x < mb_width-1)
806
        XCHG(top_border+32, src_y+16, 1);
807

    
808
    // only copy chroma for normal loop filter
809
    // or to initialize the top row to 127
810
    if (!simple || !mb_y) {
811
        XCHG(top_border_m1+16, src_cb-8, xchg);
812
        XCHG(top_border_m1+24, src_cr-8, xchg);
813
        XCHG(top_border+16,    src_cb, 1);
814
        XCHG(top_border+24,    src_cr, 1);
815
    }
816
}
817

    
818
static av_always_inline
819
int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
820
{
821
    if (!mb_x) {
822
        return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
823
    } else {
824
        return mb_y ? mode : LEFT_DC_PRED8x8;
825
    }
826
}
827

    
828
static av_always_inline
829
int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
830
{
831
    if (!mb_x) {
832
        return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
833
    } else {
834
        return mb_y ? mode : HOR_PRED8x8;
835
    }
836
}
837

    
838
static av_always_inline
839
int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
840
{
841
    if (mode == DC_PRED8x8) {
842
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
843
    } else {
844
        return mode;
845
    }
846
}
847

    
848
static av_always_inline
849
int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
850
{
851
    switch (mode) {
852
    case DC_PRED8x8:
853
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
854
    case VERT_PRED8x8:
855
        return !mb_y ? DC_127_PRED8x8 : mode;
856
    case HOR_PRED8x8:
857
        return !mb_x ? DC_129_PRED8x8 : mode;
858
    case PLANE_PRED8x8 /*TM*/:
859
        return check_tm_pred8x8_mode(mode, mb_x, mb_y);
860
    }
861
    return mode;
862
}
863

    
864
static av_always_inline
865
int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
866
{
867
    if (!mb_x) {
868
        return mb_y ? VERT_VP8_PRED : DC_129_PRED;
869
    } else {
870
        return mb_y ? mode : HOR_VP8_PRED;
871
    }
872
}
873

    
874
static av_always_inline
875
int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
876
{
877
    switch (mode) {
878
    case VERT_PRED:
879
        if (!mb_x && mb_y) {
880
            *copy_buf = 1;
881
            return mode;
882
        }
883
        /* fall-through */
884
    case DIAG_DOWN_LEFT_PRED:
885
    case VERT_LEFT_PRED:
886
        return !mb_y ? DC_127_PRED : mode;
887
    case HOR_PRED:
888
        if (!mb_y) {
889
            *copy_buf = 1;
890
            return mode;
891
        }
892
        /* fall-through */
893
    case HOR_UP_PRED:
894
        return !mb_x ? DC_129_PRED : mode;
895
    case TM_VP8_PRED:
896
        return check_tm_pred4x4_mode(mode, mb_x, mb_y);
897
    case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
898
    case DIAG_DOWN_RIGHT_PRED:
899
    case VERT_RIGHT_PRED:
900
    case HOR_DOWN_PRED:
901
        if (!mb_y || !mb_x)
902
            *copy_buf = 1;
903
        return mode;
904
    }
905
    return mode;
906
}
907

    
908
static av_always_inline
909
void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
910
                   int mb_x, int mb_y)
911
{
912
    AVCodecContext *avctx = s->avctx;
913
    int x, y, mode, nnz, tr;
914

    
915
    // for the first row, we need to run xchg_mb_border to init the top edge to 127
916
    // otherwise, skip it if we aren't going to deblock
917
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
918
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
919
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
920
                       s->filter.simple, 1);
921

    
922
    if (mb->mode < MODE_I4x4) {
923
        if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
924
            mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
925
        } else {
926
            mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
927
        }
928
        s->hpc.pred16x16[mode](dst[0], s->linesize);
929
    } else {
930
        uint8_t *ptr = dst[0];
931
        uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
932
        uint8_t tr_top[4] = { 127, 127, 127, 127 };
933

    
934
        // all blocks on the right edge of the macroblock use bottom edge
935
        // the top macroblock for their topright edge
936
        uint8_t *tr_right = ptr - s->linesize + 16;
937

    
938
        // if we're on the right edge of the frame, said edge is extended
939
        // from the top macroblock
940
        if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
941
            mb_x == s->mb_width-1) {
942
            tr = tr_right[-1]*0x01010101;
943
            tr_right = (uint8_t *)&tr;
944
        }
945

    
946
        if (mb->skip)
947
            AV_ZERO128(s->non_zero_count_cache);
948

    
949
        for (y = 0; y < 4; y++) {
950
            uint8_t *topright = ptr + 4 - s->linesize;
951
            for (x = 0; x < 4; x++) {
952
                int copy = 0, linesize = s->linesize;
953
                uint8_t *dst = ptr+4*x;
954
                DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
955

    
956
                if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
957
                    topright = tr_top;
958
                } else if (x == 3)
959
                    topright = tr_right;
960

    
961
                if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
962
                    mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
963
                    if (copy) {
964
                        dst = copy_dst + 12;
965
                        linesize = 8;
966
                        if (!(mb_y + y)) {
967
                            copy_dst[3] = 127U;
968
                            AV_WN32A(copy_dst+4, 127U * 0x01010101U);
969
                        } else {
970
                            AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
971
                            if (!(mb_x + x)) {
972
                                copy_dst[3] = 129U;
973
                            } else {
974
                                copy_dst[3] = ptr[4*x-s->linesize-1];
975
                            }
976
                        }
977
                        if (!(mb_x + x)) {
978
                            copy_dst[11] =
979
                            copy_dst[19] =
980
                            copy_dst[27] =
981
                            copy_dst[35] = 129U;
982
                        } else {
983
                            copy_dst[11] = ptr[4*x              -1];
984
                            copy_dst[19] = ptr[4*x+s->linesize  -1];
985
                            copy_dst[27] = ptr[4*x+s->linesize*2-1];
986
                            copy_dst[35] = ptr[4*x+s->linesize*3-1];
987
                        }
988
                    }
989
                } else {
990
                    mode = intra4x4[x];
991
                }
992
                s->hpc.pred4x4[mode](dst, topright, linesize);
993
                if (copy) {
994
                    AV_COPY32(ptr+4*x              , copy_dst+12);
995
                    AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
996
                    AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
997
                    AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
998
                }
999

    
1000
                nnz = s->non_zero_count_cache[y][x];
1001
                if (nnz) {
1002
                    if (nnz == 1)
1003
                        s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
1004
                    else
1005
                        s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
1006
                }
1007
                topright += 4;
1008
            }
1009

    
1010
            ptr   += 4*s->linesize;
1011
            intra4x4 += 4;
1012
        }
1013
    }
1014

    
1015
    if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1016
        mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
1017
    } else {
1018
        mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
1019
    }
1020
    s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1021
    s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1022

    
1023
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
1024
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1025
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1026
                       s->filter.simple, 0);
1027
}
1028

    
1029
static const uint8_t subpel_idx[3][8] = {
1030
    { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1031
                                // also function pointer index
1032
    { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1033
    { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1034
};
1035

    
1036
/**
1037
 * Generic MC function.
1038
 *
1039
 * @param s VP8 decoding context
1040
 * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes
1041
 * @param dst target buffer for block data at block position
1042
 * @param src reference picture buffer at origin (0, 0)
1043
 * @param mv motion vector (relative to block position) to get pixel data from
1044
 * @param x_off horizontal position of block from origin (0, 0)
1045
 * @param y_off vertical position of block from origin (0, 0)
1046
 * @param block_w width of block (16, 8 or 4)
1047
 * @param block_h height of block (always same as block_w)
1048
 * @param width width of src/dst plane data
1049
 * @param height height of src/dst plane data
1050
 * @param linesize size of a single line of plane data, including padding
1051
 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1052
 */
1053
static av_always_inline
1054
void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
1055
                 int x_off, int y_off, int block_w, int block_h,
1056
                 int width, int height, int linesize,
1057
                 vp8_mc_func mc_func[3][3])
1058
{
1059
    uint8_t *src = ref->data[0];
1060

    
1061
    if (AV_RN32A(mv)) {
1062

    
1063
        int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1064
        int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1065

    
1066
        x_off += mv->x >> 2;
1067
        y_off += mv->y >> 2;
1068

    
1069
        // edge emulation
1070
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1071
        src += y_off * linesize + x_off;
1072
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1073
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1074
            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1075
                                    block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1076
                                    x_off - mx_idx, y_off - my_idx, width, height);
1077
            src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1078
        }
1079
        mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1080
    } else {
1081
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1082
        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1083
    }
1084
}
1085

    
1086
static av_always_inline
1087
void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
1088
                   const VP56mv *mv, int x_off, int y_off,
1089
                   int block_w, int block_h, int width, int height, int linesize,
1090
                   vp8_mc_func mc_func[3][3])
1091
{
1092
    uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1093

    
1094
    if (AV_RN32A(mv)) {
1095
        int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1096
        int my = mv->y&7, my_idx = subpel_idx[0][my];
1097

    
1098
        x_off += mv->x >> 3;
1099
        y_off += mv->y >> 3;
1100

    
1101
        // edge emulation
1102
        src1 += y_off * linesize + x_off;
1103
        src2 += y_off * linesize + x_off;
1104
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1105
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1106
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1107
            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1108
                                    block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1109
                                    x_off - mx_idx, y_off - my_idx, width, height);
1110
            src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1111
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1112

    
1113
            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1114
                                    block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1115
                                    x_off - mx_idx, y_off - my_idx, width, height);
1116
            src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1117
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1118
        } else {
1119
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1120
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1121
        }
1122
    } else {
1123
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1124
        mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1125
        mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1126
    }
1127
}
1128

    
1129
static av_always_inline
1130
void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1131
                 AVFrame *ref_frame, int x_off, int y_off,
1132
                 int bx_off, int by_off,
1133
                 int block_w, int block_h,
1134
                 int width, int height, VP56mv *mv)
1135
{
1136
    VP56mv uvmv = *mv;
1137

    
1138
    /* Y */
1139
    vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
1140
                ref_frame, mv, x_off + bx_off, y_off + by_off,
1141
                block_w, block_h, width, height, s->linesize,
1142
                s->put_pixels_tab[block_w == 8]);
1143

    
1144
    /* U/V */
1145
    if (s->profile == 3) {
1146
        uvmv.x &= ~7;
1147
        uvmv.y &= ~7;
1148
    }
1149
    x_off   >>= 1; y_off   >>= 1;
1150
    bx_off  >>= 1; by_off  >>= 1;
1151
    width   >>= 1; height  >>= 1;
1152
    block_w >>= 1; block_h >>= 1;
1153
    vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
1154
                  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1155
                  &uvmv, x_off + bx_off, y_off + by_off,
1156
                  block_w, block_h, width, height, s->uvlinesize,
1157
                  s->put_pixels_tab[1 + (block_w == 4)]);
1158
}
1159

    
1160
/* Fetch pixels for estimated mv 4 macroblocks ahead.
1161
 * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
1162
static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1163
{
1164
    /* Don't prefetch refs that haven't been used very often this frame. */
1165
    if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1166
        int x_off = mb_x << 4, y_off = mb_y << 4;
1167
        int mx = (mb->mv.x>>2) + x_off + 8;
1168
        int my = (mb->mv.y>>2) + y_off;
1169
        uint8_t **src= s->framep[ref]->data;
1170
        int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1171
        /* For threading, a ff_thread_await_progress here might be useful, but
1172
         * it actually slows down the decoder. Since a bad prefetch doesn't
1173
         * generate bad decoder output, we don't run it here. */
1174
        s->dsp.prefetch(src[0]+off, s->linesize, 4);
1175
        off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1176
        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1177
    }
1178
}
1179

    
1180
/**
1181
 * Apply motion vectors to prediction buffer, chapter 18.
1182
 */
1183
static av_always_inline
1184
void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1185
                   int mb_x, int mb_y)
1186
{
1187
    int x_off = mb_x << 4, y_off = mb_y << 4;
1188
    int width = 16*s->mb_width, height = 16*s->mb_height;
1189
    AVFrame *ref = s->framep[mb->ref_frame];
1190
    VP56mv *bmv = mb->bmv;
1191

    
1192
    switch (mb->partitioning) {
1193
    case VP8_SPLITMVMODE_NONE:
1194
        vp8_mc_part(s, dst, ref, x_off, y_off,
1195
                    0, 0, 16, 16, width, height, &mb->mv);
1196
        break;
1197
    case VP8_SPLITMVMODE_4x4: {
1198
        int x, y;
1199
        VP56mv uvmv;
1200

    
1201
        /* Y */
1202
        for (y = 0; y < 4; y++) {
1203
            for (x = 0; x < 4; x++) {
1204
                vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
1205
                            ref, &bmv[4*y + x],
1206
                            4*x + x_off, 4*y + y_off, 4, 4,
1207
                            width, height, s->linesize,
1208
                            s->put_pixels_tab[2]);
1209
            }
1210
        }
1211

    
1212
        /* U/V */
1213
        x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1214
        for (y = 0; y < 2; y++) {
1215
            for (x = 0; x < 2; x++) {
1216
                uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
1217
                         mb->bmv[ 2*y    * 4 + 2*x+1].x +
1218
                         mb->bmv[(2*y+1) * 4 + 2*x  ].x +
1219
                         mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1220
                uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
1221
                         mb->bmv[ 2*y    * 4 + 2*x+1].y +
1222
                         mb->bmv[(2*y+1) * 4 + 2*x  ].y +
1223
                         mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1224
                uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1225
                uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1226
                if (s->profile == 3) {
1227
                    uvmv.x &= ~7;
1228
                    uvmv.y &= ~7;
1229
                }
1230
                vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
1231
                              dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1232
                              4*x + x_off, 4*y + y_off, 4, 4,
1233
                              width, height, s->uvlinesize,
1234
                              s->put_pixels_tab[2]);
1235
            }
1236
        }
1237
        break;
1238
    }
1239
    case VP8_SPLITMVMODE_16x8:
1240
        vp8_mc_part(s, dst, ref, x_off, y_off,
1241
                    0, 0, 16, 8, width, height, &bmv[0]);
1242
        vp8_mc_part(s, dst, ref, x_off, y_off,
1243
                    0, 8, 16, 8, width, height, &bmv[1]);
1244
        break;
1245
    case VP8_SPLITMVMODE_8x16:
1246
        vp8_mc_part(s, dst, ref, x_off, y_off,
1247
                    0, 0, 8, 16, width, height, &bmv[0]);
1248
        vp8_mc_part(s, dst, ref, x_off, y_off,
1249
                    8, 0, 8, 16, width, height, &bmv[1]);
1250
        break;
1251
    case VP8_SPLITMVMODE_8x8:
1252
        vp8_mc_part(s, dst, ref, x_off, y_off,
1253
                    0, 0, 8, 8, width, height, &bmv[0]);
1254
        vp8_mc_part(s, dst, ref, x_off, y_off,
1255
                    8, 0, 8, 8, width, height, &bmv[1]);
1256
        vp8_mc_part(s, dst, ref, x_off, y_off,
1257
                    0, 8, 8, 8, width, height, &bmv[2]);
1258
        vp8_mc_part(s, dst, ref, x_off, y_off,
1259
                    8, 8, 8, 8, width, height, &bmv[3]);
1260
        break;
1261
    }
1262
}
1263

    
1264
static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
1265
{
1266
    int x, y, ch;
1267

    
1268
    if (mb->mode != MODE_I4x4) {
1269
        uint8_t *y_dst = dst[0];
1270
        for (y = 0; y < 4; y++) {
1271
            uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
1272
            if (nnz4) {
1273
                if (nnz4&~0x01010101) {
1274
                    for (x = 0; x < 4; x++) {
1275
                        if ((uint8_t)nnz4 == 1)
1276
                            s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
1277
                        else if((uint8_t)nnz4 > 1)
1278
                            s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1279
                        nnz4 >>= 8;
1280
                        if (!nnz4)
1281
                            break;
1282
                    }
1283
                } else {
1284
                    s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
1285
                }
1286
            }
1287
            y_dst += 4*s->linesize;
1288
        }
1289
    }
1290

    
1291
    for (ch = 0; ch < 2; ch++) {
1292
        uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
1293
        if (nnz4) {
1294
            uint8_t *ch_dst = dst[1+ch];
1295
            if (nnz4&~0x01010101) {
1296
                for (y = 0; y < 2; y++) {
1297
                    for (x = 0; x < 2; x++) {
1298
                        if ((uint8_t)nnz4 == 1)
1299
                            s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1300
                        else if((uint8_t)nnz4 > 1)
1301
                            s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1302
                        nnz4 >>= 8;
1303
                        if (!nnz4)
1304
                            goto chroma_idct_end;
1305
                    }
1306
                    ch_dst += 4*s->uvlinesize;
1307
                }
1308
            } else {
1309
                s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
1310
            }
1311
        }
1312
chroma_idct_end: ;
1313
    }
1314
}
1315

    
1316
static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1317
{
1318
    int interior_limit, filter_level;
1319

    
1320
    if (s->segmentation.enabled) {
1321
        filter_level = s->segmentation.filter_level[s->segment];
1322
        if (!s->segmentation.absolute_vals)
1323
            filter_level += s->filter.level;
1324
    } else
1325
        filter_level = s->filter.level;
1326

    
1327
    if (s->lf_delta.enabled) {
1328
        filter_level += s->lf_delta.ref[mb->ref_frame];
1329
        filter_level += s->lf_delta.mode[mb->mode];
1330
    }
1331

    
1332
    filter_level = av_clip_uintp2(filter_level, 6);
1333

    
1334
    interior_limit = filter_level;
1335
    if (s->filter.sharpness) {
1336
        interior_limit >>= (s->filter.sharpness + 3) >> 2;
1337
        interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1338
    }
1339
    interior_limit = FFMAX(interior_limit, 1);
1340

    
1341
    f->filter_level = filter_level;
1342
    f->inner_limit = interior_limit;
1343
    f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1344
}
1345

    
1346
static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1347
{
1348
    int mbedge_lim, bedge_lim, hev_thresh;
1349
    int filter_level = f->filter_level;
1350
    int inner_limit = f->inner_limit;
1351
    int inner_filter = f->inner_filter;
1352
    int linesize = s->linesize;
1353
    int uvlinesize = s->uvlinesize;
1354
    static const uint8_t hev_thresh_lut[2][64] = {
1355
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1356
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1357
          3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1358
          3, 3, 3, 3 },
1359
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1360
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1361
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1362
          2, 2, 2, 2 }
1363
    };
1364

    
1365
    if (!filter_level)
1366
        return;
1367

    
1368
     bedge_lim = 2*filter_level + inner_limit;
1369
    mbedge_lim = bedge_lim + 4;
1370

    
1371
    hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1372

    
1373
    if (mb_x) {
1374
        s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
1375
                                       mbedge_lim, inner_limit, hev_thresh);
1376
        s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1377
                                       mbedge_lim, inner_limit, hev_thresh);
1378
    }
1379

    
1380
    if (inner_filter) {
1381
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1382
                                             inner_limit, hev_thresh);
1383
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1384
                                             inner_limit, hev_thresh);
1385
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1386
                                             inner_limit, hev_thresh);
1387
        s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1388
                                             uvlinesize,  bedge_lim,
1389
                                             inner_limit, hev_thresh);
1390
    }
1391

    
1392
    if (mb_y) {
1393
        s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
1394
                                       mbedge_lim, inner_limit, hev_thresh);
1395
        s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1396
                                       mbedge_lim, inner_limit, hev_thresh);
1397
    }
1398

    
1399
    if (inner_filter) {
1400
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1401
                                             linesize,    bedge_lim,
1402
                                             inner_limit, hev_thresh);
1403
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1404
                                             linesize,    bedge_lim,
1405
                                             inner_limit, hev_thresh);
1406
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1407
                                             linesize,    bedge_lim,
1408
                                             inner_limit, hev_thresh);
1409
        s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1410
                                             dst[2] + 4 * uvlinesize,
1411
                                             uvlinesize,  bedge_lim,
1412
                                             inner_limit, hev_thresh);
1413
    }
1414
}
1415

    
1416
static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1417
{
1418
    int mbedge_lim, bedge_lim;
1419
    int filter_level = f->filter_level;
1420
    int inner_limit = f->inner_limit;
1421
    int inner_filter = f->inner_filter;
1422
    int linesize = s->linesize;
1423

    
1424
    if (!filter_level)
1425
        return;
1426

    
1427
     bedge_lim = 2*filter_level + inner_limit;
1428
    mbedge_lim = bedge_lim + 4;
1429

    
1430
    if (mb_x)
1431
        s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1432
    if (inner_filter) {
1433
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1434
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1435
        s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1436
    }
1437

    
1438
    if (mb_y)
1439
        s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1440
    if (inner_filter) {
1441
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1442
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1443
        s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1444
    }
1445
}
1446

    
1447
static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
1448
{
1449
    VP8FilterStrength *f = s->filter_strength;
1450
    uint8_t *dst[3] = {
1451
        curframe->data[0] + 16*mb_y*s->linesize,
1452
        curframe->data[1] +  8*mb_y*s->uvlinesize,
1453
        curframe->data[2] +  8*mb_y*s->uvlinesize
1454
    };
1455
    int mb_x;
1456

    
1457
    for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1458
        backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1459
        filter_mb(s, dst, f++, mb_x, mb_y);
1460
        dst[0] += 16;
1461
        dst[1] += 8;
1462
        dst[2] += 8;
1463
    }
1464
}
1465

    
1466
static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
1467
{
1468
    VP8FilterStrength *f = s->filter_strength;
1469
    uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
1470
    int mb_x;
1471

    
1472
    for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1473
        backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
1474
        filter_mb_simple(s, dst, f++, mb_x, mb_y);
1475
        dst += 16;
1476
    }
1477
}
1478

    
1479
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1480
                            AVPacket *avpkt)
1481
{
1482
    VP8Context *s = avctx->priv_data;
1483
    int ret, mb_x, mb_y, i, y, referenced;
1484
    enum AVDiscard skip_thresh;
1485
    AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT];
1486

    
1487
    if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1488
        return ret;
1489

    
1490
    referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1491
                                || s->update_altref == VP56_FRAME_CURRENT;
1492

    
1493
    skip_thresh = !referenced ? AVDISCARD_NONREF :
1494
                    !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1495

    
1496
    if (avctx->skip_frame >= skip_thresh) {
1497
        s->invisible = 1;
1498
        goto skip_decode;
1499
    }
1500
    s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1501

    
1502
    // release no longer referenced frames
1503
    for (i = 0; i < 5; i++)
1504
        if (s->frames[i].data[0] &&
1505
            &s->frames[i] != prev_frame &&
1506
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1507
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1508
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1509
            ff_thread_release_buffer(avctx, &s->frames[i]);
1510

    
1511
    // find a free buffer
1512
    for (i = 0; i < 5; i++)
1513
        if (&s->frames[i] != prev_frame &&
1514
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1515
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1516
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1517
            curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1518
            break;
1519
        }
1520
    if (i == 5) {
1521
        av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1522
        abort();
1523
    }
1524
    if (curframe->data[0])
1525
        ff_thread_release_buffer(avctx, curframe);
1526

    
1527
    curframe->key_frame = s->keyframe;
1528
    curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1529
    curframe->reference = referenced ? 3 : 0;
1530
    curframe->ref_index[0] = s->segmentation_map;
1531
    if ((ret = ff_thread_get_buffer(avctx, curframe))) {
1532
        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1533
        return ret;
1534
    }
1535

    
1536
    // check if golden and altref are swapped
1537
    if (s->update_altref != VP56_FRAME_NONE) {
1538
        s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[s->update_altref];
1539
    } else {
1540
        s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[VP56_FRAME_GOLDEN2];
1541
    }
1542
    if (s->update_golden != VP56_FRAME_NONE) {
1543
        s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[s->update_golden];
1544
    } else {
1545
        s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[VP56_FRAME_GOLDEN];
1546
    }
1547
    if (s->update_last) {
1548
        s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1549
    } else {
1550
        s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1551
    }
1552
    s->next_framep[VP56_FRAME_CURRENT]      = curframe;
1553

    
1554
    ff_thread_finish_setup(avctx);
1555

    
1556
    // Given that arithmetic probabilities are updated every frame, it's quite likely
1557
    // that the values we have on a random interframe are complete junk if we didn't
1558
    // start decode on a keyframe. So just don't display anything rather than junk.
1559
    if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1560
                         !s->framep[VP56_FRAME_GOLDEN] ||
1561
                         !s->framep[VP56_FRAME_GOLDEN2])) {
1562
        av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1563
        return AVERROR_INVALIDDATA;
1564
    }
1565

    
1566
    s->linesize   = curframe->linesize[0];
1567
    s->uvlinesize = curframe->linesize[1];
1568

    
1569
    if (!s->edge_emu_buffer)
1570
        s->edge_emu_buffer = av_malloc(21*s->linesize);
1571

    
1572
    memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1573

    
1574
    /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1575
    memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1576

    
1577
    // top edge of 127 for intra prediction
1578
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1579
        s->top_border[0][15] = s->top_border[0][23] = 127;
1580
        memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1581
    }
1582
    memset(s->ref_count, 0, sizeof(s->ref_count));
1583
    if (s->keyframe)
1584
        memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1585

    
1586
#define MARGIN (16 << 2)
1587
    s->mv_min.y = -MARGIN;
1588
    s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1589

    
1590
    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1591
        VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1592
        VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1593
        int mb_xy = mb_y*s->mb_width;
1594
        uint8_t *dst[3] = {
1595
            curframe->data[0] + 16*mb_y*s->linesize,
1596
            curframe->data[1] +  8*mb_y*s->uvlinesize,
1597
            curframe->data[2] +  8*mb_y*s->uvlinesize
1598
        };
1599

    
1600
        memset(mb - 1, 0, sizeof(*mb));   // zero left macroblock
1601
        memset(s->left_nnz, 0, sizeof(s->left_nnz));
1602
        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1603

    
1604
        // left edge of 129 for intra prediction
1605
        if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1606
            for (i = 0; i < 3; i++)
1607
                for (y = 0; y < 16>>!!i; y++)
1608
                    dst[i][y*curframe->linesize[i]-1] = 129;
1609
            if (mb_y == 1) // top left edge is also 129
1610
                s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1611
        }
1612

    
1613
        s->mv_min.x = -MARGIN;
1614
        s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;
1615
        if (prev_frame && s->segmentation.enabled && s->segmentation.update_map)
1616
            ff_thread_await_progress(prev_frame, mb_y, 0);
1617

    
1618
        for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1619
            /* Prefetch the current frame, 4 MBs ahead */
1620
            s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1621
            s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1622

    
1623
            decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy,
1624
                           prev_frame ? prev_frame->ref_index[0] + mb_xy : NULL);
1625

    
1626
            prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1627

    
1628
            if (!mb->skip)
1629
                decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1630

    
1631
            if (mb->mode <= MODE_I4x4)
1632
                intra_predict(s, dst, mb, mb_x, mb_y);
1633
            else
1634
                inter_predict(s, dst, mb, mb_x, mb_y);
1635

    
1636
            prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1637

    
1638
            if (!mb->skip) {
1639
                idct_mb(s, dst, mb);
1640
            } else {
1641
                AV_ZERO64(s->left_nnz);
1642
                AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
1643

    
1644
                // Reset DC block predictors if they would exist if the mb had coefficients
1645
                if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1646
                    s->left_nnz[8]      = 0;
1647
                    s->top_nnz[mb_x][8] = 0;
1648
                }
1649
            }
1650

    
1651
            if (s->deblock_filter)
1652
                filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1653

    
1654
            prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1655

    
1656
            dst[0] += 16;
1657
            dst[1] += 8;
1658
            dst[2] += 8;
1659
            s->mv_min.x -= 64;
1660
            s->mv_max.x -= 64;
1661
        }
1662
        if (s->deblock_filter) {
1663
            if (s->filter.simple)
1664
                filter_mb_row_simple(s, curframe, mb_y);
1665
            else
1666
                filter_mb_row(s, curframe, mb_y);
1667
        }
1668
        s->mv_min.y -= 64;
1669
        s->mv_max.y -= 64;
1670

    
1671
        ff_thread_report_progress(curframe, mb_y, 0);
1672
    }
1673

    
1674
    ff_thread_report_progress(curframe, INT_MAX, 0);
1675
skip_decode:
1676
    // if future frames don't use the updated probabilities,
1677
    // reset them to the values we saved
1678
    if (!s->update_probabilities)
1679
        s->prob[0] = s->prob[1];
1680

    
1681
    memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1682

    
1683
    if (!s->invisible) {
1684
        *(AVFrame*)data = *curframe;
1685
        *data_size = sizeof(AVFrame);
1686
    }
1687

    
1688
    return avpkt->size;
1689
}
1690

    
1691
static av_cold int vp8_decode_init(AVCodecContext *avctx)
1692
{
1693
    VP8Context *s = avctx->priv_data;
1694

    
1695
    s->avctx = avctx;
1696
    avctx->pix_fmt = PIX_FMT_YUV420P;
1697

    
1698
    dsputil_init(&s->dsp, avctx);
1699
    ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8);
1700
    ff_vp8dsp_init(&s->vp8dsp);
1701

    
1702
    return 0;
1703
}
1704

    
1705
static av_cold int vp8_decode_free(AVCodecContext *avctx)
1706
{
1707
    vp8_decode_flush(avctx);
1708
    return 0;
1709
}
1710

    
1711
static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
1712
{
1713
    VP8Context *s = avctx->priv_data;
1714

    
1715
    s->avctx = avctx;
1716

    
1717
    return 0;
1718
}
1719

    
1720
#define REBASE(pic) \
1721
    pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
1722

    
1723
static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1724
{
1725
    VP8Context *s = dst->priv_data, *s_src = src->priv_data;
1726

    
1727
    s->prob[0] = s_src->prob[!s_src->update_probabilities];
1728
    s->segmentation = s_src->segmentation;
1729
    s->lf_delta = s_src->lf_delta;
1730
    memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
1731

    
1732
    memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
1733
    s->framep[0] = REBASE(s_src->next_framep[0]);
1734
    s->framep[1] = REBASE(s_src->next_framep[1]);
1735
    s->framep[2] = REBASE(s_src->next_framep[2]);
1736
    s->framep[3] = REBASE(s_src->next_framep[3]);
1737

    
1738
    return 0;
1739
}
1740

    
1741
AVCodec ff_vp8_decoder = {
1742
    "vp8",
1743
    AVMEDIA_TYPE_VIDEO,
1744
    CODEC_ID_VP8,
1745
    sizeof(VP8Context),
1746
    vp8_decode_init,
1747
    NULL,
1748
    vp8_decode_free,
1749
    vp8_decode_frame,
1750
    CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
1751
    .flush = vp8_decode_flush,
1752
    .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
1753
    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
1754
    .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
1755
};