Statistics
| Branch: | Revision:

ffmpeg / libavcodec / vp8.c @ 8dbe5856

History | View | Annotate | Download (60.6 KB)

1
/**
2
 * VP8 compatible video decoder
3
 *
4
 * Copyright (C) 2010 David Conrad
5
 * Copyright (C) 2010 Ronald S. Bultje
6
 * Copyright (C) 2010 Jason Garrett-Glaser
7
 *
8
 * This file is part of FFmpeg.
9
 *
10
 * FFmpeg is free software; you can redistribute it and/or
11
 * modify it under the terms of the GNU Lesser General Public
12
 * License as published by the Free Software Foundation; either
13
 * version 2.1 of the License, or (at your option) any later version.
14
 *
15
 * FFmpeg is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
 * Lesser General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Lesser General Public
21
 * License along with FFmpeg; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23
 */
24

    
25
#include "libavutil/imgutils.h"
26
#include "avcodec.h"
27
#include "vp8.h"
28
#include "vp8data.h"
29
#include "rectangle.h"
30

    
31
#if ARCH_ARM
32
#   include "arm/vp8.h"
33
#endif
34

    
35
static void vp8_decode_flush(AVCodecContext *avctx)
36
{
37
    VP8Context *s = avctx->priv_data;
38
    int i;
39

    
40
    for (i = 0; i < 4; i++)
41
        if (s->frames[i].data[0])
42
            avctx->release_buffer(avctx, &s->frames[i]);
43
    memset(s->framep, 0, sizeof(s->framep));
44

    
45
    av_freep(&s->macroblocks_base);
46
    av_freep(&s->filter_strength);
47
    av_freep(&s->intra4x4_pred_mode_top);
48
    av_freep(&s->top_nnz);
49
    av_freep(&s->edge_emu_buffer);
50
    av_freep(&s->top_border);
51
    av_freep(&s->segmentation_map);
52

    
53
    s->macroblocks        = NULL;
54
}
55

    
56
static int update_dimensions(VP8Context *s, int width, int height)
57
{
58
    if (av_image_check_size(width, height, 0, s->avctx))
59
        return AVERROR_INVALIDDATA;
60

    
61
    vp8_decode_flush(s->avctx);
62

    
63
    avcodec_set_dimensions(s->avctx, width, height);
64

    
65
    s->mb_width  = (s->avctx->coded_width +15) / 16;
66
    s->mb_height = (s->avctx->coded_height+15) / 16;
67

    
68
    s->macroblocks_base        = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
69
    s->filter_strength         = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
70
    s->intra4x4_pred_mode_top  = av_mallocz(s->mb_width*4);
71
    s->top_nnz                 = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
72
    s->top_border              = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
73
    s->segmentation_map        = av_mallocz(s->mb_width*s->mb_height);
74

    
75
    if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
76
        !s->top_nnz || !s->top_border || !s->segmentation_map)
77
        return AVERROR(ENOMEM);
78

    
79
    s->macroblocks        = s->macroblocks_base + 1;
80

    
81
    return 0;
82
}
83

    
84
static void parse_segment_info(VP8Context *s)
85
{
86
    VP56RangeCoder *c = &s->c;
87
    int i;
88

    
89
    s->segmentation.update_map = vp8_rac_get(c);
90

    
91
    if (vp8_rac_get(c)) { // update segment feature data
92
        s->segmentation.absolute_vals = vp8_rac_get(c);
93

    
94
        for (i = 0; i < 4; i++)
95
            s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
96

    
97
        for (i = 0; i < 4; i++)
98
            s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
99
    }
100
    if (s->segmentation.update_map)
101
        for (i = 0; i < 3; i++)
102
            s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
103
}
104

    
105
static void update_lf_deltas(VP8Context *s)
106
{
107
    VP56RangeCoder *c = &s->c;
108
    int i;
109

    
110
    for (i = 0; i < 4; i++)
111
        s->lf_delta.ref[i]  = vp8_rac_get_sint(c, 6);
112

    
113
    for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
114
        s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
115
}
116

    
117
static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
118
{
119
    const uint8_t *sizes = buf;
120
    int i;
121

    
122
    s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
123

    
124
    buf      += 3*(s->num_coeff_partitions-1);
125
    buf_size -= 3*(s->num_coeff_partitions-1);
126
    if (buf_size < 0)
127
        return -1;
128

    
129
    for (i = 0; i < s->num_coeff_partitions-1; i++) {
130
        int size = AV_RL24(sizes + 3*i);
131
        if (buf_size - size < 0)
132
            return -1;
133

    
134
        ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
135
        buf      += size;
136
        buf_size -= size;
137
    }
138
    ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
139

    
140
    return 0;
141
}
142

    
143
static void get_quants(VP8Context *s)
144
{
145
    VP56RangeCoder *c = &s->c;
146
    int i, base_qi;
147

    
148
    int yac_qi     = vp8_rac_get_uint(c, 7);
149
    int ydc_delta  = vp8_rac_get_sint(c, 4);
150
    int y2dc_delta = vp8_rac_get_sint(c, 4);
151
    int y2ac_delta = vp8_rac_get_sint(c, 4);
152
    int uvdc_delta = vp8_rac_get_sint(c, 4);
153
    int uvac_delta = vp8_rac_get_sint(c, 4);
154

    
155
    for (i = 0; i < 4; i++) {
156
        if (s->segmentation.enabled) {
157
            base_qi = s->segmentation.base_quant[i];
158
            if (!s->segmentation.absolute_vals)
159
                base_qi += yac_qi;
160
        } else
161
            base_qi = yac_qi;
162

    
163
        s->qmat[i].luma_qmul[0]    =       vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)];
164
        s->qmat[i].luma_qmul[1]    =       vp8_ac_qlookup[av_clip(base_qi             , 0, 127)];
165
        s->qmat[i].luma_dc_qmul[0] =   2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)];
166
        s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100;
167
        s->qmat[i].chroma_qmul[0]  =       vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)];
168
        s->qmat[i].chroma_qmul[1]  =       vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)];
169

    
170
        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
171
        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
172
    }
173
}
174

    
175
/**
176
 * Determine which buffers golden and altref should be updated with after this frame.
177
 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
178
 *
179
 * Intra frames update all 3 references
180
 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
181
 * If the update (golden|altref) flag is set, it's updated with the current frame
182
 *      if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
183
 * If the flag is not set, the number read means:
184
 *      0: no update
185
 *      1: VP56_FRAME_PREVIOUS
186
 *      2: update golden with altref, or update altref with golden
187
 */
188
static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
189
{
190
    VP56RangeCoder *c = &s->c;
191

    
192
    if (update)
193
        return VP56_FRAME_CURRENT;
194

    
195
    switch (vp8_rac_get_uint(c, 2)) {
196
    case 1:
197
        return VP56_FRAME_PREVIOUS;
198
    case 2:
199
        return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
200
    }
201
    return VP56_FRAME_NONE;
202
}
203

    
204
static void update_refs(VP8Context *s)
205
{
206
    VP56RangeCoder *c = &s->c;
207

    
208
    int update_golden = vp8_rac_get(c);
209
    int update_altref = vp8_rac_get(c);
210

    
211
    s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
212
    s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
213
}
214

    
215
static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
216
{
217
    VP56RangeCoder *c = &s->c;
218
    int header_size, hscale, vscale, i, j, k, l, m, ret;
219
    int width  = s->avctx->width;
220
    int height = s->avctx->height;
221

    
222
    s->keyframe  = !(buf[0] & 1);
223
    s->profile   =  (buf[0]>>1) & 7;
224
    s->invisible = !(buf[0] & 0x10);
225
    header_size  = AV_RL24(buf) >> 5;
226
    buf      += 3;
227
    buf_size -= 3;
228

    
229
    if (s->profile > 3)
230
        av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
231

    
232
    if (!s->profile)
233
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
234
    else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
235
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
236

    
237
    if (header_size > buf_size - 7*s->keyframe) {
238
        av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
239
        return AVERROR_INVALIDDATA;
240
    }
241

    
242
    if (s->keyframe) {
243
        if (AV_RL24(buf) != 0x2a019d) {
244
            av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
245
            return AVERROR_INVALIDDATA;
246
        }
247
        width  = AV_RL16(buf+3) & 0x3fff;
248
        height = AV_RL16(buf+5) & 0x3fff;
249
        hscale = buf[4] >> 6;
250
        vscale = buf[6] >> 6;
251
        buf      += 7;
252
        buf_size -= 7;
253

    
254
        if (hscale || vscale)
255
            av_log_missing_feature(s->avctx, "Upscaling", 1);
256

    
257
        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
258
        for (i = 0; i < 4; i++)
259
            for (j = 0; j < 16; j++)
260
                memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
261
                       sizeof(s->prob->token[i][j]));
262
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
263
        memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
264
        memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
265
        memset(&s->segmentation, 0, sizeof(s->segmentation));
266
    }
267

    
268
    if (!s->macroblocks_base || /* first frame */
269
        width != s->avctx->width || height != s->avctx->height) {
270
        if ((ret = update_dimensions(s, width, height) < 0))
271
            return ret;
272
    }
273

    
274
    ff_vp56_init_range_decoder(c, buf, header_size);
275
    buf      += header_size;
276
    buf_size -= header_size;
277

    
278
    if (s->keyframe) {
279
        if (vp8_rac_get(c))
280
            av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
281
        vp8_rac_get(c); // whether we can skip clamping in dsp functions
282
    }
283

    
284
    if ((s->segmentation.enabled = vp8_rac_get(c)))
285
        parse_segment_info(s);
286
    else
287
        s->segmentation.update_map = 0; // FIXME: move this to some init function?
288

    
289
    s->filter.simple    = vp8_rac_get(c);
290
    s->filter.level     = vp8_rac_get_uint(c, 6);
291
    s->filter.sharpness = vp8_rac_get_uint(c, 3);
292

    
293
    if ((s->lf_delta.enabled = vp8_rac_get(c)))
294
        if (vp8_rac_get(c))
295
            update_lf_deltas(s);
296

    
297
    if (setup_partitions(s, buf, buf_size)) {
298
        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
299
        return AVERROR_INVALIDDATA;
300
    }
301

    
302
    get_quants(s);
303

    
304
    if (!s->keyframe) {
305
        update_refs(s);
306
        s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
307
        s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
308
    }
309

    
310
    // if we aren't saving this frame's probabilities for future frames,
311
    // make a copy of the current probabilities
312
    if (!(s->update_probabilities = vp8_rac_get(c)))
313
        s->prob[1] = s->prob[0];
314

    
315
    s->update_last = s->keyframe || vp8_rac_get(c);
316

    
317
    for (i = 0; i < 4; i++)
318
        for (j = 0; j < 8; j++)
319
            for (k = 0; k < 3; k++)
320
                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
321
                    if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
322
                        int prob = vp8_rac_get_uint(c, 8);
323
                        for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
324
                            s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
325
                    }
326

    
327
    if ((s->mbskip_enabled = vp8_rac_get(c)))
328
        s->prob->mbskip = vp8_rac_get_uint(c, 8);
329

    
330
    if (!s->keyframe) {
331
        s->prob->intra  = vp8_rac_get_uint(c, 8);
332
        s->prob->last   = vp8_rac_get_uint(c, 8);
333
        s->prob->golden = vp8_rac_get_uint(c, 8);
334

    
335
        if (vp8_rac_get(c))
336
            for (i = 0; i < 4; i++)
337
                s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
338
        if (vp8_rac_get(c))
339
            for (i = 0; i < 3; i++)
340
                s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);
341

    
342
        // 17.2 MV probability update
343
        for (i = 0; i < 2; i++)
344
            for (j = 0; j < 19; j++)
345
                if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
346
                    s->prob->mvc[i][j] = vp8_rac_get_nn(c);
347
    }
348

    
349
    return 0;
350
}
351

    
352
static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
353
{
354
    dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
355
    dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
356
}
357

    
358
/**
359
 * Motion vector coding, 17.1.
360
 */
361
static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
362
{
363
    int bit, x = 0;
364

    
365
    if (vp56_rac_get_prob_branchy(c, p[0])) {
366
        int i;
367

    
368
        for (i = 0; i < 3; i++)
369
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
370
        for (i = 9; i > 3; i--)
371
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
372
        if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
373
            x += 8;
374
    } else {
375
        // small_mvtree
376
        const uint8_t *ps = p+2;
377
        bit = vp56_rac_get_prob(c, *ps);
378
        ps += 1 + 3*bit;
379
        x  += 4*bit;
380
        bit = vp56_rac_get_prob(c, *ps);
381
        ps += 1 + bit;
382
        x  += 2*bit;
383
        x  += vp56_rac_get_prob(c, *ps);
384
    }
385

    
386
    return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
387
}
388

    
389
static av_always_inline
390
const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
391
{
392
    if (left == top)
393
        return vp8_submv_prob[4-!!left];
394
    if (!top)
395
        return vp8_submv_prob[2];
396
    return vp8_submv_prob[1-!!left];
397
}
398

    
399
/**
400
 * Split motion vector prediction, 16.4.
401
 * @returns the number of motion vectors parsed (2, 4 or 16)
402
 */
403
static av_always_inline
404
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
405
{
406
    int part_idx;
407
    int n, num;
408
    VP8Macroblock *top_mb  = &mb[2];
409
    VP8Macroblock *left_mb = &mb[-1];
410
    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
411
                  *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
412
                  *mbsplits_cur, *firstidx;
413
    VP56mv *top_mv  = top_mb->bmv;
414
    VP56mv *left_mv = left_mb->bmv;
415
    VP56mv *cur_mv  = mb->bmv;
416

    
417
    if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
418
        if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
419
            part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
420
        } else {
421
            part_idx = VP8_SPLITMVMODE_8x8;
422
        }
423
    } else {
424
        part_idx = VP8_SPLITMVMODE_4x4;
425
    }
426

    
427
    num = vp8_mbsplit_count[part_idx];
428
    mbsplits_cur = vp8_mbsplits[part_idx],
429
    firstidx = vp8_mbfirstidx[part_idx];
430
    mb->partitioning = part_idx;
431

    
432
    for (n = 0; n < num; n++) {
433
        int k = firstidx[n];
434
        uint32_t left, above;
435
        const uint8_t *submv_prob;
436

    
437
        if (!(k & 3))
438
            left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
439
        else
440
            left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
441
        if (k <= 3)
442
            above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
443
        else
444
            above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
445

    
446
        submv_prob = get_submv_prob(left, above);
447

    
448
        if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
449
            if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
450
                if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
451
                    mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
452
                    mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
453
                } else {
454
                    AV_ZERO32(&mb->bmv[n]);
455
                }
456
            } else {
457
                AV_WN32A(&mb->bmv[n], above);
458
            }
459
        } else {
460
            AV_WN32A(&mb->bmv[n], left);
461
        }
462
    }
463

    
464
    return num;
465
}
466

    
467
static av_always_inline
468
void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
469
{
470
    VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
471
                                  mb - 1 /* left */,
472
                                  mb + 1 /* top-left */ };
473
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
474
    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
475
    int idx = CNT_ZERO;
476
    int cur_sign_bias = s->sign_bias[mb->ref_frame];
477
    int8_t *sign_bias = s->sign_bias;
478
    VP56mv near_mv[4];
479
    uint8_t cnt[4] = { 0 };
480
    VP56RangeCoder *c = &s->c;
481

    
482
    AV_ZERO32(&near_mv[0]);
483
    AV_ZERO32(&near_mv[1]);
484

    
485
    /* Process MB on top, left and top-left */
486
    #define MV_EDGE_CHECK(n)\
487
    {\
488
        VP8Macroblock *edge = mb_edge[n];\
489
        int edge_ref = edge->ref_frame;\
490
        if (edge_ref != VP56_FRAME_CURRENT) {\
491
            uint32_t mv = AV_RN32A(&edge->mv);\
492
            if (mv) {\
493
                if (cur_sign_bias != sign_bias[edge_ref]) {\
494
                    /* SWAR negate of the values in mv. */\
495
                    mv = ~mv;\
496
                    mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
497
                }\
498
                if (!n || mv != AV_RN32A(&near_mv[idx]))\
499
                    AV_WN32A(&near_mv[++idx], mv);\
500
                cnt[idx]      += 1 + (n != 2);\
501
            } else\
502
                cnt[CNT_ZERO] += 1 + (n != 2);\
503
        }\
504
    }
505

    
506
    MV_EDGE_CHECK(0)
507
    MV_EDGE_CHECK(1)
508
    MV_EDGE_CHECK(2)
509

    
510
    mb->partitioning = VP8_SPLITMVMODE_NONE;
511
    if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
512
        mb->mode = VP8_MVMODE_MV;
513

    
514
        /* If we have three distinct MVs, merge first and last if they're the same */
515
        if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
516
            cnt[CNT_NEAREST] += 1;
517

    
518
        /* Swap near and nearest if necessary */
519
        if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
520
            FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
521
            FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
522
        }
523

    
524
        if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
525
            if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
526

    
527
                /* Choose the best mv out of 0,0 and the nearest mv */
528
                clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
529
                cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
530
                                    (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
531
                                    (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
532

    
533
                if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
534
                    mb->mode = VP8_MVMODE_SPLIT;
535
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
536
                } else {
537
                    mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
538
                    mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
539
                    mb->bmv[0] = mb->mv;
540
                }
541
            } else {
542
                clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
543
                mb->bmv[0] = mb->mv;
544
            }
545
        } else {
546
            clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
547
            mb->bmv[0] = mb->mv;
548
        }
549
    } else {
550
        mb->mode = VP8_MVMODE_ZERO;
551
        AV_ZERO32(&mb->mv);
552
        mb->bmv[0] = mb->mv;
553
    }
554
}
555

    
556
static av_always_inline
557
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
558
                           int mb_x, int keyframe)
559
{
560
    uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
561
    if (keyframe) {
562
        int x, y;
563
        uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
564
        uint8_t* const left = s->intra4x4_pred_mode_left;
565
        for (y = 0; y < 4; y++) {
566
            for (x = 0; x < 4; x++) {
567
                const uint8_t *ctx;
568
                ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
569
                *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
570
                left[y] = top[x] = *intra4x4;
571
                intra4x4++;
572
            }
573
        }
574
    } else {
575
        int i;
576
        for (i = 0; i < 16; i++)
577
            intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
578
    }
579
}
580

    
581
static av_always_inline
582
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment)
583
{
584
    VP56RangeCoder *c = &s->c;
585

    
586
    if (s->segmentation.update_map)
587
        *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
588
    s->segment = *segment;
589

    
590
    mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
591

    
592
    if (s->keyframe) {
593
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
594

    
595
        if (mb->mode == MODE_I4x4) {
596
            decode_intra4x4_modes(s, c, mb_x, 1);
597
        } else {
598
            const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
599
            AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
600
            AV_WN32A(s->intra4x4_pred_mode_left, modes);
601
        }
602

    
603
        s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
604
        mb->ref_frame = VP56_FRAME_CURRENT;
605
    } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
606
        // inter MB, 16.2
607
        if (vp56_rac_get_prob_branchy(c, s->prob->last))
608
            mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
609
                VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
610
        else
611
            mb->ref_frame = VP56_FRAME_PREVIOUS;
612
        s->ref_count[mb->ref_frame-1]++;
613

    
614
        // motion vectors, 16.3
615
        decode_mvs(s, mb, mb_x, mb_y);
616
    } else {
617
        // intra MB, 16.1
618
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
619

    
620
        if (mb->mode == MODE_I4x4)
621
            decode_intra4x4_modes(s, c, mb_x, 0);
622

    
623
        s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
624
        mb->ref_frame = VP56_FRAME_CURRENT;
625
        mb->partitioning = VP8_SPLITMVMODE_NONE;
626
        AV_ZERO32(&mb->bmv[0]);
627
    }
628
}
629

    
630
#ifndef decode_block_coeffs_internal
631
/**
632
 * @param c arithmetic bitstream reader context
633
 * @param block destination for block coefficients
634
 * @param probs probabilities to use when reading trees from the bitstream
635
 * @param i initial coeff index, 0 unless a separate DC block is coded
636
 * @param zero_nhood the initial prediction context for number of surrounding
637
 *                   all-zero blocks (only left/top, so 0-2)
638
 * @param qmul array holding the dc/ac dequant factor at position 0/1
639
 * @return 0 if no coeffs were decoded
640
 *         otherwise, the index of the last coeff decoded plus one
641
 */
642
static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
643
                                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
644
                                        int i, uint8_t *token_prob, int16_t qmul[2])
645
{
646
    goto skip_eob;
647
    do {
648
        int coeff;
649
        if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
650
            return i;
651

    
652
skip_eob:
653
        if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
654
            if (++i == 16)
655
                return i; // invalid input; blocks should end with EOB
656
            token_prob = probs[i][0];
657
            goto skip_eob;
658
        }
659

    
660
        if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
661
            coeff = 1;
662
            token_prob = probs[i+1][1];
663
        } else {
664
            if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
665
                coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
666
                if (coeff)
667
                    coeff += vp56_rac_get_prob(c, token_prob[5]);
668
                coeff += 2;
669
            } else {
670
                // DCT_CAT*
671
                if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
672
                    if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
673
                        coeff  = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
674
                    } else {                                    // DCT_CAT2
675
                        coeff  = 7;
676
                        coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
677
                        coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
678
                    }
679
                } else {    // DCT_CAT3 and up
680
                    int a = vp56_rac_get_prob(c, token_prob[8]);
681
                    int b = vp56_rac_get_prob(c, token_prob[9+a]);
682
                    int cat = (a<<1) + b;
683
                    coeff  = 3 + (8<<cat);
684
                    coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
685
                }
686
            }
687
            token_prob = probs[i+1][2];
688
        }
689
        block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
690
    } while (++i < 16);
691

    
692
    return i;
693
}
694
#endif
695

    
696
static av_always_inline
697
int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
698
                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
699
                        int i, int zero_nhood, int16_t qmul[2])
700
{
701
    uint8_t *token_prob = probs[i][zero_nhood];
702
    if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
703
        return 0;
704
    return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
705
}
706

    
707
static av_always_inline
708
void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
709
                      uint8_t t_nnz[9], uint8_t l_nnz[9])
710
{
711
    int i, x, y, luma_start = 0, luma_ctx = 3;
712
    int nnz_pred, nnz, nnz_total = 0;
713
    int segment = s->segment;
714
    int block_dc = 0;
715

    
716
    if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
717
        nnz_pred = t_nnz[8] + l_nnz[8];
718

    
719
        // decode DC values and do hadamard
720
        nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
721
                                  s->qmat[segment].luma_dc_qmul);
722
        l_nnz[8] = t_nnz[8] = !!nnz;
723
        if (nnz) {
724
            nnz_total += nnz;
725
            block_dc = 1;
726
            if (nnz == 1)
727
                s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
728
            else
729
                s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
730
        }
731
        luma_start = 1;
732
        luma_ctx = 0;
733
    }
734

    
735
    // luma blocks
736
    for (y = 0; y < 4; y++)
737
        for (x = 0; x < 4; x++) {
738
            nnz_pred = l_nnz[y] + t_nnz[x];
739
            nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
740
                                      nnz_pred, s->qmat[segment].luma_qmul);
741
            // nnz+block_dc may be one more than the actual last index, but we don't care
742
            s->non_zero_count_cache[y][x] = nnz + block_dc;
743
            t_nnz[x] = l_nnz[y] = !!nnz;
744
            nnz_total += nnz;
745
        }
746

    
747
    // chroma blocks
748
    // TODO: what to do about dimensions? 2nd dim for luma is x,
749
    // but for chroma it's (y<<1)|x
750
    for (i = 4; i < 6; i++)
751
        for (y = 0; y < 2; y++)
752
            for (x = 0; x < 2; x++) {
753
                nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
754
                nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
755
                                          nnz_pred, s->qmat[segment].chroma_qmul);
756
                s->non_zero_count_cache[i][(y<<1)+x] = nnz;
757
                t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
758
                nnz_total += nnz;
759
            }
760

    
761
    // if there were no coded coeffs despite the macroblock not being marked skip,
762
    // we MUST not do the inner loop filter and should not do IDCT
763
    // Since skip isn't used for bitstream prediction, just manually set it.
764
    if (!nnz_total)
765
        mb->skip = 1;
766
}
767

    
768
static av_always_inline
769
void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
770
                      int linesize, int uvlinesize, int simple)
771
{
772
    AV_COPY128(top_border, src_y + 15*linesize);
773
    if (!simple) {
774
        AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
775
        AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
776
    }
777
}
778

    
779
static av_always_inline
780
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
781
                    int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
782
                    int simple, int xchg)
783
{
784
    uint8_t *top_border_m1 = top_border-32;     // for TL prediction
785
    src_y  -=   linesize;
786
    src_cb -= uvlinesize;
787
    src_cr -= uvlinesize;
788

    
789
#define XCHG(a,b,xchg) do {                     \
790
        if (xchg) AV_SWAP64(b,a);               \
791
        else      AV_COPY64(b,a);               \
792
    } while (0)
793

    
794
    XCHG(top_border_m1+8, src_y-8, xchg);
795
    XCHG(top_border,      src_y,   xchg);
796
    XCHG(top_border+8,    src_y+8, 1);
797
    if (mb_x < mb_width-1)
798
        XCHG(top_border+32, src_y+16, 1);
799

    
800
    // only copy chroma for normal loop filter
801
    // or to initialize the top row to 127
802
    if (!simple || !mb_y) {
803
        XCHG(top_border_m1+16, src_cb-8, xchg);
804
        XCHG(top_border_m1+24, src_cr-8, xchg);
805
        XCHG(top_border+16,    src_cb, 1);
806
        XCHG(top_border+24,    src_cr, 1);
807
    }
808
}
809

    
810
static av_always_inline
811
int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
812
{
813
    if (!mb_x) {
814
        return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
815
    } else {
816
        return mb_y ? mode : LEFT_DC_PRED8x8;
817
    }
818
}
819

    
820
static av_always_inline
821
int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
822
{
823
    if (!mb_x) {
824
        return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
825
    } else {
826
        return mb_y ? mode : HOR_PRED8x8;
827
    }
828
}
829

    
830
static av_always_inline
831
int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
832
{
833
    if (mode == DC_PRED8x8) {
834
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
835
    } else {
836
        return mode;
837
    }
838
}
839

    
840
static av_always_inline
841
int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
842
{
843
    switch (mode) {
844
    case DC_PRED8x8:
845
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
846
    case VERT_PRED8x8:
847
        return !mb_y ? DC_127_PRED8x8 : mode;
848
    case HOR_PRED8x8:
849
        return !mb_x ? DC_129_PRED8x8 : mode;
850
    case PLANE_PRED8x8 /*TM*/:
851
        return check_tm_pred8x8_mode(mode, mb_x, mb_y);
852
    }
853
    return mode;
854
}
855

    
856
static av_always_inline
857
int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
858
{
859
    if (!mb_x) {
860
        return mb_y ? VERT_VP8_PRED : DC_129_PRED;
861
    } else {
862
        return mb_y ? mode : HOR_VP8_PRED;
863
    }
864
}
865

    
866
static av_always_inline
867
int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
868
{
869
    switch (mode) {
870
    case VERT_PRED:
871
        if (!mb_x && mb_y) {
872
            *copy_buf = 1;
873
            return mode;
874
        }
875
        /* fall-through */
876
    case DIAG_DOWN_LEFT_PRED:
877
    case VERT_LEFT_PRED:
878
        return !mb_y ? DC_127_PRED : mode;
879
    case HOR_PRED:
880
        if (!mb_y) {
881
            *copy_buf = 1;
882
            return mode;
883
        }
884
        /* fall-through */
885
    case HOR_UP_PRED:
886
        return !mb_x ? DC_129_PRED : mode;
887
    case TM_VP8_PRED:
888
        return check_tm_pred4x4_mode(mode, mb_x, mb_y);
889
    case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
890
    case DIAG_DOWN_RIGHT_PRED:
891
    case VERT_RIGHT_PRED:
892
    case HOR_DOWN_PRED:
893
        if (!mb_y || !mb_x)
894
            *copy_buf = 1;
895
        return mode;
896
    }
897
    return mode;
898
}
899

    
900
static av_always_inline
901
void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
902
                   int mb_x, int mb_y)
903
{
904
    AVCodecContext *avctx = s->avctx;
905
    int x, y, mode, nnz, tr;
906

    
907
    // for the first row, we need to run xchg_mb_border to init the top edge to 127
908
    // otherwise, skip it if we aren't going to deblock
909
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
910
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
911
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
912
                       s->filter.simple, 1);
913

    
914
    if (mb->mode < MODE_I4x4) {
915
        if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
916
            mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
917
        } else {
918
            mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
919
        }
920
        s->hpc.pred16x16[mode](dst[0], s->linesize);
921
    } else {
922
        uint8_t *ptr = dst[0];
923
        uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
924
        uint8_t tr_top[4] = { 127, 127, 127, 127 };
925

    
926
        // all blocks on the right edge of the macroblock use bottom edge
927
        // the top macroblock for their topright edge
928
        uint8_t *tr_right = ptr - s->linesize + 16;
929

    
930
        // if we're on the right edge of the frame, said edge is extended
931
        // from the top macroblock
932
        if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
933
            mb_x == s->mb_width-1) {
934
            tr = tr_right[-1]*0x01010101;
935
            tr_right = (uint8_t *)&tr;
936
        }
937

    
938
        if (mb->skip)
939
            AV_ZERO128(s->non_zero_count_cache);
940

    
941
        for (y = 0; y < 4; y++) {
942
            uint8_t *topright = ptr + 4 - s->linesize;
943
            for (x = 0; x < 4; x++) {
944
                int copy = 0, linesize = s->linesize;
945
                uint8_t *dst = ptr+4*x;
946
                DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
947

    
948
                if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
949
                    topright = tr_top;
950
                } else if (x == 3)
951
                    topright = tr_right;
952

    
953
                if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
954
                    mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
955
                    if (copy) {
956
                        dst = copy_dst + 12;
957
                        linesize = 8;
958
                        if (!(mb_y + y)) {
959
                            copy_dst[3] = 127U;
960
                            AV_WN32A(copy_dst+4, 127U * 0x01010101U);
961
                        } else {
962
                            AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
963
                            if (!(mb_x + x)) {
964
                                copy_dst[3] = 129U;
965
                            } else {
966
                                copy_dst[3] = ptr[4*x-s->linesize-1];
967
                            }
968
                        }
969
                        if (!(mb_x + x)) {
970
                            copy_dst[11] =
971
                            copy_dst[19] =
972
                            copy_dst[27] =
973
                            copy_dst[35] = 129U;
974
                        } else {
975
                            copy_dst[11] = ptr[4*x              -1];
976
                            copy_dst[19] = ptr[4*x+s->linesize  -1];
977
                            copy_dst[27] = ptr[4*x+s->linesize*2-1];
978
                            copy_dst[35] = ptr[4*x+s->linesize*3-1];
979
                        }
980
                    }
981
                } else {
982
                    mode = intra4x4[x];
983
                }
984
                s->hpc.pred4x4[mode](dst, topright, linesize);
985
                if (copy) {
986
                    AV_COPY32(ptr+4*x              , copy_dst+12);
987
                    AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
988
                    AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
989
                    AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
990
                }
991

    
992
                nnz = s->non_zero_count_cache[y][x];
993
                if (nnz) {
994
                    if (nnz == 1)
995
                        s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
996
                    else
997
                        s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
998
                }
999
                topright += 4;
1000
            }
1001

    
1002
            ptr   += 4*s->linesize;
1003
            intra4x4 += 4;
1004
        }
1005
    }
1006

    
1007
    if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1008
        mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
1009
    } else {
1010
        mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
1011
    }
1012
    s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1013
    s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1014

    
1015
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
1016
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1017
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1018
                       s->filter.simple, 0);
1019
}
1020

    
1021
static const uint8_t subpel_idx[3][8] = {
1022
    { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1023
                                // also function pointer index
1024
    { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1025
    { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1026
};
1027

    
1028
/**
1029
 * Generic MC function.
1030
 *
1031
 * @param s VP8 decoding context
1032
 * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes
1033
 * @param dst target buffer for block data at block position
1034
 * @param src reference picture buffer at origin (0, 0)
1035
 * @param mv motion vector (relative to block position) to get pixel data from
1036
 * @param x_off horizontal position of block from origin (0, 0)
1037
 * @param y_off vertical position of block from origin (0, 0)
1038
 * @param block_w width of block (16, 8 or 4)
1039
 * @param block_h height of block (always same as block_w)
1040
 * @param width width of src/dst plane data
1041
 * @param height height of src/dst plane data
1042
 * @param linesize size of a single line of plane data, including padding
1043
 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1044
 */
1045
static av_always_inline
1046
void vp8_mc_luma(VP8Context *s, uint8_t *dst, uint8_t *src, const VP56mv *mv,
1047
                 int x_off, int y_off, int block_w, int block_h,
1048
                 int width, int height, int linesize,
1049
                 vp8_mc_func mc_func[3][3])
1050
{
1051
    if (AV_RN32A(mv)) {
1052

    
1053
        int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1054
        int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1055

    
1056
        x_off += mv->x >> 2;
1057
        y_off += mv->y >> 2;
1058

    
1059
        // edge emulation
1060
        src += y_off * linesize + x_off;
1061
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1062
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1063
            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1064
                                    block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1065
                                    x_off - mx_idx, y_off - my_idx, width, height);
1066
            src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1067
        }
1068
        mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1069
    } else
1070
        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1071
}
1072

    
1073
static av_always_inline
1074
void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, uint8_t *src1,
1075
                   uint8_t *src2, const VP56mv *mv, int x_off, int y_off,
1076
                   int block_w, int block_h, int width, int height, int linesize,
1077
                   vp8_mc_func mc_func[3][3])
1078
{
1079
    if (AV_RN32A(mv)) {
1080
        int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1081
        int my = mv->y&7, my_idx = subpel_idx[0][my];
1082

    
1083
        x_off += mv->x >> 3;
1084
        y_off += mv->y >> 3;
1085

    
1086
        // edge emulation
1087
        src1 += y_off * linesize + x_off;
1088
        src2 += y_off * linesize + x_off;
1089
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1090
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1091
            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1092
                                    block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1093
                                    x_off - mx_idx, y_off - my_idx, width, height);
1094
            src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1095
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1096

    
1097
            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1098
                                    block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1099
                                    x_off - mx_idx, y_off - my_idx, width, height);
1100
            src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1101
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1102
        } else {
1103
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1104
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1105
        }
1106
    } else {
1107
        mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1108
        mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1109
    }
1110
}
1111

    
1112
static av_always_inline
1113
void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1114
                 AVFrame *ref_frame, int x_off, int y_off,
1115
                 int bx_off, int by_off,
1116
                 int block_w, int block_h,
1117
                 int width, int height, VP56mv *mv)
1118
{
1119
    VP56mv uvmv = *mv;
1120

    
1121
    /* Y */
1122
    vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
1123
                ref_frame->data[0], mv, x_off + bx_off, y_off + by_off,
1124
                block_w, block_h, width, height, s->linesize,
1125
                s->put_pixels_tab[block_w == 8]);
1126

    
1127
    /* U/V */
1128
    if (s->profile == 3) {
1129
        uvmv.x &= ~7;
1130
        uvmv.y &= ~7;
1131
    }
1132
    x_off   >>= 1; y_off   >>= 1;
1133
    bx_off  >>= 1; by_off  >>= 1;
1134
    width   >>= 1; height  >>= 1;
1135
    block_w >>= 1; block_h >>= 1;
1136
    vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
1137
                  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame->data[1],
1138
                  ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off,
1139
                  block_w, block_h, width, height, s->uvlinesize,
1140
                  s->put_pixels_tab[1 + (block_w == 4)]);
1141
}
1142

    
1143
/* Fetch pixels for estimated mv 4 macroblocks ahead.
1144
 * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
1145
static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1146
{
1147
    /* Don't prefetch refs that haven't been used very often this frame. */
1148
    if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1149
        int x_off = mb_x << 4, y_off = mb_y << 4;
1150
        int mx = (mb->mv.x>>2) + x_off + 8;
1151
        int my = (mb->mv.y>>2) + y_off;
1152
        uint8_t **src= s->framep[ref]->data;
1153
        int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1154
        s->dsp.prefetch(src[0]+off, s->linesize, 4);
1155
        off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1156
        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1157
    }
1158
}
1159

    
1160
/**
1161
 * Apply motion vectors to prediction buffer, chapter 18.
1162
 */
1163
static av_always_inline
1164
void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1165
                   int mb_x, int mb_y)
1166
{
1167
    int x_off = mb_x << 4, y_off = mb_y << 4;
1168
    int width = 16*s->mb_width, height = 16*s->mb_height;
1169
    AVFrame *ref = s->framep[mb->ref_frame];
1170
    VP56mv *bmv = mb->bmv;
1171

    
1172
    switch (mb->partitioning) {
1173
    case VP8_SPLITMVMODE_NONE:
1174
        vp8_mc_part(s, dst, ref, x_off, y_off,
1175
                    0, 0, 16, 16, width, height, &mb->mv);
1176
        break;
1177
    case VP8_SPLITMVMODE_4x4: {
1178
        int x, y;
1179
        VP56mv uvmv;
1180

    
1181
        /* Y */
1182
        for (y = 0; y < 4; y++) {
1183
            for (x = 0; x < 4; x++) {
1184
                vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
1185
                            ref->data[0], &bmv[4*y + x],
1186
                            4*x + x_off, 4*y + y_off, 4, 4,
1187
                            width, height, s->linesize,
1188
                            s->put_pixels_tab[2]);
1189
            }
1190
        }
1191

    
1192
        /* U/V */
1193
        x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1194
        for (y = 0; y < 2; y++) {
1195
            for (x = 0; x < 2; x++) {
1196
                uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
1197
                         mb->bmv[ 2*y    * 4 + 2*x+1].x +
1198
                         mb->bmv[(2*y+1) * 4 + 2*x  ].x +
1199
                         mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1200
                uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
1201
                         mb->bmv[ 2*y    * 4 + 2*x+1].y +
1202
                         mb->bmv[(2*y+1) * 4 + 2*x  ].y +
1203
                         mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1204
                uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1205
                uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1206
                if (s->profile == 3) {
1207
                    uvmv.x &= ~7;
1208
                    uvmv.y &= ~7;
1209
                }
1210
                vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
1211
                              dst[2] + 4*y*s->uvlinesize + x*4,
1212
                              ref->data[1], ref->data[2], &uvmv,
1213
                              4*x + x_off, 4*y + y_off, 4, 4,
1214
                              width, height, s->uvlinesize,
1215
                              s->put_pixels_tab[2]);
1216
            }
1217
        }
1218
        break;
1219
    }
1220
    case VP8_SPLITMVMODE_16x8:
1221
        vp8_mc_part(s, dst, ref, x_off, y_off,
1222
                    0, 0, 16, 8, width, height, &bmv[0]);
1223
        vp8_mc_part(s, dst, ref, x_off, y_off,
1224
                    0, 8, 16, 8, width, height, &bmv[1]);
1225
        break;
1226
    case VP8_SPLITMVMODE_8x16:
1227
        vp8_mc_part(s, dst, ref, x_off, y_off,
1228
                    0, 0, 8, 16, width, height, &bmv[0]);
1229
        vp8_mc_part(s, dst, ref, x_off, y_off,
1230
                    8, 0, 8, 16, width, height, &bmv[1]);
1231
        break;
1232
    case VP8_SPLITMVMODE_8x8:
1233
        vp8_mc_part(s, dst, ref, x_off, y_off,
1234
                    0, 0, 8, 8, width, height, &bmv[0]);
1235
        vp8_mc_part(s, dst, ref, x_off, y_off,
1236
                    8, 0, 8, 8, width, height, &bmv[1]);
1237
        vp8_mc_part(s, dst, ref, x_off, y_off,
1238
                    0, 8, 8, 8, width, height, &bmv[2]);
1239
        vp8_mc_part(s, dst, ref, x_off, y_off,
1240
                    8, 8, 8, 8, width, height, &bmv[3]);
1241
        break;
1242
    }
1243
}
1244

    
1245
static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
1246
{
1247
    int x, y, ch;
1248

    
1249
    if (mb->mode != MODE_I4x4) {
1250
        uint8_t *y_dst = dst[0];
1251
        for (y = 0; y < 4; y++) {
1252
            uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
1253
            if (nnz4) {
1254
                if (nnz4&~0x01010101) {
1255
                    for (x = 0; x < 4; x++) {
1256
                        if ((uint8_t)nnz4 == 1)
1257
                            s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
1258
                        else if((uint8_t)nnz4 > 1)
1259
                            s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1260
                        nnz4 >>= 8;
1261
                        if (!nnz4)
1262
                            break;
1263
                    }
1264
                } else {
1265
                    s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
1266
                }
1267
            }
1268
            y_dst += 4*s->linesize;
1269
        }
1270
    }
1271

    
1272
    for (ch = 0; ch < 2; ch++) {
1273
        uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
1274
        if (nnz4) {
1275
            uint8_t *ch_dst = dst[1+ch];
1276
            if (nnz4&~0x01010101) {
1277
                for (y = 0; y < 2; y++) {
1278
                    for (x = 0; x < 2; x++) {
1279
                        if ((uint8_t)nnz4 == 1)
1280
                            s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1281
                        else if((uint8_t)nnz4 > 1)
1282
                            s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1283
                        nnz4 >>= 8;
1284
                        if (!nnz4)
1285
                            goto chroma_idct_end;
1286
                    }
1287
                    ch_dst += 4*s->uvlinesize;
1288
                }
1289
            } else {
1290
                s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
1291
            }
1292
        }
1293
chroma_idct_end: ;
1294
    }
1295
}
1296

    
1297
static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1298
{
1299
    int interior_limit, filter_level;
1300

    
1301
    if (s->segmentation.enabled) {
1302
        filter_level = s->segmentation.filter_level[s->segment];
1303
        if (!s->segmentation.absolute_vals)
1304
            filter_level += s->filter.level;
1305
    } else
1306
        filter_level = s->filter.level;
1307

    
1308
    if (s->lf_delta.enabled) {
1309
        filter_level += s->lf_delta.ref[mb->ref_frame];
1310
        filter_level += s->lf_delta.mode[mb->mode];
1311
    }
1312

    
1313
/* Like av_clip for inputs 0 and max, where max is equal to (2^n-1) */
1314
#define POW2CLIP(x,max) (((x) & ~max) ? (-(x))>>31 & max : (x));
1315
    filter_level = POW2CLIP(filter_level, 63);
1316

    
1317
    interior_limit = filter_level;
1318
    if (s->filter.sharpness) {
1319
        interior_limit >>= (s->filter.sharpness + 3) >> 2;
1320
        interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1321
    }
1322
    interior_limit = FFMAX(interior_limit, 1);
1323

    
1324
    f->filter_level = filter_level;
1325
    f->inner_limit = interior_limit;
1326
    f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1327
}
1328

    
1329
static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1330
{
1331
    int mbedge_lim, bedge_lim, hev_thresh;
1332
    int filter_level = f->filter_level;
1333
    int inner_limit = f->inner_limit;
1334
    int inner_filter = f->inner_filter;
1335
    int linesize = s->linesize;
1336
    int uvlinesize = s->uvlinesize;
1337
    static const uint8_t hev_thresh_lut[2][64] = {
1338
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1339
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1340
          3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1341
          3, 3, 3, 3 },
1342
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1343
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1344
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1345
          2, 2, 2, 2 }
1346
    };
1347

    
1348
    if (!filter_level)
1349
        return;
1350

    
1351
     bedge_lim = 2*filter_level + inner_limit;
1352
    mbedge_lim = bedge_lim + 4;
1353

    
1354
    hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1355

    
1356
    if (mb_x) {
1357
        s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
1358
                                       mbedge_lim, inner_limit, hev_thresh);
1359
        s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1360
                                       mbedge_lim, inner_limit, hev_thresh);
1361
    }
1362

    
1363
    if (inner_filter) {
1364
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1365
                                             inner_limit, hev_thresh);
1366
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1367
                                             inner_limit, hev_thresh);
1368
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1369
                                             inner_limit, hev_thresh);
1370
        s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1371
                                             uvlinesize,  bedge_lim,
1372
                                             inner_limit, hev_thresh);
1373
    }
1374

    
1375
    if (mb_y) {
1376
        s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
1377
                                       mbedge_lim, inner_limit, hev_thresh);
1378
        s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1379
                                       mbedge_lim, inner_limit, hev_thresh);
1380
    }
1381

    
1382
    if (inner_filter) {
1383
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1384
                                             linesize,    bedge_lim,
1385
                                             inner_limit, hev_thresh);
1386
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1387
                                             linesize,    bedge_lim,
1388
                                             inner_limit, hev_thresh);
1389
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1390
                                             linesize,    bedge_lim,
1391
                                             inner_limit, hev_thresh);
1392
        s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1393
                                             dst[2] + 4 * uvlinesize,
1394
                                             uvlinesize,  bedge_lim,
1395
                                             inner_limit, hev_thresh);
1396
    }
1397
}
1398

    
1399
static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1400
{
1401
    int mbedge_lim, bedge_lim;
1402
    int filter_level = f->filter_level;
1403
    int inner_limit = f->inner_limit;
1404
    int inner_filter = f->inner_filter;
1405
    int linesize = s->linesize;
1406

    
1407
    if (!filter_level)
1408
        return;
1409

    
1410
     bedge_lim = 2*filter_level + inner_limit;
1411
    mbedge_lim = bedge_lim + 4;
1412

    
1413
    if (mb_x)
1414
        s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1415
    if (inner_filter) {
1416
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1417
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1418
        s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1419
    }
1420

    
1421
    if (mb_y)
1422
        s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1423
    if (inner_filter) {
1424
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1425
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1426
        s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1427
    }
1428
}
1429

    
1430
static void filter_mb_row(VP8Context *s, int mb_y)
1431
{
1432
    VP8FilterStrength *f = s->filter_strength;
1433
    uint8_t *dst[3] = {
1434
        s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize,
1435
        s->framep[VP56_FRAME_CURRENT]->data[1] +  8*mb_y*s->uvlinesize,
1436
        s->framep[VP56_FRAME_CURRENT]->data[2] +  8*mb_y*s->uvlinesize
1437
    };
1438
    int mb_x;
1439

    
1440
    for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1441
        backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1442
        filter_mb(s, dst, f++, mb_x, mb_y);
1443
        dst[0] += 16;
1444
        dst[1] += 8;
1445
        dst[2] += 8;
1446
    }
1447
}
1448

    
1449
static void filter_mb_row_simple(VP8Context *s, int mb_y)
1450
{
1451
    VP8FilterStrength *f = s->filter_strength;
1452
    uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize;
1453
    int mb_x;
1454

    
1455
    for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1456
        backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
1457
        filter_mb_simple(s, dst, f++, mb_x, mb_y);
1458
        dst += 16;
1459
    }
1460
}
1461

    
1462
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1463
                            AVPacket *avpkt)
1464
{
1465
    VP8Context *s = avctx->priv_data;
1466
    int ret, mb_x, mb_y, i, y, referenced;
1467
    enum AVDiscard skip_thresh;
1468
    AVFrame *av_uninit(curframe);
1469

    
1470
    if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1471
        return ret;
1472

    
1473
    referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1474
                                || s->update_altref == VP56_FRAME_CURRENT;
1475

    
1476
    skip_thresh = !referenced ? AVDISCARD_NONREF :
1477
                    !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1478

    
1479
    if (avctx->skip_frame >= skip_thresh) {
1480
        s->invisible = 1;
1481
        goto skip_decode;
1482
    }
1483
    s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1484

    
1485
    for (i = 0; i < 4; i++)
1486
        if (&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1487
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1488
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1489
            curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1490
            break;
1491
        }
1492
    if (curframe->data[0])
1493
        avctx->release_buffer(avctx, curframe);
1494

    
1495
    curframe->key_frame = s->keyframe;
1496
    curframe->pict_type = s->keyframe ? FF_I_TYPE : FF_P_TYPE;
1497
    curframe->reference = referenced ? 3 : 0;
1498
    if ((ret = avctx->get_buffer(avctx, curframe))) {
1499
        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1500
        return ret;
1501
    }
1502

    
1503
    // Given that arithmetic probabilities are updated every frame, it's quite likely
1504
    // that the values we have on a random interframe are complete junk if we didn't
1505
    // start decode on a keyframe. So just don't display anything rather than junk.
1506
    if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1507
                         !s->framep[VP56_FRAME_GOLDEN] ||
1508
                         !s->framep[VP56_FRAME_GOLDEN2])) {
1509
        av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1510
        return AVERROR_INVALIDDATA;
1511
    }
1512

    
1513
    s->linesize   = curframe->linesize[0];
1514
    s->uvlinesize = curframe->linesize[1];
1515

    
1516
    if (!s->edge_emu_buffer)
1517
        s->edge_emu_buffer = av_malloc(21*s->linesize);
1518

    
1519
    memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1520

    
1521
    /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1522
    memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1523

    
1524
    // top edge of 127 for intra prediction
1525
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1526
        s->top_border[0][15] = s->top_border[0][23] = 127;
1527
        memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1528
    }
1529
    memset(s->ref_count, 0, sizeof(s->ref_count));
1530
    if (s->keyframe)
1531
        memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1532

    
1533
    #define MARGIN (16 << 2)
1534
    s->mv_min.y = -MARGIN;
1535
    s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1536

    
1537
    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1538
        VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1539
        VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1540
        int mb_xy = mb_y*s->mb_width;
1541
        uint8_t *dst[3] = {
1542
            curframe->data[0] + 16*mb_y*s->linesize,
1543
            curframe->data[1] +  8*mb_y*s->uvlinesize,
1544
            curframe->data[2] +  8*mb_y*s->uvlinesize
1545
        };
1546

    
1547
        memset(mb - 1, 0, sizeof(*mb));   // zero left macroblock
1548
        memset(s->left_nnz, 0, sizeof(s->left_nnz));
1549
        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1550

    
1551
        // left edge of 129 for intra prediction
1552
        if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1553
            for (i = 0; i < 3; i++)
1554
                for (y = 0; y < 16>>!!i; y++)
1555
                    dst[i][y*curframe->linesize[i]-1] = 129;
1556
            if (mb_y == 1) // top left edge is also 129
1557
                s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1558
        }
1559

    
1560
        s->mv_min.x = -MARGIN;
1561
        s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;
1562

    
1563
        for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1564
            /* Prefetch the current frame, 4 MBs ahead */
1565
            s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1566
            s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1567

    
1568
            decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy);
1569

    
1570
            prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1571

    
1572
            if (!mb->skip)
1573
                decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1574

    
1575
            if (mb->mode <= MODE_I4x4)
1576
                intra_predict(s, dst, mb, mb_x, mb_y);
1577
            else
1578
                inter_predict(s, dst, mb, mb_x, mb_y);
1579

    
1580
            prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1581

    
1582
            if (!mb->skip) {
1583
                idct_mb(s, dst, mb);
1584
            } else {
1585
                AV_ZERO64(s->left_nnz);
1586
                AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
1587

    
1588
                // Reset DC block predictors if they would exist if the mb had coefficients
1589
                if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1590
                    s->left_nnz[8]      = 0;
1591
                    s->top_nnz[mb_x][8] = 0;
1592
                }
1593
            }
1594

    
1595
            if (s->deblock_filter)
1596
                filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1597

    
1598
            prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1599

    
1600
            dst[0] += 16;
1601
            dst[1] += 8;
1602
            dst[2] += 8;
1603
            s->mv_min.x -= 64;
1604
            s->mv_max.x -= 64;
1605
        }
1606
        if (s->deblock_filter) {
1607
            if (s->filter.simple)
1608
                filter_mb_row_simple(s, mb_y);
1609
            else
1610
                filter_mb_row(s, mb_y);
1611
        }
1612
        s->mv_min.y -= 64;
1613
        s->mv_max.y -= 64;
1614
    }
1615

    
1616
skip_decode:
1617
    // if future frames don't use the updated probabilities,
1618
    // reset them to the values we saved
1619
    if (!s->update_probabilities)
1620
        s->prob[0] = s->prob[1];
1621

    
1622
    // check if golden and altref are swapped
1623
    if (s->update_altref == VP56_FRAME_GOLDEN &&
1624
        s->update_golden == VP56_FRAME_GOLDEN2)
1625
        FFSWAP(AVFrame *, s->framep[VP56_FRAME_GOLDEN], s->framep[VP56_FRAME_GOLDEN2]);
1626
    else {
1627
        if (s->update_altref != VP56_FRAME_NONE)
1628
            s->framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1629

    
1630
        if (s->update_golden != VP56_FRAME_NONE)
1631
            s->framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1632
    }
1633

    
1634
    if (s->update_last) // move cur->prev
1635
        s->framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_CURRENT];
1636

    
1637
    // release no longer referenced frames
1638
    for (i = 0; i < 4; i++)
1639
        if (s->frames[i].data[0] &&
1640
            &s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
1641
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1642
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1643
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1644
            avctx->release_buffer(avctx, &s->frames[i]);
1645

    
1646
    if (!s->invisible) {
1647
        *(AVFrame*)data = *s->framep[VP56_FRAME_CURRENT];
1648
        *data_size = sizeof(AVFrame);
1649
    }
1650

    
1651
    return avpkt->size;
1652
}
1653

    
1654
static av_cold int vp8_decode_init(AVCodecContext *avctx)
1655
{
1656
    VP8Context *s = avctx->priv_data;
1657

    
1658
    s->avctx = avctx;
1659
    avctx->pix_fmt = PIX_FMT_YUV420P;
1660

    
1661
    dsputil_init(&s->dsp, avctx);
1662
    ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8);
1663
    ff_vp8dsp_init(&s->vp8dsp);
1664

    
1665
    return 0;
1666
}
1667

    
1668
static av_cold int vp8_decode_free(AVCodecContext *avctx)
1669
{
1670
    vp8_decode_flush(avctx);
1671
    return 0;
1672
}
1673

    
1674
AVCodec ff_vp8_decoder = {
1675
    "vp8",
1676
    AVMEDIA_TYPE_VIDEO,
1677
    CODEC_ID_VP8,
1678
    sizeof(VP8Context),
1679
    vp8_decode_init,
1680
    NULL,
1681
    vp8_decode_free,
1682
    vp8_decode_frame,
1683
    CODEC_CAP_DR1,
1684
    .flush = vp8_decode_flush,
1685
    .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
1686
};