Statistics
| Branch: | Revision:

ffmpeg / libavcodec / vp8.c @ 8e624c1c

History | View | Annotate | Download (60.6 KB)

1
/**
2
 * VP8 compatible video decoder
3
 *
4
 * Copyright (C) 2010 David Conrad
5
 * Copyright (C) 2010 Ronald S. Bultje
6
 * Copyright (C) 2010 Jason Garrett-Glaser
7
 *
8
 * This file is part of FFmpeg.
9
 *
10
 * FFmpeg is free software; you can redistribute it and/or
11
 * modify it under the terms of the GNU Lesser General Public
12
 * License as published by the Free Software Foundation; either
13
 * version 2.1 of the License, or (at your option) any later version.
14
 *
15
 * FFmpeg is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
 * Lesser General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Lesser General Public
21
 * License along with FFmpeg; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23
 */
24

    
25
#include "libavutil/imgutils.h"
26
#include "avcodec.h"
27
#include "vp8.h"
28
#include "vp8data.h"
29
#include "rectangle.h"
30

    
31
#if ARCH_ARM
32
#   include "arm/vp8.h"
33
#endif
34

    
35
static void vp8_decode_flush(AVCodecContext *avctx)
36
{
37
    VP8Context *s = avctx->priv_data;
38
    int i;
39

    
40
    for (i = 0; i < 4; i++)
41
        if (s->frames[i].data[0])
42
            avctx->release_buffer(avctx, &s->frames[i]);
43
    memset(s->framep, 0, sizeof(s->framep));
44

    
45
    av_freep(&s->macroblocks_base);
46
    av_freep(&s->filter_strength);
47
    av_freep(&s->intra4x4_pred_mode_top);
48
    av_freep(&s->top_nnz);
49
    av_freep(&s->edge_emu_buffer);
50
    av_freep(&s->top_border);
51
    av_freep(&s->segmentation_map);
52

    
53
    s->macroblocks        = NULL;
54
}
55

    
56
static int update_dimensions(VP8Context *s, int width, int height)
57
{
58
    if (av_image_check_size(width, height, 0, s->avctx))
59
        return AVERROR_INVALIDDATA;
60

    
61
    vp8_decode_flush(s->avctx);
62

    
63
    avcodec_set_dimensions(s->avctx, width, height);
64

    
65
    s->mb_width  = (s->avctx->coded_width +15) / 16;
66
    s->mb_height = (s->avctx->coded_height+15) / 16;
67

    
68
    s->macroblocks_base        = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
69
    s->filter_strength         = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
70
    s->intra4x4_pred_mode_top  = av_mallocz(s->mb_width*4);
71
    s->top_nnz                 = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
72
    s->top_border              = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
73
    s->segmentation_map        = av_mallocz(s->mb_width*s->mb_height);
74

    
75
    if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
76
        !s->top_nnz || !s->top_border || !s->segmentation_map)
77
        return AVERROR(ENOMEM);
78

    
79
    s->macroblocks        = s->macroblocks_base + 1;
80

    
81
    return 0;
82
}
83

    
84
static void parse_segment_info(VP8Context *s)
85
{
86
    VP56RangeCoder *c = &s->c;
87
    int i;
88

    
89
    s->segmentation.update_map = vp8_rac_get(c);
90

    
91
    if (vp8_rac_get(c)) { // update segment feature data
92
        s->segmentation.absolute_vals = vp8_rac_get(c);
93

    
94
        for (i = 0; i < 4; i++)
95
            s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
96

    
97
        for (i = 0; i < 4; i++)
98
            s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
99
    }
100
    if (s->segmentation.update_map)
101
        for (i = 0; i < 3; i++)
102
            s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
103
}
104

    
105
static void update_lf_deltas(VP8Context *s)
106
{
107
    VP56RangeCoder *c = &s->c;
108
    int i;
109

    
110
    for (i = 0; i < 4; i++)
111
        s->lf_delta.ref[i]  = vp8_rac_get_sint(c, 6);
112

    
113
    for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++)
114
        s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6);
115
}
116

    
117
static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
118
{
119
    const uint8_t *sizes = buf;
120
    int i;
121

    
122
    s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
123

    
124
    buf      += 3*(s->num_coeff_partitions-1);
125
    buf_size -= 3*(s->num_coeff_partitions-1);
126
    if (buf_size < 0)
127
        return -1;
128

    
129
    for (i = 0; i < s->num_coeff_partitions-1; i++) {
130
        int size = AV_RL24(sizes + 3*i);
131
        if (buf_size - size < 0)
132
            return -1;
133

    
134
        ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
135
        buf      += size;
136
        buf_size -= size;
137
    }
138
    ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
139

    
140
    return 0;
141
}
142

    
143
static void get_quants(VP8Context *s)
144
{
145
    VP56RangeCoder *c = &s->c;
146
    int i, base_qi;
147

    
148
    int yac_qi     = vp8_rac_get_uint(c, 7);
149
    int ydc_delta  = vp8_rac_get_sint(c, 4);
150
    int y2dc_delta = vp8_rac_get_sint(c, 4);
151
    int y2ac_delta = vp8_rac_get_sint(c, 4);
152
    int uvdc_delta = vp8_rac_get_sint(c, 4);
153
    int uvac_delta = vp8_rac_get_sint(c, 4);
154

    
155
    for (i = 0; i < 4; i++) {
156
        if (s->segmentation.enabled) {
157
            base_qi = s->segmentation.base_quant[i];
158
            if (!s->segmentation.absolute_vals)
159
                base_qi += yac_qi;
160
        } else
161
            base_qi = yac_qi;
162

    
163
        s->qmat[i].luma_qmul[0]    =       vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)];
164
        s->qmat[i].luma_qmul[1]    =       vp8_ac_qlookup[av_clip(base_qi             , 0, 127)];
165
        s->qmat[i].luma_dc_qmul[0] =   2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)];
166
        s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100;
167
        s->qmat[i].chroma_qmul[0]  =       vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)];
168
        s->qmat[i].chroma_qmul[1]  =       vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)];
169

    
170
        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
171
        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
172
    }
173
}
174

    
175
/**
176
 * Determine which buffers golden and altref should be updated with after this frame.
177
 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
178
 *
179
 * Intra frames update all 3 references
180
 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
181
 * If the update (golden|altref) flag is set, it's updated with the current frame
182
 *      if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
183
 * If the flag is not set, the number read means:
184
 *      0: no update
185
 *      1: VP56_FRAME_PREVIOUS
186
 *      2: update golden with altref, or update altref with golden
187
 */
188
static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
189
{
190
    VP56RangeCoder *c = &s->c;
191

    
192
    if (update)
193
        return VP56_FRAME_CURRENT;
194

    
195
    switch (vp8_rac_get_uint(c, 2)) {
196
    case 1:
197
        return VP56_FRAME_PREVIOUS;
198
    case 2:
199
        return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
200
    }
201
    return VP56_FRAME_NONE;
202
}
203

    
204
static void update_refs(VP8Context *s)
205
{
206
    VP56RangeCoder *c = &s->c;
207

    
208
    int update_golden = vp8_rac_get(c);
209
    int update_altref = vp8_rac_get(c);
210

    
211
    s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
212
    s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
213
}
214

    
215
static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
216
{
217
    VP56RangeCoder *c = &s->c;
218
    int header_size, hscale, vscale, i, j, k, l, m, ret;
219
    int width  = s->avctx->width;
220
    int height = s->avctx->height;
221

    
222
    s->keyframe  = !(buf[0] & 1);
223
    s->profile   =  (buf[0]>>1) & 7;
224
    s->invisible = !(buf[0] & 0x10);
225
    header_size  = AV_RL24(buf) >> 5;
226
    buf      += 3;
227
    buf_size -= 3;
228

    
229
    if (s->profile > 3)
230
        av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
231

    
232
    if (!s->profile)
233
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
234
    else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
235
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
236

    
237
    if (header_size > buf_size - 7*s->keyframe) {
238
        av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
239
        return AVERROR_INVALIDDATA;
240
    }
241

    
242
    if (s->keyframe) {
243
        if (AV_RL24(buf) != 0x2a019d) {
244
            av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
245
            return AVERROR_INVALIDDATA;
246
        }
247
        width  = AV_RL16(buf+3) & 0x3fff;
248
        height = AV_RL16(buf+5) & 0x3fff;
249
        hscale = buf[4] >> 6;
250
        vscale = buf[6] >> 6;
251
        buf      += 7;
252
        buf_size -= 7;
253

    
254
        if (hscale || vscale)
255
            av_log_missing_feature(s->avctx, "Upscaling", 1);
256

    
257
        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
258
        for (i = 0; i < 4; i++)
259
            for (j = 0; j < 16; j++)
260
                memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
261
                       sizeof(s->prob->token[i][j]));
262
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
263
        memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
264
        memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
265
        memset(&s->segmentation, 0, sizeof(s->segmentation));
266
    }
267

    
268
    if (!s->macroblocks_base || /* first frame */
269
        width != s->avctx->width || height != s->avctx->height) {
270
        if ((ret = update_dimensions(s, width, height) < 0))
271
            return ret;
272
    }
273

    
274
    ff_vp56_init_range_decoder(c, buf, header_size);
275
    buf      += header_size;
276
    buf_size -= header_size;
277

    
278
    if (s->keyframe) {
279
        if (vp8_rac_get(c))
280
            av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
281
        vp8_rac_get(c); // whether we can skip clamping in dsp functions
282
    }
283

    
284
    if ((s->segmentation.enabled = vp8_rac_get(c)))
285
        parse_segment_info(s);
286
    else
287
        s->segmentation.update_map = 0; // FIXME: move this to some init function?
288

    
289
    s->filter.simple    = vp8_rac_get(c);
290
    s->filter.level     = vp8_rac_get_uint(c, 6);
291
    s->filter.sharpness = vp8_rac_get_uint(c, 3);
292

    
293
    if ((s->lf_delta.enabled = vp8_rac_get(c)))
294
        if (vp8_rac_get(c))
295
            update_lf_deltas(s);
296

    
297
    if (setup_partitions(s, buf, buf_size)) {
298
        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
299
        return AVERROR_INVALIDDATA;
300
    }
301

    
302
    get_quants(s);
303

    
304
    if (!s->keyframe) {
305
        update_refs(s);
306
        s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
307
        s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
308
    }
309

    
310
    // if we aren't saving this frame's probabilities for future frames,
311
    // make a copy of the current probabilities
312
    if (!(s->update_probabilities = vp8_rac_get(c)))
313
        s->prob[1] = s->prob[0];
314

    
315
    s->update_last = s->keyframe || vp8_rac_get(c);
316

    
317
    for (i = 0; i < 4; i++)
318
        for (j = 0; j < 8; j++)
319
            for (k = 0; k < 3; k++)
320
                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
321
                    if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
322
                        int prob = vp8_rac_get_uint(c, 8);
323
                        for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
324
                            s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
325
                    }
326

    
327
    if ((s->mbskip_enabled = vp8_rac_get(c)))
328
        s->prob->mbskip = vp8_rac_get_uint(c, 8);
329

    
330
    if (!s->keyframe) {
331
        s->prob->intra  = vp8_rac_get_uint(c, 8);
332
        s->prob->last   = vp8_rac_get_uint(c, 8);
333
        s->prob->golden = vp8_rac_get_uint(c, 8);
334

    
335
        if (vp8_rac_get(c))
336
            for (i = 0; i < 4; i++)
337
                s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
338
        if (vp8_rac_get(c))
339
            for (i = 0; i < 3; i++)
340
                s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);
341

    
342
        // 17.2 MV probability update
343
        for (i = 0; i < 2; i++)
344
            for (j = 0; j < 19; j++)
345
                if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
346
                    s->prob->mvc[i][j] = vp8_rac_get_nn(c);
347
    }
348

    
349
    return 0;
350
}
351

    
352
static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
353
{
354
    dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
355
    dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
356
}
357

    
358
/**
359
 * Motion vector coding, 17.1.
360
 */
361
static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
362
{
363
    int bit, x = 0;
364

    
365
    if (vp56_rac_get_prob_branchy(c, p[0])) {
366
        int i;
367

    
368
        for (i = 0; i < 3; i++)
369
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
370
        for (i = 9; i > 3; i--)
371
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
372
        if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
373
            x += 8;
374
    } else {
375
        // small_mvtree
376
        const uint8_t *ps = p+2;
377
        bit = vp56_rac_get_prob(c, *ps);
378
        ps += 1 + 3*bit;
379
        x  += 4*bit;
380
        bit = vp56_rac_get_prob(c, *ps);
381
        ps += 1 + bit;
382
        x  += 2*bit;
383
        x  += vp56_rac_get_prob(c, *ps);
384
    }
385

    
386
    return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
387
}
388

    
389
static av_always_inline
390
const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
391
{
392
    if (left == top)
393
        return vp8_submv_prob[4-!!left];
394
    if (!top)
395
        return vp8_submv_prob[2];
396
    return vp8_submv_prob[1-!!left];
397
}
398

    
399
/**
400
 * Split motion vector prediction, 16.4.
401
 * @returns the number of motion vectors parsed (2, 4 or 16)
402
 */
403
static av_always_inline
404
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)
405
{
406
    int part_idx;
407
    int n, num;
408
    VP8Macroblock *top_mb  = &mb[2];
409
    VP8Macroblock *left_mb = &mb[-1];
410
    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
411
                  *mbsplits_top = vp8_mbsplits[top_mb->partitioning],
412
                  *mbsplits_cur, *firstidx;
413
    VP56mv *top_mv  = top_mb->bmv;
414
    VP56mv *left_mv = left_mb->bmv;
415
    VP56mv *cur_mv  = mb->bmv;
416

    
417
    if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
418
        if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
419
            part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
420
        } else {
421
            part_idx = VP8_SPLITMVMODE_8x8;
422
        }
423
    } else {
424
        part_idx = VP8_SPLITMVMODE_4x4;
425
    }
426

    
427
    num = vp8_mbsplit_count[part_idx];
428
    mbsplits_cur = vp8_mbsplits[part_idx],
429
    firstidx = vp8_mbfirstidx[part_idx];
430
    mb->partitioning = part_idx;
431

    
432
    for (n = 0; n < num; n++) {
433
        int k = firstidx[n];
434
        uint32_t left, above;
435
        const uint8_t *submv_prob;
436

    
437
        if (!(k & 3))
438
            left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
439
        else
440
            left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
441
        if (k <= 3)
442
            above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
443
        else
444
            above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
445

    
446
        submv_prob = get_submv_prob(left, above);
447

    
448
        if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
449
            if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
450
                if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
451
                    mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
452
                    mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
453
                } else {
454
                    AV_ZERO32(&mb->bmv[n]);
455
                }
456
            } else {
457
                AV_WN32A(&mb->bmv[n], above);
458
            }
459
        } else {
460
            AV_WN32A(&mb->bmv[n], left);
461
        }
462
    }
463

    
464
    return num;
465
}
466

    
467
static av_always_inline
468
void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y)
469
{
470
    VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
471
                                  mb - 1 /* left */,
472
                                  mb + 1 /* top-left */ };
473
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
474
    enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT };
475
    int idx = CNT_ZERO;
476
    int cur_sign_bias = s->sign_bias[mb->ref_frame];
477
    int *sign_bias = s->sign_bias;
478
    VP56mv near_mv[4];
479
    uint8_t cnt[4] = { 0 };
480
    VP56RangeCoder *c = &s->c;
481

    
482
    AV_ZERO32(&near_mv[0]);
483
    AV_ZERO32(&near_mv[1]);
484
    AV_ZERO32(&near_mv[2]);
485

    
486
    /* Process MB on top, left and top-left */
487
    #define MV_EDGE_CHECK(n)\
488
    {\
489
        VP8Macroblock *edge = mb_edge[n];\
490
        int edge_ref = edge->ref_frame;\
491
        if (edge_ref != VP56_FRAME_CURRENT) {\
492
            uint32_t mv = AV_RN32A(&edge->mv);\
493
            if (mv) {\
494
                if (cur_sign_bias != sign_bias[edge_ref]) {\
495
                    /* SWAR negate of the values in mv. */\
496
                    mv = ~mv;\
497
                    mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
498
                }\
499
                if (!n || mv != AV_RN32A(&near_mv[idx]))\
500
                    AV_WN32A(&near_mv[++idx], mv);\
501
                cnt[idx]      += 1 + (n != 2);\
502
            } else\
503
                cnt[CNT_ZERO] += 1 + (n != 2);\
504
        }\
505
    }
506

    
507
    MV_EDGE_CHECK(0)
508
    MV_EDGE_CHECK(1)
509
    MV_EDGE_CHECK(2)
510

    
511
    mb->partitioning = VP8_SPLITMVMODE_NONE;
512
    if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
513
        mb->mode = VP8_MVMODE_MV;
514

    
515
        /* If we have three distinct MVs, merge first and last if they're the same */
516
        if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1+EDGE_TOP]) == AV_RN32A(&near_mv[1+EDGE_TOPLEFT]))
517
            cnt[CNT_NEAREST] += 1;
518

    
519
        /* Swap near and nearest if necessary */
520
        if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
521
            FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
522
            FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
523
        }
524

    
525
        if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
526
            if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
527

    
528
                /* Choose the best mv out of 0,0 and the nearest mv */
529
                clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
530
                cnt[CNT_SPLITMV] = ((mb_edge[EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
531
                                    (mb_edge[EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
532
                                    (mb_edge[EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
533

    
534
                if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
535
                    mb->mode = VP8_MVMODE_SPLIT;
536
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1];
537
                } else {
538
                    mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
539
                    mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
540
                    mb->bmv[0] = mb->mv;
541
                }
542
            } else {
543
                clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
544
                mb->bmv[0] = mb->mv;
545
            }
546
        } else {
547
            clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
548
            mb->bmv[0] = mb->mv;
549
        }
550
    } else {
551
        mb->mode = VP8_MVMODE_ZERO;
552
        AV_ZERO32(&mb->mv);
553
        mb->bmv[0] = mb->mv;
554
    }
555
}
556

    
557
static av_always_inline
558
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,
559
                           int mb_x, int keyframe)
560
{
561
    uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
562
    if (keyframe) {
563
        int x, y;
564
        uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x;
565
        uint8_t* const left = s->intra4x4_pred_mode_left;
566
        for (y = 0; y < 4; y++) {
567
            for (x = 0; x < 4; x++) {
568
                const uint8_t *ctx;
569
                ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
570
                *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
571
                left[y] = top[x] = *intra4x4;
572
                intra4x4++;
573
            }
574
        }
575
    } else {
576
        int i;
577
        for (i = 0; i < 16; i++)
578
            intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
579
    }
580
}
581

    
582
static av_always_inline
583
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment)
584
{
585
    VP56RangeCoder *c = &s->c;
586

    
587
    if (s->segmentation.update_map)
588
        *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
589
    s->segment = *segment;
590

    
591
    mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
592

    
593
    if (s->keyframe) {
594
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
595

    
596
        if (mb->mode == MODE_I4x4) {
597
            decode_intra4x4_modes(s, c, mb_x, 1);
598
        } else {
599
            const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
600
            AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
601
            AV_WN32A(s->intra4x4_pred_mode_left, modes);
602
        }
603

    
604
        s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
605
        mb->ref_frame = VP56_FRAME_CURRENT;
606
    } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
607
        // inter MB, 16.2
608
        if (vp56_rac_get_prob_branchy(c, s->prob->last))
609
            mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
610
                VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
611
        else
612
            mb->ref_frame = VP56_FRAME_PREVIOUS;
613
        s->ref_count[mb->ref_frame-1]++;
614

    
615
        // motion vectors, 16.3
616
        decode_mvs(s, mb, mb_x, mb_y);
617
    } else {
618
        // intra MB, 16.1
619
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
620

    
621
        if (mb->mode == MODE_I4x4)
622
            decode_intra4x4_modes(s, c, mb_x, 0);
623

    
624
        s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
625
        mb->ref_frame = VP56_FRAME_CURRENT;
626
        mb->partitioning = VP8_SPLITMVMODE_NONE;
627
        AV_ZERO32(&mb->bmv[0]);
628
    }
629
}
630

    
631
#ifndef decode_block_coeffs_internal
632
/**
633
 * @param c arithmetic bitstream reader context
634
 * @param block destination for block coefficients
635
 * @param probs probabilities to use when reading trees from the bitstream
636
 * @param i initial coeff index, 0 unless a separate DC block is coded
637
 * @param zero_nhood the initial prediction context for number of surrounding
638
 *                   all-zero blocks (only left/top, so 0-2)
639
 * @param qmul array holding the dc/ac dequant factor at position 0/1
640
 * @return 0 if no coeffs were decoded
641
 *         otherwise, the index of the last coeff decoded plus one
642
 */
643
static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16],
644
                                        uint8_t probs[8][3][NUM_DCT_TOKENS-1],
645
                                        int i, uint8_t *token_prob, int16_t qmul[2])
646
{
647
    goto skip_eob;
648
    do {
649
        int coeff;
650
        if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
651
            return i;
652

    
653
skip_eob:
654
        if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
655
            if (++i == 16)
656
                return i; // invalid input; blocks should end with EOB
657
            token_prob = probs[i][0];
658
            goto skip_eob;
659
        }
660

    
661
        if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
662
            coeff = 1;
663
            token_prob = probs[i+1][1];
664
        } else {
665
            if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
666
                coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);
667
                if (coeff)
668
                    coeff += vp56_rac_get_prob(c, token_prob[5]);
669
                coeff += 2;
670
            } else {
671
                // DCT_CAT*
672
                if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
673
                    if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
674
                        coeff  = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
675
                    } else {                                    // DCT_CAT2
676
                        coeff  = 7;
677
                        coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
678
                        coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
679
                    }
680
                } else {    // DCT_CAT3 and up
681
                    int a = vp56_rac_get_prob(c, token_prob[8]);
682
                    int b = vp56_rac_get_prob(c, token_prob[9+a]);
683
                    int cat = (a<<1) + b;
684
                    coeff  = 3 + (8<<cat);
685
                    coeff += vp8_rac_get_coeff(c, ff_vp8_dct_cat_prob[cat]);
686
                }
687
            }
688
            token_prob = probs[i+1][2];
689
        }
690
        block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
691
    } while (++i < 16);
692

    
693
    return i;
694
}
695
#endif
696

    
697
static av_always_inline
698
int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
699
                        uint8_t probs[8][3][NUM_DCT_TOKENS-1],
700
                        int i, int zero_nhood, int16_t qmul[2])
701
{
702
    uint8_t *token_prob = probs[i][zero_nhood];
703
    if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
704
        return 0;
705
    return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
706
}
707

    
708
static av_always_inline
709
void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
710
                      uint8_t t_nnz[9], uint8_t l_nnz[9])
711
{
712
    int i, x, y, luma_start = 0, luma_ctx = 3;
713
    int nnz_pred, nnz, nnz_total = 0;
714
    int segment = s->segment;
715
    int block_dc = 0;
716

    
717
    if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
718
        nnz_pred = t_nnz[8] + l_nnz[8];
719

    
720
        // decode DC values and do hadamard
721
        nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
722
                                  s->qmat[segment].luma_dc_qmul);
723
        l_nnz[8] = t_nnz[8] = !!nnz;
724
        if (nnz) {
725
            nnz_total += nnz;
726
            block_dc = 1;
727
            if (nnz == 1)
728
                s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc);
729
            else
730
                s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
731
        }
732
        luma_start = 1;
733
        luma_ctx = 0;
734
    }
735

    
736
    // luma blocks
737
    for (y = 0; y < 4; y++)
738
        for (x = 0; x < 4; x++) {
739
            nnz_pred = l_nnz[y] + t_nnz[x];
740
            nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
741
                                      nnz_pred, s->qmat[segment].luma_qmul);
742
            // nnz+block_dc may be one more than the actual last index, but we don't care
743
            s->non_zero_count_cache[y][x] = nnz + block_dc;
744
            t_nnz[x] = l_nnz[y] = !!nnz;
745
            nnz_total += nnz;
746
        }
747

    
748
    // chroma blocks
749
    // TODO: what to do about dimensions? 2nd dim for luma is x,
750
    // but for chroma it's (y<<1)|x
751
    for (i = 4; i < 6; i++)
752
        for (y = 0; y < 2; y++)
753
            for (x = 0; x < 2; x++) {
754
                nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
755
                nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0,
756
                                          nnz_pred, s->qmat[segment].chroma_qmul);
757
                s->non_zero_count_cache[i][(y<<1)+x] = nnz;
758
                t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
759
                nnz_total += nnz;
760
            }
761

    
762
    // if there were no coded coeffs despite the macroblock not being marked skip,
763
    // we MUST not do the inner loop filter and should not do IDCT
764
    // Since skip isn't used for bitstream prediction, just manually set it.
765
    if (!nnz_total)
766
        mb->skip = 1;
767
}
768

    
769
static av_always_inline
770
void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
771
                      int linesize, int uvlinesize, int simple)
772
{
773
    AV_COPY128(top_border, src_y + 15*linesize);
774
    if (!simple) {
775
        AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
776
        AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
777
    }
778
}
779

    
780
static av_always_inline
781
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
782
                    int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
783
                    int simple, int xchg)
784
{
785
    uint8_t *top_border_m1 = top_border-32;     // for TL prediction
786
    src_y  -=   linesize;
787
    src_cb -= uvlinesize;
788
    src_cr -= uvlinesize;
789

    
790
#define XCHG(a,b,xchg) do {                     \
791
        if (xchg) AV_SWAP64(b,a);               \
792
        else      AV_COPY64(b,a);               \
793
    } while (0)
794

    
795
    XCHG(top_border_m1+8, src_y-8, xchg);
796
    XCHG(top_border,      src_y,   xchg);
797
    XCHG(top_border+8,    src_y+8, 1);
798
    if (mb_x < mb_width-1)
799
        XCHG(top_border+32, src_y+16, 1);
800

    
801
    // only copy chroma for normal loop filter
802
    // or to initialize the top row to 127
803
    if (!simple || !mb_y) {
804
        XCHG(top_border_m1+16, src_cb-8, xchg);
805
        XCHG(top_border_m1+24, src_cr-8, xchg);
806
        XCHG(top_border+16,    src_cb, 1);
807
        XCHG(top_border+24,    src_cr, 1);
808
    }
809
}
810

    
811
static av_always_inline
812
int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
813
{
814
    if (!mb_x) {
815
        return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
816
    } else {
817
        return mb_y ? mode : LEFT_DC_PRED8x8;
818
    }
819
}
820

    
821
static av_always_inline
822
int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
823
{
824
    if (!mb_x) {
825
        return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
826
    } else {
827
        return mb_y ? mode : HOR_PRED8x8;
828
    }
829
}
830

    
831
static av_always_inline
832
int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
833
{
834
    if (mode == DC_PRED8x8) {
835
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
836
    } else {
837
        return mode;
838
    }
839
}
840

    
841
static av_always_inline
842
int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
843
{
844
    switch (mode) {
845
    case DC_PRED8x8:
846
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
847
    case VERT_PRED8x8:
848
        return !mb_y ? DC_127_PRED8x8 : mode;
849
    case HOR_PRED8x8:
850
        return !mb_x ? DC_129_PRED8x8 : mode;
851
    case PLANE_PRED8x8 /*TM*/:
852
        return check_tm_pred8x8_mode(mode, mb_x, mb_y);
853
    }
854
    return mode;
855
}
856

    
857
static av_always_inline
858
int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
859
{
860
    if (!mb_x) {
861
        return mb_y ? VERT_VP8_PRED : DC_129_PRED;
862
    } else {
863
        return mb_y ? mode : HOR_VP8_PRED;
864
    }
865
}
866

    
867
static av_always_inline
868
int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
869
{
870
    switch (mode) {
871
    case VERT_PRED:
872
        if (!mb_x && mb_y) {
873
            *copy_buf = 1;
874
            return mode;
875
        }
876
        /* fall-through */
877
    case DIAG_DOWN_LEFT_PRED:
878
    case VERT_LEFT_PRED:
879
        return !mb_y ? DC_127_PRED : mode;
880
    case HOR_PRED:
881
        if (!mb_y) {
882
            *copy_buf = 1;
883
            return mode;
884
        }
885
        /* fall-through */
886
    case HOR_UP_PRED:
887
        return !mb_x ? DC_129_PRED : mode;
888
    case TM_VP8_PRED:
889
        return check_tm_pred4x4_mode(mode, mb_x, mb_y);
890
    case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
891
    case DIAG_DOWN_RIGHT_PRED:
892
    case VERT_RIGHT_PRED:
893
    case HOR_DOWN_PRED:
894
        if (!mb_y || !mb_x)
895
            *copy_buf = 1;
896
        return mode;
897
    }
898
    return mode;
899
}
900

    
901
static av_always_inline
902
void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
903
                   int mb_x, int mb_y)
904
{
905
    AVCodecContext *avctx = s->avctx;
906
    int x, y, mode, nnz, tr;
907

    
908
    // for the first row, we need to run xchg_mb_border to init the top edge to 127
909
    // otherwise, skip it if we aren't going to deblock
910
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
911
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
912
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
913
                       s->filter.simple, 1);
914

    
915
    if (mb->mode < MODE_I4x4) {
916
        if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
917
            mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
918
        } else {
919
            mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
920
        }
921
        s->hpc.pred16x16[mode](dst[0], s->linesize);
922
    } else {
923
        uint8_t *ptr = dst[0];
924
        uint8_t *intra4x4 = s->intra4x4_pred_mode_mb;
925
        uint8_t tr_top[4] = { 127, 127, 127, 127 };
926

    
927
        // all blocks on the right edge of the macroblock use bottom edge
928
        // the top macroblock for their topright edge
929
        uint8_t *tr_right = ptr - s->linesize + 16;
930

    
931
        // if we're on the right edge of the frame, said edge is extended
932
        // from the top macroblock
933
        if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
934
            mb_x == s->mb_width-1) {
935
            tr = tr_right[-1]*0x01010101;
936
            tr_right = (uint8_t *)&tr;
937
        }
938

    
939
        if (mb->skip)
940
            AV_ZERO128(s->non_zero_count_cache);
941

    
942
        for (y = 0; y < 4; y++) {
943
            uint8_t *topright = ptr + 4 - s->linesize;
944
            for (x = 0; x < 4; x++) {
945
                int copy = 0, linesize = s->linesize;
946
                uint8_t *dst = ptr+4*x;
947
                DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
948

    
949
                if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
950
                    topright = tr_top;
951
                } else if (x == 3)
952
                    topright = tr_right;
953

    
954
                if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
955
                    mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
956
                    if (copy) {
957
                        dst = copy_dst + 12;
958
                        linesize = 8;
959
                        if (!(mb_y + y)) {
960
                            copy_dst[3] = 127U;
961
                            AV_WN32A(copy_dst+4, 127U * 0x01010101U);
962
                        } else {
963
                            AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
964
                            if (!(mb_x + x)) {
965
                                copy_dst[3] = 129U;
966
                            } else {
967
                                copy_dst[3] = ptr[4*x-s->linesize-1];
968
                            }
969
                        }
970
                        if (!(mb_x + x)) {
971
                            copy_dst[11] =
972
                            copy_dst[19] =
973
                            copy_dst[27] =
974
                            copy_dst[35] = 129U;
975
                        } else {
976
                            copy_dst[11] = ptr[4*x              -1];
977
                            copy_dst[19] = ptr[4*x+s->linesize  -1];
978
                            copy_dst[27] = ptr[4*x+s->linesize*2-1];
979
                            copy_dst[35] = ptr[4*x+s->linesize*3-1];
980
                        }
981
                    }
982
                } else {
983
                    mode = intra4x4[x];
984
                }
985
                s->hpc.pred4x4[mode](dst, topright, linesize);
986
                if (copy) {
987
                    AV_COPY32(ptr+4*x              , copy_dst+12);
988
                    AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
989
                    AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
990
                    AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
991
                }
992

    
993
                nnz = s->non_zero_count_cache[y][x];
994
                if (nnz) {
995
                    if (nnz == 1)
996
                        s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize);
997
                    else
998
                        s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize);
999
                }
1000
                topright += 4;
1001
            }
1002

    
1003
            ptr   += 4*s->linesize;
1004
            intra4x4 += 4;
1005
        }
1006
    }
1007

    
1008
    if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1009
        mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y);
1010
    } else {
1011
        mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y);
1012
    }
1013
    s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1014
    s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1015

    
1016
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y))
1017
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1018
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1019
                       s->filter.simple, 0);
1020
}
1021

    
1022
static const uint8_t subpel_idx[3][8] = {
1023
    { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1024
                                // also function pointer index
1025
    { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1026
    { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1027
};
1028

    
1029
/**
1030
 * Generic MC function.
1031
 *
1032
 * @param s VP8 decoding context
1033
 * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes
1034
 * @param dst target buffer for block data at block position
1035
 * @param src reference picture buffer at origin (0, 0)
1036
 * @param mv motion vector (relative to block position) to get pixel data from
1037
 * @param x_off horizontal position of block from origin (0, 0)
1038
 * @param y_off vertical position of block from origin (0, 0)
1039
 * @param block_w width of block (16, 8 or 4)
1040
 * @param block_h height of block (always same as block_w)
1041
 * @param width width of src/dst plane data
1042
 * @param height height of src/dst plane data
1043
 * @param linesize size of a single line of plane data, including padding
1044
 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1045
 */
1046
static av_always_inline
1047
void vp8_mc_luma(VP8Context *s, uint8_t *dst, uint8_t *src, const VP56mv *mv,
1048
                 int x_off, int y_off, int block_w, int block_h,
1049
                 int width, int height, int linesize,
1050
                 vp8_mc_func mc_func[3][3])
1051
{
1052
    if (AV_RN32A(mv)) {
1053

    
1054
        int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1055
        int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1056

    
1057
        x_off += mv->x >> 2;
1058
        y_off += mv->y >> 2;
1059

    
1060
        // edge emulation
1061
        src += y_off * linesize + x_off;
1062
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1063
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1064
            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1065
                                    block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1066
                                    x_off - mx_idx, y_off - my_idx, width, height);
1067
            src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1068
        }
1069
        mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1070
    } else
1071
        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1072
}
1073

    
1074
static av_always_inline
1075
void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, uint8_t *src1,
1076
                   uint8_t *src2, const VP56mv *mv, int x_off, int y_off,
1077
                   int block_w, int block_h, int width, int height, int linesize,
1078
                   vp8_mc_func mc_func[3][3])
1079
{
1080
    if (AV_RN32A(mv)) {
1081
        int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1082
        int my = mv->y&7, my_idx = subpel_idx[0][my];
1083

    
1084
        x_off += mv->x >> 3;
1085
        y_off += mv->y >> 3;
1086

    
1087
        // edge emulation
1088
        src1 += y_off * linesize + x_off;
1089
        src2 += y_off * linesize + x_off;
1090
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1091
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1092
            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1093
                                    block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1094
                                    x_off - mx_idx, y_off - my_idx, width, height);
1095
            src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1096
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1097

    
1098
            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1099
                                    block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1100
                                    x_off - mx_idx, y_off - my_idx, width, height);
1101
            src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1102
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1103
        } else {
1104
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1105
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1106
        }
1107
    } else {
1108
        mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1109
        mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1110
    }
1111
}
1112

    
1113
static av_always_inline
1114
void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
1115
                 AVFrame *ref_frame, int x_off, int y_off,
1116
                 int bx_off, int by_off,
1117
                 int block_w, int block_h,
1118
                 int width, int height, VP56mv *mv)
1119
{
1120
    VP56mv uvmv = *mv;
1121

    
1122
    /* Y */
1123
    vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
1124
                ref_frame->data[0], mv, x_off + bx_off, y_off + by_off,
1125
                block_w, block_h, width, height, s->linesize,
1126
                s->put_pixels_tab[block_w == 8]);
1127

    
1128
    /* U/V */
1129
    if (s->profile == 3) {
1130
        uvmv.x &= ~7;
1131
        uvmv.y &= ~7;
1132
    }
1133
    x_off   >>= 1; y_off   >>= 1;
1134
    bx_off  >>= 1; by_off  >>= 1;
1135
    width   >>= 1; height  >>= 1;
1136
    block_w >>= 1; block_h >>= 1;
1137
    vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
1138
                  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame->data[1],
1139
                  ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off,
1140
                  block_w, block_h, width, height, s->uvlinesize,
1141
                  s->put_pixels_tab[1 + (block_w == 4)]);
1142
}
1143

    
1144
/* Fetch pixels for estimated mv 4 macroblocks ahead.
1145
 * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
1146
static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1147
{
1148
    /* Don't prefetch refs that haven't been used very often this frame. */
1149
    if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1150
        int x_off = mb_x << 4, y_off = mb_y << 4;
1151
        int mx = (mb->mv.x>>2) + x_off + 8;
1152
        int my = (mb->mv.y>>2) + y_off;
1153
        uint8_t **src= s->framep[ref]->data;
1154
        int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1155
        s->dsp.prefetch(src[0]+off, s->linesize, 4);
1156
        off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1157
        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1158
    }
1159
}
1160

    
1161
/**
1162
 * Apply motion vectors to prediction buffer, chapter 18.
1163
 */
1164
static av_always_inline
1165
void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
1166
                   int mb_x, int mb_y)
1167
{
1168
    int x_off = mb_x << 4, y_off = mb_y << 4;
1169
    int width = 16*s->mb_width, height = 16*s->mb_height;
1170
    AVFrame *ref = s->framep[mb->ref_frame];
1171
    VP56mv *bmv = mb->bmv;
1172

    
1173
    switch (mb->partitioning) {
1174
    case VP8_SPLITMVMODE_NONE:
1175
        vp8_mc_part(s, dst, ref, x_off, y_off,
1176
                    0, 0, 16, 16, width, height, &mb->mv);
1177
        break;
1178
    case VP8_SPLITMVMODE_4x4: {
1179
        int x, y;
1180
        VP56mv uvmv;
1181

    
1182
        /* Y */
1183
        for (y = 0; y < 4; y++) {
1184
            for (x = 0; x < 4; x++) {
1185
                vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
1186
                            ref->data[0], &bmv[4*y + x],
1187
                            4*x + x_off, 4*y + y_off, 4, 4,
1188
                            width, height, s->linesize,
1189
                            s->put_pixels_tab[2]);
1190
            }
1191
        }
1192

    
1193
        /* U/V */
1194
        x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1195
        for (y = 0; y < 2; y++) {
1196
            for (x = 0; x < 2; x++) {
1197
                uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
1198
                         mb->bmv[ 2*y    * 4 + 2*x+1].x +
1199
                         mb->bmv[(2*y+1) * 4 + 2*x  ].x +
1200
                         mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1201
                uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
1202
                         mb->bmv[ 2*y    * 4 + 2*x+1].y +
1203
                         mb->bmv[(2*y+1) * 4 + 2*x  ].y +
1204
                         mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1205
                uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1206
                uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1207
                if (s->profile == 3) {
1208
                    uvmv.x &= ~7;
1209
                    uvmv.y &= ~7;
1210
                }
1211
                vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
1212
                              dst[2] + 4*y*s->uvlinesize + x*4,
1213
                              ref->data[1], ref->data[2], &uvmv,
1214
                              4*x + x_off, 4*y + y_off, 4, 4,
1215
                              width, height, s->uvlinesize,
1216
                              s->put_pixels_tab[2]);
1217
            }
1218
        }
1219
        break;
1220
    }
1221
    case VP8_SPLITMVMODE_16x8:
1222
        vp8_mc_part(s, dst, ref, x_off, y_off,
1223
                    0, 0, 16, 8, width, height, &bmv[0]);
1224
        vp8_mc_part(s, dst, ref, x_off, y_off,
1225
                    0, 8, 16, 8, width, height, &bmv[1]);
1226
        break;
1227
    case VP8_SPLITMVMODE_8x16:
1228
        vp8_mc_part(s, dst, ref, x_off, y_off,
1229
                    0, 0, 8, 16, width, height, &bmv[0]);
1230
        vp8_mc_part(s, dst, ref, x_off, y_off,
1231
                    8, 0, 8, 16, width, height, &bmv[1]);
1232
        break;
1233
    case VP8_SPLITMVMODE_8x8:
1234
        vp8_mc_part(s, dst, ref, x_off, y_off,
1235
                    0, 0, 8, 8, width, height, &bmv[0]);
1236
        vp8_mc_part(s, dst, ref, x_off, y_off,
1237
                    8, 0, 8, 8, width, height, &bmv[1]);
1238
        vp8_mc_part(s, dst, ref, x_off, y_off,
1239
                    0, 8, 8, 8, width, height, &bmv[2]);
1240
        vp8_mc_part(s, dst, ref, x_off, y_off,
1241
                    8, 8, 8, 8, width, height, &bmv[3]);
1242
        break;
1243
    }
1244
}
1245

    
1246
static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
1247
{
1248
    int x, y, ch;
1249

    
1250
    if (mb->mode != MODE_I4x4) {
1251
        uint8_t *y_dst = dst[0];
1252
        for (y = 0; y < 4; y++) {
1253
            uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[y]);
1254
            if (nnz4) {
1255
                if (nnz4&~0x01010101) {
1256
                    for (x = 0; x < 4; x++) {
1257
                        if ((uint8_t)nnz4 == 1)
1258
                            s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize);
1259
                        else if((uint8_t)nnz4 > 1)
1260
                            s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize);
1261
                        nnz4 >>= 8;
1262
                        if (!nnz4)
1263
                            break;
1264
                    }
1265
                } else {
1266
                    s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
1267
                }
1268
            }
1269
            y_dst += 4*s->linesize;
1270
        }
1271
    }
1272

    
1273
    for (ch = 0; ch < 2; ch++) {
1274
        uint32_t nnz4 = AV_RL32(s->non_zero_count_cache[4+ch]);
1275
        if (nnz4) {
1276
            uint8_t *ch_dst = dst[1+ch];
1277
            if (nnz4&~0x01010101) {
1278
                for (y = 0; y < 2; y++) {
1279
                    for (x = 0; x < 2; x++) {
1280
                        if ((uint8_t)nnz4 == 1)
1281
                            s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1282
                        else if((uint8_t)nnz4 > 1)
1283
                            s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
1284
                        nnz4 >>= 8;
1285
                        if (!nnz4)
1286
                            break;
1287
                    }
1288
                    ch_dst += 4*s->uvlinesize;
1289
                }
1290
            } else {
1291
                s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
1292
            }
1293
        }
1294
    }
1295
}
1296

    
1297
static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1298
{
1299
    int interior_limit, filter_level;
1300

    
1301
    if (s->segmentation.enabled) {
1302
        filter_level = s->segmentation.filter_level[s->segment];
1303
        if (!s->segmentation.absolute_vals)
1304
            filter_level += s->filter.level;
1305
    } else
1306
        filter_level = s->filter.level;
1307

    
1308
    if (s->lf_delta.enabled) {
1309
        filter_level += s->lf_delta.ref[mb->ref_frame];
1310
        filter_level += s->lf_delta.mode[mb->mode];
1311
    }
1312

    
1313
/* Like av_clip for inputs 0 and max, where max is equal to (2^n-1) */
1314
#define POW2CLIP(x,max) (((x) & ~max) ? (-(x))>>31 & max : (x));
1315
    filter_level = POW2CLIP(filter_level, 63);
1316

    
1317
    interior_limit = filter_level;
1318
    if (s->filter.sharpness) {
1319
        interior_limit >>= (s->filter.sharpness + 3) >> 2;
1320
        interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1321
    }
1322
    interior_limit = FFMAX(interior_limit, 1);
1323

    
1324
    f->filter_level = filter_level;
1325
    f->inner_limit = interior_limit;
1326
    f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1327
}
1328

    
1329
static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1330
{
1331
    int mbedge_lim, bedge_lim, hev_thresh;
1332
    int filter_level = f->filter_level;
1333
    int inner_limit = f->inner_limit;
1334
    int inner_filter = f->inner_filter;
1335
    int linesize = s->linesize;
1336
    int uvlinesize = s->uvlinesize;
1337
    static const uint8_t hev_thresh_lut[2][64] = {
1338
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1339
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1340
          3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1341
          3, 3, 3, 3 },
1342
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1343
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1344
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1345
          2, 2, 2, 2 }
1346
    };
1347

    
1348
    if (!filter_level)
1349
        return;
1350

    
1351
     bedge_lim = 2*filter_level + inner_limit;
1352
    mbedge_lim = bedge_lim + 4;
1353

    
1354
    hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1355

    
1356
    if (mb_x) {
1357
        s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
1358
                                       mbedge_lim, inner_limit, hev_thresh);
1359
        s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1360
                                       mbedge_lim, inner_limit, hev_thresh);
1361
    }
1362

    
1363
    if (inner_filter) {
1364
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1365
                                             inner_limit, hev_thresh);
1366
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1367
                                             inner_limit, hev_thresh);
1368
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1369
                                             inner_limit, hev_thresh);
1370
        s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1371
                                             uvlinesize,  bedge_lim,
1372
                                             inner_limit, hev_thresh);
1373
    }
1374

    
1375
    if (mb_y) {
1376
        s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
1377
                                       mbedge_lim, inner_limit, hev_thresh);
1378
        s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1379
                                       mbedge_lim, inner_limit, hev_thresh);
1380
    }
1381

    
1382
    if (inner_filter) {
1383
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1384
                                             linesize,    bedge_lim,
1385
                                             inner_limit, hev_thresh);
1386
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1387
                                             linesize,    bedge_lim,
1388
                                             inner_limit, hev_thresh);
1389
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1390
                                             linesize,    bedge_lim,
1391
                                             inner_limit, hev_thresh);
1392
        s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1393
                                             dst[2] + 4 * uvlinesize,
1394
                                             uvlinesize,  bedge_lim,
1395
                                             inner_limit, hev_thresh);
1396
    }
1397
}
1398

    
1399
static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1400
{
1401
    int mbedge_lim, bedge_lim;
1402
    int filter_level = f->filter_level;
1403
    int inner_limit = f->inner_limit;
1404
    int inner_filter = f->inner_filter;
1405
    int linesize = s->linesize;
1406

    
1407
    if (!filter_level)
1408
        return;
1409

    
1410
     bedge_lim = 2*filter_level + inner_limit;
1411
    mbedge_lim = bedge_lim + 4;
1412

    
1413
    if (mb_x)
1414
        s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1415
    if (inner_filter) {
1416
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1417
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1418
        s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1419
    }
1420

    
1421
    if (mb_y)
1422
        s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1423
    if (inner_filter) {
1424
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1425
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1426
        s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1427
    }
1428
}
1429

    
1430
static void filter_mb_row(VP8Context *s, int mb_y)
1431
{
1432
    VP8FilterStrength *f = s->filter_strength;
1433
    uint8_t *dst[3] = {
1434
        s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize,
1435
        s->framep[VP56_FRAME_CURRENT]->data[1] +  8*mb_y*s->uvlinesize,
1436
        s->framep[VP56_FRAME_CURRENT]->data[2] +  8*mb_y*s->uvlinesize
1437
    };
1438
    int mb_x;
1439

    
1440
    for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1441
        backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1442
        filter_mb(s, dst, f++, mb_x, mb_y);
1443
        dst[0] += 16;
1444
        dst[1] += 8;
1445
        dst[2] += 8;
1446
    }
1447
}
1448

    
1449
static void filter_mb_row_simple(VP8Context *s, int mb_y)
1450
{
1451
    VP8FilterStrength *f = s->filter_strength;
1452
    uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize;
1453
    int mb_x;
1454

    
1455
    for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
1456
        backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
1457
        filter_mb_simple(s, dst, f++, mb_x, mb_y);
1458
        dst += 16;
1459
    }
1460
}
1461

    
1462
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
1463
                            AVPacket *avpkt)
1464
{
1465
    VP8Context *s = avctx->priv_data;
1466
    int ret, mb_x, mb_y, i, y, referenced;
1467
    enum AVDiscard skip_thresh;
1468
    AVFrame *av_uninit(curframe);
1469

    
1470
    if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1471
        return ret;
1472

    
1473
    referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1474
                                || s->update_altref == VP56_FRAME_CURRENT;
1475

    
1476
    skip_thresh = !referenced ? AVDISCARD_NONREF :
1477
                    !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1478

    
1479
    if (avctx->skip_frame >= skip_thresh) {
1480
        s->invisible = 1;
1481
        goto skip_decode;
1482
    }
1483
    s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1484

    
1485
    for (i = 0; i < 4; i++)
1486
        if (&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1487
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1488
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1489
            curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1490
            break;
1491
        }
1492
    if (curframe->data[0])
1493
        avctx->release_buffer(avctx, curframe);
1494

    
1495
    curframe->key_frame = s->keyframe;
1496
    curframe->pict_type = s->keyframe ? FF_I_TYPE : FF_P_TYPE;
1497
    curframe->reference = referenced ? 3 : 0;
1498
    if ((ret = avctx->get_buffer(avctx, curframe))) {
1499
        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1500
        return ret;
1501
    }
1502

    
1503
    // Given that arithmetic probabilities are updated every frame, it's quite likely
1504
    // that the values we have on a random interframe are complete junk if we didn't
1505
    // start decode on a keyframe. So just don't display anything rather than junk.
1506
    if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1507
                         !s->framep[VP56_FRAME_GOLDEN] ||
1508
                         !s->framep[VP56_FRAME_GOLDEN2])) {
1509
        av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1510
        return AVERROR_INVALIDDATA;
1511
    }
1512

    
1513
    s->linesize   = curframe->linesize[0];
1514
    s->uvlinesize = curframe->linesize[1];
1515

    
1516
    if (!s->edge_emu_buffer)
1517
        s->edge_emu_buffer = av_malloc(21*s->linesize);
1518

    
1519
    memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1520

    
1521
    /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1522
    memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1523

    
1524
    // top edge of 127 for intra prediction
1525
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1526
        s->top_border[0][15] = s->top_border[0][23] = 127;
1527
        memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
1528
    }
1529
    memset(s->ref_count, 0, sizeof(s->ref_count));
1530
    if (s->keyframe)
1531
        memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1532

    
1533
    #define MARGIN (16 << 2)
1534
    s->mv_min.y = -MARGIN;
1535
    s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1536

    
1537
    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1538
        VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1539
        VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1540
        int mb_xy = mb_y*s->mb_width;
1541
        uint8_t *dst[3] = {
1542
            curframe->data[0] + 16*mb_y*s->linesize,
1543
            curframe->data[1] +  8*mb_y*s->uvlinesize,
1544
            curframe->data[2] +  8*mb_y*s->uvlinesize
1545
        };
1546

    
1547
        memset(mb - 1, 0, sizeof(*mb));   // zero left macroblock
1548
        memset(s->left_nnz, 0, sizeof(s->left_nnz));
1549
        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1550

    
1551
        // left edge of 129 for intra prediction
1552
        if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1553
            for (i = 0; i < 3; i++)
1554
                for (y = 0; y < 16>>!!i; y++)
1555
                    dst[i][y*curframe->linesize[i]-1] = 129;
1556
            if (mb_y == 1) // top left edge is also 129
1557
                s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1558
        }
1559

    
1560
        s->mv_min.x = -MARGIN;
1561
        s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;
1562

    
1563
        for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1564
            /* Prefetch the current frame, 4 MBs ahead */
1565
            s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1566
            s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1567

    
1568
            decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy);
1569

    
1570
            prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1571

    
1572
            if (!mb->skip)
1573
                decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1574

    
1575
            if (mb->mode <= MODE_I4x4)
1576
                intra_predict(s, dst, mb, mb_x, mb_y);
1577
            else
1578
                inter_predict(s, dst, mb, mb_x, mb_y);
1579

    
1580
            prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1581

    
1582
            if (!mb->skip) {
1583
                idct_mb(s, dst, mb);
1584
            } else {
1585
                AV_ZERO64(s->left_nnz);
1586
                AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
1587

    
1588
                // Reset DC block predictors if they would exist if the mb had coefficients
1589
                if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1590
                    s->left_nnz[8]      = 0;
1591
                    s->top_nnz[mb_x][8] = 0;
1592
                }
1593
            }
1594

    
1595
            if (s->deblock_filter)
1596
                filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1597

    
1598
            prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1599

    
1600
            dst[0] += 16;
1601
            dst[1] += 8;
1602
            dst[2] += 8;
1603
            s->mv_min.x -= 64;
1604
            s->mv_max.x -= 64;
1605
        }
1606
        if (s->deblock_filter) {
1607
            if (s->filter.simple)
1608
                filter_mb_row_simple(s, mb_y);
1609
            else
1610
                filter_mb_row(s, mb_y);
1611
        }
1612
        s->mv_min.y -= 64;
1613
        s->mv_max.y -= 64;
1614
    }
1615

    
1616
skip_decode:
1617
    // if future frames don't use the updated probabilities,
1618
    // reset them to the values we saved
1619
    if (!s->update_probabilities)
1620
        s->prob[0] = s->prob[1];
1621

    
1622
    // check if golden and altref are swapped
1623
    if (s->update_altref == VP56_FRAME_GOLDEN &&
1624
        s->update_golden == VP56_FRAME_GOLDEN2)
1625
        FFSWAP(AVFrame *, s->framep[VP56_FRAME_GOLDEN], s->framep[VP56_FRAME_GOLDEN2]);
1626
    else {
1627
        if (s->update_altref != VP56_FRAME_NONE)
1628
            s->framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1629

    
1630
        if (s->update_golden != VP56_FRAME_NONE)
1631
            s->framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1632
    }
1633

    
1634
    if (s->update_last) // move cur->prev
1635
        s->framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_CURRENT];
1636

    
1637
    // release no longer referenced frames
1638
    for (i = 0; i < 4; i++)
1639
        if (s->frames[i].data[0] &&
1640
            &s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
1641
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1642
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1643
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1644
            avctx->release_buffer(avctx, &s->frames[i]);
1645

    
1646
    if (!s->invisible) {
1647
        *(AVFrame*)data = *s->framep[VP56_FRAME_CURRENT];
1648
        *data_size = sizeof(AVFrame);
1649
    }
1650

    
1651
    return avpkt->size;
1652
}
1653

    
1654
static av_cold int vp8_decode_init(AVCodecContext *avctx)
1655
{
1656
    VP8Context *s = avctx->priv_data;
1657

    
1658
    s->avctx = avctx;
1659
    avctx->pix_fmt = PIX_FMT_YUV420P;
1660

    
1661
    dsputil_init(&s->dsp, avctx);
1662
    ff_h264_pred_init(&s->hpc, CODEC_ID_VP8);
1663
    ff_vp8dsp_init(&s->vp8dsp);
1664

    
1665
    return 0;
1666
}
1667

    
1668
static av_cold int vp8_decode_free(AVCodecContext *avctx)
1669
{
1670
    vp8_decode_flush(avctx);
1671
    return 0;
1672
}
1673

    
1674
AVCodec ff_vp8_decoder = {
1675
    "vp8",
1676
    AVMEDIA_TYPE_VIDEO,
1677
    CODEC_ID_VP8,
1678
    sizeof(VP8Context),
1679
    vp8_decode_init,
1680
    NULL,
1681
    vp8_decode_free,
1682
    vp8_decode_frame,
1683
    CODEC_CAP_DR1,
1684
    .flush = vp8_decode_flush,
1685
    .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
1686
};