Revision 4773d904

View differences:

libavcodec/vp8.c
27 27
#include "vp8.h"
28 28
#include "vp8data.h"
29 29
#include "rectangle.h"
30
#include "thread.h"
30 31

  
31 32
#if ARCH_ARM
32 33
#   include "arm/vp8.h"
......
37 38
    VP8Context *s = avctx->priv_data;
38 39
    int i;
39 40

  
40
    for (i = 0; i < 4; i++)
41
        if (s->frames[i].data[0])
42
            avctx->release_buffer(avctx, &s->frames[i]);
41
    if (!avctx->is_copy) {
42
        for (i = 0; i < 5; i++)
43
            if (s->frames[i].data[0])
44
                ff_thread_release_buffer(avctx, &s->frames[i]);
45
    }
43 46
    memset(s->framep, 0, sizeof(s->framep));
44 47

  
45 48
    av_freep(&s->macroblocks_base);
......
55 58

  
56 59
static int update_dimensions(VP8Context *s, int width, int height)
57 60
{
58
    if (av_image_check_size(width, height, 0, s->avctx))
59
        return AVERROR_INVALIDDATA;
61
    if (width  != s->avctx->width ||
62
        height != s->avctx->height) {
63
        if (av_image_check_size(width, height, 0, s->avctx))
64
            return AVERROR_INVALIDDATA;
60 65

  
61
    vp8_decode_flush(s->avctx);
66
        vp8_decode_flush(s->avctx);
62 67

  
63
    avcodec_set_dimensions(s->avctx, width, height);
68
        avcodec_set_dimensions(s->avctx, width, height);
69
    }
64 70

  
65 71
    s->mb_width  = (s->avctx->coded_width +15) / 16;
66 72
    s->mb_height = (s->avctx->coded_height+15) / 16;
......
579 585
}
580 586

  
581 587
static av_always_inline
582
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment)
588
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref)
583 589
{
584 590
    VP56RangeCoder *c = &s->c;
585 591

  
586 592
    if (s->segmentation.update_map)
587 593
        *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
594
    else
595
        *segment = ref ? *ref : *segment;
588 596
    s->segment = *segment;
589 597

  
590 598
    mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
......
1043 1051
 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1044 1052
 */
1045 1053
static av_always_inline
1046
void vp8_mc_luma(VP8Context *s, uint8_t *dst, uint8_t *src, const VP56mv *mv,
1054
void vp8_mc_luma(VP8Context *s, uint8_t *dst, AVFrame *ref, const VP56mv *mv,
1047 1055
                 int x_off, int y_off, int block_w, int block_h,
1048 1056
                 int width, int height, int linesize,
1049 1057
                 vp8_mc_func mc_func[3][3])
1050 1058
{
1059
    uint8_t *src = ref->data[0];
1060

  
1051 1061
    if (AV_RN32A(mv)) {
1052 1062

  
1053 1063
        int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
......
1057 1067
        y_off += mv->y >> 2;
1058 1068

  
1059 1069
        // edge emulation
1070
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1060 1071
        src += y_off * linesize + x_off;
1061 1072
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1062 1073
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
......
1066 1077
            src = s->edge_emu_buffer + mx_idx + linesize * my_idx;
1067 1078
        }
1068 1079
        mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1069
    } else
1080
    } else {
1081
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1070 1082
        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1083
    }
1071 1084
}
1072 1085

  
1073 1086
static av_always_inline
1074
void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, uint8_t *src1,
1075
                   uint8_t *src2, const VP56mv *mv, int x_off, int y_off,
1087
void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, AVFrame *ref,
1088
                   const VP56mv *mv, int x_off, int y_off,
1076 1089
                   int block_w, int block_h, int width, int height, int linesize,
1077 1090
                   vp8_mc_func mc_func[3][3])
1078 1091
{
1092
    uint8_t *src1 = ref->data[1], *src2 = ref->data[2];
1093

  
1079 1094
    if (AV_RN32A(mv)) {
1080 1095
        int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1081 1096
        int my = mv->y&7, my_idx = subpel_idx[0][my];
......
1086 1101
        // edge emulation
1087 1102
        src1 += y_off * linesize + x_off;
1088 1103
        src2 += y_off * linesize + x_off;
1104
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1089 1105
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1090 1106
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1091 1107
            s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
......
1104 1120
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1105 1121
        }
1106 1122
    } else {
1123
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1107 1124
        mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1108 1125
        mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1109 1126
    }
......
1120 1137

  
1121 1138
    /* Y */
1122 1139
    vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off,
1123
                ref_frame->data[0], mv, x_off + bx_off, y_off + by_off,
1140
                ref_frame, mv, x_off + bx_off, y_off + by_off,
1124 1141
                block_w, block_h, width, height, s->linesize,
1125 1142
                s->put_pixels_tab[block_w == 8]);
1126 1143

  
......
1134 1151
    width   >>= 1; height  >>= 1;
1135 1152
    block_w >>= 1; block_h >>= 1;
1136 1153
    vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off,
1137
                  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame->data[1],
1138
                  ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off,
1154
                  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1155
                  &uvmv, x_off + bx_off, y_off + by_off,
1139 1156
                  block_w, block_h, width, height, s->uvlinesize,
1140 1157
                  s->put_pixels_tab[1 + (block_w == 4)]);
1141 1158
}
......
1151 1168
        int my = (mb->mv.y>>2) + y_off;
1152 1169
        uint8_t **src= s->framep[ref]->data;
1153 1170
        int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1171
        /* For threading, a ff_thread_await_progress here might be useful, but
1172
         * it actually slows down the decoder. Since a bad prefetch doesn't
1173
         * generate bad decoder output, we don't run it here. */
1154 1174
        s->dsp.prefetch(src[0]+off, s->linesize, 4);
1155 1175
        off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1156 1176
        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
......
1182 1202
        for (y = 0; y < 4; y++) {
1183 1203
            for (x = 0; x < 4; x++) {
1184 1204
                vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4,
1185
                            ref->data[0], &bmv[4*y + x],
1205
                            ref, &bmv[4*y + x],
1186 1206
                            4*x + x_off, 4*y + y_off, 4, 4,
1187 1207
                            width, height, s->linesize,
1188 1208
                            s->put_pixels_tab[2]);
......
1208 1228
                    uvmv.y &= ~7;
1209 1229
                }
1210 1230
                vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4,
1211
                              dst[2] + 4*y*s->uvlinesize + x*4,
1212
                              ref->data[1], ref->data[2], &uvmv,
1231
                              dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1213 1232
                              4*x + x_off, 4*y + y_off, 4, 4,
1214 1233
                              width, height, s->uvlinesize,
1215 1234
                              s->put_pixels_tab[2]);
......
1427 1446
    }
1428 1447
}
1429 1448

  
1430
static void filter_mb_row(VP8Context *s, int mb_y)
1449
static void filter_mb_row(VP8Context *s, AVFrame *curframe, int mb_y)
1431 1450
{
1432 1451
    VP8FilterStrength *f = s->filter_strength;
1433 1452
    uint8_t *dst[3] = {
1434
        s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize,
1435
        s->framep[VP56_FRAME_CURRENT]->data[1] +  8*mb_y*s->uvlinesize,
1436
        s->framep[VP56_FRAME_CURRENT]->data[2] +  8*mb_y*s->uvlinesize
1453
        curframe->data[0] + 16*mb_y*s->linesize,
1454
        curframe->data[1] +  8*mb_y*s->uvlinesize,
1455
        curframe->data[2] +  8*mb_y*s->uvlinesize
1437 1456
    };
1438 1457
    int mb_x;
1439 1458

  
......
1446 1465
    }
1447 1466
}
1448 1467

  
1449
static void filter_mb_row_simple(VP8Context *s, int mb_y)
1468
static void filter_mb_row_simple(VP8Context *s, AVFrame *curframe, int mb_y)
1450 1469
{
1451 1470
    VP8FilterStrength *f = s->filter_strength;
1452
    uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize;
1471
    uint8_t *dst = curframe->data[0] + 16*mb_y*s->linesize;
1453 1472
    int mb_x;
1454 1473

  
1455 1474
    for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
......
1465 1484
    VP8Context *s = avctx->priv_data;
1466 1485
    int ret, mb_x, mb_y, i, y, referenced;
1467 1486
    enum AVDiscard skip_thresh;
1468
    AVFrame *av_uninit(curframe);
1487
    AVFrame *av_uninit(curframe), *prev_frame = s->framep[VP56_FRAME_CURRENT];
1469 1488

  
1470 1489
    if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1471 1490
        return ret;
......
1482 1501
    }
1483 1502
    s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1484 1503

  
1485
    for (i = 0; i < 4; i++)
1486
        if (&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1504
    // release no longer referenced frames
1505
    for (i = 0; i < 5; i++)
1506
        if (s->frames[i].data[0] &&
1507
            &s->frames[i] != prev_frame &&
1508
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1509
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1510
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1511
            ff_thread_release_buffer(avctx, &s->frames[i]);
1512

  
1513
    // find a free buffer
1514
    for (i = 0; i < 5; i++)
1515
        if (&s->frames[i] != prev_frame &&
1516
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1487 1517
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1488 1518
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1489 1519
            curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1490 1520
            break;
1491 1521
        }
1522
    if (i == 5) {
1523
        av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1524
        abort();
1525
    }
1492 1526
    if (curframe->data[0])
1493
        avctx->release_buffer(avctx, curframe);
1527
        ff_thread_release_buffer(avctx, curframe);
1494 1528

  
1495 1529
    curframe->key_frame = s->keyframe;
1496 1530
    curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1497 1531
    curframe->reference = referenced ? 3 : 0;
1498
    if ((ret = avctx->get_buffer(avctx, curframe))) {
1532
    curframe->ref_index[0] = s->segmentation_map;
1533
    if ((ret = ff_thread_get_buffer(avctx, curframe))) {
1499 1534
        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1500 1535
        return ret;
1501 1536
    }
1502 1537

  
1538
    // check if golden and altref are swapped
1539
    if (s->update_altref != VP56_FRAME_NONE) {
1540
        s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[s->update_altref];
1541
    } else {
1542
        s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[VP56_FRAME_GOLDEN2];
1543
    }
1544
    if (s->update_golden != VP56_FRAME_NONE) {
1545
        s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[s->update_golden];
1546
    } else {
1547
        s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[VP56_FRAME_GOLDEN];
1548
    }
1549
    if (s->update_last) {
1550
        s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1551
    } else {
1552
        s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1553
    }
1554
    s->next_framep[VP56_FRAME_CURRENT]      = curframe;
1555

  
1556
    ff_thread_finish_setup(avctx);
1557

  
1503 1558
    // Given that arithmetic probabilities are updated every frame, it's quite likely
1504 1559
    // that the values we have on a random interframe are complete junk if we didn't
1505 1560
    // start decode on a keyframe. So just don't display anything rather than junk.
......
1530 1585
    if (s->keyframe)
1531 1586
        memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1532 1587

  
1533
    #define MARGIN (16 << 2)
1588
#define MARGIN (16 << 2)
1534 1589
    s->mv_min.y = -MARGIN;
1535 1590
    s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1536 1591

  
......
1559 1614

  
1560 1615
        s->mv_min.x = -MARGIN;
1561 1616
        s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;
1617
        if (prev_frame && s->segmentation.enabled && s->segmentation.update_map)
1618
            ff_thread_await_progress(prev_frame, mb_y, 0);
1562 1619

  
1563 1620
        for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1564 1621
            /* Prefetch the current frame, 4 MBs ahead */
1565 1622
            s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1566 1623
            s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1567 1624

  
1568
            decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy);
1625
            decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy,
1626
                           prev_frame ? prev_frame->ref_index[0] + mb_xy : NULL);
1569 1627

  
1570 1628
            prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1571 1629

  
......
1605 1663
        }
1606 1664
        if (s->deblock_filter) {
1607 1665
            if (s->filter.simple)
1608
                filter_mb_row_simple(s, mb_y);
1666
                filter_mb_row_simple(s, curframe, mb_y);
1609 1667
            else
1610
                filter_mb_row(s, mb_y);
1668
                filter_mb_row(s, curframe, mb_y);
1611 1669
        }
1612 1670
        s->mv_min.y -= 64;
1613 1671
        s->mv_max.y -= 64;
1672

  
1673
        ff_thread_report_progress(curframe, mb_y, 0);
1614 1674
    }
1615 1675

  
1676
    ff_thread_report_progress(curframe, INT_MAX, 0);
1616 1677
skip_decode:
1617 1678
    // if future frames don't use the updated probabilities,
1618 1679
    // reset them to the values we saved
1619 1680
    if (!s->update_probabilities)
1620 1681
        s->prob[0] = s->prob[1];
1621 1682

  
1622
    // check if golden and altref are swapped
1623
    if (s->update_altref == VP56_FRAME_GOLDEN &&
1624
        s->update_golden == VP56_FRAME_GOLDEN2)
1625
        FFSWAP(AVFrame *, s->framep[VP56_FRAME_GOLDEN], s->framep[VP56_FRAME_GOLDEN2]);
1626
    else {
1627
        if (s->update_altref != VP56_FRAME_NONE)
1628
            s->framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1629

  
1630
        if (s->update_golden != VP56_FRAME_NONE)
1631
            s->framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1632
    }
1633

  
1634
    if (s->update_last) // move cur->prev
1635
        s->framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_CURRENT];
1636

  
1637
    // release no longer referenced frames
1638
    for (i = 0; i < 4; i++)
1639
        if (s->frames[i].data[0] &&
1640
            &s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
1641
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1642
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1643
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1644
            avctx->release_buffer(avctx, &s->frames[i]);
1683
    memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1645 1684

  
1646 1685
    if (!s->invisible) {
1647
        *(AVFrame*)data = *s->framep[VP56_FRAME_CURRENT];
1686
        *(AVFrame*)data = *curframe;
1648 1687
        *data_size = sizeof(AVFrame);
1649 1688
    }
1650 1689

  
......
1671 1710
    return 0;
1672 1711
}
1673 1712

  
1713
static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
1714
{
1715
    VP8Context *s = avctx->priv_data;
1716

  
1717
    s->avctx = avctx;
1718

  
1719
    return 0;
1720
}
1721

  
1722
#define REBASE(pic) \
1723
    pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
1724

  
1725
static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1726
{
1727
    VP8Context *s = dst->priv_data, *s_src = src->priv_data;
1728

  
1729
    s->prob[0] = s_src->prob[!s_src->update_probabilities];
1730
    s->segmentation = s_src->segmentation;
1731
    s->lf_delta = s_src->lf_delta;
1732
    memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
1733

  
1734
    memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
1735
    s->framep[0] = REBASE(s_src->next_framep[0]);
1736
    s->framep[1] = REBASE(s_src->next_framep[1]);
1737
    s->framep[2] = REBASE(s_src->next_framep[2]);
1738
    s->framep[3] = REBASE(s_src->next_framep[3]);
1739

  
1740
    return 0;
1741
}
1742

  
1674 1743
AVCodec ff_vp8_decoder = {
1675 1744
    "vp8",
1676 1745
    AVMEDIA_TYPE_VIDEO,
......
1680 1749
    NULL,
1681 1750
    vp8_decode_free,
1682 1751
    vp8_decode_frame,
1683
    CODEC_CAP_DR1,
1752
    CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
1684 1753
    .flush = vp8_decode_flush,
1685 1754
    .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
1755
    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
1756
    .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
1686 1757
};
libavcodec/vp8.h
86 86
typedef struct {
87 87
    AVCodecContext *avctx;
88 88
    AVFrame *framep[4];
89
    AVFrame *next_framep[4];
89 90
    uint8_t *edge_emu_buffer;
90 91

  
91 92
    uint16_t mb_width;   /* number of horizontal MB */
......
235 236
    VP8DSPContext vp8dsp;
236 237
    H264PredContext hpc;
237 238
    vp8_mc_func put_pixels_tab[3][3][3];
238
    AVFrame frames[4];
239
    AVFrame frames[5];
239 240
} VP8Context;
240 241

  
241 242
#endif

Also available in: Unified diff