Revision c4211046 libavcodec/vp8.c

View differences:

libavcodec/vp8.c
123 123

  
124 124
    int mbskip_enabled;
125 125
    int sign_bias[4]; ///< one state [0, 1] per ref frame type
126
    int ref_count[3];
126 127

  
127 128
    /**
128 129
     * Base parameters for segmentation, i.e. per-macroblock parameters.
......
733 734
                VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
734 735
        else
735 736
            mb->ref_frame = VP56_FRAME_PREVIOUS;
737
        s->ref_count[mb->ref_frame-1]++;
736 738

  
737 739
        // motion vectors, 16.3
738 740
        find_near_mvs(s, mb, mb_x, mb_y, near, &best, cnt);
......
1081 1083

  
1082 1084
/* Fetch pixels for estimated mv 4 macroblocks ahead.
1083 1085
 * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
1084
static inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int x_off, int y_off, int ref)
1086
static inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int ref)
1085 1087
{
1086
    int mx = mb->mv.x + x_off + 8;
1087
    int my = mb->mv.y + y_off;
1088
    uint8_t **src= s->framep[ref]->data;
1089
    int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1090
    s->dsp.prefetch(src[0]+off, s->linesize, 4);
1091
    off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1092
    s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1088
    /* Don't prefetch refs that haven't been used yet this frame. */
1089
    if (s->ref_count[ref-1]) {
1090
        int x_off = mb_x << 4, y_off = mb_y << 4;
1091
        int mx = mb->mv.x + x_off + 8;
1092
        int my = mb->mv.y + y_off;
1093
        uint8_t **src= s->framep[ref]->data;
1094
        int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1095
        s->dsp.prefetch(src[0]+off, s->linesize, 4);
1096
        off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1097
        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1098
    }
1093 1099
}
1094 1100

  
1095 1101
/**
......
1103 1109
    AVFrame *ref = s->framep[mb->ref_frame];
1104 1110
    VP56mv *bmv = mb->bmv;
1105 1111

  
1106
    prefetch_motion(s, mb, mb_x, mb_y, x_off, y_off, VP56_FRAME_PREVIOUS);
1107

  
1108 1112
    if (mb->mode < VP8_MVMODE_SPLIT) {
1109 1113
        vp8_mc_part(s, dst, ref, x_off, y_off,
1110 1114
                    0, 0, 16, 16, width, height, &mb->mv);
......
1179 1183
                    8, 8, 8, 8, width, height, &bmv[3]);
1180 1184
        break;
1181 1185
    }
1182

  
1183
    prefetch_motion(s, mb, mb_x, mb_y, x_off, y_off, VP56_FRAME_GOLDEN);
1184 1186
}
1185 1187

  
1186 1188
static void idct_mb(VP8Context *s, uint8_t *y_dst, uint8_t *u_dst, uint8_t *v_dst,
......
1458 1460

  
1459 1461
    // top edge of 127 for intra prediction
1460 1462
    memset(s->top_border, 127, (s->mb_width+1)*sizeof(*s->top_border));
1463
    memset(s->ref_count, 0, sizeof(s->ref_count));
1461 1464

  
1462 1465
    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1463 1466
        VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
......
1490 1493

  
1491 1494
            decode_mb_mode(s, mb, mb_x, mb_y, intra4x4_mb, segment_mb);
1492 1495

  
1496
            prefetch_motion(s, mb, mb_x, mb_y, VP56_FRAME_PREVIOUS);
1497

  
1493 1498
            if (!mb->skip)
1494 1499
                decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
1495 1500
            else {
......
1502 1507
            else
1503 1508
                inter_predict(s, dst, mb, mb_x, mb_y);
1504 1509

  
1510
            prefetch_motion(s, mb, mb_x, mb_y, VP56_FRAME_GOLDEN);
1511

  
1505 1512
            if (!mb->skip) {
1506 1513
                idct_mb(s, dst[0], dst[1], dst[2], mb);
1507 1514
            } else {
......
1518 1525
            if (s->deblock_filter)
1519 1526
                filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
1520 1527

  
1528
            prefetch_motion(s, mb, mb_x, mb_y, VP56_FRAME_GOLDEN2);
1529

  
1521 1530
            dst[0] += 16;
1522 1531
            dst[1] += 8;
1523 1532
            dst[2] += 8;

Also available in: Unified diff