Revision 513fbd8e

View differences:

libavcodec/dsputil.c
3773 3773
    dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
3774 3774
}
3775 3775

  
3776
static void just_return() { return; }
3777

  
3776 3778
/* init static data */
3777 3779
void dsputil_static_init(void)
3778 3780
{
......
4054 4056
    c->inner_add_yblock = ff_snow_inner_add_yblock;
4055 4057
#endif
4056 4058

  
4059
    c->prefetch= just_return;
4060

  
4057 4061
#ifdef HAVE_MMX
4058 4062
    dsputil_init_mmx(c, avctx);
4059 4063
#endif
libavcodec/dsputil.h
343 343
    void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
344 344
    void (*horizontal_compose97i)(DWTELEM *b, int width);
345 345
    void (*inner_add_yblock)(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
346

  
347
    void (*prefetch)(void *mem, int stride, int h);
346 348
} DSPContext;
347 349

  
348 350
void dsputil_static_init(void);
libavcodec/h264.c
2752 2752
                    x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
2753 2753
}
2754 2754

  
2755
static inline void prefetch_motion(H264Context *h, int list){
2756
    /* fetch pixels for estimated mv 4 macroblocks ahead
2757
     * optimized for 64byte cache lines */
2758
    MpegEncContext * const s = &h->s;
2759
    const int refn = h->ref_cache[list][scan8[0]];
2760
    if(refn >= 0){
2761
        const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
2762
        const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
2763
        uint8_t **src= h->ref_list[list][refn].data;
2764
        int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
2765
        s->dsp.prefetch(src[0]+off, s->linesize, 4);
2766
        off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
2767
        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
2768
    }
2769
}
2770

  
2755 2771
static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2756 2772
                      qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
2757 2773
                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
......
2762 2778

  
2763 2779
    assert(IS_INTER(mb_type));
2764 2780

  
2781
    prefetch_motion(h, 0);
2782

  
2765 2783
    if(IS_16X16(mb_type)){
2766 2784
        mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
2767 2785
                qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
......
2833 2851
            }
2834 2852
        }
2835 2853
    }
2854

  
2855
    prefetch_motion(h, 1);
2836 2856
}
2837 2857

  
2838 2858
static void decode_init_vlc(H264Context *h){
libavcodec/i386/dsputil_mmx.c
2489 2489
    }
2490 2490
}
2491 2491

  
2492
#define PREFETCH(name, op) \
2493
void name(void *mem, int stride, int h){\
2494
    const uint8_t *p= mem;\
2495
    do{\
2496
        asm volatile(#op" %0" :: "m"(*p));\
2497
        p+= stride;\
2498
    }while(--h);\
2499
}
2500
PREFETCH(prefetch_mmx2,  prefetcht0)
2501
PREFETCH(prefetch_3dnow, prefetch)
2502
#undef PREFETCH
2503

  
2492 2504
#include "h264dsp_mmx.c"
2493 2505

  
2494 2506
/* external functions, from idct_mmx.c */
......
2749 2761
        c->h264_idct8_add= ff_h264_idct8_add_mmx;
2750 2762

  
2751 2763
        if (mm_flags & MM_MMXEXT) {
2764
            c->prefetch = prefetch_mmx2;
2765

  
2752 2766
            c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
2753 2767
            c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
2754 2768

  
......
2879 2893
            c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2;
2880 2894
#endif //CONFIG_ENCODERS
2881 2895
        } else if (mm_flags & MM_3DNOW) {
2896
            c->prefetch = prefetch_3dnow;
2897

  
2882 2898
            c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
2883 2899
            c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
2884 2900

  

Also available in: Unified diff