Revision baffa091 libavcodec/x86/dsputil_mmx.c

View differences:

libavcodec/x86/dsputil_mmx.c
1664 1664
static void just_return(void) { return; }
1665 1665
#endif
1666 1666

  
1667
static void gmc_mmx(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
1668
                    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height){
1667
#if HAVE_YASM
1668
typedef void emu_edge_core_func (uint8_t *buf, const uint8_t *src,
1669
                                 x86_reg linesize, x86_reg start_y,
1670
                                 x86_reg end_y, x86_reg block_h,
1671
                                 x86_reg start_x, x86_reg end_x,
1672
                                 x86_reg block_w);
1673
extern emu_edge_core_func ff_emu_edge_core_mmx;
1674
extern emu_edge_core_func ff_emu_edge_core_sse;
1675

  
1676
static av_always_inline
1677
void emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize,
1678
                      int block_w, int block_h,
1679
                      int src_x, int src_y, int w, int h,
1680
                      emu_edge_core_func *core_fn)
1681
{
1682
    int start_y, start_x, end_y, end_x, src_y_add=0;
1683

  
1684
    if(src_y>= h){
1685
        src_y_add = h-1-src_y;
1686
        src_y=h-1;
1687
    }else if(src_y<=-block_h){
1688
        src_y_add = 1-block_h-src_y;
1689
        src_y=1-block_h;
1690
    }
1691
    if(src_x>= w){
1692
        src+= (w-1-src_x);
1693
        src_x=w-1;
1694
    }else if(src_x<=-block_w){
1695
        src+= (1-block_w-src_x);
1696
        src_x=1-block_w;
1697
    }
1698

  
1699
    start_y= FFMAX(0, -src_y);
1700
    start_x= FFMAX(0, -src_x);
1701
    end_y= FFMIN(block_h, h-src_y);
1702
    end_x= FFMIN(block_w, w-src_x);
1703
    assert(start_x < end_x && block_w > 0);
1704
    assert(start_y < end_y && block_h > 0);
1705

  
1706
    // fill in the to-be-copied part plus all above/below
1707
    src += (src_y_add+start_y)*linesize + start_x;
1708
    buf += start_x;
1709
    core_fn(buf, src, linesize, start_y, end_y, block_h, start_x, end_x, block_w);
1710
}
1711

  
1712
#if ARCH_X86_32
1713
static av_noinline
1714
void emulated_edge_mc_mmx(uint8_t *buf, const uint8_t *src, int linesize,
1715
                          int block_w, int block_h,
1716
                          int src_x, int src_y, int w, int h)
1717
{
1718
    emulated_edge_mc(buf, src, linesize, block_w, block_h, src_x, src_y,
1719
                     w, h, &ff_emu_edge_core_mmx);
1720
}
1721
#endif
1722
static av_noinline
1723
void emulated_edge_mc_sse(uint8_t *buf, const uint8_t *src, int linesize,
1724
                          int block_w, int block_h,
1725
                          int src_x, int src_y, int w, int h)
1726
{
1727
    emulated_edge_mc(buf, src, linesize, block_w, block_h, src_x, src_y,
1728
                     w, h, &ff_emu_edge_core_sse);
1729
}
1730
#endif /* HAVE_YASM */
1731

  
1732
typedef void emulated_edge_mc_func (uint8_t *dst, const uint8_t *src,
1733
                                    int linesize, int block_w, int block_h,
1734
                                    int src_x, int src_y, int w, int h);
1735

  
1736
static av_always_inline
1737
void gmc(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
1738
         int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height,
1739
         emulated_edge_mc_func *emu_edge_fn)
1740
{
1669 1741
    const int w = 8;
1670 1742
    const int ix = ox>>(16+shift);
1671 1743
    const int iy = oy>>(16+shift);
......
1701 1773
    if( (unsigned)ix >= width-w ||
1702 1774
        (unsigned)iy >= height-h )
1703 1775
    {
1704
        ff_emulated_edge_mc(edge_buf, src, stride, w+1, h+1, ix, iy, width, height);
1776
        emu_edge_fn(edge_buf, src, stride, w+1, h+1, ix, iy, width, height);
1705 1777
        src = edge_buf;
1706 1778
    }
1707 1779

  
......
1782 1854
    }
1783 1855
}
1784 1856

  
1857
#if HAVE_YASM
1858
#if ARCH_X86_32
1859
static void gmc_mmx(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
1860
                    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
1861
{
1862
    gmc(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r,
1863
        width, height, &emulated_edge_mc_mmx);
1864
}
1865
#endif
1866
static void gmc_sse(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
1867
                    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
1868
{
1869
    gmc(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r,
1870
        width, height, &emulated_edge_mc_sse);
1871
}
1872
#else
1873
static void gmc_mmx(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
1874
                    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
1875
{
1876
    gmc(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r,
1877
        width, height, &ff_emulated_edge_mc);
1878
}
1879
#endif
1880

  
1785 1881
#define PREFETCH(name, op) \
1786 1882
static void name(void *mem, int stride, int h){\
1787 1883
    const uint8_t *p= mem;\
......
2626 2722
        SET_HPEL_FUNCS(avg, 1, 8, mmx);
2627 2723
        SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx);
2628 2724

  
2725
#if ARCH_X86_32 || !HAVE_YASM
2629 2726
        c->gmc= gmc_mmx;
2727
#endif
2728
#if ARCH_X86_32 && HAVE_YASM
2729
        c->emulated_edge_mc = emulated_edge_mc_mmx;
2730
#endif
2630 2731

  
2631 2732
        c->add_bytes= add_bytes_mmx;
2632 2733
        c->add_bytes_l2= add_bytes_l2_mmx;
......
2913 3014
#if HAVE_YASM
2914 3015
            c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
2915 3016
            c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
3017

  
3018
            c->emulated_edge_mc = emulated_edge_mc_sse;
3019
            c->gmc= gmc_sse;
2916 3020
#endif
2917 3021
        }
2918 3022
        if((mm_flags & AV_CPU_FLAG_SSSE3) && !(mm_flags & (AV_CPU_FLAG_SSE42|AV_CPU_FLAG_3DNOW)) && HAVE_YASM) // cachesplit

Also available in: Unified diff