Revision 015f9f1a libavcodec/x86/dsputil_mmx.c

View differences:

libavcodec/x86/dsputil_mmx.c
2074 2074
    }
2075 2075
}
2076 2076

  
2077
static void vector_fmul_3dnow(float *dst, const float *src, int len){
2077
static void vector_fmul_3dnow(float *dst, const float *src0, const float *src1, int len){
2078 2078
    x86_reg i = (len-4)*4;
2079 2079
    __asm__ volatile(
2080 2080
        "1: \n\t"
2081
        "movq    (%1,%0), %%mm0 \n\t"
2082
        "movq   8(%1,%0), %%mm1 \n\t"
2083
        "pfmul   (%2,%0), %%mm0 \n\t"
2084
        "pfmul  8(%2,%0), %%mm1 \n\t"
2081
        "movq    (%2,%0), %%mm0 \n\t"
2082
        "movq   8(%2,%0), %%mm1 \n\t"
2083
        "pfmul   (%3,%0), %%mm0 \n\t"
2084
        "pfmul  8(%3,%0), %%mm1 \n\t"
2085 2085
        "movq   %%mm0,  (%1,%0) \n\t"
2086 2086
        "movq   %%mm1, 8(%1,%0) \n\t"
2087 2087
        "sub  $16, %0 \n\t"
2088 2088
        "jge 1b \n\t"
2089 2089
        "femms  \n\t"
2090 2090
        :"+r"(i)
2091
        :"r"(dst), "r"(src)
2091
        :"r"(dst), "r"(src0), "r"(src1)
2092 2092
        :"memory"
2093 2093
    );
2094 2094
}
2095
static void vector_fmul_sse(float *dst, const float *src, int len){
2095
static void vector_fmul_sse(float *dst, const float *src0, const float *src1, int len){
2096 2096
    x86_reg i = (len-8)*4;
2097 2097
    __asm__ volatile(
2098 2098
        "1: \n\t"
2099
        "movaps    (%1,%0), %%xmm0 \n\t"
2100
        "movaps  16(%1,%0), %%xmm1 \n\t"
2101
        "mulps     (%2,%0), %%xmm0 \n\t"
2102
        "mulps   16(%2,%0), %%xmm1 \n\t"
2099
        "movaps    (%2,%0), %%xmm0 \n\t"
2100
        "movaps  16(%2,%0), %%xmm1 \n\t"
2101
        "mulps     (%3,%0), %%xmm0 \n\t"
2102
        "mulps   16(%3,%0), %%xmm1 \n\t"
2103 2103
        "movaps  %%xmm0,   (%1,%0) \n\t"
2104 2104
        "movaps  %%xmm1, 16(%1,%0) \n\t"
2105 2105
        "sub  $32, %0 \n\t"
2106 2106
        "jge 1b \n\t"
2107 2107
        :"+r"(i)
2108
        :"r"(dst), "r"(src)
2108
        :"r"(dst), "r"(src0), "r"(src1)
2109 2109
        :"memory"
2110 2110
    );
2111 2111
}

Also available in: Unified diff