Revision e98750c3 libavcodec/i386/dsputil_mmx.c

View differences:

libavcodec/i386/dsputil_mmx.c
2066 2066
    );
2067 2067
}
2068 2068

  
2069
static void float_to_int16_sse2(int16_t *dst, const float *src, long len){
2070
    asm volatile(
2071
        "add        %0          , %0        \n\t"
2072
        "lea         (%2,%0,2)  , %2        \n\t"
2073
        "add        %0          , %1        \n\t"
2074
        "neg        %0                      \n\t"
2075
        "1:                                 \n\t"
2076
        "cvtps2dq    (%2,%0,2)  , %%xmm0    \n\t"
2077
        "cvtps2dq  16(%2,%0,2)  , %%xmm1    \n\t"
2078
        "packssdw   %%xmm1      , %%xmm0    \n\t"
2079
        "movdqa     %%xmm0      ,  (%1,%0)  \n\t"
2080
        "add        $16         , %0        \n\t"
2081
        " js 1b                             \n\t"
2082
        :"+r"(len), "+r"(dst), "+r"(src)
2083
    );
2084
}
2085

  
2069 2086
extern void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width);
2070 2087
extern void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width);
2071 2088
extern void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
......
2441 2458
            c->vector_fmul_reverse = vector_fmul_reverse_sse;
2442 2459
            c->vector_fmul_add_add = vector_fmul_add_add_sse;
2443 2460
        }
2461
        if(mm_flags & MM_SSE2){
2462
            c->float_to_int16 = float_to_int16_sse2;
2463
        }
2444 2464
        if(mm_flags & MM_3DNOW)
2445 2465
            c->vector_fmul_add_add = vector_fmul_add_add_3dnow; // faster than sse
2446 2466
    }

Also available in: Unified diff