Revision 559738ef

View differences:

libavcodec/x86/dsputil_mmx.c
2012 2012
        "1: \n"\
2013 2013
        "movaps  (%3,%0), %%xmm0 \n"\
2014 2014
 stereo("movaps   %%xmm0, %%xmm1 \n")\
2015
        "mulps    %%xmm6, %%xmm0 \n"\
2016
 stereo("mulps    %%xmm7, %%xmm1 \n")\
2015
        "mulps    %%xmm4, %%xmm0 \n"\
2016
 stereo("mulps    %%xmm5, %%xmm1 \n")\
2017 2017
        "lea 1024(%3,%0), %1 \n"\
2018 2018
        "mov %5, %2 \n"\
2019 2019
        "2: \n"\
......
2051 2051
        __asm__ volatile(
2052 2052
            "1: \n"
2053 2053
            "sub $8, %0 \n"
2054
            "movss     (%2,%0), %%xmm6 \n"
2055
            "movss    4(%2,%0), %%xmm7 \n"
2056
            "shufps $0, %%xmm6, %%xmm6 \n"
2057
            "shufps $0, %%xmm7, %%xmm7 \n"
2058
            "movaps %%xmm6,   (%1,%0,4) \n"
2059
            "movaps %%xmm7, 16(%1,%0,4) \n"
2054
            "movss     (%2,%0), %%xmm4 \n"
2055
            "movss    4(%2,%0), %%xmm5 \n"
2056
            "shufps $0, %%xmm4, %%xmm4 \n"
2057
            "shufps $0, %%xmm5, %%xmm5 \n"
2058
            "movaps %%xmm4,   (%1,%0,4) \n"
2059
            "movaps %%xmm5, 16(%1,%0,4) \n"
2060 2060
            "jg 1b \n"
2061 2061
            :"+&r"(j)
2062 2062
            :"r"(matrix_simd), "r"(matrix)
libavcodec/x86/dsputilenc_mmx.c
61 61
static void get_pixels_sse2(DCTELEM *block, const uint8_t *pixels, int line_size)
62 62
{
63 63
    __asm__ volatile(
64
        "pxor %%xmm7,      %%xmm7         \n\t"
64
        "pxor %%xmm4,      %%xmm4         \n\t"
65 65
        "movq (%0),        %%xmm0         \n\t"
66 66
        "movq (%0, %2),    %%xmm1         \n\t"
67 67
        "movq (%0, %2,2),  %%xmm2         \n\t"
68 68
        "movq (%0, %3),    %%xmm3         \n\t"
69 69
        "lea (%0,%2,4), %0                \n\t"
70
        "punpcklbw %%xmm7, %%xmm0         \n\t"
71
        "punpcklbw %%xmm7, %%xmm1         \n\t"
72
        "punpcklbw %%xmm7, %%xmm2         \n\t"
73
        "punpcklbw %%xmm7, %%xmm3         \n\t"
70
        "punpcklbw %%xmm4, %%xmm0         \n\t"
71
        "punpcklbw %%xmm4, %%xmm1         \n\t"
72
        "punpcklbw %%xmm4, %%xmm2         \n\t"
73
        "punpcklbw %%xmm4, %%xmm3         \n\t"
74 74
        "movdqa %%xmm0,      (%1)         \n\t"
75 75
        "movdqa %%xmm1,    16(%1)         \n\t"
76 76
        "movdqa %%xmm2,    32(%1)         \n\t"
......
79 79
        "movq (%0, %2),    %%xmm1         \n\t"
80 80
        "movq (%0, %2,2),  %%xmm2         \n\t"
81 81
        "movq (%0, %3),    %%xmm3         \n\t"
82
        "punpcklbw %%xmm7, %%xmm0         \n\t"
83
        "punpcklbw %%xmm7, %%xmm1         \n\t"
84
        "punpcklbw %%xmm7, %%xmm2         \n\t"
85
        "punpcklbw %%xmm7, %%xmm3         \n\t"
82
        "punpcklbw %%xmm4, %%xmm0         \n\t"
83
        "punpcklbw %%xmm4, %%xmm1         \n\t"
84
        "punpcklbw %%xmm4, %%xmm2         \n\t"
85
        "punpcklbw %%xmm4, %%xmm3         \n\t"
86 86
        "movdqa %%xmm0,    64(%1)         \n\t"
87 87
        "movdqa %%xmm1,    80(%1)         \n\t"
88 88
        "movdqa %%xmm2,    96(%1)         \n\t"

Also available in: Unified diff