Revision b32c9ca9

View differences:

libavcodec/x86/h264_qpel_mmx.c
299 299
    int h=8;\
300 300
    __asm__ volatile(\
301 301
        "pxor %%mm7, %%mm7          \n\t"\
302
        "movq %0, %%mm6             \n\t"\
303
        :: "m"(ff_pw_5)\
304
    );\
305
    do{\
306
    __asm__ volatile(\
302
        "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\
303
        "1:                         \n\t"\
307 304
        "movq    (%0), %%mm0        \n\t"\
308 305
        "movq   1(%0), %%mm2        \n\t"\
309 306
        "movq %%mm0, %%mm1          \n\t"\
......
336 333
        "punpcklbw %%mm7, %%mm5     \n\t"\
337 334
        "paddw %%mm3, %%mm2         \n\t"\
338 335
        "paddw %%mm5, %%mm4         \n\t"\
339
        "movq %5, %%mm5             \n\t"\
336
        "movq "MANGLE(ff_pw_16)", %%mm5\n\t"\
340 337
        "paddw %%mm5, %%mm2         \n\t"\
341 338
        "paddw %%mm5, %%mm4         \n\t"\
342 339
        "paddw %%mm2, %%mm0         \n\t"\
......
347 344
        "packuswb %%mm1, %%mm0      \n\t"\
348 345
        PAVGB" %%mm4, %%mm0         \n\t"\
349 346
        OP(%%mm0, (%1),%%mm5, q)\
350
        "add %4, %0                 \n\t"\
351
        "add %4, %1                 \n\t"\
352
        "add %3, %2                 \n\t"\
353
        : "+a"(src), "+c"(dst), "+d"(src2)\
354
        : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
355
          "m"(ff_pw_16)\
347
        "add %5, %0                 \n\t"\
348
        "add %5, %1                 \n\t"\
349
        "add %4, %2                 \n\t"\
350
        "decl %3                    \n\t"\
351
        "jg 1b                      \n\t"\
352
        : "+a"(src), "+c"(dst), "+d"(src2), "+g"(h)\
353
        : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\
356 354
        : "memory"\
357 355
    );\
358
    }while(--h);\
359 356
}\
360 357
\
361 358
static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
......
697 694
    int h=8;\
698 695
    __asm__ volatile(\
699 696
        "pxor %%xmm7, %%xmm7        \n\t"\
700
        "movdqa %0, %%xmm6          \n\t"\
701
        :: "m"(ff_pw_5)\
702
    );\
703
    do{\
704
    __asm__ volatile(\
697
        "movdqa "MANGLE(ff_pw_5)", %%xmm6\n\t"\
698
        "1:                         \n\t"\
705 699
        "lddqu   -2(%0), %%xmm1     \n\t"\
706 700
        "movdqa  %%xmm1, %%xmm0     \n\t"\
707 701
        "punpckhbw %%xmm7, %%xmm1   \n\t"\
......
721 715
        "psllw   $2,     %%xmm2     \n\t"\
722 716
        "movq    (%2),   %%xmm3     \n\t"\
723 717
        "psubw   %%xmm1, %%xmm2     \n\t"\
724
        "paddw   %5,     %%xmm0     \n\t"\
718
        "paddw "MANGLE(ff_pw_16)", %%xmm0\n\t"\
725 719
        "pmullw  %%xmm6, %%xmm2     \n\t"\
726 720
        "paddw   %%xmm0, %%xmm2     \n\t"\
727 721
        "psraw   $5,     %%xmm2     \n\t"\
728 722
        "packuswb %%xmm2, %%xmm2    \n\t"\
729 723
        "pavgb   %%xmm3, %%xmm2     \n\t"\
730 724
        OP(%%xmm2, (%1), %%xmm4, q)\
731
        "add %4, %0                 \n\t"\
732
        "add %4, %1                 \n\t"\
733
        "add %3, %2                 \n\t"\
734
        : "+a"(src), "+c"(dst), "+d"(src2)\
735
        : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
736
          "m"(ff_pw_16)\
725
        "add %5, %0                 \n\t"\
726
        "add %5, %1                 \n\t"\
727
        "add %4, %2                 \n\t"\
728
        "decl %3                    \n\t"\
729
        "jg 1b                      \n\t"\
730
        : "+a"(src), "+c"(dst), "+d"(src2), "+g"(h)\
731
        : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\
737 732
        : "memory"\
738 733
    );\
739
    }while(--h);\
740 734
}\
741 735
QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
742 736
\

Also available in: Unified diff