Revision 11f18faf libavcodec/i386/dsputil_mmx.c

View differences:

libavcodec/i386/dsputil_mmx.c
453 453
        return sum;
454 454
}
455 455

  
456
static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
457
    int i=0;
458
    asm volatile(
459
        "1:				\n\t"
460
        "movq  (%1, %0), %%mm0		\n\t"
461
        "movq  (%2, %0), %%mm1		\n\t"
462
        "paddb %%mm0, %%mm1		\n\t"
463
        "movq %%mm1, (%2, %0)		\n\t"
464
        "movq 8(%1, %0), %%mm0		\n\t"
465
        "movq 8(%2, %0), %%mm1		\n\t"
466
        "paddb %%mm0, %%mm1		\n\t"
467
        "movq %%mm1, 8(%2, %0)		\n\t"
468
        "addl $16, %0			\n\t"
469
        "cmpl %3, %0			\n\t"
470
        " jb 1b				\n\t"
471
        : "+r" (i)
472
        : "r"(src), "r"(dst), "r"(w-15)
473
    );
474
    for(; i<w; i++)
475
        dst[i+0] += src[i+0];
476
}
477

  
478
static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
479
    int i=0;
480
    asm volatile(
481
        "1:				\n\t"
482
        "movq  (%2, %0), %%mm0		\n\t"
483
        "movq  (%1, %0), %%mm1		\n\t"
484
        "psubb %%mm0, %%mm1		\n\t"
485
        "movq %%mm1, (%3, %0)		\n\t"
486
        "movq 8(%2, %0), %%mm0		\n\t"
487
        "movq 8(%1, %0), %%mm1		\n\t"
488
        "psubb %%mm0, %%mm1		\n\t"
489
        "movq %%mm1, 8(%3, %0)		\n\t"
490
        "addl $16, %0			\n\t"
491
        "cmpl %4, %0			\n\t"
492
        " jb 1b				\n\t"
493
        : "+r" (i)
494
        : "r"(src1), "r"(src2), "r"(dst), "r"(w-15)
495
    );
496
    for(; i<w; i++)
497
        dst[i+0] = src1[i+0]-src2[i+0];
498
}
499

  
500

  
456 501
#if 0
457 502
static void just_return() { return; }
458 503
#endif
......
531 576
        c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx;
532 577
        c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx;
533 578
        c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx;
579
        
580
        c->add_bytes= add_bytes_mmx;
581
        c->diff_bytes= diff_bytes_mmx;
534 582

  
535 583
        if (mm_flags & MM_MMXEXT) {
536 584
            c->pix_abs16x16     = pix_abs16x16_mmx2;

Also available in: Unified diff