Revision 594ff7cc

View differences:

libswscale/rgb2rgb_template.c
23 23
 * along with FFmpeg; if not, write to the Free Software
24 24
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 25
 *
26
 * the C code (not assembly, mmx, ...) of this file can be used
27
 * under the LGPL license too
26
 * The C code (not assembly, mmx, ...) of this file can be used
27
 * under the LGPL license.
28 28
 */
29 29

  
30 30
#include <stddef.h>
31 31
#include <inttypes.h> /* for __WORDSIZE */
32 32

  
33 33
#ifndef __WORDSIZE
34
// #warning You have misconfigured system and probably will lose performance!
34
// #warning You have a misconfigured system and will probably lose performance!
35 35
#define __WORDSIZE MP_WORDSIZE
36 36
#endif
37 37

  
......
68 68
#endif
69 69

  
70 70
#ifdef HAVE_3DNOW
71
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
71
/* On K6 femms is faster than emms. On K7 femms is directly mapped on emms. */
72 72
#define EMMS     "femms"
73 73
#else
74 74
#define EMMS     "emms"
......
232 232
 Original by Strepto/Astral
233 233
 ported to gcc & bugfixed : A'rpi
234 234
 MMX2, 3DNOW optimization by Nick Kurshev
235
 32bit c version, and and&add trick by Michael Niedermayer
235
 32 bit C version, and and&add trick by Michael Niedermayer
236 236
*/
237 237
static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,long src_size)
238 238
{
......
350 350
    end = s + src_size;
351 351
#ifdef HAVE_MMX
352 352
    mm_end = end - 15;
353
#if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
353
#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
354 354
    asm volatile(
355 355
    "movq           %3, %%mm5   \n\t"
356 356
    "movq           %4, %%mm6   \n\t"
......
509 509
    end = s + src_size;
510 510
#ifdef HAVE_MMX
511 511
    mm_end = end - 15;
512
#if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
512
#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
513 513
    asm volatile(
514 514
    "movq           %3, %%mm5   \n\t"
515 515
    "movq           %4, %%mm6   \n\t"
......
910 910
}
911 911

  
912 912
/*
913
  I use here less accurate approximation by simply
914
 left-shifting the input
915
  value and filling the low order bits with
916
 zeroes. This method improves png's
917
  compression but this scheme cannot reproduce white exactly, since it does not
918
  generate an all-ones maximum value; the net effect is to darken the
913
  I use less accurate approximation here by simply left-shifting the input
914
  value and filling the low order bits with zeroes. This method improves PNG
915
  compression but this scheme cannot reproduce white exactly, since it does
916
  not generate an all-ones maximum value; the net effect is to darken the
919 917
  image slightly.
920 918

  
921 919
  The better method should be "left bit replication":
......
1271 1269
#endif
1272 1270
    while (s < end)
1273 1271
    {
1274
#if 0 //slightly slower on athlon
1272
#if 0 //slightly slower on Athlon
1275 1273
        int bgr= *s++;
1276 1274
        *((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9);
1277 1275
#else
......
1507 1505
    for (y=0; y<height; y++)
1508 1506
    {
1509 1507
#ifdef HAVE_MMX
1510
//FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
1508
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1511 1509
        asm volatile(
1512 1510
        "xor                 %%"REG_a", %%"REG_a"   \n\t"
1513 1511
        ASMALIGN(4)
......
1639 1637
}
1640 1638

  
1641 1639
/**
1642
 *
1643
 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
1644
 * problem for anyone then tell me, and ill fix it)
1640
 * Height should be a multiple of 2 and width should be a multiple of 16 (if
1641
 * this is a problem for anyone then tell me, and I will fix it).
1645 1642
 */
1646 1643
static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1647 1644
                                      long width, long height,
......
1660 1657
    for (y=0; y<height; y++)
1661 1658
    {
1662 1659
#ifdef HAVE_MMX
1663
//FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)
1660
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1664 1661
        asm volatile(
1665 1662
        "xor                %%"REG_a", %%"REG_a"    \n\t"
1666 1663
        ASMALIGN(4)
......
1695 1692
        : "%"REG_a
1696 1693
        );
1697 1694
#else
1698
//FIXME adapt the alpha asm code from yv12->yuy2
1695
//FIXME adapt the Alpha ASM code from yv12->yuy2
1699 1696

  
1700 1697
#if __WORDSIZE >= 64
1701 1698
        int i;
......
1746 1743
}
1747 1744

  
1748 1745
/**
1749
 *
1750
 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
1751
 * problem for anyone then tell me, and ill fix it)
1746
 * Height should be a multiple of 2 and width should be a multiple of 16 (if
1747
 * this is a problem for anyone then tell me, and I will fix it).
1752 1748
 */
1753 1749
static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1754 1750
                                      long width, long height,
......
1759 1755
}
1760 1756

  
1761 1757
/**
1762
 *
1763
 * width should be a multiple of 16
1758
 * Width should be a multiple of 16.
1764 1759
 */
1765 1760
static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst,
1766 1761
                                         long width, long height,
......
1770 1765
}
1771 1766

  
1772 1767
/**
1773
 *
1774
 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
1775
 * problem for anyone then tell me, and ill fix it)
1768
 * Height should be a multiple of 2 and width should be a multiple of 16 (if
1769
 * this is a problem for anyone then tell me, and I will fix it).
1776 1770
 */
1777 1771
static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
1778 1772
                                      long width, long height,
......
2007 2001
}
2008 2002

  
2009 2003
/**
2010
 *
2011
 * height should be a multiple of 2 and width should be a multiple of 16 (if this is a
2012
 * problem for anyone then tell me, and ill fix it)
2013
 * chrominance data is only taken from every secound line others are ignored FIXME write HQ version
2004
 * Height should be a multiple of 2 and width should be a multiple of 16 (if
2005
 * this is a problem for anyone then tell me, and I will fix it).
2006
 * Chrominance data is only taken from every secound line, others are ignored.
2007
 * FIXME: Write HQ version.
2014 2008
 */
2015 2009
static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
2016 2010
                                      long width, long height,
......
2133 2127
}
2134 2128

  
2135 2129
/**
2136
 *
2137
 * height should be a multiple of 2 and width should be a multiple of 2 (if this is a
2138
 * problem for anyone then tell me, and ill fix it)
2139
 * chrominance data is only taken from every secound line others are ignored in the C version FIXME write HQ version
2130
 * Height should be a multiple of 2 and width should be a multiple of 2 (if
2131
 * this is a problem for anyone then tell me, and I will fix it).
2132
 * Chrominance data is only taken from every secound line,
2133
 * others are ignored in the C version.
2134
 * FIXME: Write HQ version.
2140 2135
 */
2141 2136
static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
2142 2137
                                       long width, long height,

Also available in: Unified diff