Revision 594ff7cc
libswscale/rgb2rgb_template.c  

23  23 
* along with FFmpeg; if not, write to the Free Software 
24  24 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA 
25  25 
* 
26 
* the C code (not assembly, mmx, ...) of this file can be used


27 
* under the LGPL license too


26 
* The C code (not assembly, mmx, ...) of this file can be used


27 
* under the LGPL license.


28  28 
*/ 
29  29  
30  30 
#include <stddef.h> 
31  31 
#include <inttypes.h> /* for __WORDSIZE */ 
32  32  
33  33 
#ifndef __WORDSIZE 
34 
// #warning You have misconfigured system and probably will lose performance!


34 
// #warning You have a misconfigured system and will probably lose performance!


35  35 
#define __WORDSIZE MP_WORDSIZE 
36  36 
#endif 
37  37  
...  ...  
68  68 
#endif 
69  69  
70  70 
#ifdef HAVE_3DNOW 
71 
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */


71 
/* On K6 femms is faster than emms. On K7 femms is directly mapped on emms. */


72  72 
#define EMMS "femms" 
73  73 
#else 
74  74 
#define EMMS "emms" 
...  ...  
232  232 
Original by Strepto/Astral 
233  233 
ported to gcc & bugfixed : A'rpi 
234  234 
MMX2, 3DNOW optimization by Nick Kurshev 
235 
32bit c version, and and&add trick by Michael Niedermayer


235 
32 bit C version, and and&add trick by Michael Niedermayer


236  236 
*/ 
237  237 
static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,long src_size) 
238  238 
{ 
...  ...  
350  350 
end = s + src_size; 
351  351 
#ifdef HAVE_MMX 
352  352 
mm_end = end  15; 
353 
#if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)


353 
#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)


354  354 
asm volatile( 
355  355 
"movq %3, %%mm5 \n\t" 
356  356 
"movq %4, %%mm6 \n\t" 
...  ...  
509  509 
end = s + src_size; 
510  510 
#ifdef HAVE_MMX 
511  511 
mm_end = end  15; 
512 
#if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)


512 
#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)


513  513 
asm volatile( 
514  514 
"movq %3, %%mm5 \n\t" 
515  515 
"movq %4, %%mm6 \n\t" 
...  ...  
910  910 
} 
911  911  
912  912 
/* 
913 
I use here less accurate approximation by simply 

914 
leftshifting the input 

915 
value and filling the low order bits with 

916 
zeroes. This method improves png's 

917 
compression but this scheme cannot reproduce white exactly, since it does not 

918 
generate an allones maximum value; the net effect is to darken the 

913 
I use less accurate approximation here by simply leftshifting the input 

914 
value and filling the low order bits with zeroes. This method improves PNG 

915 
compression but this scheme cannot reproduce white exactly, since it does 

916 
not generate an allones maximum value; the net effect is to darken the 

919  917 
image slightly. 
920  918  
921  919 
The better method should be "left bit replication": 
...  ...  
1271  1269 
#endif 
1272  1270 
while (s < end) 
1273  1271 
{ 
1274 
#if 0 //slightly slower on athlon


1272 
#if 0 //slightly slower on Athlon


1275  1273 
int bgr= *s++; 
1276  1274 
*((uint32_t*)d)++ = ((bgr&0x1F)<<3) + ((bgr&0x3E0)<<6) + ((bgr&0x7C00)<<9); 
1277  1275 
#else 
...  ...  
1507  1505 
for (y=0; y<height; y++) 
1508  1506 
{ 
1509  1507 
#ifdef HAVE_MMX 
1510 
//FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)


1508 
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memorylimited anyway)


1511  1509 
asm volatile( 
1512  1510 
"xor %%"REG_a", %%"REG_a" \n\t" 
1513  1511 
ASMALIGN(4) 
...  ...  
1639  1637 
} 
1640  1638  
1641  1639 
/** 
1642 
* 

1643 
* height should be a multiple of 2 and width should be a multiple of 16 (if this is a 

1644 
* problem for anyone then tell me, and ill fix it) 

1640 
* Height should be a multiple of 2 and width should be a multiple of 16 (if 

1641 
* this is a problem for anyone then tell me, and I will fix it). 

1645  1642 
*/ 
1646  1643 
static inline void RENAME(yv12toyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, 
1647  1644 
long width, long height, 
...  ...  
1660  1657 
for (y=0; y<height; y++) 
1661  1658 
{ 
1662  1659 
#ifdef HAVE_MMX 
1663 
//FIXME handle 2 lines a once (fewer prefetch, reuse some chrom, but very likely limited by mem anyway)


1660 
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memorylimited anyway)


1664  1661 
asm volatile( 
1665  1662 
"xor %%"REG_a", %%"REG_a" \n\t" 
1666  1663 
ASMALIGN(4) 
...  ...  
1695  1692 
: "%"REG_a 
1696  1693 
); 
1697  1694 
#else 
1698 
//FIXME adapt the alpha asm code from yv12>yuy2


1695 
//FIXME adapt the Alpha ASM code from yv12>yuy2


1699  1696  
1700  1697 
#if __WORDSIZE >= 64 
1701  1698 
int i; 
...  ...  
1746  1743 
} 
1747  1744  
1748  1745 
/** 
1749 
* 

1750 
* height should be a multiple of 2 and width should be a multiple of 16 (if this is a 

1751 
* problem for anyone then tell me, and ill fix it) 

1746 
* Height should be a multiple of 2 and width should be a multiple of 16 (if 

1747 
* this is a problem for anyone then tell me, and I will fix it). 

1752  1748 
*/ 
1753  1749 
static inline void RENAME(yv12touyvy)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, 
1754  1750 
long width, long height, 
...  ...  
1759  1755 
} 
1760  1756  
1761  1757 
/** 
1762 
* 

1763 
* width should be a multiple of 16 

1758 
* Width should be a multiple of 16. 

1764  1759 
*/ 
1765  1760 
static inline void RENAME(yuv422ptoyuy2)(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, uint8_t *dst, 
1766  1761 
long width, long height, 
...  ...  
1770  1765 
} 
1771  1766  
1772  1767 
/** 
1773 
* 

1774 
* height should be a multiple of 2 and width should be a multiple of 16 (if this is a 

1775 
* problem for anyone then tell me, and ill fix it) 

1768 
* Height should be a multiple of 2 and width should be a multiple of 16 (if 

1769 
* this is a problem for anyone then tell me, and I will fix it). 

1776  1770 
*/ 
1777  1771 
static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 
1778  1772 
long width, long height, 
...  ...  
2007  2001 
} 
2008  2002  
2009  2003 
/** 
2010 
* 

2011 
* height should be a multiple of 2 and width should be a multiple of 16 (if this is a


2012 
* problem for anyone then tell me, and ill fix it)


2013 
* chrominance data is only taken from every secound line others are ignored FIXME write HQ version


2004 
* Height should be a multiple of 2 and width should be a multiple of 16 (if


2005 
* this is a problem for anyone then tell me, and I will fix it).


2006 
* Chrominance data is only taken from every secound line, others are ignored.


2007 
* FIXME: Write HQ version.


2014  2008 
*/ 
2015  2009 
static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 
2016  2010 
long width, long height, 
...  ...  
2133  2127 
} 
2134  2128  
2135  2129 
/** 
2136 
* 

2137 
* height should be a multiple of 2 and width should be a multiple of 2 (if this is a 

2138 
* problem for anyone then tell me, and ill fix it) 

2139 
* chrominance data is only taken from every secound line others are ignored in the C version FIXME write HQ version 

2130 
* Height should be a multiple of 2 and width should be a multiple of 2 (if 

2131 
* this is a problem for anyone then tell me, and I will fix it). 

2132 
* Chrominance data is only taken from every secound line, 

2133 
* others are ignored in the C version. 

2134 
* FIXME: Write HQ version. 

2140  2135 
*/ 
2141  2136 
static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, 
2142  2137 
long width, long height, 
Also available in: Unified diff