Revision da3b92c8 postproc/yuv2rgb_template.c

View differences:

postproc/yuv2rgb_template.c
23 23
 * along with GNU Make; see the file COPYING. If not, write to
24 24
 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
25 25
 *
26
 * 15 and 24 bpp support from Michael Niedermayer (michaelni@gmx.at)
26
 * 15,24 bpp and dithering from Michael Niedermayer (michaelni@gmx.at)
27 27
 */
28 28

  
29 29
#include <stdio.h>
......
38 38
#include "rgb2rgb.h"
39 39
#include "../mmx_defs.h"
40 40

  
41
#define DITHER1XBPP
42

  
41 43
/* hope these constant values are cache line aligned */
42 44
uint64_t __attribute__((aligned(8))) mmx_80w = 0x0080008000800080;
43 45
uint64_t __attribute__((aligned(8))) mmx_10w = 0x1010101010101010;
......
58 60
uint64_t __attribute__((aligned(8))) M24B=   0xFF0000FF0000FF00LL;
59 61
uint64_t __attribute__((aligned(8))) M24C=   0x0000FF0000FF0000LL;
60 62

  
63
// the volatile is required because gcc otherwise optimizes some writes away not knowing that these
64
// are read in the asm block
65
volatile uint64_t __attribute__((aligned(8))) b5Dither;
66
volatile uint64_t __attribute__((aligned(8))) g5Dither;
67
volatile uint64_t __attribute__((aligned(8))) g6Dither;
68
volatile uint64_t __attribute__((aligned(8))) r5Dither;
69

  
70
uint64_t __attribute__((aligned(8))) dither4[2]={
71
	0x0103010301030103LL,
72
	0x0200020002000200LL,};
73

  
74
uint64_t __attribute__((aligned(8))) dither8[2]={
75
	0x0602060206020602LL,
76
	0x0004000400040004LL,};
77

  
78

  
61 79

  
62 80
#define YUV2RGB \
63 81
		     /* Do the multiply part of the conversion for even and odd pixels,
......
150 168
	uint8_t *_pu = pu;
151 169
	uint8_t *_pv = pv;
152 170

  
171
	b5Dither= dither8[y&1];
172
	g6Dither= dither4[y&1];
173
	g5Dither= dither8[y&1];
174
	r5Dither= dither8[(y+1)&1];
175

  
153 176
	/* load data for start of next scan line */
154 177
	__asm__ __volatile__ (
155 178
		 "movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
......
171 194
*/
172 195
YUV2RGB
173 196

  
197
#ifdef DITHER1XBPP
198
			"paddusb b5Dither, %%mm0;"
199
			"paddusb g6Dither, %%mm2;"
200
			"paddusb r5Dither, %%mm1;"
201
#endif
174 202
		     /* mask unneeded bits off */
175 203
		     "pand mmx_redmask, %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */
176 204
		     "pand mmx_grnmask, %%mm2;" /* g7g6g5g4 g3g2_0_0 g7g6g5g4 g3g2_0_0 */
......
241 269
	uint8_t *_pu = pu;
242 270
	uint8_t *_pv = pv;
243 271

  
272
	b5Dither= dither8[y&1];
273
	g6Dither= dither4[y&1];
274
	g5Dither= dither8[y&1];
275
	r5Dither= dither8[(y+1)&1];
276

  
244 277
	/* load data for start of next scan line */
245 278
	__asm__ __volatile__ (
246 279
		 "movd (%1), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
......
256 289
	    __asm__ __volatile__ (
257 290
YUV2RGB
258 291

  
292
#ifdef DITHER1XBPP
293
			"paddusb b5Dither, %%mm0	\n\t"
294
			"paddusb g5Dither, %%mm2	\n\t"
295
			"paddusb r5Dither, %%mm1	\n\t"
296
#endif
297

  
259 298
		     /* mask unneeded bits off */
260 299
		     "pand mmx_redmask, %%mm0;" /* b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0 */
261 300
		     "pand mmx_redmask, %%mm2;" /* g7g6g5g4 g3_0_0_0 g7g6g5g4 g3_0_0_0 */

Also available in: Unified diff