Revision 6e1c66bc postproc/swscale.c

View differences:

postproc/swscale.c
145 145
#define MIN(a,b) ((a) > (b) ? (b) : (a))
146 146
#define MAX(a,b) ((a) < (b) ? (b) : (a))
147 147

  
148
#ifdef ARCH_X86
148
#if defined(ARCH_X86) || defined(ARCH_X86_64)
149 149
static uint64_t attribute_used __attribute__((aligned(8))) bF8=       0xF8F8F8F8F8F8F8F8LL;
150 150
static uint64_t attribute_used __attribute__((aligned(8))) bFC=       0xFCFCFCFCFCFCFCFCLL;
151 151
static uint64_t __attribute__((aligned(8))) w10=       0x0010001000100010LL;
......
204 204
extern const uint8_t dither_8x8_73[8][8];
205 205
extern const uint8_t dither_8x8_220[8][8];
206 206

  
207
#ifdef ARCH_X86
207
#if defined(ARCH_X86) || defined(ARCH_X86_64)
208 208
void in_asm_used_var_warning_killer()
209 209
{
210 210
 volatile int i= bF8+bFC+w10+
......
679 679
#endif //HAVE_ALTIVEC
680 680
#endif //ARCH_POWERPC
681 681

  
682
#ifdef ARCH_X86
682
#if defined(ARCH_X86) || defined(ARCH_X86_64)
683 683

  
684 684
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
685 685
#define COMPILE_MMX
......
692 692
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
693 693
#define COMPILE_3DNOW
694 694
#endif
695
#endif //ARCH_X86
695
#endif //ARCH_X86 || ARCH_X86_64
696 696

  
697 697
#undef HAVE_MMX
698 698
#undef HAVE_MMX2
......
716 716
#endif
717 717
#endif //ARCH_POWERPC
718 718

  
719
#ifdef ARCH_X86
719
#if defined(ARCH_X86) || defined(ARCH_X86_64)
720 720

  
721 721
//X86 versions
722 722
/*
......
758 758
#include "swscale_template.c"
759 759
#endif
760 760

  
761
#endif //ARCH_X86
761
#endif //ARCH_X86 || ARCH_X86_64
762 762

  
763 763
// minor note: the HAVE_xyz is messed up after that line so don't use it
764 764

  
......
783 783
	int minFilterSize;
784 784
	double *filter=NULL;
785 785
	double *filter2=NULL;
786
#ifdef ARCH_X86
786
#if defined(ARCH_X86) || defined(ARCH_X86_64)
787 787
	if(flags & SWS_CPU_CAPS_MMX)
788 788
		asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
789 789
#endif
......
1142 1142
	free(filter);
1143 1143
}
1144 1144

  
1145
#ifdef ARCH_X86
1145
#if defined(ARCH_X86) || defined(ARCH_X86_64)
1146 1146
static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
1147 1147
{
1148 1148
	uint8_t *fragmentA;
1149
	int imm8OfPShufW1A;
1150
	int imm8OfPShufW2A;
1151
	int fragmentLengthA;
1149
	long imm8OfPShufW1A;
1150
	long imm8OfPShufW2A;
1151
	long fragmentLengthA;
1152 1152
	uint8_t *fragmentB;
1153
	int imm8OfPShufW1B;
1154
	int imm8OfPShufW2B;
1155
	int fragmentLengthB;
1153
	long imm8OfPShufW1B;
1154
	long imm8OfPShufW2B;
1155
	long fragmentLengthB;
1156 1156
	int fragmentPos;
1157 1157

  
1158 1158
	int xpos, i;
......
1165 1165
		"jmp 9f				\n\t"
1166 1166
	// Begin
1167 1167
		"0:				\n\t"
1168
		"movq (%%edx, %%eax), %%mm3	\n\t" 
1169
		"movd (%%ecx, %%esi), %%mm0	\n\t" 
1170
		"movd 1(%%ecx, %%esi), %%mm1	\n\t"
1168
		"movq (%%"REG_d", %%"REG_a"), %%mm3\n\t" 
1169
		"movd (%%"REG_c", %%"REG_S"), %%mm0\n\t" 
1170
		"movd 1(%%"REG_c", %%"REG_S"), %%mm1\n\t"
1171 1171
		"punpcklbw %%mm7, %%mm1		\n\t"
1172 1172
		"punpcklbw %%mm7, %%mm0		\n\t"
1173 1173
		"pshufw $0xFF, %%mm1, %%mm1	\n\t"
......
1175 1175
		"pshufw $0xFF, %%mm0, %%mm0	\n\t"
1176 1176
		"2:				\n\t"
1177 1177
		"psubw %%mm1, %%mm0		\n\t"
1178
		"movl 8(%%ebx, %%eax), %%esi	\n\t"
1178
		"mov 8(%%"REG_b", %%"REG_a"), %%"REG_S"\n\t"
1179 1179
		"pmullw %%mm3, %%mm0		\n\t"
1180 1180
		"psllw $7, %%mm1		\n\t"
1181 1181
		"paddw %%mm1, %%mm0		\n\t"
1182 1182

  
1183
		"movq %%mm0, (%%edi, %%eax)	\n\t"
1183
		"movq %%mm0, (%%"REG_D", %%"REG_a")\n\t"
1184 1184

  
1185
		"addl $8, %%eax			\n\t"
1185
		"add $8, %%"REG_a"		\n\t"
1186 1186
	// End
1187 1187
		"9:				\n\t"
1188 1188
//		"int $3\n\t"
1189
		"leal 0b, %0			\n\t"
1190
		"leal 1b, %1			\n\t"
1191
		"leal 2b, %2			\n\t"
1192
		"decl %1			\n\t"
1193
		"decl %2			\n\t"
1194
		"subl %0, %1			\n\t"
1195
		"subl %0, %2			\n\t"
1196
		"leal 9b, %3			\n\t"
1197
		"subl %0, %3			\n\t"
1189
		"lea 0b, %0			\n\t"
1190
		"lea 1b, %1			\n\t"
1191
		"lea 2b, %2			\n\t"
1192
		"dec %1				\n\t"
1193
		"dec %2				\n\t"
1194
		"sub %0, %1			\n\t"
1195
		"sub %0, %2			\n\t"
1196
		"lea 9b, %3			\n\t"
1197
		"sub %0, %3			\n\t"
1198 1198

  
1199 1199

  
1200 1200
		:"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
......
1205 1205
		"jmp 9f				\n\t"
1206 1206
	// Begin
1207 1207
		"0:				\n\t"
1208
		"movq (%%edx, %%eax), %%mm3	\n\t" 
1209
		"movd (%%ecx, %%esi), %%mm0	\n\t" 
1208
		"movq (%%"REG_d", %%"REG_a"), %%mm3\n\t" 
1209
		"movd (%%"REG_c", %%"REG_S"), %%mm0\n\t" 
1210 1210
		"punpcklbw %%mm7, %%mm0		\n\t"
1211 1211
		"pshufw $0xFF, %%mm0, %%mm1	\n\t"
1212 1212
		"1:				\n\t"
1213 1213
		"pshufw $0xFF, %%mm0, %%mm0	\n\t"
1214 1214
		"2:				\n\t"
1215 1215
		"psubw %%mm1, %%mm0		\n\t"
1216
		"movl 8(%%ebx, %%eax), %%esi	\n\t"
1216
		"mov 8(%%"REG_b", %%"REG_a"), %%"REG_S"\n\t"
1217 1217
		"pmullw %%mm3, %%mm0		\n\t"
1218 1218
		"psllw $7, %%mm1		\n\t"
1219 1219
		"paddw %%mm1, %%mm0		\n\t"
1220 1220

  
1221
		"movq %%mm0, (%%edi, %%eax)	\n\t"
1221
		"movq %%mm0, (%%"REG_D", %%"REG_a")\n\t"
1222 1222

  
1223
		"addl $8, %%eax			\n\t"
1223
		"add $8, %%"REG_a"		\n\t"
1224 1224
	// End
1225 1225
		"9:				\n\t"
1226 1226
//		"int $3\n\t"
1227
		"leal 0b, %0			\n\t"
1228
		"leal 1b, %1			\n\t"
1229
		"leal 2b, %2			\n\t"
1230
		"decl %1			\n\t"
1231
		"decl %2			\n\t"
1232
		"subl %0, %1			\n\t"
1233
		"subl %0, %2			\n\t"
1234
		"leal 9b, %3			\n\t"
1235
		"subl %0, %3			\n\t"
1227
		"lea 0b, %0			\n\t"
1228
		"lea 1b, %1			\n\t"
1229
		"lea 2b, %2			\n\t"
1230
		"dec %1				\n\t"
1231
		"dec %2				\n\t"
1232
		"sub %0, %1			\n\t"
1233
		"sub %0, %2			\n\t"
1234
		"lea 9b, %3			\n\t"
1235
		"sub %0, %3			\n\t"
1236 1236

  
1237 1237

  
1238 1238
		:"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
......
1313 1313
	}
1314 1314
	filterPos[i/2]= xpos>>16; // needed to jump to the next part
1315 1315
}
1316
#endif // ARCH_X86
1316
#endif // ARCH_X86 || ARCH_X86_64
1317 1317

  
1318 1318
static void globalInit(){
1319 1319
    // generating tables:
......
1327 1327
static SwsFunc getSwsFunc(int flags){
1328 1328
    
1329 1329
#ifdef RUNTIME_CPUDETECT
1330
#ifdef ARCH_X86
1330
#if defined(ARCH_X86) || defined(ARCH_X86_64)
1331 1331
	// ordered per speed fasterst first
1332 1332
	if(flags & SWS_CPU_CAPS_MMX2)
1333 1333
		return swScale_MMX2;
......
1755 1755
	int unscaled, needsDither;
1756 1756
	int srcFormat, dstFormat;
1757 1757
	SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
1758
#ifdef ARCH_X86
1758
#if defined(ARCH_X86) || defined(ARCH_X86_64)
1759 1759
	if(flags & SWS_CPU_CAPS_MMX)
1760 1760
		asm volatile("emms\n\t"::: "memory");
1761 1761
#endif
......
1995 1995
				 (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
1996 1996
				 srcFilter->chrH, dstFilter->chrH, c->param);
1997 1997

  
1998
#ifdef ARCH_X86
1998
#if defined(ARCH_X86) || defined(ARCH_X86_64)
1999 1999
// can't downscale !!!
2000 2000
		if(c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
2001 2001
		{
......
2136 2136
		}
2137 2137
		else
2138 2138
		{
2139
#ifdef ARCH_X86
2139
#if defined(ARCH_X86) || defined(ARCH_X86_64)
2140 2140
			MSG_V("SwScaler: using X86-Asm scaler for horizontal scaling\n");
2141 2141
#else
2142 2142
			if(flags & SWS_FAST_BILINEAR)

Also available in: Unified diff