Revision 36e8de07
libswscale/swscale_template.c  

385  385 
"packuswb %%mm6, %%mm5 \n\t"\ 
386  386 
"packuswb %%mm3, %%mm4 \n\t"\ 
387  387 
"pxor %%mm7, %%mm7 \n\t" 
388 
#if 0 

389 
#define FULL_YSCALEYUV2RGB \ 

390 
"pxor %%mm7, %%mm7 \n\t"\ 

391 
"movd %6, %%mm6 \n\t" /*yalpha1*/\ 

392 
"punpcklwd %%mm6, %%mm6 \n\t"\ 

393 
"punpcklwd %%mm6, %%mm6 \n\t"\ 

394 
"movd %7, %%mm5 \n\t" /*uvalpha1*/\ 

395 
"punpcklwd %%mm5, %%mm5 \n\t"\ 

396 
"punpcklwd %%mm5, %%mm5 \n\t"\ 

397 
"xor %%"REG_a", %%"REG_a" \n\t"\ 

398 
ASMALIGN(4)\ 

399 
"1: \n\t"\ 

400 
"movq (%0, %%"REG_a",2), %%mm0 \n\t" /*buf0[eax]*/\ 

401 
"movq (%1, %%"REG_a",2), %%mm1 \n\t" /*buf1[eax]*/\ 

402 
"movq (%2, %%"REG_a",2), %%mm2 \n\t" /* uvbuf0[eax]*/\ 

403 
"movq (%3, %%"REG_a",2), %%mm3 \n\t" /* uvbuf1[eax]*/\ 

404 
"psubw %%mm1, %%mm0 \n\t" /* buf0[eax]  buf1[eax]*/\ 

405 
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax]  uvbuf1[eax]*/\ 

406 
"pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax]  buf1[eax])yalpha1>>16*/\ 

407 
"pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax]  uvbuf1[eax])uvalpha1>>16*/\ 

408 
"psraw $4, %%mm1 \n\t" /* buf0[eax]  buf1[eax] >>4*/\ 

409 
"movq "AV_STRINGIFY(VOF)"(%2, %%"REG_a",2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ 

410 
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax]  uvbuf1[eax] >>4*/\ 

411 
"paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1yalpha1) >>16*/\ 

412 
"movq "AV_STRINGIFY(VOF)"(%3, %%"REG_a",2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\ 

413 
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1  uvbuf1[eax](1uvalpha1)*/\ 

414 
"psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048]  uvbuf1[eax+2048]*/\ 

415 
"psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y16)*/\ 

416 
"psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U128)*/\ 

417 
"pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ 

418 
\ 

419 
\ 

420 
"pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048]  uvbuf1[eax+2048])uvalpha1>>16*/\ 

421 
"movq %%mm3, %%mm2 \n\t" /* (U128)8*/\ 

422 
"pmulhw "MANGLE(ubCoeff)", %%mm3 \n\t"\ 

423 
"psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048]  uvbuf1[eax+2048] >>4*/\ 

424 
"pmulhw "MANGLE(ugCoeff)", %%mm2 \n\t"\ 

425 
"paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1  uvbuf1[eax+2048](1uvalpha1)*/\ 

426 
"psubw "MANGLE(w400)", %%mm0 \n\t" /* (V128)8*/\ 

427 
\ 

428 
\ 

429 
"movq %%mm0, %%mm4 \n\t" /* (V128)8*/\ 

430 
"pmulhw "MANGLE(vrCoeff)", %%mm0 \n\t"\ 

431 
"pmulhw "MANGLE(vgCoeff)", %%mm4 \n\t"\ 

432 
"paddw %%mm1, %%mm3 \n\t" /* B*/\ 

433 
"paddw %%mm1, %%mm0 \n\t" /* R*/\ 

434 
"packuswb %%mm3, %%mm3 \n\t"\ 

435 
\ 

436 
"packuswb %%mm0, %%mm0 \n\t"\ 

437 
"paddw %%mm4, %%mm2 \n\t"\ 

438 
"paddw %%mm2, %%mm1 \n\t" /* G*/\ 

439 
\ 

440 
"packuswb %%mm1, %%mm1 \n\t" 

441 
#endif 

442  388  
443  389 
#define REAL_YSCALEYUV2PACKED(index, c) \ 
444  390 
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\ 
...  ...  
1213  1159 
int uvalpha1=4095uvalpha; 
1214  1160 
int i; 
1215  1161  
1216 
#if 0 //isn't used 

1217 
if (flags&SWS_FULL_CHR_H_INT) 

1218 
{ 

1219 
switch(dstFormat) 

1220 
{ 

1221 
#ifdef HAVE_MMX 

1222 
case PIX_FMT_RGB32: 

1223 
__asm__ volatile( 

1224  
1225  
1226 
FULL_YSCALEYUV2RGB 

1227 
"punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG 

1228 
"punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 

1229  
1230 
"movq %%mm3, %%mm1 \n\t" 

1231 
"punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 

1232 
"punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 

1233  
1234 
MOVNTQ(%%mm3, (%4, %%REGa, 4)) 

1235 
MOVNTQ(%%mm1, 8(%4, %%REGa, 4)) 

1236  
1237 
"add $4, %%"REG_a" \n\t" 

1238 
"cmp %5, %%"REG_a" \n\t" 

1239 
" jb 1b \n\t" 

1240  
1241 
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" ((long)dstW), 

1242 
"m" (yalpha1), "m" (uvalpha1) 

1243 
: "%"REG_a 

1244 
); 

1245 
break; 

1246 
case PIX_FMT_BGR24: 

1247 
__asm__ volatile( 

1248  
1249 
FULL_YSCALEYUV2RGB 

1250  
1251 
// lsb ... msb 

1252 
"punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG 

1253 
"punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 

1254  
1255 
"movq %%mm3, %%mm1 \n\t" 

1256 
"punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 

1257 
"punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 

1258  
1259 
"movq %%mm3, %%mm2 \n\t" // BGR0BGR0 

1260 
"psrlq $8, %%mm3 \n\t" // GR0BGR00 

1261 
"pand "MANGLE(bm00000111)", %%mm2 \n\t" // BGR00000 

1262 
"pand "MANGLE(bm11111000)", %%mm3 \n\t" // 000BGR00 

1263 
"por %%mm2, %%mm3 \n\t" // BGRBGR00 

1264 
"movq %%mm1, %%mm2 \n\t" 

1265 
"psllq $48, %%mm1 \n\t" // 000000BG 

1266 
"por %%mm1, %%mm3 \n\t" // BGRBGRBG 

1267  
1268 
"movq %%mm2, %%mm1 \n\t" // BGR0BGR0 

1269 
"psrld $16, %%mm2 \n\t" // R000R000 

1270 
"psrlq $24, %%mm1 \n\t" // 0BGR0000 

1271 
"por %%mm2, %%mm1 \n\t" // RBGRR000 

1272  
1273 
"mov %4, %%"REG_b" \n\t" 

1274 
"add %%"REG_a", %%"REG_b" \n\t" 

1275  
1276 
#ifdef HAVE_MMX2 

1277 
//FIXME Alignment 

1278 
"movntq %%mm3, (%%"REG_b", %%"REG_a", 2) \n\t" 

1279 
"movntq %%mm1, 8(%%"REG_b", %%"REG_a", 2) \n\t" 

1280 
#else 

1281 
"movd %%mm3, (%%"REG_b", %%"REG_a", 2) \n\t" 

1282 
"psrlq $32, %%mm3 \n\t" 

1283 
"movd %%mm3, 4(%%"REG_b", %%"REG_a", 2) \n\t" 

1284 
"movd %%mm1, 8(%%"REG_b", %%"REG_a", 2) \n\t" 

1285 
#endif 

1286 
"add $4, %%"REG_a" \n\t" 

1287 
"cmp %5, %%"REG_a" \n\t" 

1288 
" jb 1b \n\t" 

1289  
1290 
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), 

1291 
"m" (yalpha1), "m" (uvalpha1) 

1292 
: "%"REG_a, "%"REG_b 

1293 
); 

1294 
break; 

1295 
case PIX_FMT_BGR555: 

1296 
__asm__ volatile( 

1297  
1298 
FULL_YSCALEYUV2RGB 

1299 
#ifdef DITHER1XBPP 

1300 
"paddusb "MANGLE(g5Dither)", %%mm1 \n\t" 

1301 
"paddusb "MANGLE(r5Dither)", %%mm0 \n\t" 

1302 
"paddusb "MANGLE(b5Dither)", %%mm3 \n\t" 

1303 
#endif 

1304 
"punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G 

1305 
"punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B 

1306 
"punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R 

1307  
1308 
"psrlw $3, %%mm3 \n\t" 

1309 
"psllw $2, %%mm1 \n\t" 

1310 
"psllw $7, %%mm0 \n\t" 

1311 
"pand "MANGLE(g15Mask)", %%mm1 \n\t" 

1312 
"pand "MANGLE(r15Mask)", %%mm0 \n\t" 

1313  
1314 
"por %%mm3, %%mm1 \n\t" 

1315 
"por %%mm1, %%mm0 \n\t" 

1316  
1317 
MOVNTQ(%%mm0, (%4, %%REGa, 2)) 

1318  
1319 
"add $4, %%"REG_a" \n\t" 

1320 
"cmp %5, %%"REG_a" \n\t" 

1321 
" jb 1b \n\t" 

1322  
1323 
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), 

1324 
"m" (yalpha1), "m" (uvalpha1) 

1325 
: "%"REG_a 

1326 
); 

1327 
break; 

1328 
case PIX_FMT_BGR565: 

1329 
__asm__ volatile( 

1330  
1331 
FULL_YSCALEYUV2RGB 

1332 
#ifdef DITHER1XBPP 

1333 
"paddusb "MANGLE(g5Dither)", %%mm1 \n\t" 

1334 
"paddusb "MANGLE(r5Dither)", %%mm0 \n\t" 

1335 
"paddusb "MANGLE(b5Dither)", %%mm3 \n\t" 

1336 
#endif 

1337 
"punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G 

1338 
"punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B 

1339 
"punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R 

1340  
1341 
"psrlw $3, %%mm3 \n\t" 

1342 
"psllw $3, %%mm1 \n\t" 

1343 
"psllw $8, %%mm0 \n\t" 

1344 
"pand "MANGLE(g16Mask)", %%mm1 \n\t" 

1345 
"pand "MANGLE(r16Mask)", %%mm0 \n\t" 

1346  
1347 
"por %%mm3, %%mm1 \n\t" 

1348 
"por %%mm1, %%mm0 \n\t" 

1349  
1350 
MOVNTQ(%%mm0, (%4, %%REGa, 2)) 

1351  
1352 
"add $4, %%"REG_a" \n\t" 

1353 
"cmp %5, %%"REG_a" \n\t" 

1354 
" jb 1b \n\t" 

1355  
1356 
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), 

1357 
"m" (yalpha1), "m" (uvalpha1) 

1358 
: "%"REG_a 

1359 
); 

1360 
break; 

1361 
#endif /* HAVE_MMX */ 

1362 
case PIX_FMT_BGR32: 

1363 
#ifndef HAVE_MMX 

1364 
case PIX_FMT_RGB32: 

1365 
#endif 

1366 
if (dstFormat==PIX_FMT_RGB32) 

1367 
{ 

1368 
int i; 

1369 
#ifdef WORDS_BIGENDIAN 

1370 
dest++; 

1371 
#endif 

1372 
for (i=0;i<dstW;i++){ 

1373 
// vertical linear interpolation && yuv2rgb in a single step: 

1374 
int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; 

1375 
int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); 

1376 
int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19); 

1377 
dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; 

1378 
dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; 

1379 
dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; 

1380 
dest+= 4; 

1381 
} 

1382 
} 

1383 
else if (dstFormat==PIX_FMT_BGR24) 

1384 
{ 

1385 
int i; 

1386 
for (i=0;i<dstW;i++){ 

1387 
// vertical linear interpolation && yuv2rgb in a single step: 

1388 
int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; 

1389 
int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); 

1390 
int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19); 

1391 
dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; 

1392 
dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; 

1393 
dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; 

1394 
dest+= 3; 

1395 
} 

1396 
} 

1397 
else if (dstFormat==PIX_FMT_BGR565) 

1398 
{ 

1399 
int i; 

1400 
for (i=0;i<dstW;i++){ 

1401 
// vertical linear interpolation && yuv2rgb in a single step: 

1402 
int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; 

1403 
int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); 

1404 
int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19); 

1405  
1406 
((uint16_t*)dest)[i] = 

1407 
clip_table16b[(Y + yuvtab_40cf[U]) >>13]  

1408 
clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]  

1409 
clip_table16r[(Y + yuvtab_3343[V]) >>13]; 

1410 
} 

1411 
} 

1412 
else if (dstFormat==PIX_FMT_BGR555) 

1413 
{ 

1414 
int i; 

1415 
for (i=0;i<dstW;i++){ 

1416 
// vertical linear interpolation && yuv2rgb in a single step: 

1417 
int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; 

1418 
int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); 

1419 
int V=((uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19); 

1420  
1421 
((uint16_t*)dest)[i] = 

1422 
clip_table15b[(Y + yuvtab_40cf[U]) >>13]  

1423 
clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]  

1424 
clip_table15r[(Y + yuvtab_3343[V]) >>13]; 

1425 
} 

1426 
} 

1427 
}//FULL_UV_IPOL 

1428 
else 

1429 
{ 

1430 
#endif // if 0 

1431  1162 
#ifdef HAVE_MMX 
1432  1163 
if(!(c>flags & SWS_BITEXACT)){ 
1433  1164 
switch(c>dstFormat) 
Also available in: Unified diff