Revision 6858492e libswscale/swscale_template.c

View differences:

libswscale/swscale_template.c
644 644

  
645 645
#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
646 646

  
647
#define REAL_YSCALEYUV2RGB1_ALPHA(index) \
648
    "movq  (%1, "#index", 2), %%mm7     \n\t" /* abuf0[index  ]     */\
649
    "movq 8(%1, "#index", 2), %%mm1     \n\t" /* abuf0[index+4]     */\
650
    "psraw                $7, %%mm7     \n\t" /* abuf0[index  ] >>7 */\
651
    "psraw                $7, %%mm1     \n\t" /* abuf0[index+4] >>7 */\
652
    "packuswb          %%mm1, %%mm7     \n\t"
653
#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
654

  
647 655
#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
648 656
    "movq       "#b", "#q2"     \n\t" /* B */\
649 657
    "movq       "#r", "#t"      \n\t" /* R */\
......
909 917

  
910 918

  
911 919
static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
912
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
913
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
920
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, int16_t **alpSrc,
921
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
914 922
{
915 923
#if HAVE_MMX
916 924
    if(!(c->flags & SWS_BITEXACT)){
......
919 927
                YSCALEYUV2YV12X_ACCURATE(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
920 928
                YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
921 929
            }
930
            if (CONFIG_SWSCALE_ALPHA && aDest){
931
                YSCALEYUV2YV12X_ACCURATE(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
932
            }
922 933

  
923 934
            YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
924 935
        }else{
......
926 937
                YSCALEYUV2YV12X(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
927 938
                YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
928 939
            }
940
            if (CONFIG_SWSCALE_ALPHA && aDest){
941
                YSCALEYUV2YV12X(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
942
            }
929 943

  
930 944
            YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
931 945
        }
......
939 953
#else //HAVE_ALTIVEC
940 954
yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
941 955
            chrFilter, chrSrc, chrFilterSize,
942
            dest, uDest, vDest, dstW, chrDstW);
956
            alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
943 957
#endif //!HAVE_ALTIVEC
944 958
}
945 959

  
......
952 966
             dest, uDest, dstW, chrDstW, dstFormat);
953 967
}
954 968

  
955
static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chrSrc,
956
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
969
static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chrSrc, int16_t *alpSrc,
970
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
957 971
{
958 972
    int i;
959 973
#if HAVE_MMX
960 974
    if(!(c->flags & SWS_BITEXACT)){
961
        long p= uDest ? 3 : 1;
962
        uint8_t *src[3]= {lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
963
        uint8_t *dst[3]= {dest, uDest, vDest};
964
        x86_reg counter[3] = {dstW, chrDstW, chrDstW};
975
        long p= 4;
976
        uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
977
        uint8_t *dst[4]= {aDest, dest, uDest, vDest};
978
        x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
965 979

  
966 980
        if (c->flags & SWS_ACCURATE_RND){
967 981
            while(p--){
982
            if (dst[p]){
968 983
                __asm__ volatile(
969 984
                    YSCALEYUV2YV121_ACCURATE
970 985
                    :: "r" (src[p]), "r" (dst[p] + counter[p]),
......
972 987
                    : "%"REG_a
973 988
                );
974 989
            }
990
            }
975 991
        }else{
976 992
            while(p--){
993
            if (dst[p]){
977 994
                __asm__ volatile(
978 995
                    YSCALEYUV2YV121
979 996
                    :: "r" (src[p]), "r" (dst[p] + counter[p]),
......
981 998
                    : "%"REG_a
982 999
                );
983 1000
            }
1001
            }
984 1002
        }
985 1003
        return;
986 1004
    }
......
1013 1031
            uDest[i]= u;
1014 1032
            vDest[i]= v;
1015 1033
        }
1034

  
1035
    if (CONFIG_SWSCALE_ALPHA && aDest)
1036
        for (i=0; i<dstW; i++){
1037
            int val= (alpSrc[i]+64)>>7;
1038
            aDest[i]= av_clip_uint8(val);
1039
        }
1016 1040
}
1017 1041

  
1018 1042

  
......
1021 1045
 */
1022 1046
static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
1023 1047
                                       int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
1024
                                       uint8_t *dest, long dstW, long dstY)
1048
                                       int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
1025 1049
{
1026 1050
#if HAVE_MMX
1027 1051
    x86_reg dummy=0;
......
1029 1053
        if (c->flags & SWS_ACCURATE_RND){
1030 1054
            switch(c->dstFormat){
1031 1055
            case PIX_FMT_RGB32:
1056
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1057
                    YSCALEYUV2PACKEDX_ACCURATE
1058
                    YSCALEYUV2RGBX
1059
                    "movq                      %%mm2, "U_TEMP"(%0)  \n\t"
1060
                    "movq                      %%mm4, "V_TEMP"(%0)  \n\t"
1061
                    "movq                      %%mm5, "Y_TEMP"(%0)  \n\t"
1062
                    YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
1063
                    "movq               "Y_TEMP"(%0), %%mm5         \n\t"
1064
                    "psraw                        $3, %%mm1         \n\t"
1065
                    "psraw                        $3, %%mm7         \n\t"
1066
                    "packuswb                  %%mm7, %%mm1         \n\t"
1067
                    WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
1068

  
1069
                    YSCALEYUV2PACKEDX_END
1070
                }else{
1032 1071
                YSCALEYUV2PACKEDX_ACCURATE
1033 1072
                YSCALEYUV2RGBX
1034 1073
                "pcmpeqd %%mm7, %%mm7 \n\t"
1035 1074
                WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1036 1075

  
1037 1076
                YSCALEYUV2PACKEDX_END
1077
                }
1038 1078
                return;
1039 1079
            case PIX_FMT_BGR24:
1040 1080
                YSCALEYUV2PACKEDX_ACCURATE
......
1095 1135
            switch(c->dstFormat)
1096 1136
            {
1097 1137
            case PIX_FMT_RGB32:
1138
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1139
                    YSCALEYUV2PACKEDX
1140
                    YSCALEYUV2RGBX
1141
                    YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
1142
                    "psraw                        $3, %%mm1         \n\t"
1143
                    "psraw                        $3, %%mm7         \n\t"
1144
                    "packuswb                  %%mm7, %%mm1         \n\t"
1145
                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1146
                    YSCALEYUV2PACKEDX_END
1147
                }else{
1098 1148
                YSCALEYUV2PACKEDX
1099 1149
                YSCALEYUV2RGBX
1100 1150
                "pcmpeqd %%mm7, %%mm7 \n\t"
1101 1151
                WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1102 1152
                YSCALEYUV2PACKEDX_END
1153
                }
1103 1154
                return;
1104 1155
            case PIX_FMT_BGR24:
1105 1156
                YSCALEYUV2PACKEDX
......
1161 1212
#if HAVE_ALTIVEC
1162 1213
    /* The following list of supported dstFormat values should
1163 1214
       match what's found in the body of ff_yuv2packedX_altivec() */
1164
    if (!(c->flags & SWS_BITEXACT) &&
1215
    if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf
1165 1216
       (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
1166 1217
        c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
1167 1218
        c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB))
......
1172 1223
#endif
1173 1224
        yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
1174 1225
                       chrFilter, chrSrc, chrFilterSize,
1175
                       dest, dstW, dstY);
1226
                       alpSrc, dest, dstW, dstY);
1176 1227
}
1177 1228

  
1178 1229
/**
1179 1230
 * vertical bilinear scale YV12 to RGB
1180 1231
 */
1181 1232
static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
1182
                          uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
1233
                          uint16_t *abuf0, uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
1183 1234
{
1184 1235
    int  yalpha1=4095- yalpha;
1185 1236
    int uvalpha1=4095-uvalpha;
......
1191 1242
        {
1192 1243
            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
1193 1244
            case PIX_FMT_RGB32:
1245
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1246
#if ARCH_X86_64
1247
                    __asm__ volatile(
1248
                    "mov        %4, %%"REG_b"               \n\t"
1249
                    YSCALEYUV2RGB(%%REGBP, %5)
1250
                    YSCALEYUV2RGB_YA(%%REGBP, %5, %6, %7)
1251
                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1252
                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1253
                    "packuswb            %%mm7, %%mm1       \n\t"
1254
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1255

  
1256
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1257
                    "a" (&c->redDither)
1258
                    ,"r" (abuf0), "r" (abuf1)
1259
                    : "%"REG_b, "%"REG_BP
1260
                    );
1261
#else
1262
                    *(uint16_t **)(&c->u_temp)=abuf0;
1263
                    *(uint16_t **)(&c->v_temp)=abuf1;
1264
                    __asm__ volatile(
1265
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1266
                    "mov        %4, %%"REG_b"               \n\t"
1267
                    "push %%"REG_BP"                        \n\t"
1268
                    YSCALEYUV2RGB(%%REGBP, %5)
1269
                    "push                   %0              \n\t"
1270
                    "push                   %1              \n\t"
1271
                    "mov          "U_TEMP"(%5), %0          \n\t"
1272
                    "mov          "V_TEMP"(%5), %1          \n\t"
1273
                    YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
1274
                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1275
                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1276
                    "packuswb            %%mm7, %%mm1       \n\t"
1277
                    "pop                    %1              \n\t"
1278
                    "pop                    %0              \n\t"
1279
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1280
                    "pop %%"REG_BP"                         \n\t"
1281
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1282

  
1283
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1284
                    "a" (&c->redDither)
1285
                    );
1286
#endif
1287
                }else{
1194 1288
                __asm__ volatile(
1195 1289
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1196 1290
                "mov        %4, %%"REG_b"               \n\t"
......
1204 1298
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1205 1299
                "a" (&c->redDither)
1206 1300
                );
1301
                }
1207 1302
                return;
1208 1303
            case PIX_FMT_BGR24:
1209 1304
                __asm__ volatile(
......
1279 1374
        }
1280 1375
    }
1281 1376
#endif //HAVE_MMX
1282
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C, YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
1377
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
1283 1378
}
1284 1379

  
1285 1380
/**
1286 1381
 * YV12 to RGB without scaling or interpolating
1287 1382
 */
1288 1383
static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
1289
                          uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
1384
                          uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
1290 1385
{
1291 1386
    const int yalpha1=0;
1292 1387
    int i;
......
1296 1391

  
1297 1392
    if (flags&SWS_FULL_CHR_H_INT)
1298 1393
    {
1299
        RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y);
1394
        RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
1300 1395
        return;
1301 1396
    }
1302 1397

  
......
1307 1402
            switch(dstFormat)
1308 1403
            {
1309 1404
            case PIX_FMT_RGB32:
1405
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1406
                    __asm__ volatile(
1407
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1408
                    "mov        %4, %%"REG_b"               \n\t"
1409
                    "push %%"REG_BP"                        \n\t"
1410
                    YSCALEYUV2RGB1(%%REGBP, %5)
1411
                    YSCALEYUV2RGB1_ALPHA(%%REGBP)
1412
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1413
                    "pop %%"REG_BP"                         \n\t"
1414
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1415

  
1416
                    :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1417
                    "a" (&c->redDither)
1418
                    );
1419
                }else{
1310 1420
                __asm__ volatile(
1311 1421
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1312 1422
                "mov        %4, %%"REG_b"               \n\t"
......
1320 1430
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1321 1431
                "a" (&c->redDither)
1322 1432
                );
1433
                }
1323 1434
                return;
1324 1435
            case PIX_FMT_BGR24:
1325 1436
                __asm__ volatile(
......
1400 1511
            switch(dstFormat)
1401 1512
            {
1402 1513
            case PIX_FMT_RGB32:
1514
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1515
                    __asm__ volatile(
1516
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1517
                    "mov        %4, %%"REG_b"               \n\t"
1518
                    "push %%"REG_BP"                        \n\t"
1519
                    YSCALEYUV2RGB1b(%%REGBP, %5)
1520
                    YSCALEYUV2RGB1_ALPHA(%%REGBP)
1521
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1522
                    "pop %%"REG_BP"                         \n\t"
1523
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1524

  
1525
                    :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1526
                    "a" (&c->redDither)
1527
                    );
1528
                }else{
1403 1529
                __asm__ volatile(
1404 1530
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1405 1531
                "mov        %4, %%"REG_b"               \n\t"
......
1413 1539
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1414 1540
                "a" (&c->redDither)
1415 1541
                );
1542
                }
1416 1543
                return;
1417 1544
            case PIX_FMT_BGR24:
1418 1545
                __asm__ volatile(
......
1492 1619
#endif /* HAVE_MMX */
1493 1620
    if (uvalpha < 2048)
1494 1621
    {
1495
        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C, YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1622
        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1496 1623
    }else{
1497
        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C, YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1624
        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1498 1625
    }
1499 1626
}
1500 1627

  
......
1642 1769
BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1643 1770
BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1644 1771

  
1772
static inline void RENAME(abgrToA)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused){
1773
    int i;
1774
    for (i=0; i<width; i++){
1775
        dst[i]= src[4*i];
1776
    }
1777
}
1778

  
1645 1779
#define BGR2UV(type, name, shr, shg, shb, maska, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S)\
1646 1780
static inline void RENAME(name)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width, uint32_t *unused)\
1647 1781
{\
......
2130 2264
                                   int flags, int canMMX2BeUsed, int16_t *hLumFilter,
2131 2265
                                   int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode,
2132 2266
                                   int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
2133
                                   int32_t *mmx2FilterPos, uint32_t *pal)
2267
                                   int32_t *mmx2FilterPos, uint32_t *pal, int isAlpha)
2134 2268
{
2135 2269
    if (srcFormat==PIX_FMT_YUYV422 || srcFormat==PIX_FMT_GRAY16BE)
2136 2270
    {
......
2144 2278
    }
2145 2279
    else if (srcFormat==PIX_FMT_RGB32)
2146 2280
    {
2281
        if (isAlpha)
2282
            RENAME(abgrToA)(formatConvBuffer, src+3, srcW, pal);
2283
        else
2147 2284
        RENAME(bgr32ToY)(formatConvBuffer, src, srcW, pal);
2148 2285
        src= formatConvBuffer;
2149 2286
    }
2150 2287
    else if (srcFormat==PIX_FMT_RGB32_1)
2151 2288
    {
2289
        if (isAlpha)
2290
            RENAME(abgrToA)(formatConvBuffer, src, srcW, pal);
2291
        else
2152 2292
        RENAME(bgr32ToY)(formatConvBuffer, src+ALT32_CORR, srcW, pal);
2153 2293
        src= formatConvBuffer;
2154 2294
    }
......
2169 2309
    }
2170 2310
    else if (srcFormat==PIX_FMT_BGR32)
2171 2311
    {
2312
        if (isAlpha)
2313
            RENAME(abgrToA)(formatConvBuffer, src+3, srcW, pal);
2314
        else
2172 2315
        RENAME(rgb32ToY)(formatConvBuffer, src, srcW, pal);
2173 2316
        src= formatConvBuffer;
2174 2317
    }
2175 2318
    else if (srcFormat==PIX_FMT_BGR32_1)
2176 2319
    {
2320
        if (isAlpha)
2321
            RENAME(abgrToA)(formatConvBuffer, src, srcW, pal);
2322
        else
2177 2323
        RENAME(rgb32ToY)(formatConvBuffer, src+ALT32_CORR, srcW, pal);
2178 2324
        src= formatConvBuffer;
2179 2325
    }
......
2347 2493
#endif /* ARCH_X86 */
2348 2494
    }
2349 2495

  
2350
    if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){
2496
    if(!isAlpha && c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){
2351 2497
        int i;
2352 2498
        //FIXME all pal and rgb srcFormats could do this convertion as well
2353 2499
        //FIXME all scalers more complex than bilinear could do half of this transform
......
2683 2829
    int16_t *hChrFilter= c->hChrFilter;
2684 2830
    int32_t *lumMmxFilter= c->lumMmxFilter;
2685 2831
    int32_t *chrMmxFilter= c->chrMmxFilter;
2832
    int32_t *alpMmxFilter= c->alpMmxFilter;
2686 2833
    const int vLumFilterSize= c->vLumFilterSize;
2687 2834
    const int vChrFilterSize= c->vChrFilterSize;
2688 2835
    const int hLumFilterSize= c->hLumFilterSize;
2689 2836
    const int hChrFilterSize= c->hChrFilterSize;
2690 2837
    int16_t **lumPixBuf= c->lumPixBuf;
2691 2838
    int16_t **chrPixBuf= c->chrPixBuf;
2839
    int16_t **alpPixBuf= c->alpPixBuf;
2692 2840
    const int vLumBufSize= c->vLumBufSize;
2693 2841
    const int vChrBufSize= c->vChrBufSize;
2694 2842
    uint8_t *funnyYCode= c->funnyYCode;
......
2709 2857
    if (isPacked(c->srcFormat)){
2710 2858
        src[0]=
2711 2859
        src[1]=
2712
        src[2]= src[0];
2860
        src[2]=
2861
        src[3]= src[0];
2713 2862
        srcStride[0]=
2714 2863
        srcStride[1]=
2715
        srcStride[2]= srcStride[0];
2864
        srcStride[2]=
2865
        srcStride[3]= srcStride[0];
2716 2866
    }
2717 2867
    srcStride[1]<<= c->vChrDrop;
2718 2868
    srcStride[2]<<= c->vChrDrop;
......
2733 2883
    //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2],
2734 2884
    //dstStride[0],dstStride[1],dstStride[2]);
2735 2885

  
2736
    if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0)
2886
    if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0)
2737 2887
    {
2738 2888
        static int warnedAlready=0; //FIXME move this into the context perhaps
2739 2889
        if (flags & SWS_PRINT_INFO && !warnedAlready)
......
2762 2912
        const int chrDstY= dstY>>c->chrDstVSubSample;
2763 2913
        unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
2764 2914
        unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
2915
        unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
2765 2916

  
2766 2917
        const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2767 2918
        const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
......
2783 2934
            //Do horizontal scaling
2784 2935
            while(lastInLumBuf < lastLumSrcY)
2785 2936
            {
2786
                uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2937
                uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2938
                uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2787 2939
                lumBufIndex++;
2788 2940
                //printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf,  lastLumSrcY);
2789 2941
                assert(lumBufIndex < 2*vLumBufSize);
2790 2942
                assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2791 2943
                assert(lastInLumBuf + 1 - srcSliceY >= 0);
2792 2944
                //printf("%d %d\n", lumBufIndex, vLumBufSize);
2793
                RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
2945
                RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2794 2946
                                flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
2795 2947
                                funnyYCode, c->srcFormat, formatConvBuffer,
2796
                                c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
2948
                                c->lumMmx2Filter, c->lumMmx2FilterPos, pal, 0);
2949
                if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2950
                    RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc,
2951
                                    flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
2952
                                    funnyYCode, c->srcFormat, formatConvBuffer,
2953
                                    c->lumMmx2Filter, c->lumMmx2FilterPos, pal, 1);
2797 2954
                lastInLumBuf++;
2798 2955
            }
2799 2956
            while(lastInChrBuf < lastChrSrcY)
......
2827 2984
            //Do horizontal scaling
2828 2985
            while(lastInLumBuf+1 < srcSliceY + srcSliceH)
2829 2986
            {
2830
                uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2987
                uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2988
                uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2831 2989
                lumBufIndex++;
2832 2990
                assert(lumBufIndex < 2*vLumBufSize);
2833 2991
                assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2834 2992
                assert(lastInLumBuf + 1 - srcSliceY >= 0);
2835
                RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
2993
                RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2836 2994
                                flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
2837 2995
                                funnyYCode, c->srcFormat, formatConvBuffer,
2838
                                c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
2996
                                c->lumMmx2Filter, c->lumMmx2FilterPos, pal, 0);
2997
                if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2998
                    RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc,
2999
                                    flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
3000
                                    funnyYCode, c->srcFormat, formatConvBuffer,
3001
                                    c->lumMmx2Filter, c->lumMmx2FilterPos, pal, 1);
2839 3002
                lastInLumBuf++;
2840 3003
            }
2841 3004
            while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH))
......
2872 3035
        {
2873 3036
            int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2874 3037
            int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
3038
            int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2875 3039
#if HAVE_MMX
2876 3040
            int i;
2877 3041
        if (flags & SWS_ACCURATE_RND){
......
2882 3046
                          lumMmxFilter[s*i+APCK_COEF/4  ]=
2883 3047
                          lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i    ]
2884 3048
                    + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
3049
                if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
3050
                    *(void**)&alpMmxFilter[s*i              ]= alpSrcPtr[i  ];
3051
                    *(void**)&alpMmxFilter[s*i+APCK_PTR2/4  ]= alpSrcPtr[i+(vLumFilterSize>1)];
3052
                              alpMmxFilter[s*i+APCK_COEF/4  ]=
3053
                              alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4  ];
3054
                }
2885 3055
            }
2886 3056
            for (i=0; i<vChrFilterSize; i+=2){
2887 3057
                *(void**)&chrMmxFilter[s*i              ]= chrSrcPtr[i  ];
......
2898 3068
                lumMmxFilter[4*i+2]=
2899 3069
                lumMmxFilter[4*i+3]=
2900 3070
                    ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
3071
                if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
3072
                    alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i];
3073
                    alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32;
3074
                    alpMmxFilter[4*i+2]=
3075
                    alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
3076
                }
2901 3077
            }
2902 3078
            for (i=0; i<vChrFilterSize; i++)
2903 3079
            {
......
2925 3101
                {
2926 3102
                    int16_t *lumBuf = lumPixBuf[0];
2927 3103
                    int16_t *chrBuf= chrPixBuf[0];
2928
                    RENAME(yuv2yuv1)(c, lumBuf, chrBuf, dest, uDest, vDest, dstW, chrDstW);
3104
                    int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpPixBuf[0] : NULL;
3105
                    RENAME(yuv2yuv1)(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
2929 3106
                }
2930 3107
                else //General YV12
2931 3108
                {
2932 3109
                    RENAME(yuv2yuvX)(c,
2933 3110
                        vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
2934 3111
                        vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2935
                        dest, uDest, vDest, dstW, chrDstW);
3112
                        alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
2936 3113
                }
2937 3114
            }
2938 3115
            else
......
2946 3123
                        yuv2rgbXinC_full(c, //FIXME write a packed1_full function
2947 3124
                            vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2948 3125
                            vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2949
                            dest, dstW, dstY);
3126
                            alpSrcPtr, dest, dstW, dstY);
2950 3127
                    }else{
2951 3128
                        RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
3129
                            alpPixBuf ? *alpSrcPtr : NULL,
2952 3130
                            dest, dstW, chrAlpha, dstFormat, flags, dstY);
2953 3131
                    }
2954 3132
                }
......
2964 3142
                        yuv2rgbXinC_full(c, //FIXME write a packed2_full function
2965 3143
                            vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2966 3144
                            vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2967
                            dest, dstW, dstY);
3145
                            alpSrcPtr, dest, dstW, dstY);
2968 3146
                    }else{
2969 3147
                        RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
3148
                            alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
2970 3149
                            dest, dstW, lumAlpha, chrAlpha, dstY);
2971 3150
                    }
2972 3151
                }
......
2976 3155
                        yuv2rgbXinC_full(c,
2977 3156
                            vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2978 3157
                            vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2979
                            dest, dstW, dstY);
3158
                            alpSrcPtr, dest, dstW, dstY);
2980 3159
                    }else{
2981 3160
                        RENAME(yuv2packedX)(c,
2982 3161
                            vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2983 3162
                            vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2984
                            dest, dstW, dstY);
3163
                            alpSrcPtr, dest, dstW, dstY);
2985 3164
                    }
2986 3165
                }
2987 3166
            }
......
2990 3169
        {
2991 3170
            int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2992 3171
            int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
3172
            int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2993 3173
            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){
2994 3174
                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2995 3175
                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
......
3005 3185
                yuv2yuvXinC(
3006 3186
                    vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
3007 3187
                    vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
3008
                    dest, uDest, vDest, dstW, chrDstW);
3188
                    alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
3009 3189
            }
3010 3190
            else
3011 3191
            {
......
3015 3195
                    yuv2rgbXinC_full(c,
3016 3196
                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
3017 3197
                        vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
3018
                        dest, dstW, dstY);
3198
                        alpSrcPtr, dest, dstW, dstY);
3019 3199
                }else{
3020 3200
                    yuv2packedXinC(c,
3021 3201
                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
3022 3202
                        vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
3023
                        dest, dstW, dstY);
3203
                        alpSrcPtr, dest, dstW, dstY);
3024 3204
                }
3025 3205
            }
3026 3206
        }

Also available in: Unified diff