Revision 9c77b26b

View differences:

libswscale/swscale_template.c
626 626
    "pxor              %%mm7, %%mm7     \n\t"
627 627
#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
628 628

  
629
#define REAL_WRITEBGR32(dst, dstw, index) \
630
    /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\
631
    "movq      %%mm2, %%mm1     \n\t" /* B */\
632
    "movq      %%mm5, %%mm6     \n\t" /* R */\
633
    "punpcklbw %%mm4, %%mm2     \n\t" /* GBGBGBGB 0 */\
634
    "punpcklbw %%mm7, %%mm5     \n\t" /* 0R0R0R0R 0 */\
635
    "punpckhbw %%mm4, %%mm1     \n\t" /* GBGBGBGB 2 */\
636
    "punpckhbw %%mm7, %%mm6     \n\t" /* 0R0R0R0R 2 */\
637
    "movq      %%mm2, %%mm0     \n\t" /* GBGBGBGB 0 */\
638
    "movq      %%mm1, %%mm3     \n\t" /* GBGBGBGB 2 */\
639
    "punpcklwd %%mm5, %%mm0     \n\t" /* 0RGB0RGB 0 */\
640
    "punpckhwd %%mm5, %%mm2     \n\t" /* 0RGB0RGB 1 */\
641
    "punpcklwd %%mm6, %%mm1     \n\t" /* 0RGB0RGB 2 */\
642
    "punpckhwd %%mm6, %%mm3     \n\t" /* 0RGB0RGB 3 */\
629
#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
630
    "movq       "#b", "#q2"     \n\t" /* B */\
631
    "movq       "#r", "#t"      \n\t" /* R */\
632
    "punpcklbw  "#g", "#b"      \n\t" /* GBGBGBGB 0 */\
633
    "punpcklbw  "#a", "#r"      \n\t" /* ARARARAR 0 */\
634
    "punpckhbw  "#g", "#q2"     \n\t" /* GBGBGBGB 2 */\
635
    "punpckhbw  "#a", "#t"      \n\t" /* ARARARAR 2 */\
636
    "movq       "#b", "#q0"     \n\t" /* GBGBGBGB 0 */\
637
    "movq      "#q2", "#q3"     \n\t" /* GBGBGBGB 2 */\
638
    "punpcklwd  "#r", "#q0"     \n\t" /* ARGBARGB 0 */\
639
    "punpckhwd  "#r", "#b"      \n\t" /* ARGBARGB 1 */\
640
    "punpcklwd  "#t", "#q2"     \n\t" /* ARGBARGB 2 */\
641
    "punpckhwd  "#t", "#q3"     \n\t" /* ARGBARGB 3 */\
643 642
\
644
    MOVNTQ(%%mm0,   (dst, index, 4))\
645
    MOVNTQ(%%mm2,  8(dst, index, 4))\
646
    MOVNTQ(%%mm1, 16(dst, index, 4))\
647
    MOVNTQ(%%mm3, 24(dst, index, 4))\
643
    MOVNTQ(   q0,   (dst, index, 4))\
644
    MOVNTQ(    b,  8(dst, index, 4))\
645
    MOVNTQ(   q2, 16(dst, index, 4))\
646
    MOVNTQ(   q3, 24(dst, index, 4))\
648 647
\
649 648
    "add      $8, "#index"      \n\t"\
650 649
    "cmp "#dstw", "#index"      \n\t"\
651 650
    " jb      1b                \n\t"
652
#define WRITEBGR32(dst, dstw, index)  REAL_WRITEBGR32(dst, dstw, index)
651
#define WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)  REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t)
653 652

  
654 653
#define REAL_WRITERGB16(dst, dstw, index) \
655 654
    "pand "MANGLE(bF8)", %%mm2  \n\t" /* B */\
......
1014 1013
            case PIX_FMT_RGB32:
1015 1014
                YSCALEYUV2PACKEDX_ACCURATE
1016 1015
                YSCALEYUV2RGBX
1017
                WRITEBGR32(%4, %5, %%REGa)
1016
                WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1018 1017

  
1019 1018
                YSCALEYUV2PACKEDX_END
1020 1019
                return;
......
1076 1075
            case PIX_FMT_RGB32:
1077 1076
                YSCALEYUV2PACKEDX
1078 1077
                YSCALEYUV2RGBX
1079
                WRITEBGR32(%4, %5, %%REGa)
1078
                WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1080 1079
                YSCALEYUV2PACKEDX_END
1081 1080
                return;
1082 1081
            case PIX_FMT_BGR24:
......
1171 1170
                "mov        %4, %%"REG_b"               \n\t"
1172 1171
                "push %%"REG_BP"                        \n\t"
1173 1172
                YSCALEYUV2RGB(%%REGBP, %5)
1174
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
1173
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1175 1174
                "pop %%"REG_BP"                         \n\t"
1176 1175
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1177 1176

  
......
1283 1282
                "mov        %4, %%"REG_b"               \n\t"
1284 1283
                "push %%"REG_BP"                        \n\t"
1285 1284
                YSCALEYUV2RGB1(%%REGBP, %5)
1286
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
1285
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1287 1286
                "pop %%"REG_BP"                         \n\t"
1288 1287
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1289 1288

  
......
1372 1371
                "mov        %4, %%"REG_b"               \n\t"
1373 1372
                "push %%"REG_BP"                        \n\t"
1374 1373
                YSCALEYUV2RGB1b(%%REGBP, %5)
1375
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP)
1374
                WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1376 1375
                "pop %%"REG_BP"                         \n\t"
1377 1376
                "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1378 1377

  

Also available in: Unified diff