Revision 6858492e

View differences:

libswscale/swscale.c
473 473

  
474 474
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
475 475
                               int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
476
                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
476
                               int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW)
477 477
{
478 478
    //FIXME Optimize (just quickly written not optimized..)
479 479
    int i;
......
502 502
            uDest[i]= av_clip_uint8(u>>19);
503 503
            vDest[i]= av_clip_uint8(v>>19);
504 504
        }
505

  
506
    if (CONFIG_SWSCALE_ALPHA && aDest)
507
        for (i=0; i<dstW; i++){
508
            int val=1<<18;
509
            int j;
510
            for (j=0; j<lumFilterSize; j++)
511
                val += alpSrc[j][i] * lumFilter[j];
512

  
513
            aDest[i]= av_clip_uint8(val>>19);
514
        }
515

  
505 516
}
506 517

  
507 518
static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
......
555 566
        }
556 567
}
557 568

  
558
#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type) \
569
#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
559 570
    for (i=0; i<(dstW>>1); i++){\
560 571
        int j;\
561 572
        int Y1 = 1<<18;\
562 573
        int Y2 = 1<<18;\
563 574
        int U  = 1<<18;\
564 575
        int V  = 1<<18;\
576
        int av_unused A1, A2;\
565 577
        type av_unused *r, *b, *g;\
566 578
        const int i2= 2*i;\
567 579
        \
......
579 591
        Y2>>=19;\
580 592
        U >>=19;\
581 593
        V >>=19;\
594
        if (alpha){\
595
            A1 = 1<<18;\
596
            A2 = 1<<18;\
597
            for (j=0; j<lumFilterSize; j++){\
598
                A1 += alpSrc[j][i2  ] * lumFilter[j];\
599
                A2 += alpSrc[j][i2+1] * lumFilter[j];\
600
            }\
601
            A1>>=19;\
602
            A2>>=19;\
603
        }\
582 604

  
583
#define YSCALE_YUV_2_PACKEDX_C(type) \
584
        YSCALE_YUV_2_PACKEDX_NOCLIP_C(type)\
605
#define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
606
        YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
585 607
        if ((Y1|Y2|U|V)&256)\
586 608
        {\
587 609
            if (Y1>255)   Y1=255; \
......
592 614
            else if (U<0) U=0;    \
593 615
            if (V>255)    V=255;  \
594 616
            else if (V<0) V=0;    \
617
        }\
618
        if (alpha && ((A1|A2)&256)){\
619
            A1=av_clip_uint8(A1);\
620
            A2=av_clip_uint8(A2);\
595 621
        }
596 622

  
597
#define YSCALE_YUV_2_PACKEDX_FULL_C \
623
#define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
598 624
    for (i=0; i<dstW; i++){\
599 625
        int j;\
600 626
        int Y = 0;\
601 627
        int U = -128<<19;\
602 628
        int V = -128<<19;\
629
        int av_unused A;\
603 630
        int R,G,B;\
604 631
        \
605 632
        for (j=0; j<lumFilterSize; j++){\
......
612 639
        Y >>=10;\
613 640
        U >>=10;\
614 641
        V >>=10;\
642
        if (alpha){\
643
            A = rnd;\
644
            for (j=0; j<lumFilterSize; j++)\
645
                A += alpSrc[j][i     ] * lumFilter[j];\
646
            A >>=19;\
647
            if (A&256)\
648
                A = av_clip_uint8(A);\
649
        }\
615 650

  
616
#define YSCALE_YUV_2_RGBX_FULL_C(rnd) \
617
    YSCALE_YUV_2_PACKEDX_FULL_C\
651
#define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
652
    YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
618 653
        Y-= c->yuv2rgb_y_offset;\
619 654
        Y*= c->yuv2rgb_y_coeff;\
620 655
        Y+= rnd;\
......
656 691
            else if (Y2<0)Y2=0;   \
657 692
        }
658 693

  
659
#define YSCALE_YUV_2_RGBX_C(type) \
660
    YSCALE_YUV_2_PACKEDX_C(type)  /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
694
#define YSCALE_YUV_2_RGBX_C(type,alpha) \
695
    YSCALE_YUV_2_PACKEDX_C(type,alpha)  /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
661 696
    r = (type *)c->table_rV[V];   \
662 697
    g = (type *)(c->table_gU[U] + c->table_gV[V]); \
663 698
    b = (type *)c->table_bU[U];   \
664 699

  
665
#define YSCALE_YUV_2_PACKED2_C   \
700
#define YSCALE_YUV_2_PACKED2_C(type,alpha)   \
666 701
    for (i=0; i<(dstW>>1); i++){ \
667 702
        const int i2= 2*i;       \
668 703
        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
669 704
        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
670 705
        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;  \
671 706
        int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19;  \
707
        type av_unused *r, *b, *g;                                    \
708
        int av_unused A1, A2;                                         \
709
        if (alpha){\
710
            A1= (abuf0[i2  ]*yalpha1+abuf1[i2  ]*yalpha)>>19;         \
711
            A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19;         \
712
        }\
672 713

  
673 714
#define YSCALE_YUV_2_GRAY16_2_C   \
674 715
    for (i=0; i<(dstW>>1); i++){ \
......
676 717
        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>11;           \
677 718
        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;           \
678 719

  
679
#define YSCALE_YUV_2_RGB2_C(type) \
680
    YSCALE_YUV_2_PACKED2_C\
681
    type *r, *b, *g;\
720
#define YSCALE_YUV_2_RGB2_C(type,alpha) \
721
    YSCALE_YUV_2_PACKED2_C(type,alpha)\
682 722
    r = (type *)c->table_rV[V];\
683 723
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
684 724
    b = (type *)c->table_bU[U];\
685 725

  
686
#define YSCALE_YUV_2_PACKED1_C \
726
#define YSCALE_YUV_2_PACKED1_C(type,alpha) \
687 727
    for (i=0; i<(dstW>>1); i++){\
688 728
        const int i2= 2*i;\
689 729
        int Y1= buf0[i2  ]>>7;\
690 730
        int Y2= buf0[i2+1]>>7;\
691 731
        int U= (uvbuf1[i     ])>>7;\
692 732
        int V= (uvbuf1[i+VOFW])>>7;\
733
        type av_unused *r, *b, *g;\
734
        int av_unused A1, A2;\
735
        if (alpha){\
736
            A1= abuf0[i2  ]>>7;\
737
            A2= abuf0[i2+1]>>7;\
738
        }\
693 739

  
694 740
#define YSCALE_YUV_2_GRAY16_1_C \
695 741
    for (i=0; i<(dstW>>1); i++){\
......
697 743
        int Y1= buf0[i2  ]<<1;\
698 744
        int Y2= buf0[i2+1]<<1;\
699 745

  
700
#define YSCALE_YUV_2_RGB1_C(type) \
701
    YSCALE_YUV_2_PACKED1_C\
702
    type *r, *b, *g;\
746
#define YSCALE_YUV_2_RGB1_C(type,alpha) \
747
    YSCALE_YUV_2_PACKED1_C(type,alpha)\
703 748
    r = (type *)c->table_rV[V];\
704 749
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
705 750
    b = (type *)c->table_bU[U];\
706 751

  
707
#define YSCALE_YUV_2_PACKED1B_C \
752
#define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
708 753
    for (i=0; i<(dstW>>1); i++){\
709 754
        const int i2= 2*i;\
710 755
        int Y1= buf0[i2  ]>>7;\
711 756
        int Y2= buf0[i2+1]>>7;\
712 757
        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
713 758
        int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\
759
        type av_unused *r, *b, *g;\
760
        int av_unused A1, A2;\
761
        if (alpha){\
762
            A1= abuf0[i2  ]>>7;\
763
            A2= abuf0[i2+1]>>7;\
764
        }\
714 765

  
715
#define YSCALE_YUV_2_RGB1B_C(type) \
716
    YSCALE_YUV_2_PACKED1B_C\
717
    type *r, *b, *g;\
766
#define YSCALE_YUV_2_RGB1B_C(type,alpha) \
767
    YSCALE_YUV_2_PACKED1B_C(type,alpha)\
718 768
    r = (type *)c->table_rV[V];\
719 769
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
720 770
    b = (type *)c->table_bU[U];\
......
772 822
#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
773 823
    switch(c->dstFormat)\
774 824
    {\
775
    case PIX_FMT_RGB32:\
776
    case PIX_FMT_BGR32:\
777
    case PIX_FMT_RGB32_1:\
778
    case PIX_FMT_BGR32_1:\
779
        func(uint32_t)\
780
            ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
781
            ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
825
    case PIX_FMT_RGBA:\
826
    case PIX_FMT_BGRA:\
827
        if (CONFIG_SMALL){\
828
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
829
            func(uint32_t,needAlpha)\
830
                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
831
                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
832
            }\
833
        }else{\
834
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){\
835
                func(uint32_t,1)\
836
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
837
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
838
                }\
839
            }else{\
840
                func(uint32_t,0)\
841
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
842
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
843
                }\
844
            }\
845
        }\
846
        break;\
847
    case PIX_FMT_ARGB:\
848
    case PIX_FMT_ABGR:\
849
        if (CONFIG_SMALL){\
850
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
851
            func(uint32_t,needAlpha)\
852
                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
853
                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
854
            }\
855
        }else{\
856
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){\
857
                func(uint32_t,1)\
858
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
859
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
860
                }\
861
            }else{\
862
                func(uint32_t,0)\
863
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
864
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
865
                }\
866
            }\
782 867
        }                \
783 868
        break;\
784 869
    case PIX_FMT_RGB24:\
785
        func(uint8_t)\
870
        func(uint8_t,0)\
786 871
            ((uint8_t*)dest)[0]= r[Y1];\
787 872
            ((uint8_t*)dest)[1]= g[Y1];\
788 873
            ((uint8_t*)dest)[2]= b[Y1];\
......
793 878
        }\
794 879
        break;\
795 880
    case PIX_FMT_BGR24:\
796
        func(uint8_t)\
881
        func(uint8_t,0)\
797 882
            ((uint8_t*)dest)[0]= b[Y1];\
798 883
            ((uint8_t*)dest)[1]= g[Y1];\
799 884
            ((uint8_t*)dest)[2]= r[Y1];\
......
812 897
            const int dr2= dither_2x2_8[y&1    ][1];\
813 898
            const int dg2= dither_2x2_4[y&1    ][1];\
814 899
            const int db2= dither_2x2_8[(y&1)^1][1];\
815
            func(uint16_t)\
900
            func(uint16_t,0)\
816 901
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
817 902
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
818 903
            }\
......
827 912
            const int dr2= dither_2x2_8[y&1    ][1];\
828 913
            const int dg2= dither_2x2_8[y&1    ][0];\
829 914
            const int db2= dither_2x2_8[(y&1)^1][1];\
830
            func(uint16_t)\
915
            func(uint16_t,0)\
831 916
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
832 917
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
833 918
            }\
......
838 923
        {\
839 924
            const uint8_t * const d64= dither_8x8_73[y&7];\
840 925
            const uint8_t * const d32= dither_8x8_32[y&7];\
841
            func(uint8_t)\
926
            func(uint8_t,0)\
842 927
                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
843 928
                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
844 929
            }\
......
849 934
        {\
850 935
            const uint8_t * const d64= dither_8x8_73 [y&7];\
851 936
            const uint8_t * const d128=dither_8x8_220[y&7];\
852
            func(uint8_t)\
937
            func(uint8_t,0)\
853 938
                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
854 939
                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
855 940
            }\
......
860 945
        {\
861 946
            const uint8_t * const d64= dither_8x8_73 [y&7];\
862 947
            const uint8_t * const d128=dither_8x8_220[y&7];\
863
            func(uint8_t)\
948
            func(uint8_t,0)\
864 949
                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
865 950
                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
866 951
            }\
......
909 994

  
910 995
static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
911 996
                                  int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
912
                                  uint8_t *dest, int dstW, int y)
997
                                  int16_t **alpSrc, uint8_t *dest, int dstW, int y)
913 998
{
914 999
    int i;
915
    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
1000
    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
916 1001
}
917 1002

  
918 1003
static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
919 1004
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
920
                                    uint8_t *dest, int dstW, int y)
1005
                                    int16_t **alpSrc, uint8_t *dest, int dstW, int y)
921 1006
{
922 1007
    int i;
923 1008
    int step= fmt_depth(c->dstFormat)/8;
......
930 1015
    case PIX_FMT_RGB24:
931 1016
        aidx--;
932 1017
    case PIX_FMT_RGBA:
933
        YSCALE_YUV_2_RGBX_FULL_C(1<<21)
934
            dest[aidx]= 255;
935
            dest[0]= R>>22;
936
            dest[1]= G>>22;
937
            dest[2]= B>>22;
938
            dest+= step;
1018
        if (CONFIG_SMALL){
1019
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1020
            YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1021
                dest[aidx]= needAlpha ? A : 255;
1022
                dest[0]= R>>22;
1023
                dest[1]= G>>22;
1024
                dest[2]= B>>22;
1025
                dest+= step;
1026
            }
1027
        }else{
1028
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1029
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1030
                    dest[aidx]= A;
1031
                    dest[0]= R>>22;
1032
                    dest[1]= G>>22;
1033
                    dest[2]= B>>22;
1034
                    dest+= step;
1035
                }
1036
            }else{
1037
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1038
                    dest[aidx]= 255;
1039
                    dest[0]= R>>22;
1040
                    dest[1]= G>>22;
1041
                    dest[2]= B>>22;
1042
                    dest+= step;
1043
                }
1044
            }
939 1045
        }
940 1046
        break;
941 1047
    case PIX_FMT_ABGR:
......
944 1050
    case PIX_FMT_BGR24:
945 1051
        aidx--;
946 1052
    case PIX_FMT_BGRA:
947
        YSCALE_YUV_2_RGBX_FULL_C(1<<21)
948
            dest[aidx]= 255;
949
            dest[0]= B>>22;
950
            dest[1]= G>>22;
951
            dest[2]= R>>22;
952
            dest+= step;
1053
        if (CONFIG_SMALL){
1054
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1055
            YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1056
                dest[aidx]= needAlpha ? A : 255;
1057
                dest[0]= B>>22;
1058
                dest[1]= G>>22;
1059
                dest[2]= R>>22;
1060
                dest+= step;
1061
            }
1062
        }else{
1063
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1064
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1065
                    dest[aidx]= A;
1066
                    dest[0]= B>>22;
1067
                    dest[1]= G>>22;
1068
                    dest[2]= R>>22;
1069
                    dest+= step;
1070
                }
1071
            }else{
1072
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1073
                    dest[aidx]= 255;
1074
                    dest[0]= B>>22;
1075
                    dest[1]= G>>22;
1076
                    dest[2]= R>>22;
1077
                    dest+= step;
1078
                }
1079
            }
953 1080
        }
954 1081
        break;
955 1082
    default:
......
2644 2771
    // allocate pixbufs (we use dynamic allocation because otherwise we would need to
2645 2772
    c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
2646 2773
    c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*));
2774
    if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat))
2775
        c->alpPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
2647 2776
    //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
2648 2777
    /* align at 16 bytes for AltiVec */
2649 2778
    for (i=0; i<c->vLumBufSize; i++)
2650 2779
        c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(VOF+1);
2651 2780
    for (i=0; i<c->vChrBufSize; i++)
2652 2781
        c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= av_malloc((VOF+1)*2);
2782
    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
2783
        for (i=0; i<c->vLumBufSize; i++)
2784
            c->alpPixBuf[i]= c->alpPixBuf[i+c->vLumBufSize]= av_mallocz(VOF+1);
2653 2785

  
2654 2786
    //try to avoid drawing green stuff between the right end and the stride end
2655 2787
    for (i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, (VOF+1)*2);
......
3200 3332
        av_freep(&c->chrPixBuf);
3201 3333
    }
3202 3334

  
3335
    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
3336
        for (i=0; i<c->vLumBufSize; i++)
3337
            av_freep(&c->alpPixBuf[i]);
3338
        av_freep(&c->alpPixBuf);
3339
    }
3340

  
3203 3341
    av_freep(&c->vLumFilter);
3204 3342
    av_freep(&c->vChrFilter);
3205 3343
    av_freep(&c->hLumFilter);
libswscale/swscale_internal.h
87 87

  
88 88
    int16_t **lumPixBuf;
89 89
    int16_t **chrPixBuf;
90
    int16_t **alpPixBuf;
90 91
    int16_t *hLumFilter;
91 92
    int16_t *hLumFilterPos;
92 93
    int16_t *hChrFilter;
......
156 157
#define VROUNDER_OFFSET       "11*8+4*4*256*2+16"
157 158
#define U_TEMP                "11*8+4*4*256*2+24"
158 159
#define V_TEMP                "11*8+4*4*256*2+32"
160
#define Y_TEMP                "11*8+4*4*256*2+40"
161
#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
159 162

  
160 163
    uint64_t redDither   __attribute__((aligned(8)));
161 164
    uint64_t greenDither __attribute__((aligned(8)));
......
176 179
    uint64_t vRounder     __attribute__((aligned(8)));
177 180
    uint64_t u_temp       __attribute__((aligned(8)));
178 181
    uint64_t v_temp       __attribute__((aligned(8)));
182
    uint64_t y_temp       __attribute__((aligned(8)));
183
    int32_t  alpMmxFilter[4*MAX_FILTER_SIZE];
179 184

  
180 185
#if HAVE_ALTIVEC
181 186

  
libswscale/swscale_template.c
644 644

  
645 645
#define YSCALEYUV2RGB1b(index, c)  REAL_YSCALEYUV2RGB1b(index, c)
646 646

  
647
#define REAL_YSCALEYUV2RGB1_ALPHA(index) \
648
    "movq  (%1, "#index", 2), %%mm7     \n\t" /* abuf0[index  ]     */\
649
    "movq 8(%1, "#index", 2), %%mm1     \n\t" /* abuf0[index+4]     */\
650
    "psraw                $7, %%mm7     \n\t" /* abuf0[index  ] >>7 */\
651
    "psraw                $7, %%mm1     \n\t" /* abuf0[index+4] >>7 */\
652
    "packuswb          %%mm1, %%mm7     \n\t"
653
#define YSCALEYUV2RGB1_ALPHA(index) REAL_YSCALEYUV2RGB1_ALPHA(index)
654

  
647 655
#define REAL_WRITEBGR32(dst, dstw, index, b, g, r, a, q0, q2, q3, t) \
648 656
    "movq       "#b", "#q2"     \n\t" /* B */\
649 657
    "movq       "#r", "#t"      \n\t" /* R */\
......
909 917

  
910 918

  
911 919
static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
912
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
913
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
920
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, int16_t **alpSrc,
921
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
914 922
{
915 923
#if HAVE_MMX
916 924
    if(!(c->flags & SWS_BITEXACT)){
......
919 927
                YSCALEYUV2YV12X_ACCURATE(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
920 928
                YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
921 929
            }
930
            if (CONFIG_SWSCALE_ALPHA && aDest){
931
                YSCALEYUV2YV12X_ACCURATE(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
932
            }
922 933

  
923 934
            YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
924 935
        }else{
......
926 937
                YSCALEYUV2YV12X(   "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
927 938
                YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
928 939
            }
940
            if (CONFIG_SWSCALE_ALPHA && aDest){
941
                YSCALEYUV2YV12X(   "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
942
            }
929 943

  
930 944
            YSCALEYUV2YV12X("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
931 945
        }
......
939 953
#else //HAVE_ALTIVEC
940 954
yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
941 955
            chrFilter, chrSrc, chrFilterSize,
942
            dest, uDest, vDest, dstW, chrDstW);
956
            alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW);
943 957
#endif //!HAVE_ALTIVEC
944 958
}
945 959

  
......
952 966
             dest, uDest, dstW, chrDstW, dstFormat);
953 967
}
954 968

  
955
static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chrSrc,
956
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, long dstW, long chrDstW)
969
static inline void RENAME(yuv2yuv1)(SwsContext *c, int16_t *lumSrc, int16_t *chrSrc, int16_t *alpSrc,
970
                                    uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
957 971
{
958 972
    int i;
959 973
#if HAVE_MMX
960 974
    if(!(c->flags & SWS_BITEXACT)){
961
        long p= uDest ? 3 : 1;
962
        uint8_t *src[3]= {lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
963
        uint8_t *dst[3]= {dest, uDest, vDest};
964
        x86_reg counter[3] = {dstW, chrDstW, chrDstW};
975
        long p= 4;
976
        uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
977
        uint8_t *dst[4]= {aDest, dest, uDest, vDest};
978
        x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
965 979

  
966 980
        if (c->flags & SWS_ACCURATE_RND){
967 981
            while(p--){
982
            if (dst[p]){
968 983
                __asm__ volatile(
969 984
                    YSCALEYUV2YV121_ACCURATE
970 985
                    :: "r" (src[p]), "r" (dst[p] + counter[p]),
......
972 987
                    : "%"REG_a
973 988
                );
974 989
            }
990
            }
975 991
        }else{
976 992
            while(p--){
993
            if (dst[p]){
977 994
                __asm__ volatile(
978 995
                    YSCALEYUV2YV121
979 996
                    :: "r" (src[p]), "r" (dst[p] + counter[p]),
......
981 998
                    : "%"REG_a
982 999
                );
983 1000
            }
1001
            }
984 1002
        }
985 1003
        return;
986 1004
    }
......
1013 1031
            uDest[i]= u;
1014 1032
            vDest[i]= v;
1015 1033
        }
1034

  
1035
    if (CONFIG_SWSCALE_ALPHA && aDest)
1036
        for (i=0; i<dstW; i++){
1037
            int val= (alpSrc[i]+64)>>7;
1038
            aDest[i]= av_clip_uint8(val);
1039
        }
1016 1040
}
1017 1041

  
1018 1042

  
......
1021 1045
 */
1022 1046
static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
1023 1047
                                       int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
1024
                                       uint8_t *dest, long dstW, long dstY)
1048
                                       int16_t **alpSrc, uint8_t *dest, long dstW, long dstY)
1025 1049
{
1026 1050
#if HAVE_MMX
1027 1051
    x86_reg dummy=0;
......
1029 1053
        if (c->flags & SWS_ACCURATE_RND){
1030 1054
            switch(c->dstFormat){
1031 1055
            case PIX_FMT_RGB32:
1056
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1057
                    YSCALEYUV2PACKEDX_ACCURATE
1058
                    YSCALEYUV2RGBX
1059
                    "movq                      %%mm2, "U_TEMP"(%0)  \n\t"
1060
                    "movq                      %%mm4, "V_TEMP"(%0)  \n\t"
1061
                    "movq                      %%mm5, "Y_TEMP"(%0)  \n\t"
1062
                    YSCALEYUV2PACKEDX_ACCURATE_YA(ALP_MMX_FILTER_OFFSET)
1063
                    "movq               "Y_TEMP"(%0), %%mm5         \n\t"
1064
                    "psraw                        $3, %%mm1         \n\t"
1065
                    "psraw                        $3, %%mm7         \n\t"
1066
                    "packuswb                  %%mm7, %%mm1         \n\t"
1067
                    WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
1068

  
1069
                    YSCALEYUV2PACKEDX_END
1070
                }else{
1032 1071
                YSCALEYUV2PACKEDX_ACCURATE
1033 1072
                YSCALEYUV2RGBX
1034 1073
                "pcmpeqd %%mm7, %%mm7 \n\t"
1035 1074
                WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1036 1075

  
1037 1076
                YSCALEYUV2PACKEDX_END
1077
                }
1038 1078
                return;
1039 1079
            case PIX_FMT_BGR24:
1040 1080
                YSCALEYUV2PACKEDX_ACCURATE
......
1095 1135
            switch(c->dstFormat)
1096 1136
            {
1097 1137
            case PIX_FMT_RGB32:
1138
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1139
                    YSCALEYUV2PACKEDX
1140
                    YSCALEYUV2RGBX
1141
                    YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
1142
                    "psraw                        $3, %%mm1         \n\t"
1143
                    "psraw                        $3, %%mm7         \n\t"
1144
                    "packuswb                  %%mm7, %%mm1         \n\t"
1145
                    WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1146
                    YSCALEYUV2PACKEDX_END
1147
                }else{
1098 1148
                YSCALEYUV2PACKEDX
1099 1149
                YSCALEYUV2RGBX
1100 1150
                "pcmpeqd %%mm7, %%mm7 \n\t"
1101 1151
                WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1102 1152
                YSCALEYUV2PACKEDX_END
1153
                }
1103 1154
                return;
1104 1155
            case PIX_FMT_BGR24:
1105 1156
                YSCALEYUV2PACKEDX
......
1161 1212
#if HAVE_ALTIVEC
1162 1213
    /* The following list of supported dstFormat values should
1163 1214
       match what's found in the body of ff_yuv2packedX_altivec() */
1164
    if (!(c->flags & SWS_BITEXACT) &&
1215
    if (!(c->flags & SWS_BITEXACT) && !c->alpPixBuf
1165 1216
       (c->dstFormat==PIX_FMT_ABGR  || c->dstFormat==PIX_FMT_BGRA  ||
1166 1217
        c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 ||
1167 1218
        c->dstFormat==PIX_FMT_RGBA  || c->dstFormat==PIX_FMT_ARGB))
......
1172 1223
#endif
1173 1224
        yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize,
1174 1225
                       chrFilter, chrSrc, chrFilterSize,
1175
                       dest, dstW, dstY);
1226
                       alpSrc, dest, dstW, dstY);
1176 1227
}
1177 1228

  
1178 1229
/**
1179 1230
 * vertical bilinear scale YV12 to RGB
1180 1231
 */
1181 1232
static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
1182
                          uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
1233
                          uint16_t *abuf0, uint16_t *abuf1, uint8_t *dest, int dstW, int yalpha, int uvalpha, int y)
1183 1234
{
1184 1235
    int  yalpha1=4095- yalpha;
1185 1236
    int uvalpha1=4095-uvalpha;
......
1191 1242
        {
1192 1243
            //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
1193 1244
            case PIX_FMT_RGB32:
1245
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1246
#if ARCH_X86_64
1247
                    __asm__ volatile(
1248
                    "mov        %4, %%"REG_b"               \n\t"
1249
                    YSCALEYUV2RGB(%%REGBP, %5)
1250
                    YSCALEYUV2RGB_YA(%%REGBP, %5, %6, %7)
1251
                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1252
                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1253
                    "packuswb            %%mm7, %%mm1       \n\t"
1254
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1255

  
1256
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1257
                    "a" (&c->redDither)
1258
                    ,"r" (abuf0), "r" (abuf1)
1259
                    : "%"REG_b, "%"REG_BP
1260
                    );
1261
#else
1262
                    *(uint16_t **)(&c->u_temp)=abuf0;
1263
                    *(uint16_t **)(&c->v_temp)=abuf1;
1264
                    __asm__ volatile(
1265
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1266
                    "mov        %4, %%"REG_b"               \n\t"
1267
                    "push %%"REG_BP"                        \n\t"
1268
                    YSCALEYUV2RGB(%%REGBP, %5)
1269
                    "push                   %0              \n\t"
1270
                    "push                   %1              \n\t"
1271
                    "mov          "U_TEMP"(%5), %0          \n\t"
1272
                    "mov          "V_TEMP"(%5), %1          \n\t"
1273
                    YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1)
1274
                    "psraw                  $3, %%mm1       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1275
                    "psraw                  $3, %%mm7       \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
1276
                    "packuswb            %%mm7, %%mm1       \n\t"
1277
                    "pop                    %1              \n\t"
1278
                    "pop                    %0              \n\t"
1279
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
1280
                    "pop %%"REG_BP"                         \n\t"
1281
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1282

  
1283
                    :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1284
                    "a" (&c->redDither)
1285
                    );
1286
#endif
1287
                }else{
1194 1288
                __asm__ volatile(
1195 1289
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1196 1290
                "mov        %4, %%"REG_b"               \n\t"
......
1204 1298
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1205 1299
                "a" (&c->redDither)
1206 1300
                );
1301
                }
1207 1302
                return;
1208 1303
            case PIX_FMT_BGR24:
1209 1304
                __asm__ volatile(
......
1279 1374
        }
1280 1375
    }
1281 1376
#endif //HAVE_MMX
1282
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C, YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
1377
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C(void,0), YSCALE_YUV_2_GRAY16_2_C, YSCALE_YUV_2_MONO2_C)
1283 1378
}
1284 1379

  
1285 1380
/**
1286 1381
 * YV12 to RGB without scaling or interpolating
1287 1382
 */
1288 1383
static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1,
1289
                          uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
1384
                          uint16_t *abuf0, uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y)
1290 1385
{
1291 1386
    const int yalpha1=0;
1292 1387
    int i;
......
1296 1391

  
1297 1392
    if (flags&SWS_FULL_CHR_H_INT)
1298 1393
    {
1299
        RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y);
1394
        RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
1300 1395
        return;
1301 1396
    }
1302 1397

  
......
1307 1402
            switch(dstFormat)
1308 1403
            {
1309 1404
            case PIX_FMT_RGB32:
1405
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1406
                    __asm__ volatile(
1407
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1408
                    "mov        %4, %%"REG_b"               \n\t"
1409
                    "push %%"REG_BP"                        \n\t"
1410
                    YSCALEYUV2RGB1(%%REGBP, %5)
1411
                    YSCALEYUV2RGB1_ALPHA(%%REGBP)
1412
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1413
                    "pop %%"REG_BP"                         \n\t"
1414
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1415

  
1416
                    :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1417
                    "a" (&c->redDither)
1418
                    );
1419
                }else{
1310 1420
                __asm__ volatile(
1311 1421
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1312 1422
                "mov        %4, %%"REG_b"               \n\t"
......
1320 1430
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1321 1431
                "a" (&c->redDither)
1322 1432
                );
1433
                }
1323 1434
                return;
1324 1435
            case PIX_FMT_BGR24:
1325 1436
                __asm__ volatile(
......
1400 1511
            switch(dstFormat)
1401 1512
            {
1402 1513
            case PIX_FMT_RGB32:
1514
                if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1515
                    __asm__ volatile(
1516
                    "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1517
                    "mov        %4, %%"REG_b"               \n\t"
1518
                    "push %%"REG_BP"                        \n\t"
1519
                    YSCALEYUV2RGB1b(%%REGBP, %5)
1520
                    YSCALEYUV2RGB1_ALPHA(%%REGBP)
1521
                    WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
1522
                    "pop %%"REG_BP"                         \n\t"
1523
                    "mov "ESP_OFFSET"(%5), %%"REG_b"        \n\t"
1524

  
1525
                    :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1526
                    "a" (&c->redDither)
1527
                    );
1528
                }else{
1403 1529
                __asm__ volatile(
1404 1530
                "mov %%"REG_b", "ESP_OFFSET"(%5)        \n\t"
1405 1531
                "mov        %4, %%"REG_b"               \n\t"
......
1413 1539
                :: "c" (buf0), "d" (buf1), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
1414 1540
                "a" (&c->redDither)
1415 1541
                );
1542
                }
1416 1543
                return;
1417 1544
            case PIX_FMT_BGR24:
1418 1545
                __asm__ volatile(
......
1492 1619
#endif /* HAVE_MMX */
1493 1620
    if (uvalpha < 2048)
1494 1621
    {
1495
        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C, YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1622
        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1496 1623
    }else{
1497
        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C, YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1624
        YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
1498 1625
    }
1499 1626
}
1500 1627

  
......
1642 1769
BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY    , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
1643 1770
BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY    , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
1644 1771

  
1772
static inline void RENAME(abgrToA)(uint8_t *dst, uint8_t *src, long width, uint32_t *unused){
1773
    int i;
1774
    for (i=0; i<width; i++){
1775
        dst[i]= src[4*i];
1776
    }
1777
}
1778

  
1645 1779
#define BGR2UV(type, name, shr, shg, shb, maska, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S)\
1646 1780
static inline void RENAME(name)(uint8_t *dstU, uint8_t *dstV, uint8_t *src, uint8_t *dummy, long width, uint32_t *unused)\
1647 1781
{\
......
2130 2264
                                   int flags, int canMMX2BeUsed, int16_t *hLumFilter,
2131 2265
                                   int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode,
2132 2266
                                   int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
2133
                                   int32_t *mmx2FilterPos, uint32_t *pal)
2267
                                   int32_t *mmx2FilterPos, uint32_t *pal, int isAlpha)
2134 2268
{
2135 2269
    if (srcFormat==PIX_FMT_YUYV422 || srcFormat==PIX_FMT_GRAY16BE)
2136 2270
    {
......
2144 2278
    }
2145 2279
    else if (srcFormat==PIX_FMT_RGB32)
2146 2280
    {
2281
        if (isAlpha)
2282
            RENAME(abgrToA)(formatConvBuffer, src+3, srcW, pal);
2283
        else
2147 2284
        RENAME(bgr32ToY)(formatConvBuffer, src, srcW, pal);
2148 2285
        src= formatConvBuffer;
2149 2286
    }
2150 2287
    else if (srcFormat==PIX_FMT_RGB32_1)
2151 2288
    {
2289
        if (isAlpha)
2290
            RENAME(abgrToA)(formatConvBuffer, src, srcW, pal);
2291
        else
2152 2292
        RENAME(bgr32ToY)(formatConvBuffer, src+ALT32_CORR, srcW, pal);
2153 2293
        src= formatConvBuffer;
2154 2294
    }
......
2169 2309
    }
2170 2310
    else if (srcFormat==PIX_FMT_BGR32)
2171 2311
    {
2312
        if (isAlpha)
2313
            RENAME(abgrToA)(formatConvBuffer, src+3, srcW, pal);
2314
        else
2172 2315
        RENAME(rgb32ToY)(formatConvBuffer, src, srcW, pal);
2173 2316
        src= formatConvBuffer;
2174 2317
    }
2175 2318
    else if (srcFormat==PIX_FMT_BGR32_1)
2176 2319
    {
2320
        if (isAlpha)
2321
            RENAME(abgrToA)(formatConvBuffer, src, srcW, pal);
2322
        else
2177 2323
        RENAME(rgb32ToY)(formatConvBuffer, src+ALT32_CORR, srcW, pal);
2178 2324
        src= formatConvBuffer;
2179 2325
    }
......
2347 2493
#endif /* ARCH_X86 */
2348 2494
    }
2349 2495

  
2350
    if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){
2496
    if(!isAlpha && c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){
2351 2497
        int i;
2352 2498
        //FIXME all pal and rgb srcFormats could do this convertion as well
2353 2499
        //FIXME all scalers more complex than bilinear could do half of this transform
......
2683 2829
    int16_t *hChrFilter= c->hChrFilter;
2684 2830
    int32_t *lumMmxFilter= c->lumMmxFilter;
2685 2831
    int32_t *chrMmxFilter= c->chrMmxFilter;
2832
    int32_t *alpMmxFilter= c->alpMmxFilter;
2686 2833
    const int vLumFilterSize= c->vLumFilterSize;
2687 2834
    const int vChrFilterSize= c->vChrFilterSize;
2688 2835
    const int hLumFilterSize= c->hLumFilterSize;
2689 2836
    const int hChrFilterSize= c->hChrFilterSize;
2690 2837
    int16_t **lumPixBuf= c->lumPixBuf;
2691 2838
    int16_t **chrPixBuf= c->chrPixBuf;
2839
    int16_t **alpPixBuf= c->alpPixBuf;
2692 2840
    const int vLumBufSize= c->vLumBufSize;
2693 2841
    const int vChrBufSize= c->vChrBufSize;
2694 2842
    uint8_t *funnyYCode= c->funnyYCode;
......
2709 2857
    if (isPacked(c->srcFormat)){
2710 2858
        src[0]=
2711 2859
        src[1]=
2712
        src[2]= src[0];
2860
        src[2]=
2861
        src[3]= src[0];
2713 2862
        srcStride[0]=
2714 2863
        srcStride[1]=
2715
        srcStride[2]= srcStride[0];
2864
        srcStride[2]=
2865
        srcStride[3]= srcStride[0];
2716 2866
    }
2717 2867
    srcStride[1]<<= c->vChrDrop;
2718 2868
    srcStride[2]<<= c->vChrDrop;
......
2733 2883
    //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2],
2734 2884
    //dstStride[0],dstStride[1],dstStride[2]);
2735 2885

  
2736
    if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0)
2886
    if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0)
2737 2887
    {
2738 2888
        static int warnedAlready=0; //FIXME move this into the context perhaps
2739 2889
        if (flags & SWS_PRINT_INFO && !warnedAlready)
......
2762 2912
        const int chrDstY= dstY>>c->chrDstVSubSample;
2763 2913
        unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
2764 2914
        unsigned char *vDest=dst[2]+dstStride[2]*chrDstY;
2915
        unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL;
2765 2916

  
2766 2917
        const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input
2767 2918
        const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input
......
2783 2934
            //Do horizontal scaling
2784 2935
            while(lastInLumBuf < lastLumSrcY)
2785 2936
            {
2786
                uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2937
                uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2938
                uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2787 2939
                lumBufIndex++;
2788 2940
                //printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf,  lastLumSrcY);
2789 2941
                assert(lumBufIndex < 2*vLumBufSize);
2790 2942
                assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2791 2943
                assert(lastInLumBuf + 1 - srcSliceY >= 0);
2792 2944
                //printf("%d %d\n", lumBufIndex, vLumBufSize);
2793
                RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
2945
                RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2794 2946
                                flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
2795 2947
                                funnyYCode, c->srcFormat, formatConvBuffer,
2796
                                c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
2948
                                c->lumMmx2Filter, c->lumMmx2FilterPos, pal, 0);
2949
                if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2950
                    RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc,
2951
                                    flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
2952
                                    funnyYCode, c->srcFormat, formatConvBuffer,
2953
                                    c->lumMmx2Filter, c->lumMmx2FilterPos, pal, 1);
2797 2954
                lastInLumBuf++;
2798 2955
            }
2799 2956
            while(lastInChrBuf < lastChrSrcY)
......
2827 2984
            //Do horizontal scaling
2828 2985
            while(lastInLumBuf+1 < srcSliceY + srcSliceH)
2829 2986
            {
2830
                uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2987
                uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
2988
                uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
2831 2989
                lumBufIndex++;
2832 2990
                assert(lumBufIndex < 2*vLumBufSize);
2833 2991
                assert(lastInLumBuf + 1 - srcSliceY < srcSliceH);
2834 2992
                assert(lastInLumBuf + 1 - srcSliceY >= 0);
2835
                RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
2993
                RENAME(hyscale)(c, lumPixBuf[ lumBufIndex ], dstW, src1, srcW, lumXInc,
2836 2994
                                flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
2837 2995
                                funnyYCode, c->srcFormat, formatConvBuffer,
2838
                                c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
2996
                                c->lumMmx2Filter, c->lumMmx2FilterPos, pal, 0);
2997
                if (CONFIG_SWSCALE_ALPHA && alpPixBuf)
2998
                    RENAME(hyscale)(c, alpPixBuf[ lumBufIndex ], dstW, src2, srcW, lumXInc,
2999
                                    flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
3000
                                    funnyYCode, c->srcFormat, formatConvBuffer,
3001
                                    c->lumMmx2Filter, c->lumMmx2FilterPos, pal, 1);
2839 3002
                lastInLumBuf++;
2840 3003
            }
2841 3004
            while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH))
......
2872 3035
        {
2873 3036
            int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2874 3037
            int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
3038
            int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2875 3039
#if HAVE_MMX
2876 3040
            int i;
2877 3041
        if (flags & SWS_ACCURATE_RND){
......
2882 3046
                          lumMmxFilter[s*i+APCK_COEF/4  ]=
2883 3047
                          lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i    ]
2884 3048
                    + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
3049
                if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
3050
                    *(void**)&alpMmxFilter[s*i              ]= alpSrcPtr[i  ];
3051
                    *(void**)&alpMmxFilter[s*i+APCK_PTR2/4  ]= alpSrcPtr[i+(vLumFilterSize>1)];
3052
                              alpMmxFilter[s*i+APCK_COEF/4  ]=
3053
                              alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4  ];
3054
                }
2885 3055
            }
2886 3056
            for (i=0; i<vChrFilterSize; i+=2){
2887 3057
                *(void**)&chrMmxFilter[s*i              ]= chrSrcPtr[i  ];
......
2898 3068
                lumMmxFilter[4*i+2]=
2899 3069
                lumMmxFilter[4*i+3]=
2900 3070
                    ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
3071
                if (CONFIG_SWSCALE_ALPHA && alpPixBuf){
3072
                    alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i];
3073
                    alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32;
3074
                    alpMmxFilter[4*i+2]=
3075
                    alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
3076
                }
2901 3077
            }
2902 3078
            for (i=0; i<vChrFilterSize; i++)
2903 3079
            {
......
2925 3101
                {
2926 3102
                    int16_t *lumBuf = lumPixBuf[0];
2927 3103
                    int16_t *chrBuf= chrPixBuf[0];
2928
                    RENAME(yuv2yuv1)(c, lumBuf, chrBuf, dest, uDest, vDest, dstW, chrDstW);
3104
                    int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpPixBuf[0] : NULL;
3105
                    RENAME(yuv2yuv1)(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
2929 3106
                }
2930 3107
                else //General YV12
2931 3108
                {
2932 3109
                    RENAME(yuv2yuvX)(c,
2933 3110
                        vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
2934 3111
                        vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2935
                        dest, uDest, vDest, dstW, chrDstW);
3112
                        alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
2936 3113
                }
2937 3114
            }
2938 3115
            else
......
2946 3123
                        yuv2rgbXinC_full(c, //FIXME write a packed1_full function
2947 3124
                            vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2948 3125
                            vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2949
                            dest, dstW, dstY);
3126
                            alpSrcPtr, dest, dstW, dstY);
2950 3127
                    }else{
2951 3128
                        RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
3129
                            alpPixBuf ? *alpSrcPtr : NULL,
2952 3130
                            dest, dstW, chrAlpha, dstFormat, flags, dstY);
2953 3131
                    }
2954 3132
                }
......
2964 3142
                        yuv2rgbXinC_full(c, //FIXME write a packed2_full function
2965 3143
                            vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2966 3144
                            vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2967
                            dest, dstW, dstY);
3145
                            alpSrcPtr, dest, dstW, dstY);
2968 3146
                    }else{
2969 3147
                        RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
3148
                            alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
2970 3149
                            dest, dstW, lumAlpha, chrAlpha, dstY);
2971 3150
                    }
2972 3151
                }
......
2976 3155
                        yuv2rgbXinC_full(c,
2977 3156
                            vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2978 3157
                            vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2979
                            dest, dstW, dstY);
3158
                            alpSrcPtr, dest, dstW, dstY);
2980 3159
                    }else{
2981 3160
                        RENAME(yuv2packedX)(c,
2982 3161
                            vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2983 3162
                            vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2984
                            dest, dstW, dstY);
3163
                            alpSrcPtr, dest, dstW, dstY);
2985 3164
                    }
2986 3165
                }
2987 3166
            }
......
2990 3169
        {
2991 3170
            int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2992 3171
            int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
3172
            int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
2993 3173
            if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){
2994 3174
                const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
2995 3175
                if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
......
3005 3185
                yuv2yuvXinC(
3006 3186
                    vLumFilter+dstY*vLumFilterSize   , lumSrcPtr, vLumFilterSize,
3007 3187
                    vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
3008
                    dest, uDest, vDest, dstW, chrDstW);
3188
                    alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
3009 3189
            }
3010 3190
            else
3011 3191
            {
......
3015 3195
                    yuv2rgbXinC_full(c,
3016 3196
                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
3017 3197
                        vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
3018
                        dest, dstW, dstY);
3198
                        alpSrcPtr, dest, dstW, dstY);
3019 3199
                }else{
3020 3200
                    yuv2packedXinC(c,
3021 3201
                        vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
3022 3202
                        vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
3023
                        dest, dstW, dstY);
3203
                        alpSrcPtr, dest, dstW, dstY);
3024 3204
                }
3025 3205
            }
3026 3206
        }

Also available in: Unified diff