Revision 6858492e libswscale/swscale.c

View differences:

libswscale/swscale.c
473 473

  
474 474
static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
475 475
                               int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
476
                               uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
476
                               int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW)
477 477
{
478 478
    //FIXME Optimize (just quickly written not optimized..)
479 479
    int i;
......
502 502
            uDest[i]= av_clip_uint8(u>>19);
503 503
            vDest[i]= av_clip_uint8(v>>19);
504 504
        }
505

  
506
    if (CONFIG_SWSCALE_ALPHA && aDest)
507
        for (i=0; i<dstW; i++){
508
            int val=1<<18;
509
            int j;
510
            for (j=0; j<lumFilterSize; j++)
511
                val += alpSrc[j][i] * lumFilter[j];
512

  
513
            aDest[i]= av_clip_uint8(val>>19);
514
        }
515

  
505 516
}
506 517

  
507 518
static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
......
555 566
        }
556 567
}
557 568

  
558
#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type) \
569
#define YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha) \
559 570
    for (i=0; i<(dstW>>1); i++){\
560 571
        int j;\
561 572
        int Y1 = 1<<18;\
562 573
        int Y2 = 1<<18;\
563 574
        int U  = 1<<18;\
564 575
        int V  = 1<<18;\
576
        int av_unused A1, A2;\
565 577
        type av_unused *r, *b, *g;\
566 578
        const int i2= 2*i;\
567 579
        \
......
579 591
        Y2>>=19;\
580 592
        U >>=19;\
581 593
        V >>=19;\
594
        if (alpha){\
595
            A1 = 1<<18;\
596
            A2 = 1<<18;\
597
            for (j=0; j<lumFilterSize; j++){\
598
                A1 += alpSrc[j][i2  ] * lumFilter[j];\
599
                A2 += alpSrc[j][i2+1] * lumFilter[j];\
600
            }\
601
            A1>>=19;\
602
            A2>>=19;\
603
        }\
582 604

  
583
#define YSCALE_YUV_2_PACKEDX_C(type) \
584
        YSCALE_YUV_2_PACKEDX_NOCLIP_C(type)\
605
#define YSCALE_YUV_2_PACKEDX_C(type,alpha) \
606
        YSCALE_YUV_2_PACKEDX_NOCLIP_C(type,alpha)\
585 607
        if ((Y1|Y2|U|V)&256)\
586 608
        {\
587 609
            if (Y1>255)   Y1=255; \
......
592 614
            else if (U<0) U=0;    \
593 615
            if (V>255)    V=255;  \
594 616
            else if (V<0) V=0;    \
617
        }\
618
        if (alpha && ((A1|A2)&256)){\
619
            A1=av_clip_uint8(A1);\
620
            A2=av_clip_uint8(A2);\
595 621
        }
596 622

  
597
#define YSCALE_YUV_2_PACKEDX_FULL_C \
623
#define YSCALE_YUV_2_PACKEDX_FULL_C(rnd,alpha) \
598 624
    for (i=0; i<dstW; i++){\
599 625
        int j;\
600 626
        int Y = 0;\
601 627
        int U = -128<<19;\
602 628
        int V = -128<<19;\
629
        int av_unused A;\
603 630
        int R,G,B;\
604 631
        \
605 632
        for (j=0; j<lumFilterSize; j++){\
......
612 639
        Y >>=10;\
613 640
        U >>=10;\
614 641
        V >>=10;\
642
        if (alpha){\
643
            A = rnd;\
644
            for (j=0; j<lumFilterSize; j++)\
645
                A += alpSrc[j][i     ] * lumFilter[j];\
646
            A >>=19;\
647
            if (A&256)\
648
                A = av_clip_uint8(A);\
649
        }\
615 650

  
616
#define YSCALE_YUV_2_RGBX_FULL_C(rnd) \
617
    YSCALE_YUV_2_PACKEDX_FULL_C\
651
#define YSCALE_YUV_2_RGBX_FULL_C(rnd,alpha) \
652
    YSCALE_YUV_2_PACKEDX_FULL_C(rnd>>3,alpha)\
618 653
        Y-= c->yuv2rgb_y_offset;\
619 654
        Y*= c->yuv2rgb_y_coeff;\
620 655
        Y+= rnd;\
......
656 691
            else if (Y2<0)Y2=0;   \
657 692
        }
658 693

  
659
#define YSCALE_YUV_2_RGBX_C(type) \
660
    YSCALE_YUV_2_PACKEDX_C(type)  /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
694
#define YSCALE_YUV_2_RGBX_C(type,alpha) \
695
    YSCALE_YUV_2_PACKEDX_C(type,alpha)  /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
661 696
    r = (type *)c->table_rV[V];   \
662 697
    g = (type *)(c->table_gU[U] + c->table_gV[V]); \
663 698
    b = (type *)c->table_bU[U];   \
664 699

  
665
#define YSCALE_YUV_2_PACKED2_C   \
700
#define YSCALE_YUV_2_PACKED2_C(type,alpha)   \
666 701
    for (i=0; i<(dstW>>1); i++){ \
667 702
        const int i2= 2*i;       \
668 703
        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>19;           \
669 704
        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;           \
670 705
        int U= (uvbuf0[i     ]*uvalpha1+uvbuf1[i     ]*uvalpha)>>19;  \
671 706
        int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19;  \
707
        type av_unused *r, *b, *g;                                    \
708
        int av_unused A1, A2;                                         \
709
        if (alpha){\
710
            A1= (abuf0[i2  ]*yalpha1+abuf1[i2  ]*yalpha)>>19;         \
711
            A2= (abuf0[i2+1]*yalpha1+abuf1[i2+1]*yalpha)>>19;         \
712
        }\
672 713

  
673 714
#define YSCALE_YUV_2_GRAY16_2_C   \
674 715
    for (i=0; i<(dstW>>1); i++){ \
......
676 717
        int Y1= (buf0[i2  ]*yalpha1+buf1[i2  ]*yalpha)>>11;           \
677 718
        int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>11;           \
678 719

  
679
#define YSCALE_YUV_2_RGB2_C(type) \
680
    YSCALE_YUV_2_PACKED2_C\
681
    type *r, *b, *g;\
720
#define YSCALE_YUV_2_RGB2_C(type,alpha) \
721
    YSCALE_YUV_2_PACKED2_C(type,alpha)\
682 722
    r = (type *)c->table_rV[V];\
683 723
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
684 724
    b = (type *)c->table_bU[U];\
685 725

  
686
#define YSCALE_YUV_2_PACKED1_C \
726
#define YSCALE_YUV_2_PACKED1_C(type,alpha) \
687 727
    for (i=0; i<(dstW>>1); i++){\
688 728
        const int i2= 2*i;\
689 729
        int Y1= buf0[i2  ]>>7;\
690 730
        int Y2= buf0[i2+1]>>7;\
691 731
        int U= (uvbuf1[i     ])>>7;\
692 732
        int V= (uvbuf1[i+VOFW])>>7;\
733
        type av_unused *r, *b, *g;\
734
        int av_unused A1, A2;\
735
        if (alpha){\
736
            A1= abuf0[i2  ]>>7;\
737
            A2= abuf0[i2+1]>>7;\
738
        }\
693 739

  
694 740
#define YSCALE_YUV_2_GRAY16_1_C \
695 741
    for (i=0; i<(dstW>>1); i++){\
......
697 743
        int Y1= buf0[i2  ]<<1;\
698 744
        int Y2= buf0[i2+1]<<1;\
699 745

  
700
#define YSCALE_YUV_2_RGB1_C(type) \
701
    YSCALE_YUV_2_PACKED1_C\
702
    type *r, *b, *g;\
746
#define YSCALE_YUV_2_RGB1_C(type,alpha) \
747
    YSCALE_YUV_2_PACKED1_C(type,alpha)\
703 748
    r = (type *)c->table_rV[V];\
704 749
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
705 750
    b = (type *)c->table_bU[U];\
706 751

  
707
#define YSCALE_YUV_2_PACKED1B_C \
752
#define YSCALE_YUV_2_PACKED1B_C(type,alpha) \
708 753
    for (i=0; i<(dstW>>1); i++){\
709 754
        const int i2= 2*i;\
710 755
        int Y1= buf0[i2  ]>>7;\
711 756
        int Y2= buf0[i2+1]>>7;\
712 757
        int U= (uvbuf0[i     ] + uvbuf1[i     ])>>8;\
713 758
        int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\
759
        type av_unused *r, *b, *g;\
760
        int av_unused A1, A2;\
761
        if (alpha){\
762
            A1= abuf0[i2  ]>>7;\
763
            A2= abuf0[i2+1]>>7;\
764
        }\
714 765

  
715
#define YSCALE_YUV_2_RGB1B_C(type) \
716
    YSCALE_YUV_2_PACKED1B_C\
717
    type *r, *b, *g;\
766
#define YSCALE_YUV_2_RGB1B_C(type,alpha) \
767
    YSCALE_YUV_2_PACKED1B_C(type,alpha)\
718 768
    r = (type *)c->table_rV[V];\
719 769
    g = (type *)(c->table_gU[U] + c->table_gV[V]);\
720 770
    b = (type *)c->table_bU[U];\
......
772 822
#define YSCALE_YUV_2_ANYRGB_C(func, func2, func_g16, func_monoblack)\
773 823
    switch(c->dstFormat)\
774 824
    {\
775
    case PIX_FMT_RGB32:\
776
    case PIX_FMT_BGR32:\
777
    case PIX_FMT_RGB32_1:\
778
    case PIX_FMT_BGR32_1:\
779
        func(uint32_t)\
780
            ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
781
            ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
825
    case PIX_FMT_RGBA:\
826
    case PIX_FMT_BGRA:\
827
        if (CONFIG_SMALL){\
828
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
829
            func(uint32_t,needAlpha)\
830
                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? (A1<<24) : 0);\
831
                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? (A2<<24) : 0);\
832
            }\
833
        }else{\
834
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){\
835
                func(uint32_t,1)\
836
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (A1<<24);\
837
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (A2<<24);\
838
                }\
839
            }else{\
840
                func(uint32_t,0)\
841
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
842
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
843
                }\
844
            }\
845
        }\
846
        break;\
847
    case PIX_FMT_ARGB:\
848
    case PIX_FMT_ABGR:\
849
        if (CONFIG_SMALL){\
850
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;\
851
            func(uint32_t,needAlpha)\
852
                ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + (needAlpha ? A1 : 0);\
853
                ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + (needAlpha ? A2 : 0);\
854
            }\
855
        }else{\
856
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){\
857
                func(uint32_t,1)\
858
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1] + A1;\
859
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2] + A2;\
860
                }\
861
            }else{\
862
                func(uint32_t,0)\
863
                    ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
864
                    ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
865
                }\
866
            }\
782 867
        }                \
783 868
        break;\
784 869
    case PIX_FMT_RGB24:\
785
        func(uint8_t)\
870
        func(uint8_t,0)\
786 871
            ((uint8_t*)dest)[0]= r[Y1];\
787 872
            ((uint8_t*)dest)[1]= g[Y1];\
788 873
            ((uint8_t*)dest)[2]= b[Y1];\
......
793 878
        }\
794 879
        break;\
795 880
    case PIX_FMT_BGR24:\
796
        func(uint8_t)\
881
        func(uint8_t,0)\
797 882
            ((uint8_t*)dest)[0]= b[Y1];\
798 883
            ((uint8_t*)dest)[1]= g[Y1];\
799 884
            ((uint8_t*)dest)[2]= r[Y1];\
......
812 897
            const int dr2= dither_2x2_8[y&1    ][1];\
813 898
            const int dg2= dither_2x2_4[y&1    ][1];\
814 899
            const int db2= dither_2x2_8[(y&1)^1][1];\
815
            func(uint16_t)\
900
            func(uint16_t,0)\
816 901
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
817 902
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
818 903
            }\
......
827 912
            const int dr2= dither_2x2_8[y&1    ][1];\
828 913
            const int dg2= dither_2x2_8[y&1    ][0];\
829 914
            const int db2= dither_2x2_8[(y&1)^1][1];\
830
            func(uint16_t)\
915
            func(uint16_t,0)\
831 916
                ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
832 917
                ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
833 918
            }\
......
838 923
        {\
839 924
            const uint8_t * const d64= dither_8x8_73[y&7];\
840 925
            const uint8_t * const d32= dither_8x8_32[y&7];\
841
            func(uint8_t)\
926
            func(uint8_t,0)\
842 927
                ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
843 928
                ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
844 929
            }\
......
849 934
        {\
850 935
            const uint8_t * const d64= dither_8x8_73 [y&7];\
851 936
            const uint8_t * const d128=dither_8x8_220[y&7];\
852
            func(uint8_t)\
937
            func(uint8_t,0)\
853 938
                ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
854 939
                                 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
855 940
            }\
......
860 945
        {\
861 946
            const uint8_t * const d64= dither_8x8_73 [y&7];\
862 947
            const uint8_t * const d128=dither_8x8_220[y&7];\
863
            func(uint8_t)\
948
            func(uint8_t,0)\
864 949
                ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
865 950
                ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
866 951
            }\
......
909 994

  
910 995
static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
911 996
                                  int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
912
                                  uint8_t *dest, int dstW, int y)
997
                                  int16_t **alpSrc, uint8_t *dest, int dstW, int y)
913 998
{
914 999
    int i;
915
    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
1000
    YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C)
916 1001
}
917 1002

  
918 1003
static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
919 1004
                                    int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
920
                                    uint8_t *dest, int dstW, int y)
1005
                                    int16_t **alpSrc, uint8_t *dest, int dstW, int y)
921 1006
{
922 1007
    int i;
923 1008
    int step= fmt_depth(c->dstFormat)/8;
......
930 1015
    case PIX_FMT_RGB24:
931 1016
        aidx--;
932 1017
    case PIX_FMT_RGBA:
933
        YSCALE_YUV_2_RGBX_FULL_C(1<<21)
934
            dest[aidx]= 255;
935
            dest[0]= R>>22;
936
            dest[1]= G>>22;
937
            dest[2]= B>>22;
938
            dest+= step;
1018
        if (CONFIG_SMALL){
1019
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1020
            YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1021
                dest[aidx]= needAlpha ? A : 255;
1022
                dest[0]= R>>22;
1023
                dest[1]= G>>22;
1024
                dest[2]= B>>22;
1025
                dest+= step;
1026
            }
1027
        }else{
1028
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1029
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1030
                    dest[aidx]= A;
1031
                    dest[0]= R>>22;
1032
                    dest[1]= G>>22;
1033
                    dest[2]= B>>22;
1034
                    dest+= step;
1035
                }
1036
            }else{
1037
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1038
                    dest[aidx]= 255;
1039
                    dest[0]= R>>22;
1040
                    dest[1]= G>>22;
1041
                    dest[2]= B>>22;
1042
                    dest+= step;
1043
                }
1044
            }
939 1045
        }
940 1046
        break;
941 1047
    case PIX_FMT_ABGR:
......
944 1050
    case PIX_FMT_BGR24:
945 1051
        aidx--;
946 1052
    case PIX_FMT_BGRA:
947
        YSCALE_YUV_2_RGBX_FULL_C(1<<21)
948
            dest[aidx]= 255;
949
            dest[0]= B>>22;
950
            dest[1]= G>>22;
951
            dest[2]= R>>22;
952
            dest+= step;
1053
        if (CONFIG_SMALL){
1054
            int needAlpha = CONFIG_SWSCALE_ALPHA && c->alpPixBuf;
1055
            YSCALE_YUV_2_RGBX_FULL_C(1<<21, needAlpha)
1056
                dest[aidx]= needAlpha ? A : 255;
1057
                dest[0]= B>>22;
1058
                dest[1]= G>>22;
1059
                dest[2]= R>>22;
1060
                dest+= step;
1061
            }
1062
        }else{
1063
            if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
1064
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 1)
1065
                    dest[aidx]= A;
1066
                    dest[0]= B>>22;
1067
                    dest[1]= G>>22;
1068
                    dest[2]= R>>22;
1069
                    dest+= step;
1070
                }
1071
            }else{
1072
                YSCALE_YUV_2_RGBX_FULL_C(1<<21, 0)
1073
                    dest[aidx]= 255;
1074
                    dest[0]= B>>22;
1075
                    dest[1]= G>>22;
1076
                    dest[2]= R>>22;
1077
                    dest+= step;
1078
                }
1079
            }
953 1080
        }
954 1081
        break;
955 1082
    default:
......
2644 2771
    // allocate pixbufs (we use dynamic allocation because otherwise we would need to
2645 2772
    c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
2646 2773
    c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*));
2774
    if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat))
2775
        c->alpPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
2647 2776
    //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
2648 2777
    /* align at 16 bytes for AltiVec */
2649 2778
    for (i=0; i<c->vLumBufSize; i++)
2650 2779
        c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(VOF+1);
2651 2780
    for (i=0; i<c->vChrBufSize; i++)
2652 2781
        c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= av_malloc((VOF+1)*2);
2782
    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf)
2783
        for (i=0; i<c->vLumBufSize; i++)
2784
            c->alpPixBuf[i]= c->alpPixBuf[i+c->vLumBufSize]= av_mallocz(VOF+1);
2653 2785

  
2654 2786
    //try to avoid drawing green stuff between the right end and the stride end
2655 2787
    for (i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, (VOF+1)*2);
......
3200 3332
        av_freep(&c->chrPixBuf);
3201 3333
    }
3202 3334

  
3335
    if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){
3336
        for (i=0; i<c->vLumBufSize; i++)
3337
            av_freep(&c->alpPixBuf[i]);
3338
        av_freep(&c->alpPixBuf);
3339
    }
3340

  
3203 3341
    av_freep(&c->vLumFilter);
3204 3342
    av_freep(&c->vChrFilter);
3205 3343
    av_freep(&c->hLumFilter);

Also available in: Unified diff