Revision 622348f9 libavcodec/i386/dsputil_mmx.c

View differences:

libavcodec/i386/dsputil_mmx.c
22 22
#include "../dsputil.h"
23 23
#include "../simple_idct.h"
24 24

  
25
//#undef NDEBUG
26
//#include <assert.h>
27

  
25 28
extern const uint8_t ff_h263_loop_filter_strength[32];
26 29

  
27 30
int mm_flags; /* multimedia extension flags */
......
747 750
    return tmp;
748 751
}
749 752

  
753
static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) {
754
    int tmp;
755
    
756
    assert( (((int)pix) & 7) == 0);
757
    assert((line_size &7) ==0);
758
    
759
#define SUM(in0, in1, out0, out1) \
760
      "movq (%0), %%mm2\n"\
761
      "movq 8(%0), %%mm3\n"\
762
      "addl %2,%0\n"\
763
      "movq %%mm2, " #out0 "\n"\
764
      "movq %%mm3, " #out1 "\n"\
765
      "psubusb " #in0 ", %%mm2\n"\
766
      "psubusb " #in1 ", %%mm3\n"\
767
      "psubusb " #out0 ", " #in0 "\n"\
768
      "psubusb " #out1 ", " #in1 "\n"\
769
      "por %%mm2, " #in0 "\n"\
770
      "por %%mm3, " #in1 "\n"\
771
      "movq " #in0 ", %%mm2\n"\
772
      "movq " #in1 ", %%mm3\n"\
773
      "punpcklbw %%mm7, " #in0 "\n"\
774
      "punpcklbw %%mm7, " #in1 "\n"\
775
      "punpckhbw %%mm7, %%mm2\n"\
776
      "punpckhbw %%mm7, %%mm3\n"\
777
      "paddw " #in1 ", " #in0 "\n"\
778
      "paddw %%mm3, %%mm2\n"\
779
      "paddw %%mm2, " #in0 "\n"\
780
      "paddw " #in0 ", %%mm6\n"
781

  
782
    
783
  asm volatile (
784
      "movl %3,%%ecx\n"
785
      "pxor %%mm6,%%mm6\n"
786
      "pxor %%mm7,%%mm7\n"
787
      "movq (%0),%%mm0\n"
788
      "movq 8(%0),%%mm1\n"
789
      "addl %2,%0\n"
790
      "subl $2, %%ecx\n"
791
      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
792
      "1:\n"
793
      
794
      SUM(%%mm4, %%mm5, %%mm0, %%mm1)
795
      
796
      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
797
      
798
      "subl $2, %%ecx\n"
799
      "jnz 1b\n"
800

  
801
      "movq %%mm6,%%mm0\n"
802
      "psrlq $32, %%mm6\n"
803
      "paddw %%mm6,%%mm0\n"
804
      "movq %%mm0,%%mm6\n"
805
      "psrlq $16, %%mm0\n"
806
      "paddw %%mm6,%%mm0\n"
807
      "movd %%mm0,%1\n"
808
      : "+r" (pix), "=r"(tmp) 
809
      : "r" (line_size) , "m" (h)
810
      : "%ecx");
811
    return tmp & 0xFFFF;
812
}
813
#undef SUM
814

  
815
static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) {
816
    int tmp;
817
    
818
    assert( (((int)pix) & 7) == 0);
819
    assert((line_size &7) ==0);
820
    
821
#define SUM(in0, in1, out0, out1) \
822
      "movq (%0), " #out0 "\n"\
823
      "movq 8(%0), " #out1 "\n"\
824
      "addl %2,%0\n"\
825
      "psadbw " #out0 ", " #in0 "\n"\
826
      "psadbw " #out1 ", " #in1 "\n"\
827
      "paddw " #in1 ", " #in0 "\n"\
828
      "paddw " #in0 ", %%mm6\n"
829

  
830
  asm volatile (
831
      "movl %3,%%ecx\n"
832
      "pxor %%mm6,%%mm6\n"
833
      "pxor %%mm7,%%mm7\n"
834
      "movq (%0),%%mm0\n"
835
      "movq 8(%0),%%mm1\n"
836
      "addl %2,%0\n"
837
      "subl $2, %%ecx\n"
838
      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
839
      "1:\n"
840
      
841
      SUM(%%mm4, %%mm5, %%mm0, %%mm1)
842
      
843
      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
844
      
845
      "subl $2, %%ecx\n"
846
      "jnz 1b\n"
847

  
848
      "movd %%mm6,%1\n"
849
      : "+r" (pix), "=r"(tmp) 
850
      : "r" (line_size) , "m" (h)
851
      : "%ecx");
852
    return tmp;
853
}
854
#undef SUM
855

  
856
static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
857
    int tmp;
858
    
859
    assert( (((int)pix1) & 7) == 0);
860
    assert( (((int)pix2) & 7) == 0);
861
    assert((line_size &7) ==0);
862
    
863
#define SUM(in0, in1, out0, out1) \
864
      "movq (%0),%%mm2\n"\
865
      "movq (%1)," #out0 "\n"\
866
      "movq 8(%0),%%mm3\n"\
867
      "movq 8(%1)," #out1 "\n"\
868
      "addl %3,%0\n"\
869
      "addl %3,%1\n"\
870
      "psubb " #out0 ", %%mm2\n"\
871
      "psubb " #out1 ", %%mm3\n"\
872
      "pxor %%mm7, %%mm2\n"\
873
      "pxor %%mm7, %%mm3\n"\
874
      "movq %%mm2, " #out0 "\n"\
875
      "movq %%mm3, " #out1 "\n"\
876
      "psubusb " #in0 ", %%mm2\n"\
877
      "psubusb " #in1 ", %%mm3\n"\
878
      "psubusb " #out0 ", " #in0 "\n"\
879
      "psubusb " #out1 ", " #in1 "\n"\
880
      "por %%mm2, " #in0 "\n"\
881
      "por %%mm3, " #in1 "\n"\
882
      "movq " #in0 ", %%mm2\n"\
883
      "movq " #in1 ", %%mm3\n"\
884
      "punpcklbw %%mm7, " #in0 "\n"\
885
      "punpcklbw %%mm7, " #in1 "\n"\
886
      "punpckhbw %%mm7, %%mm2\n"\
887
      "punpckhbw %%mm7, %%mm3\n"\
888
      "paddw " #in1 ", " #in0 "\n"\
889
      "paddw %%mm3, %%mm2\n"\
890
      "paddw %%mm2, " #in0 "\n"\
891
      "paddw " #in0 ", %%mm6\n"
892

  
893
    
894
  asm volatile (
895
      "movl %4,%%ecx\n"
896
      "pxor %%mm6,%%mm6\n"
897
      "pcmpeqw %%mm7,%%mm7\n"
898
      "psllw $15, %%mm7\n"
899
      "packsswb %%mm7, %%mm7\n"
900
      "movq (%0),%%mm0\n"
901
      "movq (%1),%%mm2\n"
902
      "movq 8(%0),%%mm1\n"
903
      "movq 8(%1),%%mm3\n"
904
      "addl %3,%0\n"
905
      "addl %3,%1\n"
906
      "subl $2, %%ecx\n"
907
      "psubb %%mm2, %%mm0\n"
908
      "psubb %%mm3, %%mm1\n"
909
      "pxor %%mm7, %%mm0\n"
910
      "pxor %%mm7, %%mm1\n"
911
      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
912
      "1:\n"
913
      
914
      SUM(%%mm4, %%mm5, %%mm0, %%mm1)
915
      
916
      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
917
      
918
      "subl $2, %%ecx\n"
919
      "jnz 1b\n"
920

  
921
      "movq %%mm6,%%mm0\n"
922
      "psrlq $32, %%mm6\n"
923
      "paddw %%mm6,%%mm0\n"
924
      "movq %%mm0,%%mm6\n"
925
      "psrlq $16, %%mm0\n"
926
      "paddw %%mm6,%%mm0\n"
927
      "movd %%mm0,%2\n"
928
      : "+r" (pix1), "+r" (pix2), "=r"(tmp) 
929
      : "r" (line_size) , "m" (h)
930
      : "%ecx");
931
    return tmp & 0x7FFF;
932
}
933
#undef SUM
934

  
935
static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
936
    int tmp;
937
    
938
    assert( (((int)pix1) & 7) == 0);
939
    assert( (((int)pix2) & 7) == 0);
940
    assert((line_size &7) ==0);
941
    
942
#define SUM(in0, in1, out0, out1) \
943
      "movq (%0)," #out0 "\n"\
944
      "movq (%1),%%mm2\n"\
945
      "movq 8(%0)," #out1 "\n"\
946
      "movq 8(%1),%%mm3\n"\
947
      "addl %3,%0\n"\
948
      "addl %3,%1\n"\
949
      "psubb %%mm2, " #out0 "\n"\
950
      "psubb %%mm3, " #out1 "\n"\
951
      "pxor %%mm7, " #out0 "\n"\
952
      "pxor %%mm7, " #out1 "\n"\
953
      "psadbw " #out0 ", " #in0 "\n"\
954
      "psadbw " #out1 ", " #in1 "\n"\
955
      "paddw " #in1 ", " #in0 "\n"\
956
      "paddw " #in0 ", %%mm6\n"
957

  
958
  asm volatile (
959
      "movl %4,%%ecx\n"
960
      "pxor %%mm6,%%mm6\n"
961
      "pcmpeqw %%mm7,%%mm7\n"
962
      "psllw $15, %%mm7\n"
963
      "packsswb %%mm7, %%mm7\n"
964
      "movq (%0),%%mm0\n"
965
      "movq (%1),%%mm2\n"
966
      "movq 8(%0),%%mm1\n"
967
      "movq 8(%1),%%mm3\n"
968
      "addl %3,%0\n"
969
      "addl %3,%1\n"
970
      "subl $2, %%ecx\n"
971
      "psubb %%mm2, %%mm0\n"
972
      "psubb %%mm3, %%mm1\n"
973
      "pxor %%mm7, %%mm0\n"
974
      "pxor %%mm7, %%mm1\n"
975
      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
976
      "1:\n"
977
      
978
      SUM(%%mm4, %%mm5, %%mm0, %%mm1)
979
      
980
      SUM(%%mm0, %%mm1, %%mm4, %%mm5)
981
      
982
      "subl $2, %%ecx\n"
983
      "jnz 1b\n"
984

  
985
      "movd %%mm6,%2\n"
986
      : "+r" (pix1), "+r" (pix2), "=r"(tmp) 
987
      : "r" (line_size) , "m" (h)
988
      : "%ecx");
989
    return tmp;
990
}
991
#undef SUM
992

  
750 993
static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
751 994
    int i=0;
752 995
    asm volatile(
......
1874 2117
        
1875 2118
	c->pix_norm1 = pix_norm1_mmx;
1876 2119
	c->sse[0] = sse16_mmx;
2120
        c->vsad[4]= vsad_intra16_mmx;
2121

  
2122
        if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
2123
            c->vsad[0] = vsad16_mmx;
2124
        }
1877 2125
#endif //CONFIG_ENCODERS
1878 2126

  
1879 2127
        c->h263_v_loop_filter= h263_v_loop_filter_mmx;
......
1897 2145
#ifdef CONFIG_ENCODERS
1898 2146
            c->hadamard8_diff[0]= hadamard8_diff16_mmx2;
1899 2147
            c->hadamard8_diff[1]= hadamard8_diff_mmx2;
2148
            c->vsad[4]= vsad_intra16_mmx2;
1900 2149
#endif //CONFIG_ENCODERS
1901 2150

  
1902 2151
            if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
......
1906 2155
                c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
1907 2156
                c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
1908 2157
                c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
2158
                c->vsad[0] = vsad16_mmx2;
1909 2159
            }
1910 2160

  
1911 2161
#if 1

Also available in: Unified diff