Revision 565344e7 libavcodec/x86/vp8dsp.asm

View differences:

libavcodec/x86/vp8dsp.asm
930 930
; void vp8_idct_add_<opt>(uint8_t *dst, DCTELEM block[16], int stride);
931 931
;-----------------------------------------------------------------------------
932 932

  
933
; calculate %1=%2+%1; %2=%2-%1, with %3=temp register
934
%macro SUMSUB 3
935
    mova      %3, %1
936
    paddw     %1, %2
937
    psubw     %2, %3
938
%endmacro
939

  
940 933
; calculate %1=mul_35468(%1)-mul_20091(%2); %2=mul_20091(%1)+mul_35468(%2)
941 934
;           this macro assumes that m6/m7 have words for 20091/17734 loaded
942 935
%macro VP8_MULTIPLY_SUMSUB 4
......
968 961
    SWAP                 %4,  %3
969 962
%endmacro
970 963

  
971
; transpose a 4x4 table
972
%macro TRANSPOSE4x4 5 ; output in %1/%4/%5/%3
973
    mova      m%5, m%1
974
    punpcklwd m%1, m%2
975
    punpckhwd m%5, m%2
976
    mova      m%2, m%3
977
    punpcklwd m%3, m%4
978
    punpckhwd m%2, m%4
979
    mova      m%4, m%1
980
    punpckldq m%1, m%3 ;col0
981
    punpckhdq m%4, m%3 ;col1
982
    mova      m%3, m%5
983
    punpckldq m%5, m%2 ;col2
984
    punpckhdq m%3, m%2 ;col3
985
    SWAP       %4,  %2
986
    SWAP       %4,  %5
987
    SWAP       %4,  %3
988
%endmacro
989

  
990 964
INIT_MMX
991 965
cglobal vp8_idct_add_mmx, 3, 3
992 966
    ; load block data

Also available in: Unified diff