Revision 004cda8e

View differences:

libavcodec/x86/vp8dsp-init.c
195 195

  
196 196
extern void ff_vp8_idct_dc_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
197 197
extern void ff_vp8_idct_dc_add_sse4(uint8_t *dst, DCTELEM block[16], int stride);
198
extern void ff_vp8_luma_dc_wht_mmxext(DCTELEM block[4][4][16], DCTELEM dc[16]);
198 199
#endif
199 200

  
200 201
#define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
......
237 238
    /* note that 4-tap width=16 functions are missing because w=16
238 239
     * is only used for luma, and luma is always a copy or sixtap. */
239 240
    if (mm_flags & FF_MM_MMX2) {
241
        c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_mmxext;
240 242
        VP8_LUMA_MC_FUNC(0, 16, mmxext);
241 243
        VP8_MC_FUNC(1, 8, mmxext);
242 244
        VP8_MC_FUNC(1, 4, mmxext);
libavcodec/x86/vp8dsp.asm
21 21
;******************************************************************************
22 22

  
23 23
%include "x86inc.asm"
24
%include "x86util.asm"
24 25

  
25 26
SECTION_RODATA
26 27

  
......
141 142
filter_h6_shuf2: db 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,  7, 7,  8,  8,  9
142 143
filter_h6_shuf3: db 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8,  9, 9, 10, 10, 11
143 144

  
145
cextern pw_3
144 146
cextern pw_4
145 147
cextern pw_64
146 148

  
......
920 922
    pextrd     [r1], xmm2, 2
921 923
    pextrd  [r1+r2], xmm2, 3
922 924
    RET
925

  
926
;-----------------------------------------------------------------------------
927
; void vp8_luma_dc_wht_mmxext(DCTELEM block[4][4][16], DCTELEM dc[16])
928
;-----------------------------------------------------------------------------
929

  
930
%macro SCATTER_WHT 1
931
    pextrw r1d, m0, %1
932
    pextrw r2d, m1, %1
933
    mov [r0+2*16*0], r1w
934
    mov [r0+2*16*1], r2w
935
    pextrw r1d, m2, %1
936
    pextrw r2d, m3, %1
937
    mov [r0+2*16*2], r1w
938
    mov [r0+2*16*3], r2w
939
%endmacro
940

  
941
%macro HADAMARD4_1D 4
942
    SUMSUB_BADC m%2, m%1, m%4, m%3
943
    SUMSUB_BADC m%4, m%2, m%3, m%1
944
    SWAP %1, %4, %3
945
%endmacro
946

  
947
INIT_MMX
948
cglobal vp8_luma_dc_wht_mmxext, 2,3
949
    movq          m0, [r1]
950
    movq          m1, [r1+8]
951
    movq          m2, [r1+16]
952
    movq          m3, [r1+24]
953
    HADAMARD4_1D  0, 1, 2, 3
954
    TRANSPOSE4x4W 0, 1, 2, 3, 4
955
    paddw         m0, [pw_3]
956
    HADAMARD4_1D  0, 1, 2, 3
957
    psraw         m0, 3
958
    psraw         m1, 3
959
    psraw         m2, 3
960
    psraw         m3, 3
961
    SCATTER_WHT   0
962
    add           r0, 2*16*4
963
    SCATTER_WHT   1
964
    add           r0, 2*16*4
965
    SCATTER_WHT   2
966
    add           r0, 2*16*4
967
    SCATTER_WHT   3
968
    RET

Also available in: Unified diff