Revision b10fa1bb libavcodec/x86/dsputil_yasm.asm

View differences:

libavcodec/x86/dsputil_yasm.asm
99 99

  
100 100

  
101 101

  
102
%macro SCALARPRODUCT 1
103
; void add_int16(int16_t * v1, int16_t * v2, int order)
104
cglobal add_int16_%1, 3,3,2, v1, v2, order
105
    shl orderq, 1
106
    add v1q, orderq
107
    add v2q, orderq
108
    neg orderq
109
.loop:
110
    movu    m0, [v2q + orderq]
111
    movu    m1, [v2q + orderq + mmsize]
112
    paddw   m0, [v1q + orderq]
113
    paddw   m1, [v1q + orderq + mmsize]
114
    mova    [v1q + orderq], m0
115
    mova    [v1q + orderq + mmsize], m1
116
    add     orderq, mmsize*2
117
    jl .loop
118
    REP_RET
119

  
120
; void sub_int16(int16_t * v1, int16_t * v2, int order)
121
cglobal sub_int16_%1, 3,3,4, v1, v2, order
122
    shl orderq, 1
123
    add v1q, orderq
124
    add v2q, orderq
125
    neg orderq
126
.loop:
127
    movu    m2, [v2q + orderq]
128
    movu    m3, [v2q + orderq + mmsize]
129
    mova    m0, [v1q + orderq]
130
    mova    m1, [v1q + orderq + mmsize]
131
    psubw   m0, m2
132
    psubw   m1, m3
133
    mova    [v1q + orderq], m0
134
    mova    [v1q + orderq + mmsize], m1
135
    add     orderq, mmsize*2
136
    jl .loop
137
    REP_RET
138

  
139
; int scalarproduct_int16_sse2(int16_t * v1, int16_t * v2, int order, int shift)
140
cglobal scalarproduct_int16_%1, 3,3,4, v1, v2, order, shift
141
    shl orderq, 1
142
    add v1q, orderq
143
    add v2q, orderq
144
    neg orderq
145
    movd    m3, shiftm
146
    pxor    m2, m2
147
.loop:
148
    movu    m0, [v1q + orderq]
149
    movu    m1, [v1q + orderq + mmsize]
150
    pmaddwd m0, [v2q + orderq]
151
    pmaddwd m1, [v2q + orderq + mmsize]
152
    paddd   m2, m0
153
    paddd   m2, m1
154
    add     orderq, mmsize*2
155
    jl .loop
156
%if mmsize == 16
157
    movhlps m0, m2
158
    paddd   m2, m0
159
    psrad   m2, m3
160
    pshuflw m0, m2, 0x4e
161
%else
162
    psrad   m2, m3
163
    pshufw  m0, m2, 0x4e
164
%endif
165
    paddd   m2, m0
166
    movd   eax, m2
167
    RET
168
%endmacro
169

  
170
INIT_MMX
171
SCALARPRODUCT mmx2
172
INIT_XMM
173
SCALARPRODUCT sse2
174

  
175

  
176

  
102 177
; void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)
103 178
cglobal add_hfyu_median_prediction_mmx2, 6,6,0, dst, top, diff, w, left, left_top
104 179
    movq    mm0, [topq]

Also available in: Unified diff