Revision 2966cc18 libavcodec/x86/h264_deblock_sse2.asm

View differences:

libavcodec/x86/h264_deblock_sse2.asm
234 234
%macro DEBLOCK_P0_Q0 0
235 235
    mova    m5, m1
236 236
    pxor    m5, m2           ; p0^q0
237
    pand    m5, [pb_01 GLOBAL] ; (p0^q0)&1
237
    pand    m5, [pb_01] ; (p0^q0)&1
238 238
    pcmpeqb m4, m4
239 239
    pxor    m3, m4
240 240
    pavgb   m3, m0           ; (p1 - q1 + 256)>>1
241
    pavgb   m3, [pb_03 GLOBAL] ; (((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2
241
    pavgb   m3, [pb_03] ; (((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2
242 242
    pxor    m4, m1
243 243
    pavgb   m4, m2           ; (q0 - p0 + 256)>>1
244 244
    pavgb   m3, m5
245 245
    paddusb m3, m4           ; d+128+33
246
    mova    m6, [pb_a1 GLOBAL]
246
    mova    m6, [pb_a1]
247 247
    psubusb m6, m3
248
    psubusb m3, [pb_a1 GLOBAL]
248
    psubusb m3, [pb_a1]
249 249
    pminub  m6, m7
250 250
    pminub  m3, m7
251 251
    psubusb m1, m6
......
263 263
    pavgb   %6, m2
264 264
    pavgb   %2, %6             ; avg(p2,avg(p0,q0))
265 265
    pxor    %6, %3
266
    pand    %6, [pb_01 GLOBAL] ; (p2^avg(p0,q0))&1
266
    pand    %6, [pb_01] ; (p2^avg(p0,q0))&1
267 267
    psubusb %2, %6             ; (p2+((p0+q0+1)>>1))>>1
268 268
    mova    %6, %1
269 269
    psubusb %6, %5
......
612 612
    %define mask0 spill(2)
613 613
    %define mask1p spill(3)
614 614
    %define mask1q spill(4)
615
    %define mpb_00 [pb_00 GLOBAL]
616
    %define mpb_01 [pb_01 GLOBAL]
615
    %define mpb_00 [pb_00]
616
    %define mpb_01 [pb_01]
617 617
%endif
618 618

  
619 619
;-----------------------------------------------------------------------------
......
637 637
    mova    q1, [r0+r1]
638 638
%ifdef ARCH_X86_64
639 639
    pxor    mpb_00, mpb_00
640
    mova    mpb_01, [pb_01 GLOBAL]
640
    mova    mpb_01, [pb_01]
641 641
    LOAD_MASK r2d, r3d, t5 ; m5=beta-1, t5=alpha-1, m7=mask0
642 642
    SWAP    7, 12 ; m12=mask0
643 643
    pavgb   t5, mpb_00
......
656 656
    LOAD_MASK r2d, r3d, t5 ; m5=beta-1, t5=alpha-1, m7=mask0
657 657
    mova    m4, t5
658 658
    mova    mask0, m7
659
    pavgb   m4, [pb_00 GLOBAL]
660
    pavgb   m4, [pb_01 GLOBAL] ; alpha/4+1
659
    pavgb   m4, [pb_00]
660
    pavgb   m4, [pb_01] ; alpha/4+1
661 661
    DIFF_GT2 p0, q0, m4, m6, m7 ; m6 = |p0-q0| > alpha/4+1
662 662
    pand    m6, mask0
663 663
    DIFF_GT2 p0, p2, m5, m4, m7 ; m4 = |p2-p0| > beta-1

Also available in: Unified diff