Revision 602a4cb2 libavcodec/x86/h264_intrapred.asm

View differences:

libavcodec/x86/h264_intrapred.asm
1605 1605
    por        mm0, mm1
1606 1606
    movq [r0+r3*1], mm0
1607 1607
    RET
1608

  
1609
%macro PRED8x8L_DOWN_RIGHT 1
1610
cglobal pred8x8l_down_right_%1, 4,5
1611
    sub          r0, r3
1612
    lea          r4, [r0+r3*2]
1613
    movq        mm0, [r0+r3*1-8]
1614
    punpckhbw   mm0, [r0+r3*0-8]
1615
    movq        mm1, [r4+r3*1-8]
1616
    punpckhbw   mm1, [r0+r3*2-8]
1617
    mov          r4, r0
1618
    punpckhwd   mm1, mm0
1619
    lea          r0, [r0+r3*4]
1620
    movq        mm2, [r0+r3*1-8]
1621
    punpckhbw   mm2, [r0+r3*0-8]
1622
    lea          r0, [r0+r3*2]
1623
    movq        mm3, [r0+r3*1-8]
1624
    punpckhbw   mm3, [r0+r3*0-8]
1625
    punpckhwd   mm3, mm2
1626
    punpckhdq   mm3, mm1
1627
    lea          r0, [r0+r3*2]
1628
    movq        mm0, [r0+r3*0-8]
1629
    movq        mm1, [r4]
1630
    mov          r0, r4
1631
    movq        mm4, mm3
1632
    movq        mm2, mm3
1633
    PALIGNR     mm4, mm0, 7, mm0
1634
    PALIGNR     mm1, mm2, 1, mm2
1635
    test        r1, r1
1636
    jz .fix_lt_1
1637
    jmp .do_left
1638
.fix_lt_1:
1639
    movq        mm5, mm3
1640
    pxor        mm5, mm4
1641
    psrlq       mm5, 56
1642
    psllq       mm5, 48
1643
    pxor        mm1, mm5
1644
    jmp .do_left
1645
.fix_lt_2:
1646
    movq        mm5, mm3
1647
    pxor        mm5, mm2
1648
    psllq       mm5, 56
1649
    psrlq       mm5, 56
1650
    pxor        mm2, mm5
1651
    test         r2, r2
1652
    jnz .do_top
1653
.fix_tr_1:
1654
    movq        mm5, mm3
1655
    pxor        mm5, mm1
1656
    psrlq       mm5, 56
1657
    psllq       mm5, 56
1658
    pxor        mm1, mm5
1659
    jmp .do_top
1660
.do_left:
1661
    movq        mm0, mm4
1662
    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
1663
    movq        mm4, mm0
1664
    movq        mm7, mm2
1665
    movq2dq    xmm3, mm2
1666
    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
1667
    psllq       mm1, 56
1668
    PALIGNR     mm7, mm1, 7, mm3
1669
    movq2dq    xmm1, mm7
1670
    movq        mm0, [r0-8]
1671
    movq        mm3, [r0]
1672
    movq        mm1, [r0+8]
1673
    movq        mm2, mm3
1674
    movq        mm4, mm3
1675
    PALIGNR     mm2, mm0, 7, mm0
1676
    PALIGNR     mm1, mm4, 1, mm4
1677
    test         r1, r1
1678
    jz .fix_lt_2
1679
    test         r2, r2
1680
    jz .fix_tr_1
1681
.do_top:
1682
    PRED4x4_LOWPASS mm4, mm2, mm1, mm3, mm5
1683
    movq2dq   xmm4, mm4
1684
    lea         r1, [r0+r3*2]
1685
    movdqa    xmm0, xmm3
1686
    pslldq    xmm4, 8
1687
    por       xmm3, xmm4
1688
    lea         r2, [r1+r3*2]
1689
    pslldq    xmm4, 1
1690
    por       xmm1, xmm4
1691
    psrldq    xmm0, 7
1692
    pslldq    xmm0, 15
1693
    psrldq    xmm0, 7
1694
    por       xmm1, xmm0
1695
    lea         r0, [r2+r3*2]
1696
    movdqa    xmm2, xmm3
1697
    psrldq    xmm2, 1
1698
INIT_XMM
1699
    PRED4x4_LOWPASS xmm0, xmm1, xmm2, xmm3, xmm4
1700
    movdqa    xmm1, xmm0
1701
    psrldq    xmm1, 1
1702
    movq [r0+r3*2], xmm0
1703
    movq [r0+r3*1], xmm1
1704
    psrldq    xmm0, 2
1705
    psrldq    xmm1, 2
1706
    movq [r2+r3*2], xmm0
1707
    movq [r2+r3*1], xmm1
1708
    psrldq    xmm0, 2
1709
    psrldq    xmm1, 2
1710
    movq [r1+r3*2], xmm0
1711
    movq [r1+r3*1], xmm1
1712
    psrldq    xmm0, 2
1713
    psrldq    xmm1, 2
1714
    movq [r4+r3*2], xmm0
1715
    movq [r4+r3*1], xmm1
1716
    RET
1717
%endmacro
1718

  
1719
INIT_MMX
1720
%define PALIGNR PALIGNR_MMX
1721
PRED8x8L_DOWN_RIGHT sse2
1722
INIT_MMX
1723
%define PALIGNR PALIGNR_SSSE3
1724
PRED8x8L_DOWN_RIGHT ssse3
1608 1725
%endif
1609 1726

  
1610 1727
;-----------------------------------------------------------------------------

Also available in: Unified diff