Revision 84dc2d8a libavcodec/x86/h264dsp_mmx.c

View differences:

libavcodec/x86/h264dsp_mmx.c
20 20

  
21 21
#include "dsputil_mmx.h"
22 22

  
23
DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_3_1  ) = 0x0103010301030103ULL;
24
DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_7_3  ) = 0x0307030703070307ULL;
23
DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1  ) = 0x0103010301030103ULL;
24
DECLARE_ALIGNED(8, static const uint64_t, ff_pb_7_3  ) = 0x0307030703070307ULL;
25 25

  
26 26
/***********************************/
27 27
/* IDCT */
......
157 157
static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
158 158
{
159 159
    int i;
160
    DECLARE_ALIGNED_8(int16_t, b2)[64];
160
    DECLARE_ALIGNED(8, int16_t, b2)[64];
161 161

  
162 162
    block[0] += 32;
163 163

  
164 164
    for(i=0; i<2; i++){
165
        DECLARE_ALIGNED_8(uint64_t, tmp);
165
        DECLARE_ALIGNED(8, uint64_t, tmp);
166 166

  
167 167
        h264_idct8_1d(block+4*i);
168 168

  
......
628 628

  
629 629
static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0)
630 630
{
631
    DECLARE_ALIGNED_8(uint64_t, tmp0)[2];
631
    DECLARE_ALIGNED(8, uint64_t, tmp0)[2];
632 632

  
633 633
    __asm__ volatile(
634 634
        "movq    (%2,%4), %%mm0    \n\t" //p1
......
690 690
{
691 691
    //FIXME: could cut some load/stores by merging transpose with filter
692 692
    // also, it only needs to transpose 6x8
693
    DECLARE_ALIGNED_8(uint8_t, trans)[8*8];
693
    DECLARE_ALIGNED(8, uint8_t, trans)[8*8];
694 694
    int i;
695 695
    for(i=0; i<2; i++, pix+=8*stride, tc0+=2) {
696 696
        if((tc0[0] & tc0[1]) < 0)
......
734 734
static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
735 735
{
736 736
    //FIXME: could cut some load/stores by merging transpose with filter
737
    DECLARE_ALIGNED_8(uint8_t, trans)[8*4];
737
    DECLARE_ALIGNED(8, uint8_t, trans)[8*4];
738 738
    transpose4x4(trans, pix-2, 8, stride);
739 739
    transpose4x4(trans+4, pix-2+4*stride, 8, stride);
740 740
    h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0);
......
784 784
static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta)
785 785
{
786 786
    //FIXME: could cut some load/stores by merging transpose with filter
787
    DECLARE_ALIGNED_8(uint8_t, trans)[8*4];
787
    DECLARE_ALIGNED(8, uint8_t, trans)[8*4];
788 788
    transpose4x4(trans, pix-2, 8, stride);
789 789
    transpose4x4(trans+4, pix-2+4*stride, 8, stride);
790 790
    h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1);
......
815 815
    for( dir=1; dir>=0; dir-- ) {
816 816
        const x86_reg d_idx = dir ? -8 : -1;
817 817
        const int mask_mv = dir ? mask_mv1 : mask_mv0;
818
        DECLARE_ALIGNED_8(const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL;
818
        DECLARE_ALIGNED(8, const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL;
819 819
        int b_idx, edge;
820 820
        for( b_idx=12, edge=0; edge<edges; edge+=step, b_idx+=8*step ) {
821 821
            __asm__ volatile(
......
2106 2106
#endif
2107 2107

  
2108 2108
/* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */
2109
DECLARE_ALIGNED_8(static const uint64_t, h264_rnd_reg)[4] = {
2109
DECLARE_ALIGNED(8, static const uint64_t, h264_rnd_reg)[4] = {
2110 2110
    0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL
2111 2111
};
2112 2112

  

Also available in: Unified diff