Statistics
| Branch: | Revision:

ffmpeg / libavcodec / x86 / png_mmx.c @ 6d4c49a2

History | View | Annotate | Download (4.39 KB)

1
/*
2
 * MMX optimized PNG utils
3
 * Copyright (c) 2008 Loren Merritt
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 *
21
 */
22

    
23
#include "libavutil/cpu.h"
24
#include "libavutil/x86_cpu.h"
25
#include "libavcodec/dsputil.h"
26
#include "libavcodec/png.h"
27
#include "dsputil_mmx.h"
28

    
29
//#undef NDEBUG
30
//#include <assert.h>
31

    
32
static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w)
33
{
34
    x86_reg i=0;
35
    __asm__ volatile(
36
        "jmp 2f                         \n\t"
37
        "1:                             \n\t"
38
        "movq   (%2, %0), %%mm0         \n\t"
39
        "movq  8(%2, %0), %%mm1         \n\t"
40
        "paddb  (%3, %0), %%mm0         \n\t"
41
        "paddb 8(%3, %0), %%mm1         \n\t"
42
        "movq %%mm0,  (%1, %0)          \n\t"
43
        "movq %%mm1, 8(%1, %0)          \n\t"
44
        "add $16, %0                    \n\t"
45
        "2:                             \n\t"
46
        "cmp %4, %0                     \n\t"
47
        " js 1b                         \n\t"
48
        : "+r" (i)
49
        : "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg)w-15)
50
    );
51
    for(; i<w; i++)
52
        dst[i] = src1[i] + src2[i];
53
}
54

    
55
#define PAETH(cpu, abs3)\
56
static void add_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\
57
{\
58
    x86_reg i = -bpp;\
59
    x86_reg end = w-3;\
60
    __asm__ volatile(\
61
        "pxor      %%mm7, %%mm7 \n"\
62
        "movd    (%1,%0), %%mm0 \n"\
63
        "movd    (%2,%0), %%mm1 \n"\
64
        "punpcklbw %%mm7, %%mm0 \n"\
65
        "punpcklbw %%mm7, %%mm1 \n"\
66
        "add       %4, %0 \n"\
67
        "1: \n"\
68
        "movq      %%mm1, %%mm2 \n"\
69
        "movd    (%2,%0), %%mm1 \n"\
70
        "movq      %%mm2, %%mm3 \n"\
71
        "punpcklbw %%mm7, %%mm1 \n"\
72
        "movq      %%mm2, %%mm4 \n"\
73
        "psubw     %%mm1, %%mm3 \n"\
74
        "psubw     %%mm0, %%mm4 \n"\
75
        "movq      %%mm3, %%mm5 \n"\
76
        "paddw     %%mm4, %%mm5 \n"\
77
        abs3\
78
        "movq      %%mm4, %%mm6 \n"\
79
        "pminsw    %%mm5, %%mm6 \n"\
80
        "pcmpgtw   %%mm6, %%mm3 \n"\
81
        "pcmpgtw   %%mm5, %%mm4 \n"\
82
        "movq      %%mm4, %%mm6 \n"\
83
        "pand      %%mm3, %%mm4 \n"\
84
        "pandn     %%mm3, %%mm6 \n"\
85
        "pandn     %%mm0, %%mm3 \n"\
86
        "movd    (%3,%0), %%mm0 \n"\
87
        "pand      %%mm1, %%mm6 \n"\
88
        "pand      %%mm4, %%mm2 \n"\
89
        "punpcklbw %%mm7, %%mm0 \n"\
90
        "movq      %6,    %%mm5 \n"\
91
        "paddw     %%mm6, %%mm0 \n"\
92
        "paddw     %%mm2, %%mm3 \n"\
93
        "paddw     %%mm3, %%mm0 \n"\
94
        "pand      %%mm5, %%mm0 \n"\
95
        "movq      %%mm0, %%mm3 \n"\
96
        "packuswb  %%mm3, %%mm3 \n"\
97
        "movd      %%mm3, (%1,%0) \n"\
98
        "add       %4, %0 \n"\
99
        "cmp       %5, %0 \n"\
100
        "jle 1b \n"\
101
        :"+r"(i)\
102
        :"r"(dst), "r"(top), "r"(src), "r"((x86_reg)bpp), "g"(end),\
103
         "m"(ff_pw_255)\
104
        :"memory"\
105
    );\
106
}
107

    
108
#define ABS3_MMX2\
109
        "psubw     %%mm5, %%mm7 \n"\
110
        "pmaxsw    %%mm7, %%mm5 \n"\
111
        "pxor      %%mm6, %%mm6 \n"\
112
        "pxor      %%mm7, %%mm7 \n"\
113
        "psubw     %%mm3, %%mm6 \n"\
114
        "psubw     %%mm4, %%mm7 \n"\
115
        "pmaxsw    %%mm6, %%mm3 \n"\
116
        "pmaxsw    %%mm7, %%mm4 \n"\
117
        "pxor      %%mm7, %%mm7 \n"
118

    
119
#define ABS3_SSSE3\
120
        "pabsw     %%mm3, %%mm3 \n"\
121
        "pabsw     %%mm4, %%mm4 \n"\
122
        "pabsw     %%mm5, %%mm5 \n"
123

    
124
PAETH(mmx2, ABS3_MMX2)
125
#if HAVE_SSSE3
126
PAETH(ssse3, ABS3_SSSE3)
127
#endif
128

    
129
void ff_png_init_mmx(PNGDecContext *s)
130
{
131
    int mm_flags = av_get_cpu_flags();
132

    
133
    if (mm_flags & AV_CPU_FLAG_MMX2) {
134
        s->add_bytes_l2 = add_bytes_l2_mmx;
135
        s->add_paeth_prediction = add_paeth_prediction_mmx2;
136
#if HAVE_SSSE3
137
        if (mm_flags & AV_CPU_FLAG_SSSE3)
138
            s->add_paeth_prediction = add_paeth_prediction_ssse3;
139
#endif
140
    }
141
}