ffmpeg / libavcodec / x86 / dsputil_mmx.h @ 05aec7bb
History | View | Annotate | Download (6.3 KB)
1 |
/*
|
---|---|
2 |
* MMX optimized DSP utils
|
3 |
* Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>
|
4 |
*
|
5 |
* This file is part of FFmpeg.
|
6 |
*
|
7 |
* FFmpeg is free software; you can redistribute it and/or
|
8 |
* modify it under the terms of the GNU Lesser General Public
|
9 |
* License as published by the Free Software Foundation; either
|
10 |
* version 2.1 of the License, or (at your option) any later version.
|
11 |
*
|
12 |
* FFmpeg is distributed in the hope that it will be useful,
|
13 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15 |
* Lesser General Public License for more details.
|
16 |
*
|
17 |
* You should have received a copy of the GNU Lesser General Public
|
18 |
* License along with FFmpeg; if not, write to the Free Software
|
19 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
20 |
*/
|
21 |
|
22 |
#ifndef AVCODEC_X86_DSPUTIL_MMX_H
|
23 |
#define AVCODEC_X86_DSPUTIL_MMX_H
|
24 |
|
25 |
#include <stdint.h> |
26 |
#include "libavcodec/dsputil.h" |
27 |
|
28 |
typedef struct { uint64_t a, b; } xmm_reg; |
29 |
|
30 |
extern const uint64_t ff_bone; |
31 |
extern const uint64_t ff_wtwo; |
32 |
|
33 |
extern const uint64_t ff_pdw_80000000[2]; |
34 |
|
35 |
extern const uint64_t ff_pw_3; |
36 |
extern const uint64_t ff_pw_4; |
37 |
extern const xmm_reg ff_pw_5; |
38 |
extern const xmm_reg ff_pw_8; |
39 |
extern const uint64_t ff_pw_15; |
40 |
extern const xmm_reg ff_pw_16; |
41 |
extern const uint64_t ff_pw_20; |
42 |
extern const xmm_reg ff_pw_28; |
43 |
extern const xmm_reg ff_pw_32; |
44 |
extern const uint64_t ff_pw_42; |
45 |
extern const xmm_reg ff_pw_64; |
46 |
extern const uint64_t ff_pw_96; |
47 |
extern const uint64_t ff_pw_128; |
48 |
extern const uint64_t ff_pw_255; |
49 |
|
50 |
extern const uint64_t ff_pb_1; |
51 |
extern const uint64_t ff_pb_3; |
52 |
extern const uint64_t ff_pb_7; |
53 |
extern const uint64_t ff_pb_1F; |
54 |
extern const uint64_t ff_pb_3F; |
55 |
extern const uint64_t ff_pb_81; |
56 |
extern const uint64_t ff_pb_A1; |
57 |
extern const uint64_t ff_pb_FC; |
58 |
|
59 |
extern const double ff_pd_1[2]; |
60 |
extern const double ff_pd_2[2]; |
61 |
|
62 |
#define LOAD4(stride,in,a,b,c,d)\
|
63 |
"movq 0*"#stride"+"#in", "#a"\n\t"\ |
64 |
"movq 1*"#stride"+"#in", "#b"\n\t"\ |
65 |
"movq 2*"#stride"+"#in", "#c"\n\t"\ |
66 |
"movq 3*"#stride"+"#in", "#d"\n\t" |
67 |
|
68 |
#define STORE4(stride,out,a,b,c,d)\
|
69 |
"movq "#a", 0*"#stride"+"#out"\n\t"\ |
70 |
"movq "#b", 1*"#stride"+"#out"\n\t"\ |
71 |
"movq "#c", 2*"#stride"+"#out"\n\t"\ |
72 |
"movq "#d", 3*"#stride"+"#out"\n\t" |
73 |
|
74 |
/* in/out: mma=mma+mmb, mmb=mmb-mma */
|
75 |
#define SUMSUB_BA( a, b ) \
|
76 |
"paddw "#b", "#a" \n\t"\ |
77 |
"paddw "#b", "#b" \n\t"\ |
78 |
"psubw "#a", "#b" \n\t" |
79 |
|
80 |
#define SBUTTERFLY(a,b,t,n,m)\
|
81 |
"mov" #m " " #a ", " #t " \n\t" /* abcd */\ |
82 |
"punpckl" #n " " #b ", " #a " \n\t" /* aebf */\ |
83 |
"punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\ |
84 |
|
85 |
#define TRANSPOSE4(a,b,c,d,t)\
|
86 |
SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\
|
87 |
SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\
|
88 |
SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
|
89 |
SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */
|
90 |
|
91 |
// e,f,g,h can be memory
|
92 |
// out: a,d,t,c
|
93 |
#define TRANSPOSE8x4(a,b,c,d,e,f,g,h,t)\
|
94 |
"punpcklbw " #e ", " #a " \n\t" /* a0 e0 a1 e1 a2 e2 a3 e3 */\ |
95 |
"punpcklbw " #f ", " #b " \n\t" /* b0 f0 b1 f1 b2 f2 b3 f3 */\ |
96 |
"punpcklbw " #g ", " #c " \n\t" /* c0 g0 c1 g1 c2 g2 d3 g3 */\ |
97 |
"punpcklbw " #h ", " #d " \n\t" /* d0 h0 d1 h1 d2 h2 d3 h3 */\ |
98 |
SBUTTERFLY(a, b, t, bw, q) /* a= a0 b0 e0 f0 a1 b1 e1 f1 */\
|
99 |
/* t= a2 b2 e2 f2 a3 b3 e3 f3 */\
|
100 |
SBUTTERFLY(c, d, b, bw, q) /* c= c0 d0 g0 h0 c1 d1 g1 h1 */\
|
101 |
/* b= c2 d2 g2 h2 c3 d3 g3 h3 */\
|
102 |
SBUTTERFLY(a, c, d, wd, q) /* a= a0 b0 c0 d0 e0 f0 g0 h0 */\
|
103 |
/* d= a1 b1 c1 d1 e1 f1 g1 h1 */\
|
104 |
SBUTTERFLY(t, b, c, wd, q) /* t= a2 b2 c2 d2 e2 f2 g2 h2 */\
|
105 |
/* c= a3 b3 c3 d3 e3 f3 g3 h3 */
|
106 |
|
107 |
#if ARCH_X86_64
|
108 |
// permutes 01234567 -> 05736421
|
109 |
#define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\
|
110 |
SBUTTERFLY(a,b,%%xmm8,wd,dqa)\ |
111 |
SBUTTERFLY(c,d,b,wd,dqa)\ |
112 |
SBUTTERFLY(e,f,d,wd,dqa)\ |
113 |
SBUTTERFLY(g,h,f,wd,dqa)\ |
114 |
SBUTTERFLY(a,c,h,dq,dqa)\ |
115 |
SBUTTERFLY(%%xmm8,b,c,dq,dqa)\ |
116 |
SBUTTERFLY(e,g,b,dq,dqa)\ |
117 |
SBUTTERFLY(d,f,g,dq,dqa)\ |
118 |
SBUTTERFLY(a,e,f,qdq,dqa)\ |
119 |
SBUTTERFLY(%%xmm8,d,e,qdq,dqa)\ |
120 |
SBUTTERFLY(h,b,d,qdq,dqa)\ |
121 |
SBUTTERFLY(c,g,b,qdq,dqa)\ |
122 |
"movdqa %%xmm8, "#g" \n\t" |
123 |
#else
|
124 |
#define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\
|
125 |
"movdqa "#h", "#t" \n\t"\ |
126 |
SBUTTERFLY(a,b,h,wd,dqa)\ |
127 |
"movdqa "#h", 16"#t" \n\t"\ |
128 |
"movdqa "#t", "#h" \n\t"\ |
129 |
SBUTTERFLY(c,d,b,wd,dqa)\ |
130 |
SBUTTERFLY(e,f,d,wd,dqa)\ |
131 |
SBUTTERFLY(g,h,f,wd,dqa)\ |
132 |
SBUTTERFLY(a,c,h,dq,dqa)\ |
133 |
"movdqa "#h", "#t" \n\t"\ |
134 |
"movdqa 16"#t", "#h" \n\t"\ |
135 |
SBUTTERFLY(h,b,c,dq,dqa)\ |
136 |
SBUTTERFLY(e,g,b,dq,dqa)\ |
137 |
SBUTTERFLY(d,f,g,dq,dqa)\ |
138 |
SBUTTERFLY(a,e,f,qdq,dqa)\ |
139 |
SBUTTERFLY(h,d,e,qdq,dqa)\ |
140 |
"movdqa "#h", 16"#t" \n\t"\ |
141 |
"movdqa "#t", "#h" \n\t"\ |
142 |
SBUTTERFLY(h,b,d,qdq,dqa)\ |
143 |
SBUTTERFLY(c,g,b,qdq,dqa)\ |
144 |
"movdqa 16"#t", "#g" \n\t" |
145 |
#endif
|
146 |
|
147 |
#define MOVQ_WONE(regd) \
|
148 |
__asm__ volatile ( \
|
149 |
"pcmpeqd %%" #regd ", %%" #regd " \n\t" \ |
150 |
"psrlw $15, %%" #regd ::) |
151 |
|
152 |
void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);
|
153 |
void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
|
154 |
|
155 |
void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); |
156 |
void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); |
157 |
void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); |
158 |
|
159 |
void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx);
|
160 |
void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx);
|
161 |
void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); |
162 |
void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); |
163 |
void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); |
164 |
void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); |
165 |
|
166 |
void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx);
|
167 |
void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd); |
168 |
void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd); |
169 |
|
170 |
void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag, |
171 |
double *autoc);
|
172 |
|
173 |
void ff_mmx_idct(DCTELEM *block);
|
174 |
void ff_mmxext_idct(DCTELEM *block);
|
175 |
|
176 |
#endif /* AVCODEC_X86_DSPUTIL_MMX_H */ |