ffmpeg / libavcodec / i386 / fft_3dn.c @ b550bfaa
History  View  Annotate  Download (4.2 KB)
1 
/*


2 
* FFT/MDCT transform with 3DNow! optimizations

3 
* Copyright (c) 2006 Zuxy MENG Jie, Loren Merritt

4 
* Based on fft_sse.c copyright (c) 2002 Fabrice Bellard.

5 
*

6 
* This file is part of FFmpeg.

7 
*

8 
* FFmpeg is free software; you can redistribute it and/or

9 
* modify it under the terms of the GNU Lesser General Public

10 
* License as published by the Free Software Foundation; either

11 
* version 2.1 of the License, or (at your option) any later version.

12 
*

13 
* FFmpeg is distributed in the hope that it will be useful,

14 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

15 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

16 
* Lesser General Public License for more details.

17 
*

18 
* You should have received a copy of the GNU Lesser General Public

19 
* License along with FFmpeg; if not, write to the Free Software

20 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

21 
*/

22 
#include "dsputil.h" 
23  
24 
static const int p1m1[2] __attribute__((aligned(8))) = 
25 
{ 0, 1 << 31 }; 
26  
27 
static const int m1p1[2] __attribute__((aligned(8))) = 
28 
{ 1 << 31, 0 }; 
29  
30 
void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z)

31 
{ 
32 
int ln = s>nbits;

33 
long i, j;

34 
long nblocks, nloops;

35 
FFTComplex *p, *cptr; 
36  
37 
asm volatile( 
38 
/* FEMMS is not a must here but recommended by AMD */

39 
"femms \n\t"

40 
"movq %0, %%mm7 \n\t"

41 
::"m"(*(s>inverse ? m1p1 : p1m1))

42 
); 
43  
44 
i = 8 << ln;

45 
asm volatile( 
46 
"1: \n\t"

47 
"sub $32, %0 \n\t"

48 
"movq (%0,%1), %%mm0 \n\t"

49 
"movq 16(%0,%1), %%mm1 \n\t"

50 
"movq 8(%0,%1), %%mm2 \n\t"

51 
"movq 24(%0,%1), %%mm3 \n\t"

52 
"movq %%mm0, %%mm4 \n\t"

53 
"movq %%mm1, %%mm5 \n\t"

54 
"pfadd %%mm2, %%mm0 \n\t"

55 
"pfadd %%mm3, %%mm1 \n\t"

56 
"pfsub %%mm2, %%mm4 \n\t"

57 
"pfsub %%mm3, %%mm5 \n\t"

58 
"movq %%mm0, %%mm2 \n\t"

59 
"punpckldq %%mm5, %%mm6 \n\t"

60 
"punpckhdq %%mm6, %%mm5 \n\t"

61 
"movq %%mm4, %%mm3 \n\t"

62 
"pxor %%mm7, %%mm5 \n\t"

63 
"pfadd %%mm1, %%mm0 \n\t"

64 
"pfadd %%mm5, %%mm4 \n\t"

65 
"pfsub %%mm1, %%mm2 \n\t"

66 
"pfsub %%mm5, %%mm3 \n\t"

67 
"movq %%mm0, (%0,%1) \n\t"

68 
"movq %%mm4, 8(%0,%1) \n\t"

69 
"movq %%mm2, 16(%0,%1) \n\t"

70 
"movq %%mm3, 24(%0,%1) \n\t"

71 
"jg 1b \n\t"

72 
:"+r"(i)

73 
:"r"(z)

74 
); 
75 
/* pass 2 .. ln1 */

76  
77 
nblocks = 1 << (ln3); 
78 
nloops = 1 << 2; 
79 
cptr = s>exptab1; 
80 
do {

81 
p = z; 
82 
j = nblocks; 
83 
do {

84 
i = nloops*8;

85 
asm volatile( 
86 
"1: \n\t"

87 
"sub $16, %0 \n\t"

88 
"movq (%1,%0), %%mm0 \n\t"

89 
"movq 8(%1,%0), %%mm1 \n\t"

90 
"movq (%2,%0), %%mm2 \n\t"

91 
"movq 8(%2,%0), %%mm3 \n\t"

92 
"movq %%mm2, %%mm4 \n\t"

93 
"movq %%mm3, %%mm5 \n\t"

94 
"punpckldq %%mm2, %%mm2 \n\t"

95 
"punpckldq %%mm3, %%mm3 \n\t"

96 
"punpckhdq %%mm4, %%mm4 \n\t"

97 
"punpckhdq %%mm5, %%mm5 \n\t"

98 
"pfmul (%3,%0,2), %%mm2 \n\t" // cre*re cim*re 
99 
"pfmul 8(%3,%0,2), %%mm3 \n\t"

100 
"pfmul 16(%3,%0,2), %%mm4 \n\t" // cim*im cre*im 
101 
"pfmul 24(%3,%0,2), %%mm5 \n\t"

102 
"pfadd %%mm2, %%mm4 \n\t" // cre*recim*im cim*re+cre*im 
103 
"pfadd %%mm3, %%mm5 \n\t"

104 
"movq %%mm0, %%mm2 \n\t"

105 
"movq %%mm1, %%mm3 \n\t"

106 
"pfadd %%mm4, %%mm0 \n\t"

107 
"pfadd %%mm5, %%mm1 \n\t"

108 
"pfsub %%mm4, %%mm2 \n\t"

109 
"pfsub %%mm5, %%mm3 \n\t"

110 
"movq %%mm0, (%1,%0) \n\t"

111 
"movq %%mm1, 8(%1,%0) \n\t"

112 
"movq %%mm2, (%2,%0) \n\t"

113 
"movq %%mm3, 8(%2,%0) \n\t"

114 
"jg 1b \n\t"

115 
:"+r"(i)

116 
:"r"(p), "r"(p + nloops), "r"(cptr) 
117 
); 
118 
p += nloops*2;

119 
} while (j);

120 
cptr += nloops*2;

121 
nblocks >>= 1;

122 
nloops <<= 1;

123 
} while (nblocks != 0); 
124 
asm volatile("femms"); 
125 
} 