ffmpeg / libavcodec / i386 / fft_3dn.c @ 40d0e665
History  View  Annotate  Download (4.24 KB)
1 
/*


2 
* FFT/MDCT transform with 3DNow! optimizations

3 
* Copyright (c) 2006 Zuxy MENG Jie, Loren Merritt

4 
* Based on fft_sse.c copyright (c) 2002 Fabrice Bellard.

5 
*

6 
* This file is part of FFmpeg.

7 
*

8 
* FFmpeg is free software; you can redistribute it and/or

9 
* modify it under the terms of the GNU Lesser General Public

10 
* License as published by the Free Software Foundation; either

11 
* version 2.1 of the License, or (at your option) any later version.

12 
*

13 
* FFmpeg is distributed in the hope that it will be useful,

14 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

15 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

16 
* Lesser General Public License for more details.

17 
*

18 
* You should have received a copy of the GNU Lesser General Public

19 
* License along with FFmpeg; if not, write to the Free Software

20 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

21 
*/

22 
#include "dsputil.h" 
23 
#include "x86_cpu.h" 
24  
25 
static const int p1m1[2] __attribute__((aligned(8))) = 
26 
{ 0, 1 << 31 }; 
27  
28 
static const int m1p1[2] __attribute__((aligned(8))) = 
29 
{ 1 << 31, 0 }; 
30  
31 
void ff_fft_calc_3dn(FFTContext *s, FFTComplex *z)

32 
{ 
33 
int ln = s>nbits;

34 
long j;

35 
x86_reg i; 
36 
long nblocks, nloops;

37 
FFTComplex *p, *cptr; 
38  
39 
asm volatile( 
40 
/* FEMMS is not a must here but recommended by AMD */

41 
"femms \n\t"

42 
"movq %0, %%mm7 \n\t"

43 
::"m"(*(s>inverse ? m1p1 : p1m1))

44 
); 
45  
46 
i = 8 << ln;

47 
asm volatile( 
48 
"1: \n\t"

49 
"sub $32, %0 \n\t"

50 
"movq (%0,%1), %%mm0 \n\t"

51 
"movq 16(%0,%1), %%mm1 \n\t"

52 
"movq 8(%0,%1), %%mm2 \n\t"

53 
"movq 24(%0,%1), %%mm3 \n\t"

54 
"movq %%mm0, %%mm4 \n\t"

55 
"movq %%mm1, %%mm5 \n\t"

56 
"pfadd %%mm2, %%mm0 \n\t"

57 
"pfadd %%mm3, %%mm1 \n\t"

58 
"pfsub %%mm2, %%mm4 \n\t"

59 
"pfsub %%mm3, %%mm5 \n\t"

60 
"movq %%mm0, %%mm2 \n\t"

61 
"punpckldq %%mm5, %%mm6 \n\t"

62 
"punpckhdq %%mm6, %%mm5 \n\t"

63 
"movq %%mm4, %%mm3 \n\t"

64 
"pxor %%mm7, %%mm5 \n\t"

65 
"pfadd %%mm1, %%mm0 \n\t"

66 
"pfadd %%mm5, %%mm4 \n\t"

67 
"pfsub %%mm1, %%mm2 \n\t"

68 
"pfsub %%mm5, %%mm3 \n\t"

69 
"movq %%mm0, (%0,%1) \n\t"

70 
"movq %%mm4, 8(%0,%1) \n\t"

71 
"movq %%mm2, 16(%0,%1) \n\t"

72 
"movq %%mm3, 24(%0,%1) \n\t"

73 
"jg 1b \n\t"

74 
:"+r"(i)

75 
:"r"(z)

76 
); 
77 
/* pass 2 .. ln1 */

78  
79 
nblocks = 1 << (ln3); 
80 
nloops = 1 << 2; 
81 
cptr = s>exptab1; 
82 
do {

83 
p = z; 
84 
j = nblocks; 
85 
do {

86 
i = nloops*8;

87 
asm volatile( 
88 
"1: \n\t"

89 
"sub $16, %0 \n\t"

90 
"movq (%1,%0), %%mm0 \n\t"

91 
"movq 8(%1,%0), %%mm1 \n\t"

92 
"movq (%2,%0), %%mm2 \n\t"

93 
"movq 8(%2,%0), %%mm3 \n\t"

94 
"movq %%mm2, %%mm4 \n\t"

95 
"movq %%mm3, %%mm5 \n\t"

96 
"punpckldq %%mm2, %%mm2 \n\t"

97 
"punpckldq %%mm3, %%mm3 \n\t"

98 
"punpckhdq %%mm4, %%mm4 \n\t"

99 
"punpckhdq %%mm5, %%mm5 \n\t"

100 
"pfmul (%3,%0,2), %%mm2 \n\t" // cre*re cim*re 
101 
"pfmul 8(%3,%0,2), %%mm3 \n\t"

102 
"pfmul 16(%3,%0,2), %%mm4 \n\t" // cim*im cre*im 
103 
"pfmul 24(%3,%0,2), %%mm5 \n\t"

104 
"pfadd %%mm2, %%mm4 \n\t" // cre*recim*im cim*re+cre*im 
105 
"pfadd %%mm3, %%mm5 \n\t"

106 
"movq %%mm0, %%mm2 \n\t"

107 
"movq %%mm1, %%mm3 \n\t"

108 
"pfadd %%mm4, %%mm0 \n\t"

109 
"pfadd %%mm5, %%mm1 \n\t"

110 
"pfsub %%mm4, %%mm2 \n\t"

111 
"pfsub %%mm5, %%mm3 \n\t"

112 
"movq %%mm0, (%1,%0) \n\t"

113 
"movq %%mm1, 8(%1,%0) \n\t"

114 
"movq %%mm2, (%2,%0) \n\t"

115 
"movq %%mm3, 8(%2,%0) \n\t"

116 
"jg 1b \n\t"

117 
:"+r"(i)

118 
:"r"(p), "r"(p + nloops), "r"(cptr) 
119 
); 
120 
p += nloops*2;

121 
} while (j);

122 
cptr += nloops*2;

123 
nblocks >>= 1;

124 
nloops <<= 1;

125 
} while (nblocks != 0); 
126 
asm volatile("femms"); 
127 
} 