ffmpeg / libavcodec / i386 / dsputil_mmx_qns.h @ ab54bff2
History  View  Annotate  Download (3.7 KB)
1 
/*


2 
* DSP utils : QNS functions are compiled 3 times for mmx/3dnow/ssse3

3 
* Copyright (c) 2004 Michael Niedermayer

4 
*

5 
* MMX optimization by Michael Niedermayer <michaelni@gmx.at>

6 
* 3DNow! and SSSE3 optimization by Zuxy Meng <zuxy.meng@gmail.com>

7 
*

8 
* This file is part of FFmpeg.

9 
*

10 
* FFmpeg is free software; you can redistribute it and/or

11 
* modify it under the terms of the GNU Lesser General Public

12 
* License as published by the Free Software Foundation; either

13 
* version 2.1 of the License, or (at your option) any later version.

14 
*

15 
* FFmpeg is distributed in the hope that it will be useful,

16 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

17 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

18 
* Lesser General Public License for more details.

19 
*

20 
* You should have received a copy of the GNU Lesser General Public

21 
* License along with FFmpeg; if not, write to the Free Software

22 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

23 
*/

24  
25 
#define MAX_ABS (512 >> (SCALE_OFFSET>0 ? SCALE_OFFSET : 0)) 
26  
27 
static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale) 
28 
{ 
29 
long i=0; 
30  
31 
assert(FFABS(scale) < MAX_ABS); 
32 
scale<<= 16 + SCALE_OFFSET  BASIS_SHIFT + RECON_SHIFT;

33  
34 
SET_RND(mm6); 
35 
asm volatile( 
36 
"pxor %%mm7, %%mm7 \n\t"

37 
"movd %4, %%mm5 \n\t"

38 
"punpcklwd %%mm5, %%mm5 \n\t"

39 
"punpcklwd %%mm5, %%mm5 \n\t"

40 
ASMALIGN(4)

41 
"1: \n\t"

42 
"movq (%1, %0), %%mm0 \n\t"

43 
"movq 8(%1, %0), %%mm1 \n\t"

44 
PMULHRW(%%mm0, %%mm1, %%mm5, %%mm6) 
45 
"paddw (%2, %0), %%mm0 \n\t"

46 
"paddw 8(%2, %0), %%mm1 \n\t"

47 
"psraw $6, %%mm0 \n\t"

48 
"psraw $6, %%mm1 \n\t"

49 
"pmullw (%3, %0), %%mm0 \n\t"

50 
"pmullw 8(%3, %0), %%mm1 \n\t"

51 
"pmaddwd %%mm0, %%mm0 \n\t"

52 
"pmaddwd %%mm1, %%mm1 \n\t"

53 
"paddd %%mm1, %%mm0 \n\t"

54 
"psrld $4, %%mm0 \n\t"

55 
"paddd %%mm0, %%mm7 \n\t"

56 
"add $16, %0 \n\t"

57 
"cmp $128, %0 \n\t" //FIXME optimize & bench 
58 
" jb 1b \n\t"

59 
PHADDD(%%mm7, %%mm6) 
60 
"psrld $2, %%mm7 \n\t"

61 
"movd %%mm7, %0 \n\t"

62  
63 
: "+r" (i)

64 
: "r"(basis), "r"(rem), "r"(weight), "g"(scale) 
65 
); 
66 
return i;

67 
} 
68  
69 
static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale) 
70 
{ 
71 
long i=0; 
72  
73 
if(FFABS(scale) < MAX_ABS){

74 
scale<<= 16 + SCALE_OFFSET  BASIS_SHIFT + RECON_SHIFT;

75 
SET_RND(mm6); 
76 
asm volatile( 
77 
"movd %3, %%mm5 \n\t"

78 
"punpcklwd %%mm5, %%mm5 \n\t"

79 
"punpcklwd %%mm5, %%mm5 \n\t"

80 
ASMALIGN(4)

81 
"1: \n\t"

82 
"movq (%1, %0), %%mm0 \n\t"

83 
"movq 8(%1, %0), %%mm1 \n\t"

84 
PMULHRW(%%mm0, %%mm1, %%mm5, %%mm6) 
85 
"paddw (%2, %0), %%mm0 \n\t"

86 
"paddw 8(%2, %0), %%mm1 \n\t"

87 
"movq %%mm0, (%2, %0) \n\t"

88 
"movq %%mm1, 8(%2, %0) \n\t"

89 
"add $16, %0 \n\t"

90 
"cmp $128, %0 \n\t" // FIXME optimize & bench 
91 
" jb 1b \n\t"

92  
93 
: "+r" (i)

94 
: "r"(basis), "r"(rem), "g"(scale) 
95 
); 
96 
}else{

97 
for(i=0; i<8*8; i++){ 
98 
rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT  RECON_SHIFT1)))>>(BASIS_SHIFT  RECON_SHIFT); 
99 
} 
100 
} 
101 
} 