ffmpeg / libavcodec / i386 / dsputil_mmx_qns.h @ be449fca
History  View  Annotate  Download (3.9 KB)
1 
/*


2 
* DSP utils : QNS functions are compiled 3 times for mmx/3dnow/ssse3

3 
* Copyright (c) 2004 Michael Niedermayer

4 
*

5 
* MMX optimization by Michael Niedermayer <michaelni@gmx.at>

6 
* 3DNow! and SSSE3 optimization by Zuxy Meng <zuxy.meng@gmail.com>

7 
*

8 
* This file is part of FFmpeg.

9 
*

10 
* FFmpeg is free software; you can redistribute it and/or

11 
* modify it under the terms of the GNU Lesser General Public

12 
* License as published by the Free Software Foundation; either

13 
* version 2.1 of the License, or (at your option) any later version.

14 
*

15 
* FFmpeg is distributed in the hope that it will be useful,

16 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

17 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

18 
* Lesser General Public License for more details.

19 
*

20 
* You should have received a copy of the GNU Lesser General Public

21 
* License along with FFmpeg; if not, write to the Free Software

22 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

23 
*/

24  
25 
/* This header intentionally has no multiple inclusion guards. It is meant to

26 
* be included multiple times and generates different code depending on the

27 
* value of certain #defines. */

28  
29 
#define MAX_ABS (512 >> (SCALE_OFFSET>0 ? SCALE_OFFSET : 0)) 
30  
31 
static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale) 
32 
{ 
33 
x86_reg i=0;

34  
35 
assert(FFABS(scale) < MAX_ABS); 
36 
scale<<= 16 + SCALE_OFFSET  BASIS_SHIFT + RECON_SHIFT;

37  
38 
SET_RND(mm6); 
39 
__asm__ volatile(

40 
"pxor %%mm7, %%mm7 \n\t"

41 
"movd %4, %%mm5 \n\t"

42 
"punpcklwd %%mm5, %%mm5 \n\t"

43 
"punpcklwd %%mm5, %%mm5 \n\t"

44 
ASMALIGN(4)

45 
"1: \n\t"

46 
"movq (%1, %0), %%mm0 \n\t"

47 
"movq 8(%1, %0), %%mm1 \n\t"

48 
PMULHRW(%%mm0, %%mm1, %%mm5, %%mm6) 
49 
"paddw (%2, %0), %%mm0 \n\t"

50 
"paddw 8(%2, %0), %%mm1 \n\t"

51 
"psraw $6, %%mm0 \n\t"

52 
"psraw $6, %%mm1 \n\t"

53 
"pmullw (%3, %0), %%mm0 \n\t"

54 
"pmullw 8(%3, %0), %%mm1 \n\t"

55 
"pmaddwd %%mm0, %%mm0 \n\t"

56 
"pmaddwd %%mm1, %%mm1 \n\t"

57 
"paddd %%mm1, %%mm0 \n\t"

58 
"psrld $4, %%mm0 \n\t"

59 
"paddd %%mm0, %%mm7 \n\t"

60 
"add $16, %0 \n\t"

61 
"cmp $128, %0 \n\t" //FIXME optimize & bench 
62 
" jb 1b \n\t"

63 
PHADDD(%%mm7, %%mm6) 
64 
"psrld $2, %%mm7 \n\t"

65 
"movd %%mm7, %0 \n\t"

66  
67 
: "+r" (i)

68 
: "r"(basis), "r"(rem), "r"(weight), "g"(scale) 
69 
); 
70 
return i;

71 
} 
72  
73 
static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale) 
74 
{ 
75 
x86_reg i=0;

76  
77 
if(FFABS(scale) < MAX_ABS){

78 
scale<<= 16 + SCALE_OFFSET  BASIS_SHIFT + RECON_SHIFT;

79 
SET_RND(mm6); 
80 
__asm__ volatile(

81 
"movd %3, %%mm5 \n\t"

82 
"punpcklwd %%mm5, %%mm5 \n\t"

83 
"punpcklwd %%mm5, %%mm5 \n\t"

84 
ASMALIGN(4)

85 
"1: \n\t"

86 
"movq (%1, %0), %%mm0 \n\t"

87 
"movq 8(%1, %0), %%mm1 \n\t"

88 
PMULHRW(%%mm0, %%mm1, %%mm5, %%mm6) 
89 
"paddw (%2, %0), %%mm0 \n\t"

90 
"paddw 8(%2, %0), %%mm1 \n\t"

91 
"movq %%mm0, (%2, %0) \n\t"

92 
"movq %%mm1, 8(%2, %0) \n\t"

93 
"add $16, %0 \n\t"

94 
"cmp $128, %0 \n\t" // FIXME optimize & bench 
95 
" jb 1b \n\t"

96  
97 
: "+r" (i)

98 
: "r"(basis), "r"(rem), "g"(scale) 
99 
); 
100 
}else{

101 
for(i=0; i<8*8; i++){ 
102 
rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT  RECON_SHIFT1)))>>(BASIS_SHIFT  RECON_SHIFT); 
103 
} 
104 
} 
105 
} 