## ffmpeg / libavcodec / x86 / vp6dsp_mmx.c @ 1ee076b1

History | View | Annotate | Download (5.14 KB)

1 | 6af3c226 | Sebastien Lucas | ```
/**
``` |
---|---|---|---|

2 | ba87f080 | Diego Biurrun | ```
* @file
``` |

3 | 6af3c226 | Sebastien Lucas | ```
* MMX-optimized functions for the VP6 decoder
``` |

4 | ```
*
``` |
||

5 | ```
* Copyright (C) 2009 Sebastien Lucas <sebastien.lucas@gmail.com>
``` |
||

6 | ```
*
``` |
||

7 | ```
* This file is part of FFmpeg.
``` |
||

8 | ```
*
``` |
||

9 | ```
* FFmpeg is free software; you can redistribute it and/or
``` |
||

10 | ```
* modify it under the terms of the GNU Lesser General Public
``` |
||

11 | ```
* License as published by the Free Software Foundation; either
``` |
||

12 | ```
* version 2.1 of the License, or (at your option) any later version.
``` |
||

13 | ```
*
``` |
||

14 | ```
* FFmpeg is distributed in the hope that it will be useful,
``` |
||

15 | ```
* but WITHOUT ANY WARRANTY; without even the implied warranty of
``` |
||

16 | ```
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
``` |
||

17 | ```
* Lesser General Public License for more details.
``` |
||

18 | ```
*
``` |
||

19 | ```
* You should have received a copy of the GNU Lesser General Public
``` |
||

20 | ```
* License along with FFmpeg; if not, write to the Free Software
``` |
||

21 | ```
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
``` |
||

22 | ```
*/
``` |
||

23 | |||

24 | #include "libavutil/x86_cpu.h" |
||

25 | #include "libavcodec/dsputil.h" |
||

26 | #include "dsputil_mmx.h" |
||

27 | #include "vp6dsp_mmx.h" |
||

28 | |||

29 | |||

30 | ```
#define DIAG4_MMX(in1,in2,in3,in4) \
``` |
||

31 | "movq "#in1"(%0), %%mm0 \n\t" \ |
||

32 | "movq "#in2"(%0), %%mm1 \n\t" \ |
||

33 | ```
"movq %%mm0, %%mm3 \n\t" \
``` |
||

34 | ```
"movq %%mm1, %%mm4 \n\t" \
``` |
||

35 | ```
"punpcklbw %%mm7, %%mm0 \n\t" \
``` |
||

36 | ```
"punpcklbw %%mm7, %%mm1 \n\t" \
``` |
||

37 | ```
"punpckhbw %%mm7, %%mm3 \n\t" \
``` |
||

38 | ```
"punpckhbw %%mm7, %%mm4 \n\t" \
``` |
||

39 | "pmullw 0(%2), %%mm0 \n\t" /* src[x-8 ] * biweight [0] */ \ |
||

40 | "pmullw 8(%2), %%mm1 \n\t" /* src[x ] * biweight [1] */ \ |
||

41 | "pmullw 0(%2), %%mm3 \n\t" /* src[x-8 ] * biweight [0] */ \ |
||

42 | "pmullw 8(%2), %%mm4 \n\t" /* src[x ] * biweight [1] */ \ |
||

43 | ```
"paddw %%mm1, %%mm0 \n\t" \
``` |
||

44 | ```
"paddw %%mm4, %%mm3 \n\t" \
``` |
||

45 | "movq "#in3"(%0), %%mm1 \n\t" \ |
||

46 | "movq "#in4"(%0), %%mm2 \n\t" \ |
||

47 | ```
"movq %%mm1, %%mm4 \n\t" \
``` |
||

48 | ```
"movq %%mm2, %%mm5 \n\t" \
``` |
||

49 | ```
"punpcklbw %%mm7, %%mm1 \n\t" \
``` |
||

50 | ```
"punpcklbw %%mm7, %%mm2 \n\t" \
``` |
||

51 | ```
"punpckhbw %%mm7, %%mm4 \n\t" \
``` |
||

52 | ```
"punpckhbw %%mm7, %%mm5 \n\t" \
``` |
||

53 | "pmullw 16(%2), %%mm1 \n\t" /* src[x+8 ] * biweight [2] */ \ |
||

54 | "pmullw 24(%2), %%mm2 \n\t" /* src[x+16] * biweight [3] */ \ |
||

55 | "pmullw 16(%2), %%mm4 \n\t" /* src[x+8 ] * biweight [2] */ \ |
||

56 | "pmullw 24(%2), %%mm5 \n\t" /* src[x+16] * biweight [3] */ \ |
||

57 | ```
"paddw %%mm2, %%mm1 \n\t" \
``` |
||

58 | ```
"paddw %%mm5, %%mm4 \n\t" \
``` |
||

59 | ```
"paddsw %%mm1, %%mm0 \n\t" \
``` |
||

60 | ```
"paddsw %%mm4, %%mm3 \n\t" \
``` |
||

61 | "paddsw %%mm6, %%mm0 \n\t" /* Add 64 */ \ |
||

62 | "paddsw %%mm6, %%mm3 \n\t" /* Add 64 */ \ |
||

63 | ```
"psraw $7, %%mm0 \n\t" \
``` |
||

64 | ```
"psraw $7, %%mm3 \n\t" \
``` |
||

65 | ```
"packuswb %%mm3, %%mm0 \n\t" \
``` |
||

66 | ```
"movq %%mm0, (%1) \n\t"
``` |
||

67 | |||

68 | void ff_vp6_filter_diag4_mmx(uint8_t *dst, uint8_t *src, int stride, |
||

69 | const int16_t *h_weights, const int16_t *v_weights) |
||

70 | { |
||

71 | uint8_t tmp[8*11], *t = tmp; |
||

72 | int16_t weights[4*4]; |
||

73 | ```
int i;
``` |
||

74 | src -= stride; |
||

75 | |||

76 | for (i=0; i<4*4; i++) |
||

77 | ```
weights[i] = h_weights[i>>2];
``` |
||

78 | |||

79 | ```
__asm__ volatile(
``` |
||

80 | ```
"pxor %%mm7, %%mm7 \n\t"
``` |
||

81 | "movq "MANGLE(ff_pw_64)", %%mm6 \n\t" |
||

82 | ```
"1: \n\t"
``` |
||

83 | DIAG4_MMX(-1,0,1,2) |
||

84 | ```
"add $8, %1 \n\t"
``` |
||

85 | ```
"add %3, %0 \n\t"
``` |
||

86 | ```
"decl %4 \n\t"
``` |
||

87 | ```
"jnz 1b \n\t"
``` |
||

88 | : "+r"(src), "+r"(t) |
||

89 | : "r"(weights), "r"((x86_reg)stride), "r"(11) |
||

90 | ```
: "memory");
``` |
||

91 | |||

92 | ```
t = tmp + 8;
``` |
||

93 | for (i=0; i<4*4; i++) |
||

94 | ```
weights[i] = v_weights[i>>2];
``` |
||

95 | |||

96 | ```
__asm__ volatile(
``` |
||

97 | ```
"pxor %%mm7, %%mm7 \n\t"
``` |
||

98 | "movq "MANGLE(ff_pw_64)", %%mm6 \n\t" |
||

99 | ```
"1: \n\t"
``` |
||

100 | DIAG4_MMX(-8,0,8,16) |
||

101 | ```
"add $8, %0 \n\t"
``` |
||

102 | ```
"add %3, %1 \n\t"
``` |
||

103 | ```
"decl %4 \n\t"
``` |
||

104 | ```
"jnz 1b \n\t"
``` |
||

105 | : "+r"(t), "+r"(dst) |
||

106 | : "r"(weights), "r"((x86_reg)stride), "r"(8) |
||

107 | ```
: "memory");
``` |
||

108 | } |