## ffmpeg / libavcodec / ppc / idct_altivec.c @ 1ee076b1

History | View | Annotate | Download (10.6 KB)

1 | 05c4072b | Michael Niedermayer | ```
/*
``` |
---|---|---|---|

2 | ```
* Copyright (c) 2001 Michel Lespinasse
``` |
||

3 | ```
*
``` |
||

4 | b78e7197 | Diego Biurrun | ```
* This file is part of FFmpeg.
``` |

5 | ```
*
``` |
||

6 | ```
* FFmpeg is free software; you can redistribute it and/or
``` |
||

7 | 05c4072b | Michael Niedermayer | ```
* modify it under the terms of the GNU Lesser General Public
``` |

8 | ```
* License as published by the Free Software Foundation; either
``` |
||

9 | b78e7197 | Diego Biurrun | ```
* version 2.1 of the License, or (at your option) any later version.
``` |

10 | 05c4072b | Michael Niedermayer | ```
*
``` |

11 | b78e7197 | Diego Biurrun | ```
* FFmpeg is distributed in the hope that it will be useful,
``` |

12 | 05c4072b | Michael Niedermayer | ```
* but WITHOUT ANY WARRANTY; without even the implied warranty of
``` |

13 | ```
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
``` |
||

14 | ```
* Lesser General Public License for more details.
``` |
||

15 | ```
*
``` |
||

16 | ```
* You should have received a copy of the GNU Lesser General Public
``` |
||

17 | b78e7197 | Diego Biurrun | ```
* License along with FFmpeg; if not, write to the Free Software
``` |

18 | 5509bffa | Diego Biurrun | ```
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
``` |

19 | 05c4072b | Michael Niedermayer | ```
*/
``` |

20 | |||

21 | ```
/*
``` |
||

22 | ```
* NOTE: This code is based on GPL code from the libmpeg2 project. The
``` |
||

23 | ```
* author, Michel Lespinasses, has given explicit permission to release
``` |
||

24 | 952f18ff | Diego Biurrun | ```
* under LGPL as part of FFmpeg.
``` |

25 | 05c4072b | Michael Niedermayer | ```
*/
``` |

26 | |||

27 | ```
/*
``` |
||

28 | 952f18ff | Diego Biurrun | ```
* FFmpeg integration by Dieter Shirley
``` |

29 | 05c4072b | Michael Niedermayer | ```
*
``` |

30 | 952f18ff | Diego Biurrun | ```
* This file is a direct copy of the AltiVec IDCT module from the libmpeg2
``` |

31 | ```
* project. I've deleted all of the libmpeg2-specific code, renamed the
``` |
||

32 | ```
* functions and reordered the function parameters. The only change to the
``` |
||

33 | ```
* IDCT function itself was to factor out the partial transposition, and to
``` |
||

34 | ```
* perform a full transpose at the end of the function.
``` |
||

35 | 05c4072b | Michael Niedermayer | ```
*/
``` |

36 | |||

37 | |||

38 | #include <stdlib.h> /* malloc(), free() */ |
||

39 | #include <string.h> |
||

40 | 5137235e | Diego Biurrun | #include "config.h" |

41 | ```
#if HAVE_ALTIVEC_H
``` |
||

42 | #include <altivec.h> |
||

43 | ```
#endif
``` |
||

44 | 245976da | Diego Biurrun | #include "libavcodec/dsputil.h" |

45 | a6b4448c | Luca Barbato | #include "types_altivec.h" |

46 | ddb8c2c0 | Måns Rullgård | #include "dsputil_altivec.h" |

47 | 05c4072b | Michael Niedermayer | |

48 | bb270c08 | Diego Biurrun | ```
#define IDCT_HALF \
``` |

49 | ```
/* 1st stage */ \
``` |
||

50 | t1 = vec_mradds (a1, vx7, vx1 ); \ |
||

51 | t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7)); \ |
||

52 | t7 = vec_mradds (a2, vx5, vx3); \ |
||

53 | t3 = vec_mradds (ma2, vx3, vx5); \ |
||

54 | \ |
||

55 | ```
/* 2nd stage */ \
``` |
||

56 | t5 = vec_adds (vx0, vx4); \ |
||

57 | t0 = vec_subs (vx0, vx4); \ |
||

58 | t2 = vec_mradds (a0, vx6, vx2); \ |
||

59 | t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6)); \ |
||

60 | t6 = vec_adds (t8, t3); \ |
||

61 | t3 = vec_subs (t8, t3); \ |
||

62 | t8 = vec_subs (t1, t7); \ |
||

63 | t1 = vec_adds (t1, t7); \ |
||

64 | \ |
||

65 | ```
/* 3rd stage */ \
``` |
||

66 | t7 = vec_adds (t5, t2); \ |
||

67 | t2 = vec_subs (t5, t2); \ |
||

68 | t5 = vec_adds (t0, t4); \ |
||

69 | t0 = vec_subs (t0, t4); \ |
||

70 | t4 = vec_subs (t8, t3); \ |
||

71 | t3 = vec_adds (t8, t3); \ |
||

72 | \ |
||

73 | ```
/* 4th stage */ \
``` |
||

74 | vy0 = vec_adds (t7, t1); \ |
||

75 | vy7 = vec_subs (t7, t1); \ |
||

76 | vy1 = vec_mradds (c4, t3, t5); \ |
||

77 | vy6 = vec_mradds (mc4, t3, t5); \ |
||

78 | vy2 = vec_mradds (c4, t4, t0); \ |
||

79 | vy5 = vec_mradds (mc4, t4, t0); \ |
||

80 | vy3 = vec_adds (t2, t6); \ |
||

81 | 05c4072b | Michael Niedermayer | vy4 = vec_subs (t2, t6); |

82 | |||

83 | 115329f1 | Diego Biurrun | |

84 | bb270c08 | Diego Biurrun | ```
#define IDCT \
``` |

85 | a6b4448c | Luca Barbato | vec_s16 vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \ |

86 | vec_s16 vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \ |
||

87 | vec_s16 a0, a1, a2, ma2, c4, mc4, zero, bias; \ |
||

88 | vec_s16 t0, t1, t2, t3, t4, t5, t6, t7, t8; \ |
||

89 | vec_u16 shift; \ |
||

90 | bb270c08 | Diego Biurrun | \ |

91 | c4 = vec_splat (constants[0], 0); \ |
||

92 | a0 = vec_splat (constants[0], 1); \ |
||

93 | a1 = vec_splat (constants[0], 2); \ |
||

94 | a2 = vec_splat (constants[0], 3); \ |
||

95 | mc4 = vec_splat (constants[0], 4); \ |
||

96 | ma2 = vec_splat (constants[0], 5); \ |
||

97 | a6b4448c | Luca Barbato | bias = (vec_s16)vec_splat ((vec_s32)constants[0], 3); \ |

98 | bb270c08 | Diego Biurrun | \ |

99 | ```
zero = vec_splat_s16 (0); \
``` |
||

100 | ```
shift = vec_splat_u16 (4); \
``` |
||

101 | \ |
||

102 | vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero); \ |
||

103 | vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero); \ |
||

104 | vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero); \ |
||

105 | vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero); \ |
||

106 | vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero); \ |
||

107 | vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero); \ |
||

108 | vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero); \ |
||

109 | vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero); \ |
||

110 | \ |
||

111 | IDCT_HALF \ |
||

112 | \ |
||

113 | vx0 = vec_mergeh (vy0, vy4); \ |
||

114 | vx1 = vec_mergel (vy0, vy4); \ |
||

115 | vx2 = vec_mergeh (vy1, vy5); \ |
||

116 | vx3 = vec_mergel (vy1, vy5); \ |
||

117 | vx4 = vec_mergeh (vy2, vy6); \ |
||

118 | vx5 = vec_mergel (vy2, vy6); \ |
||

119 | vx6 = vec_mergeh (vy3, vy7); \ |
||

120 | vx7 = vec_mergel (vy3, vy7); \ |
||

121 | \ |
||

122 | vy0 = vec_mergeh (vx0, vx4); \ |
||

123 | vy1 = vec_mergel (vx0, vx4); \ |
||

124 | vy2 = vec_mergeh (vx1, vx5); \ |
||

125 | vy3 = vec_mergel (vx1, vx5); \ |
||

126 | vy4 = vec_mergeh (vx2, vx6); \ |
||

127 | vy5 = vec_mergel (vx2, vx6); \ |
||

128 | vy6 = vec_mergeh (vx3, vx7); \ |
||

129 | vy7 = vec_mergel (vx3, vx7); \ |
||

130 | \ |
||

131 | vx0 = vec_adds (vec_mergeh (vy0, vy4), bias); \ |
||

132 | vx1 = vec_mergel (vy0, vy4); \ |
||

133 | vx2 = vec_mergeh (vy1, vy5); \ |
||

134 | vx3 = vec_mergel (vy1, vy5); \ |
||

135 | vx4 = vec_mergeh (vy2, vy6); \ |
||

136 | vx5 = vec_mergel (vy2, vy6); \ |
||

137 | vx6 = vec_mergeh (vy3, vy7); \ |
||

138 | vx7 = vec_mergel (vy3, vy7); \ |
||

139 | \ |
||

140 | IDCT_HALF \ |
||

141 | \ |
||

142 | ```
shift = vec_splat_u16 (6); \
``` |
||

143 | vx0 = vec_sra (vy0, shift); \ |
||

144 | vx1 = vec_sra (vy1, shift); \ |
||

145 | vx2 = vec_sra (vy2, shift); \ |
||

146 | vx3 = vec_sra (vy3, shift); \ |
||

147 | vx4 = vec_sra (vy4, shift); \ |
||

148 | vx5 = vec_sra (vy5, shift); \ |
||

149 | vx6 = vec_sra (vy6, shift); \ |
||

150 | 05c4072b | Michael Niedermayer | vx7 = vec_sra (vy7, shift); |

151 | |||

152 | 3b991c54 | Romain Dolbeau | |

153 | a6b4448c | Luca Barbato | static const vec_s16 constants[5] = { |

154 | 80a61f08 | Diego Biurrun | {23170, 13573, 6518, 21895, -23170, -21895, 32, 31}, |

155 | {16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725}, |
||

156 | {22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521}, |
||

157 | {21407, 29692, 27969, 25172, 21407, 25172, 27969, 29692}, |
||

158 | {19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722} |
||

159 | 3b991c54 | Romain Dolbeau | }; |

160 | 05c4072b | Michael Niedermayer | |

161 | ddb8c2c0 | Måns Rullgård | void idct_put_altivec(uint8_t* dest, int stride, int16_t *blk) |

162 | 05c4072b | Michael Niedermayer | { |

163 | ddb8c2c0 | Måns Rullgård | vec_s16 *block = (vec_s16*)blk; |

164 | a6b4448c | Luca Barbato | vec_u8 tmp; |

165 | 05c4072b | Michael Niedermayer | |

166 | IDCT |
||

167 | |||

168 | bb270c08 | Diego Biurrun | ```
#define COPY(dest,src) \
``` |

169 | tmp = vec_packsu (src, src); \ |
||

170 | a6b4448c | Luca Barbato | vec_ste ((vec_u32)tmp, 0, (unsigned int *)dest); \ |

171 | vec_ste ((vec_u32)tmp, 4, (unsigned int *)dest); |
||

172 | 05c4072b | Michael Niedermayer | |

173 | bb270c08 | Diego Biurrun | COPY (dest, vx0) dest += stride; |

174 | COPY (dest, vx1) dest += stride; |
||

175 | COPY (dest, vx2) dest += stride; |
||

176 | COPY (dest, vx3) dest += stride; |
||

177 | COPY (dest, vx4) dest += stride; |
||

178 | COPY (dest, vx5) dest += stride; |
||

179 | COPY (dest, vx6) dest += stride; |
||

180 | 05c4072b | Michael Niedermayer | COPY (dest, vx7) |

181 | } |
||

182 | |||

183 | ddb8c2c0 | Måns Rullgård | void idct_add_altivec(uint8_t* dest, int stride, int16_t *blk) |

184 | 05c4072b | Michael Niedermayer | { |

185 | ddb8c2c0 | Måns Rullgård | vec_s16 *block = (vec_s16*)blk; |

186 | a6b4448c | Luca Barbato | vec_u8 tmp; |

187 | vec_s16 tmp2, tmp3; |
||

188 | vec_u8 perm0; |
||

189 | vec_u8 perm1; |
||

190 | vec_u8 p0, p1, p; |
||

191 | 05c4072b | Michael Niedermayer | |

192 | IDCT |
||

193 | |||

194 | ```
p0 = vec_lvsl (0, dest);
``` |
||

195 | p1 = vec_lvsl (stride, dest); |
||

196 | ```
p = vec_splat_u8 (-1);
``` |
||

197 | perm0 = vec_mergeh (p, p0); |
||

198 | perm1 = vec_mergeh (p, p1); |
||

199 | |||

200 | bb270c08 | Diego Biurrun | ```
#define ADD(dest,src,perm) \
``` |

201 | ```
/* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \
``` |
||

202 | ```
tmp = vec_ld (0, dest); \
``` |
||

203 | a6b4448c | Luca Barbato | tmp2 = (vec_s16)vec_perm (tmp, (vec_u8)zero, perm); \ |

204 | bb270c08 | Diego Biurrun | tmp3 = vec_adds (tmp2, src); \ |

205 | tmp = vec_packsu (tmp3, tmp3); \ |
||

206 | a6b4448c | Luca Barbato | vec_ste ((vec_u32)tmp, 0, (unsigned int *)dest); \ |

207 | vec_ste ((vec_u32)tmp, 4, (unsigned int *)dest); |
||

208 | 05c4072b | Michael Niedermayer | |

209 | bb270c08 | Diego Biurrun | ADD (dest, vx0, perm0) dest += stride; |

210 | ADD (dest, vx1, perm1) dest += stride; |
||

211 | ADD (dest, vx2, perm0) dest += stride; |
||

212 | ADD (dest, vx3, perm1) dest += stride; |
||

213 | ADD (dest, vx4, perm0) dest += stride; |
||

214 | ADD (dest, vx5, perm1) dest += stride; |
||

215 | ADD (dest, vx6, perm0) dest += stride; |
||

216 | 05c4072b | Michael Niedermayer | ADD (dest, vx7, perm1) |

217 | } |