## ffmpeg / libavcodec / simple_idct.c @ 63613fe6

History | View | Annotate | Download (15.8 KB)

1 | 37e8dcda | Arpi | ```
/*
``` |
---|---|---|---|

2 | ff4ec49e | Fabrice Bellard | ```
* Simple IDCT
``` |

3 | ```
*
``` |
||

4 | ```
* Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
``` |
||

5 | ```
*
``` |
||

6 | b78e7197 | Diego Biurrun | ```
* This file is part of FFmpeg.
``` |

7 | ```
*
``` |
||

8 | ```
* FFmpeg is free software; you can redistribute it and/or
``` |
||

9 | ff4ec49e | Fabrice Bellard | ```
* modify it under the terms of the GNU Lesser General Public
``` |

10 | ```
* License as published by the Free Software Foundation; either
``` |
||

11 | b78e7197 | Diego Biurrun | ```
* version 2.1 of the License, or (at your option) any later version.
``` |

12 | ff4ec49e | Fabrice Bellard | ```
*
``` |

13 | b78e7197 | Diego Biurrun | ```
* FFmpeg is distributed in the hope that it will be useful,
``` |

14 | ff4ec49e | Fabrice Bellard | ```
* but WITHOUT ANY WARRANTY; without even the implied warranty of
``` |

15 | ```
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
``` |
||

16 | ```
* Lesser General Public License for more details.
``` |
||

17 | ```
*
``` |
||

18 | ```
* You should have received a copy of the GNU Lesser General Public
``` |
||

19 | b78e7197 | Diego Biurrun | ```
* License along with FFmpeg; if not, write to the Free Software
``` |

20 | 5509bffa | Diego Biurrun | ```
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
``` |

21 | ff4ec49e | Fabrice Bellard | ```
*/
``` |

22 | 115329f1 | Diego Biurrun | |

23 | 983e3246 | Michael Niedermayer | ```
/**
``` |

24 | bad5537e | Diego Biurrun | ```
* @file libavcodec/simple_idct.c
``` |

25 | 983e3246 | Michael Niedermayer | ```
* simpleidct in C.
``` |

26 | ```
*/
``` |
||

27 | 115329f1 | Diego Biurrun | |

28 | 37e8dcda | Arpi | ```
/*
``` |

29 | ff4ec49e | Fabrice Bellard | ```
based upon some outcommented c code from mpeg2dec (idct_mmx.c
``` |

30 | 115329f1 | Diego Biurrun | ```
written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
``` |

31 | ff4ec49e | Fabrice Bellard | ```
*/
``` |

32 | 6000abfa | Fabrice Bellard | #include "avcodec.h" |

33 | d36a2466 | Fabrice Bellard | #include "dsputil.h" |

34 | edf7c2b2 | Måns Rullgård | #include "mathops.h" |

35 | 37e8dcda | Arpi | #include "simple_idct.h" |

36 | |||

37 | ```
#if 0
``` |
||

38 | ```
#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
``` |
||

39 | ```
#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
``` |
||

40 | ```
#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
``` |
||

41 | ```
#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
``` |
||

42 | ```
#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
``` |
||

43 | ```
#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
``` |
||

44 | ```
#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
``` |
||

45 | ```
#define ROW_SHIFT 8
``` |
||

46 | ```
#define COL_SHIFT 17
``` |
||

47 | ```
#else
``` |
||

48 | #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
||

49 | #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
||

50 | #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
||

51 | ccf589a8 | Michael Niedermayer | #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |

52 | 37e8dcda | Arpi | #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |

53 | #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
||

54 | #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
||

55 | #define ROW_SHIFT 11 |
||

56 | #define COL_SHIFT 20 // 6 |
||

57 | ```
#endif
``` |
||

58 | 4973971e | Michael Niedermayer | |

59 | 0e15384d | Michael Niedermayer | static inline void idctRowCondDC (DCTELEM * row) |

60 | 37e8dcda | Arpi | { |

61 | bb270c08 | Diego Biurrun | ```
int a0, a1, a2, a3, b0, b1, b2, b3;
``` |

62 | b250f9c6 | Aurelien Jacobs | ```
#if HAVE_FAST_64BIT
``` |

63 | 412ba501 | Fabrice Bellard | uint64_t temp; |

64 | ```
#else
``` |
||

65 | uint32_t temp; |
||

66 | ```
#endif
``` |
||

67 | 4973971e | Michael Niedermayer | |

68 | b250f9c6 | Aurelien Jacobs | ```
#if HAVE_FAST_64BIT
``` |

69 | 63613fe6 | Måns Rullgård | ```
#if HAVE_BIGENDIAN
``` |

70 | 412ba501 | Fabrice Bellard | #define ROW0_MASK 0xffff000000000000LL |

71 | ```
#else
``` |
||

72 | #define ROW0_MASK 0xffffLL |
||

73 | ```
#endif
``` |
||

74 | 0e15384d | Michael Niedermayer | if(sizeof(DCTELEM)==2){ |

75 | 115329f1 | Diego Biurrun | if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | |

76 | 0e15384d | Michael Niedermayer | ((uint64_t *)row)[1]) == 0) { |

77 | temp = (row[0] << 3) & 0xffff; |
||

78 | ```
temp += temp << 16;
``` |
||

79 | ```
temp += temp << 32;
``` |
||

80 | ```
((uint64_t *)row)[0] = temp;
``` |
||

81 | ```
((uint64_t *)row)[1] = temp;
``` |
||

82 | ```
return;
``` |
||

83 | bb270c08 | Diego Biurrun | } |

84 | 0e15384d | Michael Niedermayer | ```
}else{
``` |

85 | if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) { |
||

86 | row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3; |
||

87 | ```
return;
``` |
||

88 | } |
||

89 | } |
||

90 | 412ba501 | Fabrice Bellard | ```
#else
``` |

91 | 0e15384d | Michael Niedermayer | if(sizeof(DCTELEM)==2){ |

92 | if (!(((uint32_t*)row)[1] | |
||

93 | ```
((uint32_t*)row)[2] |
``` |
||

94 | 115329f1 | Diego Biurrun | ```
((uint32_t*)row)[3] |
``` |

95 | 0e15384d | Michael Niedermayer | ```
row[1])) {
``` |

96 | temp = (row[0] << 3) & 0xffff; |
||

97 | ```
temp += temp << 16;
``` |
||

98 | ((uint32_t*)row)[0]=((uint32_t*)row)[1] = |
||

99 | ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; |
||

100 | ```
return;
``` |
||

101 | } |
||

102 | ```
}else{
``` |
||

103 | if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) { |
||

104 | row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3; |
||

105 | ```
return;
``` |
||

106 | } |
||

107 | } |
||

108 | 412ba501 | Fabrice Bellard | ```
#endif
``` |

109 | 4973971e | Michael Niedermayer | |

110 | 412ba501 | Fabrice Bellard | a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |

111 | bb270c08 | Diego Biurrun | a1 = a0; |

112 | a2 = a0; |
||

113 | a3 = a0; |
||

114 | 412ba501 | Fabrice Bellard | |

115 | ```
/* no need to optimize : gcc does it */
``` |
||

116 | ```
a0 += W2 * row[2];
``` |
||

117 | ```
a1 += W6 * row[2];
``` |
||

118 | ```
a2 -= W6 * row[2];
``` |
||

119 | ```
a3 -= W2 * row[2];
``` |
||

120 | |||

121 | edf7c2b2 | Måns Rullgård | ```
b0 = MUL16(W1, row[1]);
``` |

122 | 412ba501 | Fabrice Bellard | ```
MAC16(b0, W3, row[3]);
``` |

123 | edf7c2b2 | Måns Rullgård | ```
b1 = MUL16(W3, row[1]);
``` |

124 | 412ba501 | Fabrice Bellard | ```
MAC16(b1, -W7, row[3]);
``` |

125 | edf7c2b2 | Måns Rullgård | ```
b2 = MUL16(W5, row[1]);
``` |

126 | 412ba501 | Fabrice Bellard | ```
MAC16(b2, -W1, row[3]);
``` |

127 | edf7c2b2 | Måns Rullgård | ```
b3 = MUL16(W7, row[1]);
``` |

128 | 412ba501 | Fabrice Bellard | ```
MAC16(b3, -W5, row[3]);
``` |

129 | |||

130 | b250f9c6 | Aurelien Jacobs | ```
#if HAVE_FAST_64BIT
``` |

131 | 412ba501 | Fabrice Bellard | ```
temp = ((uint64_t*)row)[1];
``` |

132 | ```
#else
``` |
||

133 | temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3]; |
||

134 | ```
#endif
``` |
||

135 | bb270c08 | Diego Biurrun | if (temp != 0) { |

136 | 412ba501 | Fabrice Bellard | a0 += W4*row[4] + W6*row[6]; |

137 | a1 += - W4*row[4] - W2*row[6]; |
||

138 | a2 += - W4*row[4] + W2*row[6]; |
||

139 | a3 += W4*row[4] - W6*row[6]; |
||

140 | |||

141 | ```
MAC16(b0, W5, row[5]);
``` |
||

142 | ```
MAC16(b0, W7, row[7]);
``` |
||

143 | 115329f1 | Diego Biurrun | |

144 | 412ba501 | Fabrice Bellard | ```
MAC16(b1, -W1, row[5]);
``` |

145 | ```
MAC16(b1, -W5, row[7]);
``` |
||

146 | 115329f1 | Diego Biurrun | |

147 | 412ba501 | Fabrice Bellard | ```
MAC16(b2, W7, row[5]);
``` |

148 | ```
MAC16(b2, W3, row[7]);
``` |
||

149 | 115329f1 | Diego Biurrun | |

150 | 412ba501 | Fabrice Bellard | ```
MAC16(b3, W3, row[5]);
``` |

151 | ```
MAC16(b3, -W1, row[7]);
``` |
||

152 | bb270c08 | Diego Biurrun | } |

153 | |||

154 | ```
row[0] = (a0 + b0) >> ROW_SHIFT;
``` |
||

155 | ```
row[7] = (a0 - b0) >> ROW_SHIFT;
``` |
||

156 | ```
row[1] = (a1 + b1) >> ROW_SHIFT;
``` |
||

157 | ```
row[6] = (a1 - b1) >> ROW_SHIFT;
``` |
||

158 | ```
row[2] = (a2 + b2) >> ROW_SHIFT;
``` |
||

159 | ```
row[5] = (a2 - b2) >> ROW_SHIFT;
``` |
||

160 | ```
row[3] = (a3 + b3) >> ROW_SHIFT;
``` |
||

161 | ```
row[4] = (a3 - b3) >> ROW_SHIFT;
``` |
||

162 | 4973971e | Michael Niedermayer | } |

163 | |||

164 | 115329f1 | Diego Biurrun | static inline void idctSparseColPut (uint8_t *dest, int line_size, |

165 | 0e15384d | Michael Niedermayer | DCTELEM * col) |

166 | d36a2466 | Fabrice Bellard | { |

167 | bb270c08 | Diego Biurrun | ```
int a0, a1, a2, a3, b0, b1, b2, b3;
``` |

168 | 55fde95e | Måns Rullgård | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |

169 | d36a2466 | Fabrice Bellard | |

170 | ```
/* XXX: I did that only to give same values as previous code */
``` |
||

171 | bb270c08 | Diego Biurrun | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |

172 | a1 = a0; |
||

173 | a2 = a0; |
||

174 | a3 = a0; |
||

175 | d36a2466 | Fabrice Bellard | |

176 | a0 += + W2*col[8*2]; |
||

177 | a1 += + W6*col[8*2]; |
||

178 | a2 += - W6*col[8*2]; |
||

179 | a3 += - W2*col[8*2]; |
||

180 | |||

181 | edf7c2b2 | Måns Rullgård | b0 = MUL16(W1, col[8*1]); |

182 | b1 = MUL16(W3, col[8*1]); |
||

183 | b2 = MUL16(W5, col[8*1]); |
||

184 | b3 = MUL16(W7, col[8*1]); |
||

185 | d36a2466 | Fabrice Bellard | |

186 | MAC16(b0, + W3, col[8*3]); |
||

187 | MAC16(b1, - W7, col[8*3]); |
||

188 | MAC16(b2, - W1, col[8*3]); |
||

189 | MAC16(b3, - W5, col[8*3]); |
||

190 | |||

191 | bb270c08 | Diego Biurrun | if(col[8*4]){ |

192 | d36a2466 | Fabrice Bellard | a0 += + W4*col[8*4]; |

193 | a1 += - W4*col[8*4]; |
||

194 | a2 += - W4*col[8*4]; |
||

195 | a3 += + W4*col[8*4]; |
||

196 | bb270c08 | Diego Biurrun | } |

197 | d36a2466 | Fabrice Bellard | |

198 | bb270c08 | Diego Biurrun | if (col[8*5]) { |

199 | d36a2466 | Fabrice Bellard | MAC16(b0, + W5, col[8*5]); |

200 | MAC16(b1, - W1, col[8*5]); |
||

201 | MAC16(b2, + W7, col[8*5]); |
||

202 | MAC16(b3, + W3, col[8*5]); |
||

203 | bb270c08 | Diego Biurrun | } |

204 | d36a2466 | Fabrice Bellard | |

205 | bb270c08 | Diego Biurrun | if(col[8*6]){ |

206 | d36a2466 | Fabrice Bellard | a0 += + W6*col[8*6]; |

207 | a1 += - W2*col[8*6]; |
||

208 | a2 += + W2*col[8*6]; |
||

209 | a3 += - W6*col[8*6]; |
||

210 | bb270c08 | Diego Biurrun | } |

211 | d36a2466 | Fabrice Bellard | |

212 | bb270c08 | Diego Biurrun | if (col[8*7]) { |

213 | d36a2466 | Fabrice Bellard | MAC16(b0, + W7, col[8*7]); |

214 | MAC16(b1, - W5, col[8*7]); |
||

215 | MAC16(b2, + W3, col[8*7]); |
||

216 | MAC16(b3, - W1, col[8*7]); |
||

217 | bb270c08 | Diego Biurrun | } |

218 | d36a2466 | Fabrice Bellard | |

219 | ```
dest[0] = cm[(a0 + b0) >> COL_SHIFT];
``` |
||

220 | dest += line_size; |
||

221 | ```
dest[0] = cm[(a1 + b1) >> COL_SHIFT];
``` |
||

222 | dest += line_size; |
||

223 | ```
dest[0] = cm[(a2 + b2) >> COL_SHIFT];
``` |
||

224 | dest += line_size; |
||

225 | ```
dest[0] = cm[(a3 + b3) >> COL_SHIFT];
``` |
||

226 | dest += line_size; |
||

227 | ```
dest[0] = cm[(a3 - b3) >> COL_SHIFT];
``` |
||

228 | dest += line_size; |
||

229 | ```
dest[0] = cm[(a2 - b2) >> COL_SHIFT];
``` |
||

230 | dest += line_size; |
||

231 | ```
dest[0] = cm[(a1 - b1) >> COL_SHIFT];
``` |
||

232 | dest += line_size; |
||

233 | ```
dest[0] = cm[(a0 - b0) >> COL_SHIFT];
``` |
||

234 | } |
||

235 | |||

236 | 115329f1 | Diego Biurrun | static inline void idctSparseColAdd (uint8_t *dest, int line_size, |

237 | 0e15384d | Michael Niedermayer | DCTELEM * col) |

238 | 37e8dcda | Arpi | { |

239 | bb270c08 | Diego Biurrun | ```
int a0, a1, a2, a3, b0, b1, b2, b3;
``` |

240 | 55fde95e | Måns Rullgård | uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |

241 | 37e8dcda | Arpi | |

242 | 412ba501 | Fabrice Bellard | ```
/* XXX: I did that only to give same values as previous code */
``` |

243 | bb270c08 | Diego Biurrun | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |

244 | a1 = a0; |
||

245 | a2 = a0; |
||

246 | a3 = a0; |
||

247 | 37e8dcda | Arpi | |

248 | 412ba501 | Fabrice Bellard | a0 += + W2*col[8*2]; |

249 | a1 += + W6*col[8*2]; |
||

250 | a2 += - W6*col[8*2]; |
||

251 | a3 += - W2*col[8*2]; |
||

252 | 4973971e | Michael Niedermayer | |

253 | edf7c2b2 | Måns Rullgård | b0 = MUL16(W1, col[8*1]); |

254 | b1 = MUL16(W3, col[8*1]); |
||

255 | b2 = MUL16(W5, col[8*1]); |
||

256 | b3 = MUL16(W7, col[8*1]); |
||

257 | 37e8dcda | Arpi | |

258 | 412ba501 | Fabrice Bellard | MAC16(b0, + W3, col[8*3]); |

259 | MAC16(b1, - W7, col[8*3]); |
||

260 | MAC16(b2, - W1, col[8*3]); |
||

261 | MAC16(b3, - W5, col[8*3]); |
||

262 | 4973971e | Michael Niedermayer | |

263 | bb270c08 | Diego Biurrun | if(col[8*4]){ |

264 | 412ba501 | Fabrice Bellard | a0 += + W4*col[8*4]; |

265 | a1 += - W4*col[8*4]; |
||

266 | a2 += - W4*col[8*4]; |
||

267 | a3 += + W4*col[8*4]; |
||

268 | bb270c08 | Diego Biurrun | } |

269 | 4973971e | Michael Niedermayer | |

270 | bb270c08 | Diego Biurrun | if (col[8*5]) { |

271 | 412ba501 | Fabrice Bellard | MAC16(b0, + W5, col[8*5]); |

272 | MAC16(b1, - W1, col[8*5]); |
||

273 | MAC16(b2, + W7, col[8*5]); |
||

274 | MAC16(b3, + W3, col[8*5]); |
||

275 | bb270c08 | Diego Biurrun | } |

276 | 4973971e | Michael Niedermayer | |

277 | bb270c08 | Diego Biurrun | if(col[8*6]){ |

278 | 412ba501 | Fabrice Bellard | a0 += + W6*col[8*6]; |

279 | a1 += - W2*col[8*6]; |
||

280 | a2 += + W2*col[8*6]; |
||

281 | a3 += - W6*col[8*6]; |
||

282 | bb270c08 | Diego Biurrun | } |

283 | 412ba501 | Fabrice Bellard | |

284 | bb270c08 | Diego Biurrun | if (col[8*7]) { |

285 | 412ba501 | Fabrice Bellard | MAC16(b0, + W7, col[8*7]); |

286 | MAC16(b1, - W5, col[8*7]); |
||

287 | MAC16(b2, + W3, col[8*7]); |
||

288 | MAC16(b3, - W1, col[8*7]); |
||

289 | bb270c08 | Diego Biurrun | } |

290 | 412ba501 | Fabrice Bellard | |

291 | d36a2466 | Fabrice Bellard | dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)]; |

292 | dest += line_size; |
||

293 | dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)]; |
||

294 | dest += line_size; |
||

295 | dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)]; |
||

296 | dest += line_size; |
||

297 | dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)]; |
||

298 | dest += line_size; |
||

299 | dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)]; |
||

300 | dest += line_size; |
||

301 | dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)]; |
||

302 | dest += line_size; |
||

303 | dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)]; |
||

304 | dest += line_size; |
||

305 | dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)]; |
||

306 | 37e8dcda | Arpi | } |

307 | |||

308 | 0e15384d | Michael Niedermayer | static inline void idctSparseCol (DCTELEM * col) |

309 | 86748dbc | Michael Niedermayer | { |

310 | bb270c08 | Diego Biurrun | ```
int a0, a1, a2, a3, b0, b1, b2, b3;
``` |

311 | 86748dbc | Michael Niedermayer | |

312 | ```
/* XXX: I did that only to give same values as previous code */
``` |
||

313 | bb270c08 | Diego Biurrun | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |

314 | a1 = a0; |
||

315 | a2 = a0; |
||

316 | a3 = a0; |
||

317 | 86748dbc | Michael Niedermayer | |

318 | a0 += + W2*col[8*2]; |
||

319 | a1 += + W6*col[8*2]; |
||

320 | a2 += - W6*col[8*2]; |
||

321 | a3 += - W2*col[8*2]; |
||

322 | |||

323 | edf7c2b2 | Måns Rullgård | b0 = MUL16(W1, col[8*1]); |

324 | b1 = MUL16(W3, col[8*1]); |
||

325 | b2 = MUL16(W5, col[8*1]); |
||

326 | b3 = MUL16(W7, col[8*1]); |
||

327 | 86748dbc | Michael Niedermayer | |

328 | MAC16(b0, + W3, col[8*3]); |
||

329 | MAC16(b1, - W7, col[8*3]); |
||

330 | MAC16(b2, - W1, col[8*3]); |
||

331 | MAC16(b3, - W5, col[8*3]); |
||

332 | |||

333 | bb270c08 | Diego Biurrun | if(col[8*4]){ |

334 | 86748dbc | Michael Niedermayer | a0 += + W4*col[8*4]; |

335 | a1 += - W4*col[8*4]; |
||

336 | a2 += - W4*col[8*4]; |
||

337 | a3 += + W4*col[8*4]; |
||

338 | bb270c08 | Diego Biurrun | } |

339 | 86748dbc | Michael Niedermayer | |

340 | bb270c08 | Diego Biurrun | if (col[8*5]) { |

341 | 86748dbc | Michael Niedermayer | MAC16(b0, + W5, col[8*5]); |

342 | MAC16(b1, - W1, col[8*5]); |
||

343 | MAC16(b2, + W7, col[8*5]); |
||

344 | MAC16(b3, + W3, col[8*5]); |
||

345 | bb270c08 | Diego Biurrun | } |

346 | 86748dbc | Michael Niedermayer | |

347 | bb270c08 | Diego Biurrun | if(col[8*6]){ |

348 | 86748dbc | Michael Niedermayer | a0 += + W6*col[8*6]; |

349 | a1 += - W2*col[8*6]; |
||

350 | a2 += + W2*col[8*6]; |
||

351 | a3 += - W6*col[8*6]; |
||

352 | bb270c08 | Diego Biurrun | } |

353 | 86748dbc | Michael Niedermayer | |

354 | bb270c08 | Diego Biurrun | if (col[8*7]) { |

355 | 86748dbc | Michael Niedermayer | MAC16(b0, + W7, col[8*7]); |

356 | MAC16(b1, - W5, col[8*7]); |
||

357 | MAC16(b2, + W3, col[8*7]); |
||

358 | MAC16(b3, - W1, col[8*7]); |
||

359 | bb270c08 | Diego Biurrun | } |

360 | 86748dbc | Michael Niedermayer | |

361 | ```
col[0 ] = ((a0 + b0) >> COL_SHIFT);
``` |
||

362 | ```
col[8 ] = ((a1 + b1) >> COL_SHIFT);
``` |
||

363 | ```
col[16] = ((a2 + b2) >> COL_SHIFT);
``` |
||

364 | ```
col[24] = ((a3 + b3) >> COL_SHIFT);
``` |
||

365 | ```
col[32] = ((a3 - b3) >> COL_SHIFT);
``` |
||

366 | ```
col[40] = ((a2 - b2) >> COL_SHIFT);
``` |
||

367 | ```
col[48] = ((a1 - b1) >> COL_SHIFT);
``` |
||

368 | ```
col[56] = ((a0 - b0) >> COL_SHIFT);
``` |
||

369 | } |
||

370 | |||

371 | 59e6f60a | Aurelien Jacobs | void ff_simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block) |

372 | d36a2466 | Fabrice Bellard | { |

373 | ```
int i;
``` |
||

374 | for(i=0; i<8; i++) |
||

375 | ```
idctRowCondDC(block + i*8);
``` |
||

376 | 115329f1 | Diego Biurrun | |

377 | d36a2466 | Fabrice Bellard | for(i=0; i<8; i++) |

378 | idctSparseColPut(dest + i, line_size, block + i); |
||

379 | } |
||

380 | |||

381 | 59e6f60a | Aurelien Jacobs | void ff_simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block) |

382 | 412ba501 | Fabrice Bellard | { |

383 | ```
int i;
``` |
||

384 | for(i=0; i<8; i++) |
||

385 | ```
idctRowCondDC(block + i*8);
``` |
||

386 | 115329f1 | Diego Biurrun | |

387 | 412ba501 | Fabrice Bellard | for(i=0; i<8; i++) |

388 | d36a2466 | Fabrice Bellard | idctSparseColAdd(dest + i, line_size, block + i); |

389 | 4973971e | Michael Niedermayer | } |

390 | cd4af68a | Zdenek Kabelac | |

391 | 59e6f60a | Aurelien Jacobs | ```
void ff_simple_idct(DCTELEM *block)
``` |

392 | 86748dbc | Michael Niedermayer | { |

393 | ```
int i;
``` |
||

394 | for(i=0; i<8; i++) |
||

395 | ```
idctRowCondDC(block + i*8);
``` |
||

396 | 115329f1 | Diego Biurrun | |

397 | 86748dbc | Michael Niedermayer | for(i=0; i<8; i++) |

398 | idctSparseCol(block + i); |
||

399 | } |
||

400 | |||

401 | 9bf71516 | Fabrice Bellard | ```
/* 2x4x8 idct */
``` |

402 | |||

403 | #define CN_SHIFT 12 |
||

404 | #define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5)) |
||

405 | 652f0197 | Fabrice Bellard | #define C1 C_FIX(0.6532814824) |

406 | #define C2 C_FIX(0.2705980501) |
||

407 | 9bf71516 | Fabrice Bellard | |

408 | 652f0197 | Fabrice Bellard | ```
/* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized,
``` |

409 | ```
and the butterfly must be multiplied by 0.5 * sqrt(2.0) */
``` |
||

410 | #define C_SHIFT (4+1+12) |
||

411 | 9bf71516 | Fabrice Bellard | |

412 | fda767bb | Kostya Shishkov | static inline void idct4col_put(uint8_t *dest, int line_size, const DCTELEM *col) |

413 | 9bf71516 | Fabrice Bellard | { |

414 | ```
int c0, c1, c2, c3, a0, a1, a2, a3;
``` |
||

415 | 55fde95e | Måns Rullgård | ```
const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
``` |

416 | 9bf71516 | Fabrice Bellard | |

417 | a0 = col[8*0]; |
||

418 | a1 = col[8*2]; |
||

419 | a2 = col[8*4]; |
||

420 | a3 = col[8*6]; |
||

421 | 652f0197 | Fabrice Bellard | c0 = ((a0 + a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); |

422 | c2 = ((a0 - a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); |
||

423 | 9bf71516 | Fabrice Bellard | c1 = a1 * C1 + a3 * C2; |

424 | c3 = a1 * C2 - a3 * C1; |
||

425 | ```
dest[0] = cm[(c0 + c1) >> C_SHIFT];
``` |
||

426 | dest += line_size; |
||

427 | ```
dest[0] = cm[(c2 + c3) >> C_SHIFT];
``` |
||

428 | dest += line_size; |
||

429 | ```
dest[0] = cm[(c2 - c3) >> C_SHIFT];
``` |
||

430 | dest += line_size; |
||

431 | ```
dest[0] = cm[(c0 - c1) >> C_SHIFT];
``` |
||

432 | } |
||

433 | |||

434 | ```
#define BF(k) \
``` |
||

435 | {\ |
||

436 | ```
int a0, a1;\
``` |
||

437 | a0 = ptr[k];\ |
||

438 | ```
a1 = ptr[8 + k];\
``` |
||

439 | ptr[k] = a0 + a1;\ |
||

440 | ```
ptr[8 + k] = a0 - a1;\
``` |
||

441 | } |
||

442 | |||

443 | ```
/* only used by DV codec. The input must be interlaced. 128 is added
``` |
||

444 | ```
to the pixels before clamping to avoid systematic error
``` |
||

445 | ```
(1024*sqrt(2)) offset would be needed otherwise. */
``` |
||

446 | ```
/* XXX: I think a 1.0/sqrt(2) normalization should be needed to
``` |
||

447 | ```
compensate the extra butterfly stage - I don't have the full DV
``` |
||

448 | ```
specification */
``` |
||

449 | 59e6f60a | Aurelien Jacobs | void ff_simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block) |

450 | 9bf71516 | Fabrice Bellard | { |

451 | ```
int i;
``` |
||

452 | 0e15384d | Michael Niedermayer | DCTELEM *ptr; |

453 | 115329f1 | Diego Biurrun | |

454 | 9bf71516 | Fabrice Bellard | ```
/* butterfly */
``` |

455 | ptr = block; |
||

456 | for(i=0;i<4;i++) { |
||

457 | ```
BF(0);
``` |
||

458 | ```
BF(1);
``` |
||

459 | ```
BF(2);
``` |
||

460 | ```
BF(3);
``` |
||

461 | ```
BF(4);
``` |
||

462 | ```
BF(5);
``` |
||

463 | ```
BF(6);
``` |
||

464 | ```
BF(7);
``` |
||

465 | ptr += 2 * 8; |
||

466 | } |
||

467 | |||

468 | ```
/* IDCT8 on each line */
``` |
||

469 | for(i=0; i<8; i++) { |
||

470 | ```
idctRowCondDC(block + i*8);
``` |
||

471 | } |
||

472 | |||

473 | ```
/* IDCT4 and store */
``` |
||

474 | for(i=0;i<8;i++) { |
||

475 | fda767bb | Kostya Shishkov | ```
idct4col_put(dest + i, 2 * line_size, block + i);
``` |

476 | idct4col_put(dest + line_size + i, 2 * line_size, block + 8 + i); |
||

477 | 9bf71516 | Fabrice Bellard | } |

478 | } |
||

479 | 1457ab52 | Michael Niedermayer | |

480 | ```
/* 8x4 & 4x8 WMV2 IDCT */
``` |
||

481 | ```
#undef CN_SHIFT
``` |
||

482 | ```
#undef C_SHIFT
``` |
||

483 | ```
#undef C_FIX
``` |
||

484 | ```
#undef C1
``` |
||

485 | ```
#undef C2
``` |
||

486 | #define CN_SHIFT 12 |
||

487 | #define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5)) |
||

488 | #define C1 C_FIX(0.6532814824) |
||

489 | #define C2 C_FIX(0.2705980501) |
||

490 | #define C3 C_FIX(0.5) |
||

491 | #define C_SHIFT (4+1+12) |
||

492 | 0c1a9eda | Zdenek Kabelac | static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col) |

493 | 1457ab52 | Michael Niedermayer | { |

494 | ```
int c0, c1, c2, c3, a0, a1, a2, a3;
``` |
||

495 | 55fde95e | Måns Rullgård | ```
const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
``` |

496 | 1457ab52 | Michael Niedermayer | |

497 | a0 = col[8*0]; |
||

498 | a1 = col[8*1]; |
||

499 | a2 = col[8*2]; |
||

500 | a3 = col[8*3]; |
||

501 | c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1)); |
||

502 | c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1)); |
||

503 | c1 = a1 * C1 + a3 * C2; |
||

504 | c3 = a1 * C2 - a3 * C1; |
||

505 | dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)]; |
||

506 | dest += line_size; |
||

507 | dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)]; |
||

508 | dest += line_size; |
||

509 | dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)]; |
||

510 | dest += line_size; |
||

511 | dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)]; |
||

512 | } |
||

513 | |||

514 | #define RN_SHIFT 15 |
||

515 | #define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5)) |
||

516 | #define R1 R_FIX(0.6532814824) |
||

517 | #define R2 R_FIX(0.2705980501) |
||

518 | #define R3 R_FIX(0.5) |
||

519 | #define R_SHIFT 11 |
||

520 | 0e15384d | Michael Niedermayer | static inline void idct4row(DCTELEM *row) |

521 | 1457ab52 | Michael Niedermayer | { |

522 | ```
int c0, c1, c2, c3, a0, a1, a2, a3;
``` |
||

523 | 55fde95e | Måns Rullgård | ```
//const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
``` |

524 | 1457ab52 | Michael Niedermayer | |

525 | ```
a0 = row[0];
``` |
||

526 | ```
a1 = row[1];
``` |
||

527 | ```
a2 = row[2];
``` |
||

528 | ```
a3 = row[3];
``` |
||

529 | c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1)); |
||

530 | c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1)); |
||

531 | c1 = a1 * R1 + a3 * R2; |
||

532 | c3 = a1 * R2 - a3 * R1; |
||

533 | ```
row[0]= (c0 + c1) >> R_SHIFT;
``` |
||

534 | ```
row[1]= (c2 + c3) >> R_SHIFT;
``` |
||

535 | ```
row[2]= (c2 - c3) >> R_SHIFT;
``` |
||

536 | ```
row[3]= (c0 - c1) >> R_SHIFT;
``` |
||

537 | } |
||

538 | |||

539 | 59e6f60a | Aurelien Jacobs | void ff_simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block) |

540 | 1457ab52 | Michael Niedermayer | { |

541 | ```
int i;
``` |
||

542 | |||

543 | ```
/* IDCT8 on each line */
``` |
||

544 | for(i=0; i<4; i++) { |
||

545 | ```
idctRowCondDC(block + i*8);
``` |
||

546 | } |
||

547 | |||

548 | ```
/* IDCT4 and store */
``` |
||

549 | for(i=0;i<8;i++) { |
||

550 | idct4col_add(dest + i, line_size, block + i); |
||

551 | } |
||

552 | } |
||

553 | |||

554 | 59e6f60a | Aurelien Jacobs | void ff_simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block) |

555 | 1457ab52 | Michael Niedermayer | { |

556 | ```
int i;
``` |
||

557 | |||

558 | ```
/* IDCT4 on each line */
``` |
||

559 | for(i=0; i<8; i++) { |
||

560 | ```
idct4row(block + i*8);
``` |
||

561 | } |
||

562 | |||

563 | ```
/* IDCT8 and store */
``` |
||

564 | for(i=0; i<4; i++){ |
||

565 | idctSparseColAdd(dest + i, line_size, block + i); |
||

566 | } |
||

567 | } |
||

568 | |||

569 | 91823a6e | Kostya Shishkov | void ff_simple_idct44_add(uint8_t *dest, int line_size, DCTELEM *block) |

570 | { |
||

571 | ```
int i;
``` |
||

572 | |||

573 | ```
/* IDCT4 on each line */
``` |
||

574 | for(i=0; i<4; i++) { |
||

575 | ```
idct4row(block + i*8);
``` |
||

576 | } |
||

577 | |||

578 | ```
/* IDCT4 and store */
``` |
||

579 | for(i=0; i<4; i++){ |
||

580 | idct4col_add(dest + i, line_size, block + i); |
||

581 | } |
||

582 | } |