## ffmpeg / libavcodec / imgresample.c @ 11e29a41

History | View | Annotate | Download (22.4 KB)

1 | de6d9b64 | Fabrice Bellard | ```
/*
``` |
---|---|---|---|

2 | ```
* High quality image resampling with polyphase filters
``` |
||

3 | ff4ec49e | Fabrice Bellard | ```
* Copyright (c) 2001 Fabrice Bellard.
``` |

4 | de6d9b64 | Fabrice Bellard | ```
*
``` |

5 | ff4ec49e | Fabrice Bellard | ```
* This library is free software; you can redistribute it and/or
``` |

6 | ```
* modify it under the terms of the GNU Lesser General Public
``` |
||

7 | ```
* License as published by the Free Software Foundation; either
``` |
||

8 | ```
* version 2 of the License, or (at your option) any later version.
``` |
||

9 | de6d9b64 | Fabrice Bellard | ```
*
``` |

10 | ff4ec49e | Fabrice Bellard | ```
* This library is distributed in the hope that it will be useful,
``` |

11 | de6d9b64 | Fabrice Bellard | ```
* but WITHOUT ANY WARRANTY; without even the implied warranty of
``` |

12 | ff4ec49e | Fabrice Bellard | ```
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
``` |

13 | ```
* Lesser General Public License for more details.
``` |
||

14 | de6d9b64 | Fabrice Bellard | ```
*
``` |

15 | ff4ec49e | Fabrice Bellard | ```
* You should have received a copy of the GNU Lesser General Public
``` |

16 | ```
* License along with this library; if not, write to the Free Software
``` |
||

17 | ```
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
``` |
||

18 | de6d9b64 | Fabrice Bellard | ```
*/
``` |

19 | 983e3246 | Michael Niedermayer | |

20 | ```
/**
``` |
||

21 | ```
* @file imgresample.c
``` |
||

22 | ```
* High quality image resampling with polyphase filters .
``` |
||

23 | ```
*/
``` |
||

24 | |||

25 | de6d9b64 | Fabrice Bellard | #include "avcodec.h" |

26 | 6000abfa | Fabrice Bellard | #include "dsputil.h" |

27 | de6d9b64 | Fabrice Bellard | |

28 | 54329dd5 | Nick Kurshev | ```
#ifdef USE_FASTMEMCPY
``` |

29 | #include "fastmemcpy.h" |
||

30 | ```
#endif
``` |
||

31 | |||

32 | de6d9b64 | Fabrice Bellard | #define NB_COMPONENTS 3 |

33 | |||

34 | #define PHASE_BITS 4 |
||

35 | #define NB_PHASES (1 << PHASE_BITS) |
||

36 | #define NB_TAPS 4 |
||

37 | #define FCENTER 1 /* index of the center of the filter */ |
||

38 | ab6d194a | Michael Niedermayer | ```
//#define TEST 1 /* Test it */
``` |

39 | de6d9b64 | Fabrice Bellard | |

40 | #define POS_FRAC_BITS 16 |
||

41 | #define POS_FRAC (1 << POS_FRAC_BITS) |
||

42 | ```
/* 6 bits precision is needed for MMX */
``` |
||

43 | #define FILTER_BITS 8 |
||

44 | |||

45 | #define LINE_BUF_HEIGHT (NB_TAPS * 4) |
||

46 | |||

47 | ```
struct ImgReSampleContext {
``` |
||

48 | ab6d194a | Michael Niedermayer | ```
int iwidth, iheight, owidth, oheight, topBand, bottomBand, leftBand, rightBand;
``` |

49 | de6d9b64 | Fabrice Bellard | ```
int h_incr, v_incr;
``` |

50 | 0c1a9eda | Zdenek Kabelac | ```
int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
``` |

51 | ```
int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
``` |
||

52 | uint8_t *line_buf; |
||

53 | de6d9b64 | Fabrice Bellard | }; |

54 | |||

55 | static inline int get_phase(int pos) |
||

56 | { |
||

57 | return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1); |
||

58 | } |
||

59 | |||

60 | ```
/* This function must be optimized */
``` |
||

61 | da64ecc3 | Drew Hess | static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src, |

62 | int src_width, int src_start, int src_incr, |
||

63 | int16_t *filters) |
||

64 | de6d9b64 | Fabrice Bellard | { |

65 | ```
int src_pos, phase, sum, i;
``` |
||

66 | da64ecc3 | Drew Hess | ```
const uint8_t *s;
``` |

67 | 0c1a9eda | Zdenek Kabelac | int16_t *filter; |

68 | de6d9b64 | Fabrice Bellard | |

69 | src_pos = src_start; |
||

70 | for(i=0;i<dst_width;i++) { |
||

71 | ```
#ifdef TEST
``` |
||

72 | ```
/* test */
``` |
||

73 | if ((src_pos >> POS_FRAC_BITS) < 0 || |
||

74 | (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS)) |
||

75 | 02ac3136 | Philip Gladstone | av_abort(); |

76 | de6d9b64 | Fabrice Bellard | ```
#endif
``` |

77 | s = src + (src_pos >> POS_FRAC_BITS); |
||

78 | phase = get_phase(src_pos); |
||

79 | filter = filters + phase * NB_TAPS; |
||

80 | #if NB_TAPS == 4 |
||

81 | sum = s[0] * filter[0] + |
||

82 | s[1] * filter[1] + |
||

83 | s[2] * filter[2] + |
||

84 | s[3] * filter[3]; |
||

85 | ```
#else
``` |
||

86 | { |
||

87 | ```
int j;
``` |
||

88 | ```
sum = 0;
``` |
||

89 | for(j=0;j<NB_TAPS;j++) |
||

90 | sum += s[j] * filter[j]; |
||

91 | } |
||

92 | ```
#endif
``` |
||

93 | sum = sum >> FILTER_BITS; |
||

94 | if (sum < 0) |
||

95 | ```
sum = 0;
``` |
||

96 | else if (sum > 255) |
||

97 | ```
sum = 255;
``` |
||

98 | ```
dst[0] = sum;
``` |
||

99 | src_pos += src_incr; |
||

100 | dst++; |
||

101 | } |
||

102 | } |
||

103 | |||

104 | ```
/* This function must be optimized */
``` |
||

105 | da64ecc3 | Drew Hess | static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src, |

106 | ```
int wrap, int16_t *filter)
``` |
||

107 | de6d9b64 | Fabrice Bellard | { |

108 | ```
int sum, i;
``` |
||

109 | da64ecc3 | Drew Hess | ```
const uint8_t *s;
``` |

110 | de6d9b64 | Fabrice Bellard | |

111 | s = src; |
||

112 | for(i=0;i<dst_width;i++) { |
||

113 | #if NB_TAPS == 4 |
||

114 | sum = s[0 * wrap] * filter[0] + |
||

115 | s[1 * wrap] * filter[1] + |
||

116 | s[2 * wrap] * filter[2] + |
||

117 | s[3 * wrap] * filter[3]; |
||

118 | ```
#else
``` |
||

119 | { |
||

120 | ```
int j;
``` |
||

121 | 0c1a9eda | Zdenek Kabelac | uint8_t *s1 = s; |

122 | de6d9b64 | Fabrice Bellard | |

123 | ```
sum = 0;
``` |
||

124 | for(j=0;j<NB_TAPS;j++) { |
||

125 | ```
sum += s1[0] * filter[j];
``` |
||

126 | s1 += wrap; |
||

127 | } |
||

128 | } |
||

129 | ```
#endif
``` |
||

130 | sum = sum >> FILTER_BITS; |
||

131 | if (sum < 0) |
||

132 | ```
sum = 0;
``` |
||

133 | else if (sum > 255) |
||

134 | ```
sum = 255;
``` |
||

135 | ```
dst[0] = sum;
``` |
||

136 | dst++; |
||

137 | s++; |
||

138 | } |
||

139 | } |
||

140 | |||

141 | 980fc7b8 | Fabrice Bellard | ```
#ifdef HAVE_MMX
``` |

142 | de6d9b64 | Fabrice Bellard | |

143 | #include "i386/mmx.h" |
||

144 | |||

145 | ```
#define FILTER4(reg) \
``` |
||

146 | {\ |
||

147 | s = src + (src_pos >> POS_FRAC_BITS);\ |
||

148 | phase = get_phase(src_pos);\ |
||

149 | filter = filters + phase * NB_TAPS;\ |
||

150 | movq_m2r(*s, reg);\ |
||

151 | punpcklbw_r2r(mm7, reg);\ |
||

152 | movq_m2r(*filter, mm6);\ |
||

153 | pmaddwd_r2r(reg, mm6);\ |
||

154 | movq_r2r(mm6, reg);\ |
||

155 | ```
psrlq_i2r(32, reg);\
``` |
||

156 | paddd_r2r(mm6, reg);\ |
||

157 | psrad_i2r(FILTER_BITS, reg);\ |
||

158 | src_pos += src_incr;\ |
||

159 | } |
||

160 | |||

161 | #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq); |
||

162 | |||

163 | ```
/* XXX: do four pixels at a time */
``` |
||

164 | da64ecc3 | Drew Hess | static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, |

165 | const uint8_t *src, int src_width, |
||

166 | 0c1a9eda | Zdenek Kabelac | int src_start, int src_incr, int16_t *filters) |

167 | de6d9b64 | Fabrice Bellard | { |

168 | ```
int src_pos, phase;
``` |
||

169 | da64ecc3 | Drew Hess | ```
const uint8_t *s;
``` |

170 | 0c1a9eda | Zdenek Kabelac | int16_t *filter; |

171 | de6d9b64 | Fabrice Bellard | mmx_t tmp; |

172 | |||

173 | src_pos = src_start; |
||

174 | pxor_r2r(mm7, mm7); |
||

175 | |||

176 | while (dst_width >= 4) { |
||

177 | |||

178 | FILTER4(mm0); |
||

179 | FILTER4(mm1); |
||

180 | FILTER4(mm2); |
||

181 | FILTER4(mm3); |
||

182 | |||

183 | packuswb_r2r(mm7, mm0); |
||

184 | packuswb_r2r(mm7, mm1); |
||

185 | packuswb_r2r(mm7, mm3); |
||

186 | packuswb_r2r(mm7, mm2); |
||

187 | movq_r2m(mm0, tmp); |
||

188 | dst[0] = tmp.ub[0]; |
||

189 | movq_r2m(mm1, tmp); |
||

190 | dst[1] = tmp.ub[0]; |
||

191 | movq_r2m(mm2, tmp); |
||

192 | dst[2] = tmp.ub[0]; |
||

193 | movq_r2m(mm3, tmp); |
||

194 | dst[3] = tmp.ub[0]; |
||

195 | ```
dst += 4;
``` |
||

196 | ```
dst_width -= 4;
``` |
||

197 | } |
||

198 | while (dst_width > 0) { |
||

199 | FILTER4(mm0); |
||

200 | packuswb_r2r(mm7, mm0); |
||

201 | movq_r2m(mm0, tmp); |
||

202 | dst[0] = tmp.ub[0]; |
||

203 | dst++; |
||

204 | dst_width--; |
||

205 | } |
||

206 | emms(); |
||

207 | } |
||

208 | |||

209 | da64ecc3 | Drew Hess | static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, |

210 | ```
int wrap, int16_t *filter)
``` |
||

211 | de6d9b64 | Fabrice Bellard | { |

212 | ```
int sum, i, v;
``` |
||

213 | da64ecc3 | Drew Hess | ```
const uint8_t *s;
``` |

214 | de6d9b64 | Fabrice Bellard | mmx_t tmp; |

215 | ```
mmx_t coefs[4];
``` |
||

216 | |||

217 | for(i=0;i<4;i++) { |
||

218 | v = filter[i]; |
||

219 | ```
coefs[i].uw[0] = v;
``` |
||

220 | ```
coefs[i].uw[1] = v;
``` |
||

221 | ```
coefs[i].uw[2] = v;
``` |
||

222 | ```
coefs[i].uw[3] = v;
``` |
||

223 | } |
||

224 | |||

225 | pxor_r2r(mm7, mm7); |
||

226 | s = src; |
||

227 | while (dst_width >= 4) { |
||

228 | ```
movq_m2r(s[0 * wrap], mm0);
``` |
||

229 | punpcklbw_r2r(mm7, mm0); |
||

230 | ```
movq_m2r(s[1 * wrap], mm1);
``` |
||

231 | punpcklbw_r2r(mm7, mm1); |
||

232 | ```
movq_m2r(s[2 * wrap], mm2);
``` |
||

233 | punpcklbw_r2r(mm7, mm2); |
||

234 | ```
movq_m2r(s[3 * wrap], mm3);
``` |
||

235 | punpcklbw_r2r(mm7, mm3); |
||

236 | |||

237 | ```
pmullw_m2r(coefs[0], mm0);
``` |
||

238 | ```
pmullw_m2r(coefs[1], mm1);
``` |
||

239 | ```
pmullw_m2r(coefs[2], mm2);
``` |
||

240 | ```
pmullw_m2r(coefs[3], mm3);
``` |
||

241 | |||

242 | paddw_r2r(mm1, mm0); |
||

243 | paddw_r2r(mm3, mm2); |
||

244 | paddw_r2r(mm2, mm0); |
||

245 | psraw_i2r(FILTER_BITS, mm0); |
||

246 | |||

247 | packuswb_r2r(mm7, mm0); |
||

248 | movq_r2m(mm0, tmp); |
||

249 | |||

250 | 0c1a9eda | Zdenek Kabelac | ```
*(uint32_t *)dst = tmp.ud[0];
``` |

251 | de6d9b64 | Fabrice Bellard | ```
dst += 4;
``` |

252 | ```
s += 4;
``` |
||

253 | ```
dst_width -= 4;
``` |
||

254 | } |
||

255 | while (dst_width > 0) { |
||

256 | sum = s[0 * wrap] * filter[0] + |
||

257 | s[1 * wrap] * filter[1] + |
||

258 | s[2 * wrap] * filter[2] + |
||

259 | s[3 * wrap] * filter[3]; |
||

260 | sum = sum >> FILTER_BITS; |
||

261 | if (sum < 0) |
||

262 | ```
sum = 0;
``` |
||

263 | else if (sum > 255) |
||

264 | ```
sum = 255;
``` |
||

265 | ```
dst[0] = sum;
``` |
||

266 | dst++; |
||

267 | s++; |
||

268 | dst_width--; |
||

269 | } |
||

270 | emms(); |
||

271 | } |
||

272 | ```
#endif
``` |
||

273 | |||

274 | 404d2241 | Brian Foley | ```
#ifdef HAVE_ALTIVEC
``` |

275 | typedef union { |
||

276 | vector unsigned char v; |
||

277 | unsigned char c[16]; |
||

278 | } vec_uc_t; |
||

279 | |||

280 | typedef union { |
||

281 | vector signed short v; |
||

282 | signed short s[8]; |
||

283 | } vec_ss_t; |
||

284 | |||

285 | da64ecc3 | Drew Hess | void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, |

286 | ```
int wrap, int16_t *filter)
``` |
||

287 | 404d2241 | Brian Foley | { |

288 | ```
int sum, i;
``` |
||

289 | da64ecc3 | Drew Hess | ```
const uint8_t *s;
``` |

290 | 404d2241 | Brian Foley | vector unsigned char *tv, tmp, dstv, zero; |

291 | vec_ss_t srchv[4], srclv[4], fv[4]; |
||

292 | vector signed short zeros, sumhv, sumlv; |
||

293 | s = src; |
||

294 | |||

295 | for(i=0;i<4;i++) |
||

296 | { |
||

297 | ```
/*
``` |
||

298 | ```
The vec_madds later on does an implicit >>15 on the result.
``` |
||

299 | ```
Since FILTER_BITS is 8, and we have 15 bits of magnitude in
``` |
||

300 | ```
a signed short, we have just enough bits to pre-shift our
``` |
||

301 | ```
filter constants <<7 to compensate for vec_madds.
``` |
||

302 | ```
*/
``` |
||

303 | fv[i].s[0] = filter[i] << (15-FILTER_BITS); |
||

304 | ```
fv[i].v = vec_splat(fv[i].v, 0);
``` |
||

305 | } |
||

306 | |||

307 | ```
zero = vec_splat_u8(0);
``` |
||

308 | ```
zeros = vec_splat_s16(0);
``` |
||

309 | |||

310 | |||

311 | ```
/*
``` |
||

312 | ```
When we're resampling, we'd ideally like both our input buffers,
``` |
||

313 | ```
and output buffers to be 16-byte aligned, so we can do both aligned
``` |
||

314 | ```
reads and writes. Sadly we can't always have this at the moment, so
``` |
||

315 | ```
we opt for aligned writes, as unaligned writes have a huge overhead.
``` |
||

316 | ```
To do this, do enough scalar resamples to get dst 16-byte aligned.
``` |
||

317 | ```
*/
``` |
||

318 | 9e4e1659 | Philip Gladstone | i = (-(int)dst) & 0xf; |

319 | 404d2241 | Brian Foley | while(i>0) { |

320 | sum = s[0 * wrap] * filter[0] + |
||

321 | s[1 * wrap] * filter[1] + |
||

322 | s[2 * wrap] * filter[2] + |
||

323 | s[3 * wrap] * filter[3]; |
||

324 | sum = sum >> FILTER_BITS; |
||

325 | if (sum<0) sum = 0; else if (sum>255) sum=255; |
||

326 | ```
dst[0] = sum;
``` |
||

327 | dst++; |
||

328 | s++; |
||

329 | dst_width--; |
||

330 | i--; |
||

331 | } |
||

332 | |||

333 | ```
/* Do our altivec resampling on 16 pixels at once. */
``` |
||

334 | while(dst_width>=16) { |
||

335 | ```
/*
``` |
||

336 | ```
Read 16 (potentially unaligned) bytes from each of
``` |
||

337 | ```
4 lines into 4 vectors, and split them into shorts.
``` |
||

338 | ```
Interleave the multipy/accumulate for the resample
``` |
||

339 | ```
filter with the loads to hide the 3 cycle latency
``` |
||

340 | ```
the vec_madds have.
``` |
||

341 | ```
*/
``` |
||

342 | tv = (vector unsigned char *) &s[0 * wrap]; |
||

343 | tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap])); |
||

344 | srchv[0].v = (vector signed short) vec_mergeh(zero, tmp); |
||

345 | srclv[0].v = (vector signed short) vec_mergel(zero, tmp); |
||

346 | sumhv = vec_madds(srchv[0].v, fv[0].v, zeros); |
||

347 | sumlv = vec_madds(srclv[0].v, fv[0].v, zeros); |
||

348 | |||

349 | tv = (vector unsigned char *) &s[1 * wrap]; |
||

350 | tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap])); |
||

351 | srchv[1].v = (vector signed short) vec_mergeh(zero, tmp); |
||

352 | srclv[1].v = (vector signed short) vec_mergel(zero, tmp); |
||

353 | sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv); |
||

354 | sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv); |
||

355 | |||

356 | tv = (vector unsigned char *) &s[2 * wrap]; |
||

357 | tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap])); |
||

358 | srchv[2].v = (vector signed short) vec_mergeh(zero, tmp); |
||

359 | srclv[2].v = (vector signed short) vec_mergel(zero, tmp); |
||

360 | sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv); |
||

361 | sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv); |
||

362 | |||

363 | tv = (vector unsigned char *) &s[3 * wrap]; |
||

364 | tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap])); |
||

365 | srchv[3].v = (vector signed short) vec_mergeh(zero, tmp); |
||

366 | srclv[3].v = (vector signed short) vec_mergel(zero, tmp); |
||

367 | sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv); |
||

368 | sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv); |
||

369 | |||

370 | ```
/*
``` |
||

371 | ```
Pack the results into our destination vector,
``` |
||

372 | ```
and do an aligned write of that back to memory.
``` |
||

373 | ```
*/
``` |
||

374 | dstv = vec_packsu(sumhv, sumlv) ; |
||

375 | vec_st(dstv, 0, (vector unsigned char *) dst); |
||

376 | |||

377 | ```
dst+=16;
``` |
||

378 | ```
s+=16;
``` |
||

379 | ```
dst_width-=16;
``` |
||

380 | } |
||

381 | |||

382 | ```
/*
``` |
||

383 | ```
If there are any leftover pixels, resample them
``` |
||

384 | ```
with the slow scalar method.
``` |
||

385 | ```
*/
``` |
||

386 | while(dst_width>0) { |
||

387 | sum = s[0 * wrap] * filter[0] + |
||

388 | s[1 * wrap] * filter[1] + |
||

389 | s[2 * wrap] * filter[2] + |
||

390 | s[3 * wrap] * filter[3]; |
||

391 | sum = sum >> FILTER_BITS; |
||

392 | if (sum<0) sum = 0; else if (sum>255) sum=255; |
||

393 | ```
dst[0] = sum;
``` |
||

394 | dst++; |
||

395 | s++; |
||

396 | dst_width--; |
||

397 | } |
||

398 | } |
||

399 | ```
#endif
``` |
||

400 | |||

401 | de6d9b64 | Fabrice Bellard | ```
/* slow version to handle limit cases. Does not need optimisation */
``` |

402 | da64ecc3 | Drew Hess | static void h_resample_slow(uint8_t *dst, int dst_width, |

403 | const uint8_t *src, int src_width, |
||

404 | 0c1a9eda | Zdenek Kabelac | int src_start, int src_incr, int16_t *filters) |

405 | de6d9b64 | Fabrice Bellard | { |

406 | ```
int src_pos, phase, sum, j, v, i;
``` |
||

407 | da64ecc3 | Drew Hess | ```
const uint8_t *s, *src_end;
``` |

408 | 0c1a9eda | Zdenek Kabelac | int16_t *filter; |

409 | de6d9b64 | Fabrice Bellard | |

410 | src_end = src + src_width; |
||

411 | src_pos = src_start; |
||

412 | for(i=0;i<dst_width;i++) { |
||

413 | s = src + (src_pos >> POS_FRAC_BITS); |
||

414 | phase = get_phase(src_pos); |
||

415 | filter = filters + phase * NB_TAPS; |
||

416 | ```
sum = 0;
``` |
||

417 | for(j=0;j<NB_TAPS;j++) { |
||

418 | ```
if (s < src)
``` |
||

419 | ```
v = src[0];
``` |
||

420 | else if (s >= src_end) |
||

421 | ```
v = src_end[-1];
``` |
||

422 | ```
else
``` |
||

423 | ```
v = s[0];
``` |
||

424 | sum += v * filter[j]; |
||

425 | s++; |
||

426 | } |
||

427 | sum = sum >> FILTER_BITS; |
||

428 | if (sum < 0) |
||

429 | ```
sum = 0;
``` |
||

430 | else if (sum > 255) |
||

431 | ```
sum = 255;
``` |
||

432 | ```
dst[0] = sum;
``` |
||

433 | src_pos += src_incr; |
||

434 | dst++; |
||

435 | } |
||

436 | } |
||

437 | |||

438 | da64ecc3 | Drew Hess | static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src, |

439 | int src_width, int src_start, int src_incr, |
||

440 | int16_t *filters) |
||

441 | de6d9b64 | Fabrice Bellard | { |

442 | ```
int n, src_end;
``` |
||

443 | |||

444 | if (src_start < 0) { |
||

445 | n = (0 - src_start + src_incr - 1) / src_incr; |
||

446 | h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters); |
||

447 | dst += n; |
||

448 | dst_width -= n; |
||

449 | src_start += n * src_incr; |
||

450 | } |
||

451 | src_end = src_start + dst_width * src_incr; |
||

452 | ```
if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
``` |
||

453 | n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) / |
||

454 | src_incr; |
||

455 | ```
} else {
``` |
||

456 | n = dst_width; |
||

457 | } |
||

458 | 980fc7b8 | Fabrice Bellard | ```
#ifdef HAVE_MMX
``` |

459 | de6d9b64 | Fabrice Bellard | if ((mm_flags & MM_MMX) && NB_TAPS == 4) |

460 | h_resample_fast4_mmx(dst, n, |
||

461 | src, src_width, src_start, src_incr, filters); |
||

462 | ```
else
``` |
||

463 | ```
#endif
``` |
||

464 | h_resample_fast(dst, n, |
||

465 | src, src_width, src_start, src_incr, filters); |
||

466 | ```
if (n < dst_width) {
``` |
||

467 | dst += n; |
||

468 | dst_width -= n; |
||

469 | src_start += n * src_incr; |
||

470 | h_resample_slow(dst, dst_width, |
||

471 | src, src_width, src_start, src_incr, filters); |
||

472 | } |
||

473 | } |
||

474 | |||

475 | static void component_resample(ImgReSampleContext *s, |
||

476 | 0c1a9eda | Zdenek Kabelac | uint8_t *output, int owrap, int owidth, int oheight, |

477 | uint8_t *input, int iwrap, int iwidth, int iheight) |
||

478 | de6d9b64 | Fabrice Bellard | { |

479 | ```
int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
``` |
||

480 | 0c1a9eda | Zdenek Kabelac | uint8_t *new_line, *src_line; |

481 | de6d9b64 | Fabrice Bellard | |

482 | ```
last_src_y = - FCENTER - 1;
``` |
||

483 | ```
/* position of the bottom of the filter in the source image */
``` |
||

484 | src_y = (last_src_y + NB_TAPS) * POS_FRAC; |
||

485 | ```
ring_y = NB_TAPS; /* position in ring buffer */
``` |
||

486 | for(y=0;y<oheight;y++) { |
||

487 | ```
/* apply horizontal filter on new lines from input if needed */
``` |
||

488 | src_y1 = src_y >> POS_FRAC_BITS; |
||

489 | ```
while (last_src_y < src_y1) {
``` |
||

490 | ```
if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
``` |
||

491 | ring_y = NB_TAPS; |
||

492 | last_src_y++; |
||

493 | ab6d194a | Michael Niedermayer | ```
/* handle limit conditions : replicate line (slightly
``` |

494 | ```
inefficient because we filter multiple times) */
``` |
||

495 | de6d9b64 | Fabrice Bellard | y1 = last_src_y; |

496 | if (y1 < 0) { |
||

497 | ```
y1 = 0;
``` |
||

498 | } else if (y1 >= iheight) { |
||

499 | ```
y1 = iheight - 1;
``` |
||

500 | } |
||

501 | src_line = input + y1 * iwrap; |
||

502 | new_line = s->line_buf + ring_y * owidth; |
||

503 | ```
/* apply filter and handle limit cases correctly */
``` |
||

504 | h_resample(new_line, owidth, |
||

505 | src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, |
||

506 | &s->h_filters[0][0]); |
||

507 | ```
/* handle ring buffer wraping */
``` |
||

508 | ```
if (ring_y >= LINE_BUF_HEIGHT) {
``` |
||

509 | memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth, |
||

510 | new_line, owidth); |
||

511 | } |
||

512 | } |
||

513 | ```
/* apply vertical filter */
``` |
||

514 | phase_y = get_phase(src_y); |
||

515 | 980fc7b8 | Fabrice Bellard | ```
#ifdef HAVE_MMX
``` |

516 | de6d9b64 | Fabrice Bellard | ```
/* desactivated MMX because loss of precision */
``` |

517 | if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0) |
||

518 | v_resample4_mmx(output, owidth, |
||

519 | ```
s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
``` |
||

520 | ```
&s->v_filters[phase_y][0]);
``` |
||

521 | 404d2241 | Brian Foley | ```
else
``` |

522 | ```
#endif
``` |
||

523 | ```
#ifdef HAVE_ALTIVEC
``` |
||

524 | 00a7d8d6 | Dieter | if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6) |

525 | 404d2241 | Brian Foley | v_resample16_altivec(output, owidth, |

526 | ```
s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
``` |
||

527 | ```
&s->v_filters[phase_y][0]);
``` |
||

528 | de6d9b64 | Fabrice Bellard | ```
else
``` |

529 | ```
#endif
``` |
||

530 | v_resample(output, owidth, |
||

531 | ```
s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
``` |
||

532 | ```
&s->v_filters[phase_y][0]);
``` |
||

533 | |||

534 | src_y += s->v_incr; |
||

535 | output += owrap; |
||

536 | } |
||

537 | } |
||

538 | |||

539 | ```
/* XXX: the following filter is quite naive, but it seems to suffice
``` |
||

540 | ```
for 4 taps */
``` |
||

541 | 0c1a9eda | Zdenek Kabelac | static void build_filter(int16_t *filter, float factor) |

542 | de6d9b64 | Fabrice Bellard | { |

543 | ```
int ph, i, v;
``` |
||

544 | ```
float x, y, tab[NB_TAPS], norm, mult;
``` |
||

545 | |||

546 | ```
/* if upsampling, only need to interpolate, no filter */
``` |
||

547 | if (factor > 1.0) |
||

548 | factor = 1.0; |
||

549 | |||

550 | for(ph=0;ph<NB_PHASES;ph++) { |
||

551 | ```
norm = 0;
``` |
||

552 | for(i=0;i<NB_TAPS;i++) { |
||

553 | |||

554 | x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor; |
||

555 | if (x == 0) |
||

556 | y = 1.0; |
||

557 | ```
else
``` |
||

558 | y = sin(x) / x; |
||

559 | tab[i] = y; |
||

560 | norm += y; |
||

561 | } |
||

562 | |||

563 | ```
/* normalize so that an uniform color remains the same */
``` |
||

564 | mult = (float)(1 << FILTER_BITS) / norm; |
||

565 | for(i=0;i<NB_TAPS;i++) { |
||

566 | ```
v = (int)(tab[i] * mult);
``` |
||

567 | filter[ph * NB_TAPS + i] = v; |
||

568 | } |
||

569 | } |
||

570 | } |
||

571 | |||

572 | ImgReSampleContext *img_resample_init(int owidth, int oheight, |
||

573 | int iwidth, int iheight) |
||

574 | { |
||

575 | ab6d194a | Michael Niedermayer | return img_resample_full_init(owidth, oheight, iwidth, iheight, 0, 0, 0, 0); |

576 | } |
||

577 | |||

578 | ImgReSampleContext *img_resample_full_init(int owidth, int oheight, |
||

579 | int iwidth, int iheight, |
||

580 | int topBand, int bottomBand, |
||

581 | int leftBand, int rightBand) |
||

582 | { |
||

583 | de6d9b64 | Fabrice Bellard | ImgReSampleContext *s; |

584 | |||

585 | ```
s = av_mallocz(sizeof(ImgReSampleContext));
``` |
||

586 | ```
if (!s)
``` |
||

587 | return NULL; |
||

588 | s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS)); |
||

589 | ```
if (!s->line_buf)
``` |
||

590 | ```
goto fail;
``` |
||

591 | |||

592 | s->owidth = owidth; |
||

593 | s->oheight = oheight; |
||

594 | s->iwidth = iwidth; |
||

595 | s->iheight = iheight; |
||

596 | ab6d194a | Michael Niedermayer | s->topBand = topBand; |

597 | s->bottomBand = bottomBand; |
||

598 | s->leftBand = leftBand; |
||

599 | s->rightBand = rightBand; |
||

600 | de6d9b64 | Fabrice Bellard | |

601 | ab6d194a | Michael Niedermayer | s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / owidth; |

602 | s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / oheight; |
||

603 | de6d9b64 | Fabrice Bellard | |

604 | ab6d194a | Michael Niedermayer | build_filter(&s->h_filters[0][0], (float) owidth / (float) (iwidth - leftBand - rightBand)); |

605 | build_filter(&s->v_filters[0][0], (float) oheight / (float) (iheight - topBand - bottomBand)); |
||

606 | de6d9b64 | Fabrice Bellard | |

607 | ```
return s;
``` |
||

608 | ```
fail:
``` |
||

609 | 6000abfa | Fabrice Bellard | av_free(s); |

610 | de6d9b64 | Fabrice Bellard | return NULL; |

611 | } |
||

612 | |||

613 | ```
void img_resample(ImgReSampleContext *s,
``` |
||

614 | da64ecc3 | Drew Hess | ```
AVPicture *output, const AVPicture *input)
``` |

615 | de6d9b64 | Fabrice Bellard | { |

616 | ```
int i, shift;
``` |
||

617 | |||

618 | for(i=0;i<3;i++) { |
||

619 | shift = (i == 0) ? 0 : 1; |
||

620 | component_resample(s, output->data[i], output->linesize[i], |
||

621 | s->owidth >> shift, s->oheight >> shift, |
||

622 | ab6d194a | Michael Niedermayer | input->data[i] + (input->linesize[i] * (s->topBand >> shift)) + (s->leftBand >> shift), |

623 | input->linesize[i], ((s->iwidth - s->leftBand - s->rightBand) >> shift), |
||

624 | (s->iheight - s->topBand - s->bottomBand) >> shift); |
||

625 | de6d9b64 | Fabrice Bellard | } |

626 | } |
||

627 | |||

628 | ```
void img_resample_close(ImgReSampleContext *s)
``` |
||

629 | { |
||

630 | 6000abfa | Fabrice Bellard | av_free(s->line_buf); |

631 | av_free(s); |
||

632 | de6d9b64 | Fabrice Bellard | } |

633 | |||

634 | ```
#ifdef TEST
``` |
||

635 | |||

636 | void *av_mallocz(int size) |
||

637 | { |
||

638 | ```
void *ptr;
``` |
||

639 | ptr = malloc(size); |
||

640 | ```
memset(ptr, 0, size);
``` |
||

641 | ```
return ptr;
``` |
||

642 | } |
||

643 | |||

644 | ab6d194a | Michael Niedermayer | void av_free(void *ptr) |

645 | { |
||

646 | ```
/* XXX: this test should not be needed on most libcs */
``` |
||

647 | ```
if (ptr)
``` |
||

648 | free(ptr); |
||

649 | } |
||

650 | |||

651 | de6d9b64 | Fabrice Bellard | ```
/* input */
``` |

652 | #define XSIZE 256 |
||

653 | #define YSIZE 256 |
||

654 | 0c1a9eda | Zdenek Kabelac | uint8_t img[XSIZE * YSIZE]; |

655 | de6d9b64 | Fabrice Bellard | |

656 | ```
/* output */
``` |
||

657 | #define XSIZE1 512 |
||

658 | #define YSIZE1 512 |
||

659 | 0c1a9eda | Zdenek Kabelac | uint8_t img1[XSIZE1 * YSIZE1]; |

660 | uint8_t img2[XSIZE1 * YSIZE1]; |
||

661 | de6d9b64 | Fabrice Bellard | |

662 | 0c1a9eda | Zdenek Kabelac | void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize) |

663 | de6d9b64 | Fabrice Bellard | { |

664 | FILE *f; |
||

665 | ```
f=fopen(filename,"w");
``` |
||

666 | fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255); |
||

667 | ```
fwrite(img,1, xsize * ysize,f);
``` |
||

668 | fclose(f); |
||

669 | } |
||

670 | |||

671 | 0c1a9eda | Zdenek Kabelac | static void dump_filter(int16_t *filter) |

672 | de6d9b64 | Fabrice Bellard | { |

673 | ```
int i, ph;
``` |
||

674 | |||

675 | for(ph=0;ph<NB_PHASES;ph++) { |
||

676 | ```
printf("%2d: ", ph);
``` |
||

677 | for(i=0;i<NB_TAPS;i++) { |
||

678 | printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0); |
||

679 | } |
||

680 | ```
printf("\n");
``` |
||

681 | } |
||

682 | } |
||

683 | |||

684 | 980fc7b8 | Fabrice Bellard | ```
#ifdef HAVE_MMX
``` |

685 | 6acce86b | Michael Niedermayer | ```
int mm_flags;
``` |

686 | de6d9b64 | Fabrice Bellard | ```
#endif
``` |

687 | |||

688 | int main(int argc, char **argv) |
||

689 | { |
||

690 | ```
int x, y, v, i, xsize, ysize;
``` |
||

691 | ImgReSampleContext *s; |
||

692 | float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 }; |
||

693 | char buf[256]; |
||

694 | |||

695 | ```
/* build test image */
``` |
||

696 | for(y=0;y<YSIZE;y++) { |
||

697 | for(x=0;x<XSIZE;x++) { |
||

698 | if (x < XSIZE/2 && y < YSIZE/2) { |
||

699 | if (x < XSIZE/4 && y < YSIZE/4) { |
||

700 | if ((x % 10) <= 6 && |
||

701 | (y % 10) <= 6) |
||

702 | ```
v = 0xff;
``` |
||

703 | ```
else
``` |
||

704 | ```
v = 0x00;
``` |
||

705 | } else if (x < XSIZE/4) { |
||

706 | if (x & 1) |
||

707 | ```
v = 0xff;
``` |
||

708 | ```
else
``` |
||

709 | ```
v = 0;
``` |
||

710 | } else if (y < XSIZE/4) { |
||

711 | if (y & 1) |
||

712 | ```
v = 0xff;
``` |
||

713 | ```
else
``` |
||

714 | ```
v = 0;
``` |
||

715 | ```
} else {
``` |
||

716 | if (y < YSIZE*3/8) { |
||

717 | if ((y+x) & 1) |
||

718 | ```
v = 0xff;
``` |
||

719 | ```
else
``` |
||

720 | ```
v = 0;
``` |
||

721 | ```
} else {
``` |
||

722 | if (((x+3) % 4) <= 1 && |
||

723 | ((y+3) % 4) <= 1) |
||

724 | ```
v = 0xff;
``` |
||

725 | ```
else
``` |
||

726 | ```
v = 0x00;
``` |
||

727 | } |
||

728 | } |
||

729 | } else if (x < XSIZE/2) { |
||

730 | v = ((x - (XSIZE/2)) * 255) / (XSIZE/2); |
||

731 | } else if (y < XSIZE/2) { |
||

732 | v = ((y - (XSIZE/2)) * 255) / (XSIZE/2); |
||

733 | ```
} else {
``` |
||

734 | ```
v = ((x + y - XSIZE) * 255) / XSIZE;
``` |
||

735 | } |
||

736 | ab6d194a | Michael Niedermayer | img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v; |

737 | de6d9b64 | Fabrice Bellard | } |

738 | } |
||

739 | ```
save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
``` |
||

740 | for(i=0;i<sizeof(factors)/sizeof(float);i++) { |
||

741 | fact = factors[i]; |
||

742 | ```
xsize = (int)(XSIZE * fact);
``` |
||

743 | ab6d194a | Michael Niedermayer | ysize = (int)((YSIZE - 100) * fact); |

744 | 6acce86b | Michael Niedermayer | s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0); |

745 | de6d9b64 | Fabrice Bellard | ```
printf("Factor=%0.2f\n", fact);
``` |

746 | dump_filter(&s->h_filters[0][0]); |
||

747 | component_resample(s, img1, xsize, xsize, ysize, |
||

748 | ab6d194a | Michael Niedermayer | img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100); |

749 | de6d9b64 | Fabrice Bellard | img_resample_close(s); |

750 | |||

751 | ```
sprintf(buf, "/tmp/out%d.pgm", i);
``` |
||

752 | save_pgm(buf, img1, xsize, ysize); |
||

753 | } |
||

754 | |||

755 | ```
/* mmx test */
``` |
||

756 | 980fc7b8 | Fabrice Bellard | ```
#ifdef HAVE_MMX
``` |

757 | de6d9b64 | Fabrice Bellard | ```
printf("MMX test\n");
``` |

758 | fact = 0.72; |
||

759 | ```
xsize = (int)(XSIZE * fact);
``` |
||

760 | ```
ysize = (int)(YSIZE * fact);
``` |
||

761 | mm_flags = MM_MMX; |
||

762 | s = img_resample_init(xsize, ysize, XSIZE, YSIZE); |
||

763 | component_resample(s, img1, xsize, xsize, ysize, |
||

764 | img, XSIZE, XSIZE, YSIZE); |
||

765 | |||

766 | ```
mm_flags = 0;
``` |
||

767 | s = img_resample_init(xsize, ysize, XSIZE, YSIZE); |
||

768 | component_resample(s, img2, xsize, xsize, ysize, |
||

769 | img, XSIZE, XSIZE, YSIZE); |
||

770 | if (memcmp(img1, img2, xsize * ysize) != 0) { |
||

771 | ```
fprintf(stderr, "mmx error\n");
``` |
||

772 | ```
exit(1);
``` |
||

773 | } |
||

774 | ```
printf("MMX OK\n");
``` |
||

775 | ```
#endif
``` |
||

776 | return 0; |
||

777 | } |
||

778 | |||

779 | `#endif` |