/*
/*
*/



* High quality image resampling with polyphase filters



/*
* Copyright (c) 2001 Fabrice Bellard.
``` |

*
*
``` |

5 | ff4ec49e | Fabrice Bellard | ```
* This library is free software; you can redistribute it and/or
``` |

6 | ```
* modify it under the terms of the GNU Lesser General Public



7 | ```
* License as published by the Free Software Foundation; either



8 | ```
* version 2 of the License, or (at your option) any later version.



*
*
``` |

10 | ff4ec49e | Fabrice Bellard | ```
* This library is distributed in the hope that it will be useful,
``` |

11 | de6d9b64 | Fabrice Bellard | ```
* but WITHOUT ANY WARRANTY; without even the implied warranty of
``` |

12 | ff4ec49e | Fabrice Bellard | ```
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
``` |

13 | ```
* Lesser General Public License for more details.



*
*
``` |

15 | ff4ec49e | Fabrice Bellard | ```
* You should have received a copy of the GNU Lesser General Public
``` |

16 | ```
* License along with this library; if not, write to the Free Software



17 | ```
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA



*/
*/
``` |

#include "avcodec.h"


20 | 6000abfa | Fabrice Bellard | #include "dsputil.h" |



#ifdef USE_FASTMEMCPY
#ifdef USE_FASTMEMCPY
``` |

#include "fastmemcpy.h"


#endif
#endif
``` |


25 | 404d2241 | Brian Foley | extern int mm_flags; |

26 | 54329dd5 | Nick Kurshev | |

27 | de6d9b64 | Fabrice Bellard | #define NB_COMPONENTS 3 |



29 | #define PHASE_BITS 4 |


30 | #define NB_PHASES (1 << PHASE_BITS) |


31 | #define NB_TAPS 4 |


32 | #define FCENTER 1 /* index of the center of the filter */ |


//#define TEST 1 /* Test it */
//#define TEST 1 /* Test it */
``` |



35 | #define POS_FRAC_BITS 16 |


36 | #define POS_FRAC (1 << POS_FRAC_BITS) |


37 | ```
/* 6 bits precision is needed for MMX */



38 | #define FILTER_BITS 8 |




40 | #define LINE_BUF_HEIGHT (NB_TAPS * 4) |




42 | ```
struct ImgReSampleContext {



43 | ab6d194a | Michael Niedermayer | ```
int iwidth, iheight, owidth, oheight, topBand, bottomBand, leftBand, rightBand;
``` |

int h_incr, v_incr;
int h_incr, v_incr;
``` |

45 | ```
INT16 h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */



46 | ```
INT16 v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */



UINT8 *line_buf;


};




50 | static inline int get_phase(int pos) |


51 | { |


52 | return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1); |


53 | } |




55 | ```
/* This function must be optimized */



56 | static void h_resample_fast(UINT8 *dst, int dst_width, UINT8 *src, int src_width, |


57 | int src_start, int src_incr, INT16 *filters) |


58 | { |


59 | ```
int src_pos, phase, sum, i;



60 | UINT8 *s; |


61 | INT16 *filter; |




63 | src_pos = src_start; |


64 | for(i=0;i<dst_width;i++) { |


#ifdef TEST
#ifdef TEST



/* test */
/* test */



67 | if ((src_pos >> POS_FRAC_BITS) < 0 || |


68 | (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS)) |


69 | 02ac3136 | Philip Gladstone | av_abort(); |

#endif
#endif
``` |

71 | s = src + (src_pos >> POS_FRAC_BITS); |


72 | phase = get_phase(src_pos); |


73 | filter = filters + phase * NB_TAPS; |


74 | #if NB_TAPS == 4 |


75 | sum = s[0] * filter[0] + |


76 | s[1] * filter[1] + |


77 | s[2] * filter[2] + |


78 | s[3] * filter[3]; |


#else
#else



80 | { |


int j;
int j;



sum = 0;
sum = 0;



83 | for(j=0;j<NB_TAPS;j++) |


84 | sum += s[j] * filter[j]; |


85 | } |


#endif
#endif



87 | sum = sum >> FILTER_BITS; |


88 | if (sum < 0) |


sum = 0;
sum = 0;



90 | else if (sum > 255) |


sum = 255;
sum = 255;



92 | ```
dst[0] = sum;



93 | src_pos += src_incr; |


94 | dst++; |


95 | } |


96 | } |




98 | ```
/* This function must be optimized */



99 | static void v_resample(UINT8 *dst, int dst_width, UINT8 *src, int wrap, |


100 | INT16 *filter) |


101 | { |


int sum, i;
int sum, i;



103 | UINT8 *s; |




105 | s = src; |


106 | for(i=0;i<dst_width;i++) { |


107 | #if NB_TAPS == 4 |


108 | sum = s[0 * wrap] * filter[0] + |


109 | s[1 * wrap] * filter[1] + |


110 | s[2 * wrap] * filter[2] + |


111 | s[3 * wrap] * filter[3]; |


#else
#else



113 | { |


int j;
int j;



115 | UINT8 *s1 = s; |




sum = 0;
sum = 0;



118 | for(j=0;j<NB_TAPS;j++) { |


119 | ```
sum += s1[0] * filter[j];



120 | s1 += wrap; |


121 | } |


122 | } |


#endif
#endif



124 | sum = sum >> FILTER_BITS; |


125 | if (sum < 0) |


sum = 0;
sum = 0;



127 | else if (sum > 255) |


sum = 255;
sum = 255;



dst[0] = sum;
dst[0] = sum;



130 | dst++; |


131 | s++; |


132 | } |


133 | } |




#ifdef HAVE_MMX
#ifdef HAVE_MMX
``` |



137 | #include "i386/mmx.h" |




139 | ```
#define FILTER4(reg) \



140 | {\ |


141 | s = src + (src_pos >> POS_FRAC_BITS);\ |


142 | phase = get_phase(src_pos);\ |


143 | filter = filters + phase * NB_TAPS;\ |


144 | movq_m2r(*s, reg);\ |


145 | punpcklbw_r2r(mm7, reg);\ |


146 | movq_m2r(*filter, mm6);\ |


147 | pmaddwd_r2r(reg, mm6);\ |


148 | movq_r2r(mm6, reg);\ |


149 | ```
psrlq_i2r(32, reg);\



150 | paddd_r2r(mm6, reg);\ |


151 | psrad_i2r(FILTER_BITS, reg);\ |


152 | src_pos += src_incr;\ |


153 | } |




155 | #define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq); |




157 | ```
/* XXX: do four pixels at a time */



158 | static void h_resample_fast4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int src_width, |


159 | int src_start, int src_incr, INT16 *filters) |


160 | { |


161 | ```
int src_pos, phase;



162 | UINT8 *s; |


163 | INT16 *filter; |


164 | mmx_t tmp; |




166 | src_pos = src_start; |


167 | pxor_r2r(mm7, mm7); |




169 | while (dst_width >= 4) { |




171 | FILTER4(mm0); |


172 | FILTER4(mm1); |


173 | FILTER4(mm2); |


174 | FILTER4(mm3); |




176 | packuswb_r2r(mm7, mm0); |


177 | packuswb_r2r(mm7, mm1); |


178 | packuswb_r2r(mm7, mm3); |


179 | packuswb_r2r(mm7, mm2); |


180 | movq_r2m(mm0, tmp); |


181 | dst[0] = tmp.ub[0]; |


182 | movq_r2m(mm1, tmp); |


183 | dst[1] = tmp.ub[0]; |


184 | movq_r2m(mm2, tmp); |


185 | dst[2] = tmp.ub[0]; |


186 | movq_r2m(mm3, tmp); |


187 | dst[3] = tmp.ub[0]; |


dst += 4;
dst += 4;



189 | ```
dst_width -= 4;



190 | } |


191 | while (dst_width > 0) { |


192 | FILTER4(mm0); |


193 | packuswb_r2r(mm7, mm0); |


194 | movq_r2m(mm0, tmp); |


195 | dst[0] = tmp.ub[0]; |


196 | dst++; |


197 | dst_width--; |


198 | } |


199 | emms(); |


200 | } |




202 | static void v_resample4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int wrap, |


203 | INT16 *filter) |


204 | { |


205 | ```
int sum, i, v;



206 | UINT8 *s; |


207 | mmx_t tmp; |


208 | ```
mmx_t coefs[4];





210 | for(i=0;i<4;i++) { |


211 | v = filter[i]; |


212 | ```
coefs[i].uw[0] = v;



213 | ```
coefs[i].uw[1] = v;



214 | ```
coefs[i].uw[2] = v;



215 | ```
coefs[i].uw[3] = v;



216 | } |




218 | pxor_r2r(mm7, mm7); |


219 | s = src; |


220 | while (dst_width >= 4) { |


221 | ```
movq_m2r(s[0 * wrap], mm0);



222 | punpcklbw_r2r(mm7, mm0); |


223 | ```
movq_m2r(s[1 * wrap], mm1);



224 | punpcklbw_r2r(mm7, mm1); |


225 | ```
movq_m2r(s[2 * wrap], mm2);



226 | punpcklbw_r2r(mm7, mm2); |


227 | ```
movq_m2r(s[3 * wrap], mm3);



228 | punpcklbw_r2r(mm7, mm3); |




230 | ```
pmullw_m2r(coefs[0], mm0);



231 | ```
pmullw_m2r(coefs[1], mm1);



232 | ```
pmullw_m2r(coefs[2], mm2);



233 | ```
pmullw_m2r(coefs[3], mm3);

||

234 | |||

235 | paddw_r2r(mm1, mm0); |
||

236 | paddw_r2r(mm3, mm2); |
||

237 | paddw_r2r(mm2, mm0); |
||

238 | psraw_i2r(FILTER_BITS, mm0); |
||

239 | |||

240 | packuswb_r2r(mm7, mm0); |
||

241 | movq_r2m(mm0, tmp); |
||

242 | |||

243 | ```
*(UINT32 *)dst = tmp.ud[0];
``` |
||

244 | ```
dst += 4;
``` |
||

245 | ```
s += 4;
``` |
||

246 | ```
dst_width -= 4;
``` |
||

247 | } |
||

248 | while (dst_width > 0) { |
||

249 | sum = s[0 * wrap] * filter[0] + |
||

250 | s[1 * wrap] * filter[1] + |
||

251 | s[2 * wrap] * filter[2] + |
||

252 | s[3 * wrap] * filter[3]; |
||

253 | sum = sum >> FILTER_BITS; |
||

254 | if (sum < 0) |
||

255 | ```
sum = 0;
``` |
||

256 | else if (sum > 255) |
||

257 | ```
sum = 255;
``` |
||

258 | ```
dst[0] = sum;
``` |
||

259 | dst++; |
||

260 | s++; |
||

261 | dst_width--; |
||

262 | } |
||

263 | emms(); |
||

264 | } |
||

265 | ```
#endif
``` |
||

266 | |||

267 | 404d2241 | Brian Foley | ```
#ifdef HAVE_ALTIVEC
``` |

268 | typedef union { |
||

269 | vector unsigned char v; |
||

270 | unsigned char c[16]; |
||

271 | } vec_uc_t; |
||

272 | |||

273 | typedef union { |
||

274 | vector signed short v; |
||

275 | signed short s[8]; |
||

276 | } vec_ss_t; |
||

277 | |||

278 | void v_resample16_altivec(UINT8 *dst, int dst_width, UINT8 *src, int wrap, |
||

279 | INT16 *filter) |
||

280 | { |
||

281 | ```
int sum, i;
``` |
||

282 | uint8_t *s; |
||

283 | vector unsigned char *tv, tmp, dstv, zero; |
||

284 | vec_ss_t srchv[4], srclv[4], fv[4]; |
||

285 | vector signed short zeros, sumhv, sumlv; |
||

286 | s = src; |
||

287 | |||

288 | for(i=0;i<4;i++) |
||

289 | { |
||

290 | ```
/*
``` |
||

291 | ```
The vec_madds later on does an implicit >>15 on the result.
``` |
||

292 | ```
Since FILTER_BITS is 8, and we have 15 bits of magnitude in
``` |
||

293 | ```
a signed short, we have just enough bits to pre-shift our
``` |
||

294 | ```
filter constants <<7 to compensate for vec_madds.
``` |
||

295 | ```
*/
``` |
||

296 | fv[i].s[0] = filter[i] << (15-FILTER_BITS); |
||

297 | ```
fv[i].v = vec_splat(fv[i].v, 0);
``` |
||

298 | } |
||

299 | |||

300 | ```
zero = vec_splat_u8(0);
``` |
||

301 | ```
zeros = vec_splat_s16(0);
``` |
||

302 | |||

303 | |||

304 | ```
/*
``` |
||

305 | ```
When we're resampling, we'd ideally like both our input buffers,
``` |
||

306 | ```
and output buffers to be 16-byte aligned, so we can do both aligned
``` |
||

307 | ```
reads and writes. Sadly we can't always have this at the moment, so
``` |
||

308 | ```
we opt for aligned writes, as unaligned writes have a huge overhead.
``` |
||

309 | ```
To do this, do enough scalar resamples to get dst 16-byte aligned.
``` |
||

310 | ```
*/
``` |
||

311 | 9e4e1659 | Philip Gladstone | i = (-(int)dst) & 0xf; |

312 | 404d2241 | Brian Foley | while(i>0) { |

313 | sum = s[0 * wrap] * filter[0] + |
||

314 | s[1 * wrap] * filter[1] + |
||

315 | s[2 * wrap] * filter[2] + |
||

316 | s[3 * wrap] * filter[3]; |
||

317 | sum = sum >> FILTER_BITS; |
||

318 | if (sum<0) sum = 0; else if (sum>255) sum=255; |
||

319 | ```
dst[0] = sum;
``` |
||

320 | dst++; |
||

321 | s++; |
||

322 | dst_width--; |
||

323 | i--; |
||

324 | } |
||

325 | |||

326 | ```
/* Do our altivec resampling on 16 pixels at once. */
``` |
||

327 | while(dst_width>=16) { |
||

328 | ```
/*
``` |
||

329 | ```
Read 16 (potentially unaligned) bytes from each of
``` |
||

330 | ```
4 lines into 4 vectors, and split them into shorts.
``` |
||

331 | ```
Interleave the multipy/accumulate for the resample
``` |
||

332 | ```
filter with the loads to hide the 3 cycle latency
``` |
||

333 | ```
the vec_madds have.
``` |
||

334 | ```
*/
``` |
||

335 | tv = (vector unsigned char *) &s[0 * wrap]; |
||

336 | tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap])); |
||

337 | srchv[0].v = (vector signed short) vec_mergeh(zero, tmp); |
||

338 | srclv[0].v = (vector signed short) vec_mergel(zero, tmp); |
||

339 | sumhv = vec_madds(srchv[0].v, fv[0].v, zeros); |
||

340 | sumlv = vec_madds(srclv[0].v, fv[0].v, zeros); |
||

341 | |||

342 | tv = (vector unsigned char *) &s[1 * wrap]; |
||

343 | tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap])); |
||

344 | srchv[1].v = (vector signed short) vec_mergeh(zero, tmp); |
||

345 | srclv[1].v = (vector signed short) vec_mergel(zero, tmp); |
||

346 | sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv); |
||

347 | sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv); |
||

348 | |||

349 | tv = (vector unsigned char *) &s[2 * wrap]; |
||

350 | tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap])); |
||

351 | srchv[2].v = (vector signed short) vec_mergeh(zero, tmp); |
||

352 | srclv[2].v = (vector signed short) vec_mergel(zero, tmp); |
||

353 | sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv); |
||

354 | sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv); |
||

355 | |||

356 | tv = (vector unsigned char *) &s[3 * wrap]; |
||

357 | tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap])); |
||

358 | srchv[3].v = (vector signed short) vec_mergeh(zero, tmp); |
||

359 | srclv[3].v = (vector signed short) vec_mergel(zero, tmp); |
||

360 | sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv); |
||

361 | sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv); |
||

362 | |||

363 | ```
/*
``` |
||

364 | ```
Pack the results into our destination vector,
``` |
||

365 | ```
and do an aligned write of that back to memory.
``` |
||

366 | ```
*/
``` |
||

367 | dstv = vec_packsu(sumhv, sumlv) ; |
||

368 | vec_st(dstv, 0, (vector unsigned char *) dst); |
||

369 | |||

370 | ```
dst+=16;
``` |
||

371 | ```
s+=16;
``` |
||

372 | ```
dst_width-=16;
``` |
||

373 | } |
||

374 | |||

375 | ```
/*
``` |
||

376 | ```
If there are any leftover pixels, resample them
``` |
||

377 | ```
with the slow scalar method.
``` |
||

378 | ```
*/
``` |
||

379 | while(dst_width>0) { |
||

380 | sum = s[0 * wrap] * filter[0] + |
||

381 | s[1 * wrap] * filter[1] + |
||

382 | s[2 * wrap] * filter[2] + |
||

383 | s[3 * wrap] * filter[3]; |
||

384 | sum = sum >> FILTER_BITS; |
||

385 | if (sum<0) sum = 0; else if (sum>255) sum=255; |
||

386 | ```
dst[0] = sum;
``` |
||

387 | dst++; |
||

388 | s++; |
||

389 | dst_width--; |
||

390 | } |
||

391 | } |
||

392 | ```
#endif
``` |
||

393 | |||

394 | de6d9b64 | Fabrice Bellard | ```
/* slow version to handle limit cases. Does not need optimisation */
``` |

395 | static void h_resample_slow(UINT8 *dst, int dst_width, UINT8 *src, int src_width, |
||

396 | int src_start, int src_incr, INT16 *filters) |
||

397 | { |
||

398 | ```
int src_pos, phase, sum, j, v, i;
``` |
||

399 | UINT8 *s, *src_end; |
||

400 | INT16 *filter; |
||

401 | |||

402 | src_end = src + src_width; |
||

403 | src_pos = src_start; |
||

404 | for(i=0;i<dst_width;i++) { |
||

405 | s = src + (src_pos >> POS_FRAC_BITS); |
||

406 | phase = get_phase(src_pos); |
||

407 | filter = filters + phase * NB_TAPS; |
||

408 | ```
sum = 0;
``` |
||

409 | for(j=0;j<NB_TAPS;j++) { |
||

410 | ```
if (s < src)
``` |
||

411 | ```
v = src[0];
``` |
||

412 | else if (s >= src_end) |
||

413 | ```
v = src_end[-1];
``` |
||

414 | ```
else
``` |
||

415 | ```
v = s[0];
``` |
||

416 | sum += v * filter[j]; |
||

417 | s++; |
||

418 | } |
||

419 | sum = sum >> FILTER_BITS; |
||

420 | if (sum < 0) |
||

421 | ```
sum = 0;
``` |
||

422 | else if (sum > 255) |
||

423 | ```
sum = 255;
``` |
||

424 | ```
dst[0] = sum;
``` |
||

425 | src_pos += src_incr; |
||

426 | dst++; |
||

427 | } |
||

428 | } |
||

429 | |||

430 | static void h_resample(UINT8 *dst, int dst_width, UINT8 *src, int src_width, |
||

431 | int src_start, int src_incr, INT16 *filters) |
||

432 | { |
||

433 | ```
int n, src_end;
``` |
||

434 | |||

435 | if (src_start < 0) { |
||

436 | n = (0 - src_start + src_incr - 1) / src_incr; |
||

437 | h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters); |
||

438 | dst += n; |
||

439 | dst_width -= n; |
||

440 | src_start += n * src_incr; |
||

441 | } |
||

442 | src_end = src_start + dst_width * src_incr; |
||

443 | ```
if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
``` |
||

444 | n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) / |
||

445 | src_incr; |
||

446 | ```
} else {
``` |
||

447 | n = dst_width; |
||

448 | } |
||

449 | 980fc7b8 | Fabrice Bellard | ```
#ifdef HAVE_MMX
``` |

450 | de6d9b64 | Fabrice Bellard | if ((mm_flags & MM_MMX) && NB_TAPS == 4) |

451 | h_resample_fast4_mmx(dst, n, |
||

452 | src, src_width, src_start, src_incr, filters); |
||

453 | ```
else
``` |
||

454 | ```
#endif
``` |
||

455 | h_resample_fast(dst, n, |
||

456 | src, src_width, src_start, src_incr, filters); |
||

457 | ```
if (n < dst_width) {
``` |
||

458 | dst += n; |
||

459 | dst_width -= n; |
||

460 | src_start += n * src_incr; |
||

461 | h_resample_slow(dst, dst_width, |
||

462 | src, src_width, src_start, src_incr, filters); |
||

463 | } |
||

464 | } |
||

465 | |||

466 | static void component_resample(ImgReSampleContext *s, |
||

467 | UINT8 *output, int owrap, int owidth, int oheight, |
||

468 | UINT8 *input, int iwrap, int iwidth, int iheight) |
||

469 | { |
||

470 | ```
int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
``` |
||

471 | UINT8 *new_line, *src_line; |
||

472 | |||

473 | ```
last_src_y = - FCENTER - 1;
``` |
||

474 | ```
/* position of the bottom of the filter in the source image */
``` |
||

475 | src_y = (last_src_y + NB_TAPS) * POS_FRAC; |
||

476 | ```
ring_y = NB_TAPS; /* position in ring buffer */
``` |
||

477 | for(y=0;y<oheight;y++) { |
||

478 | ```
/* apply horizontal filter on new lines from input if needed */
``` |
||

479 | src_y1 = src_y >> POS_FRAC_BITS; |
||

480 | ```
while (last_src_y < src_y1) {
``` |
||

481 | ```
if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
``` |
||

482 | ring_y = NB_TAPS; |
||

483 | last_src_y++; |
||

484 | ab6d194a | Michael Niedermayer | ```
/* handle limit conditions : replicate line (slightly
``` |

485 | ```
inefficient because we filter multiple times) */
``` |
||

486 | de6d9b64 | Fabrice Bellard | y1 = last_src_y; |

487 | if (y1 < 0) { |
||

488 | ```
y1 = 0;
``` |
||

489 | } else if (y1 >= iheight) { |
||

490 | ```
y1 = iheight - 1;
``` |
||

491 | } |
||

492 | src_line = input + y1 * iwrap; |
||

493 | new_line = s->line_buf + ring_y * owidth; |
||

494 | ```
/* apply filter and handle limit cases correctly */
``` |
||

495 | h_resample(new_line, owidth, |
||

496 | src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, |
||

497 | &s->h_filters[0][0]); |
||

498 | ```
/* handle ring buffer wraping */
``` |
||

499 | ```
if (ring_y >= LINE_BUF_HEIGHT) {
``` |
||

500 | memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth, |
||

501 | new_line, owidth); |
||

502 | } |
||

503 | } |
||

504 | ```
/* apply vertical filter */
``` |
||

505 | phase_y = get_phase(src_y); |
||

506 | 980fc7b8 | Fabrice Bellard | ```
#ifdef HAVE_MMX
``` |

507 | de6d9b64 | Fabrice Bellard | ```
/* desactivated MMX because loss of precision */
``` |

508 | if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0) |
||

509 | v_resample4_mmx(output, owidth, |
||

510 | ```
s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
``` |
||

511 | ```
&s->v_filters[phase_y][0]);
``` |
||

512 | 404d2241 | Brian Foley | ```
else
``` |

513 | ```
#endif
``` |
||

514 | ```
#ifdef HAVE_ALTIVEC
``` |
||

515 | 00a7d8d6 | Dieter | if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6) |

516 | 404d2241 | Brian Foley | v_resample16_altivec(output, owidth, |

517 | ```
s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
``` |
||

518 | ```
&s->v_filters[phase_y][0]);
``` |
||

519 | de6d9b64 | Fabrice Bellard | ```
else
``` |

520 | ```
#endif
``` |
||

521 | v_resample(output, owidth, |
||

522 | ```
s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
``` |
||

523 | ```
&s->v_filters[phase_y][0]);
``` |
||

524 | |||

525 | src_y += s->v_incr; |
||

526 | output += owrap; |
||

527 | } |
||

528 | } |
||

529 | |||

530 | ```
/* XXX: the following filter is quite naive, but it seems to suffice
``` |
||

531 | ```
for 4 taps */
``` |
||

532 | static void build_filter(INT16 *filter, float factor) |
||

533 | { |
||

534 | ```
int ph, i, v;
``` |
||

535 | ```
float x, y, tab[NB_TAPS], norm, mult;
``` |
||

536 | |||

537 | ```
/* if upsampling, only need to interpolate, no filter */
``` |
||

538 | if (factor > 1.0) |
||

539 | factor = 1.0; |
||

540 | |||

541 | for(ph=0;ph<NB_PHASES;ph++) { |
||

542 | ```
norm = 0;
``` |
||

543 | for(i=0;i<NB_TAPS;i++) { |
||

544 | |||

545 | x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor; |
||

546 | if (x == 0) |
||

547 | y = 1.0; |
||

548 | ```
else
``` |
||

549 | y = sin(x) / x; |
||

550 | tab[i] = y; |
||

551 | norm += y; |
||

552 | } |
||

553 | |||

554 | ```
/* normalize so that an uniform color remains the same */
``` |
||

555 | mult = (float)(1 << FILTER_BITS) / norm; |
||

556 | for(i=0;i<NB_TAPS;i++) { |
||

557 | ```
v = (int)(tab[i] * mult);
``` |
||

558 | filter[ph * NB_TAPS + i] = v; |
||

559 | } |
||

560 | } |
||

561 | } |
||

562 | |||

563 | ImgReSampleContext *img_resample_init(int owidth, int oheight, |
||

564 | int iwidth, int iheight) |
||

565 | { |
||

566 | ab6d194a | Michael Niedermayer | return img_resample_full_init(owidth, oheight, iwidth, iheight, 0, 0, 0, 0); |

567 | } |
||

568 | |||

569 | ImgReSampleContext *img_resample_full_init(int owidth, int oheight, |
||

570 | int iwidth, int iheight, |
||

571 | int topBand, int bottomBand, |
||

572 | int leftBand, int rightBand) |
||

573 | { |
||

574 | de6d9b64 | Fabrice Bellard | ImgReSampleContext *s; |

575 | |||

576 | ```
s = av_mallocz(sizeof(ImgReSampleContext));
``` |
||

577 | ```
if (!s)
``` |
||

578 | return NULL; |
||

579 | s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS)); |
||

580 | ```
if (!s->line_buf)
``` |
||

581 | ```
goto fail;
``` |
||

582 | |||

583 | s->owidth = owidth; |
||

584 | s->oheight = oheight; |
||

585 | s->iwidth = iwidth; |
||

586 | s->iheight = iheight; |
||

587 | ab6d194a | Michael Niedermayer | s->topBand = topBand; |

588 | s->bottomBand = bottomBand; |
||

589 | s->leftBand = leftBand; |
||

590 | s->rightBand = rightBand; |
||

591 | de6d9b64 | Fabrice Bellard | |

592 | ab6d194a | Michael Niedermayer | s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / owidth; |

593 | s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / oheight; |
||

594 | de6d9b64 | Fabrice Bellard | |

595 | ab6d194a | Michael Niedermayer | build_filter(&s->h_filters[0][0], (float) owidth / (float) (iwidth - leftBand - rightBand)); |

596 | build_filter(&s->v_filters[0][0], (float) oheight / (float) (iheight - topBand - bottomBand)); |
||

597 | de6d9b64 | Fabrice Bellard | |

598 | ```
return s;
``` |
||

599 | ```
fail:
``` |
||

600 | 6000abfa | Fabrice Bellard | av_free(s); |

601 | de6d9b64 | Fabrice Bellard | return NULL; |

602 | } |
||

603 | |||

604 | ```
void img_resample(ImgReSampleContext *s,
``` |
||

605 | AVPicture *output, AVPicture *input) |
||

606 | { |
||

607 | ```
int i, shift;
``` |
||

608 | |||

609 | for(i=0;i<3;i++) { |
||

610 | shift = (i == 0) ? 0 : 1; |
||

611 | component_resample(s, output->data[i], output->linesize[i], |
||

612 | s->owidth >> shift, s->oheight >> shift, |
||

613 | ab6d194a | Michael Niedermayer | input->data[i] + (input->linesize[i] * (s->topBand >> shift)) + (s->leftBand >> shift), |

614 | input->linesize[i], ((s->iwidth - s->leftBand - s->rightBand) >> shift), |
||

615 | (s->iheight - s->topBand - s->bottomBand) >> shift); |
||

616 | de6d9b64 | Fabrice Bellard | } |

617 | } |
||

618 | |||

619 | ```
void img_resample_close(ImgReSampleContext *s)
``` |
||

620 | { |
||

621 | 6000abfa | Fabrice Bellard | av_free(s->line_buf); |

622 | av_free(s); |
||

623 | de6d9b64 | Fabrice Bellard | } |

624 | |||

625 | ```
#ifdef TEST
``` |
||

626 | |||

627 | void *av_mallocz(int size) |
||

628 | { |
||

629 | ```
void *ptr;
``` |
||

630 | ptr = malloc(size); |
||

631 | ```
memset(ptr, 0, size);
``` |
||

632 | ```
return ptr;
``` |
||

633 | } |
||

634 | |||

635 | ab6d194a | Michael Niedermayer | void av_free(void *ptr) |

636 | { |
||

637 | ```
/* XXX: this test should not be needed on most libcs */
``` |
||

638 | ```
if (ptr)
``` |
||

639 | free(ptr); |
||

640 | } |
||

641 | |||

642 | de6d9b64 | Fabrice Bellard | ```
/* input */
``` |

643 | #define XSIZE 256 |
||

644 | #define YSIZE 256 |
||

645 | UINT8 img[XSIZE * YSIZE]; |
||

646 | |||

647 | ```
/* output */
``` |
||

648 | #define XSIZE1 512 |
||

649 | #define YSIZE1 512 |
||

650 | UINT8 img1[XSIZE1 * YSIZE1]; |
||

651 | UINT8 img2[XSIZE1 * YSIZE1]; |
||

652 | |||

653 | void save_pgm(const char *filename, UINT8 *img, int xsize, int ysize) |
||

654 | { |
||

655 | FILE *f; |
||

656 | ```
f=fopen(filename,"w");
``` |
||

657 | fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255); |
||

658 | ```
fwrite(img,1, xsize * ysize,f);
``` |
||

659 | fclose(f); |
||

660 | } |
||

661 | |||

662 | static void dump_filter(INT16 *filter) |
||

663 | { |
||

664 | ```
int i, ph;
``` |
||

665 | |||

666 | for(ph=0;ph<NB_PHASES;ph++) { |
||

667 | ```
printf("%2d: ", ph);
``` |
||

668 | for(i=0;i<NB_TAPS;i++) { |
||

669 | printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0); |
||

670 | } |
||

671 | ```
printf("\n");
``` |
||

672 | } |
||

673 | } |
||

674 | |||

675 | 980fc7b8 | Fabrice Bellard | ```
#ifdef HAVE_MMX
``` |

676 | 6acce86b | Michael Niedermayer | ```
int mm_flags;
``` |

677 | de6d9b64 | Fabrice Bellard | ```
#endif
``` |

678 | |||

679 | int main(int argc, char **argv) |
||

680 | { |
||

681 | ```
int x, y, v, i, xsize, ysize;
``` |
||

682 | ImgReSampleContext *s; |
||

683 | float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 }; |
||

684 | char buf[256]; |
||

685 | |||

686 | ```
/* build test image */
``` |
||

687 | for(y=0;y<YSIZE;y++) { |
||

688 | for(x=0;x<XSIZE;x++) { |
||

689 | if (x < XSIZE/2 && y < YSIZE/2) { |
||

690 | if (x < XSIZE/4 && y < YSIZE/4) { |
||

691 | if ((x % 10) <= 6 && |
||

692 | (y % 10) <= 6) |
||

693 | ```
v = 0xff;
``` |
||

694 | ```
else
``` |
||

695 | ```
v = 0x00;
``` |
||

696 | } else if (x < XSIZE/4) { |
||

697 | if (x & 1) |
||

698 | ```
v = 0xff;
``` |
||

699 | ```
else
``` |
||

700 | ```
v = 0;
``` |
||

701 | } else if (y < XSIZE/4) { |
||

702 | if (y & 1) |
||

703 | ```
v = 0xff;
``` |
||

704 | ```
else
``` |
||

705 | ```
v = 0;
``` |
||

706 | ```
} else {
``` |
||

707 | if (y < YSIZE*3/8) { |
||

708 | if ((y+x) & 1) |
||

709 | ```
v = 0xff;
``` |
||

710 | ```
else
``` |
||

711 | ```
v = 0;
``` |
||

712 | ```
} else {
``` |
||

713 | if (((x+3) % 4) <= 1 && |
||

714 | ((y+3) % 4) <= 1) |
||

715 | ```
v = 0xff;
``` |
||

716 | ```
else
``` |
||

717 | ```
v = 0x00;
``` |
||

718 | } |
||

719 | } |
||

720 | } else if (x < XSIZE/2) { |
||

721 | v = ((x - (XSIZE/2)) * 255) / (XSIZE/2); |
||

722 | } else if (y < XSIZE/2) { |
||

723 | v = ((y - (XSIZE/2)) * 255) / (XSIZE/2); |
||

724 | ```
} else {
``` |
||

725 | ```
v = ((x + y - XSIZE) * 255) / XSIZE;
``` |
||

726 | } |
||

727 | ab6d194a | Michael Niedermayer | img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v; |

728 | de6d9b64 | Fabrice Bellard | } |

729 | } |
||

730 | ```
save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
``` |
||

731 | for(i=0;i<sizeof(factors)/sizeof(float);i++) { |
||

732 | fact = factors[i]; |
||

733 | ```
xsize = (int)(XSIZE * fact);
``` |
||

734 | ab6d194a | Michael Niedermayer | ysize = (int)((YSIZE - 100) * fact); |

735 | 6acce86b | Michael Niedermayer | s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0); |

736 | de6d9b64 | Fabrice Bellard | ```
printf("Factor=%0.2f\n", fact);
``` |

737 | dump_filter(&s->h_filters[0][0]); |
||

738 | component_resample(s, img1, xsize, xsize, ysize, |
||

739 | ab6d194a | Michael Niedermayer | img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100); |

740 | de6d9b64 | Fabrice Bellard | img_resample_close(s); |

741 | |||

742 | ```
sprintf(buf, "/tmp/out%d.pgm", i);
``` |
||

743 | save_pgm(buf, img1, xsize, ysize); |
||

744 | } |
||

745 | |||

746 | ```
/* mmx test */
``` |
||

747 | 980fc7b8 | Fabrice Bellard | ```
#ifdef HAVE_MMX
``` |

748 | de6d9b64 | Fabrice Bellard | ```
printf("MMX test\n");
``` |

749 | fact = 0.72; |
||

750 | ```
xsize = (int)(XSIZE * fact);
``` |
||

751 | ```
ysize = (int)(YSIZE * fact);
``` |
||

752 | mm_flags = MM_MMX; |
||

753 | s = img_resample_init(xsize, ysize, XSIZE, YSIZE); |
||

754 | component_resample(s, img1, xsize, xsize, ysize, |
||

755 | img, XSIZE, XSIZE, YSIZE); |
||

756 | |||

757 | ```
mm_flags = 0;
``` |
||

758 | s = img_resample_init(xsize, ysize, XSIZE, YSIZE); |
||

759 | component_resample(s, img2, xsize, xsize, ysize, |
||

760 | img, XSIZE, XSIZE, YSIZE); |
||

761 | if (memcmp(img1, img2, xsize * ysize) != 0) { |
||

762 | ```
fprintf(stderr, "mmx error\n");
``` |
||

763 | ```
exit(1);
``` |
||

764 | } |
||

765 | ```
printf("MMX OK\n");
``` |
||

766 | ```
#endif
``` |
||

767 | return 0; |
||

768 | } |
||

769 | |||

770 | `#endif` |