ffmpeg / libavcodec / imgresample.c @ 983e3246
History  View  Annotate  Download (22.4 KB)
1 
/*


2 
* High quality image resampling with polyphase filters

3 
* Copyright (c) 2001 Fabrice Bellard.

4 
*

5 
* This library is free software; you can redistribute it and/or

6 
* modify it under the terms of the GNU Lesser General Public

7 
* License as published by the Free Software Foundation; either

8 
* version 2 of the License, or (at your option) any later version.

9 
*

10 
* This library is distributed in the hope that it will be useful,

11 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

12 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

13 
* Lesser General Public License for more details.

14 
*

15 
* You should have received a copy of the GNU Lesser General Public

16 
* License along with this library; if not, write to the Free Software

17 
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 021111307 USA

18 
*/

19 

20 
/**

21 
* @file imgresample.c

22 
* High quality image resampling with polyphase filters .

23 
*/

24 

25 
#include "avcodec.h" 
26 
#include "dsputil.h" 
27  
28 
#ifdef USE_FASTMEMCPY

29 
#include "fastmemcpy.h" 
30 
#endif

31  
32 
#define NB_COMPONENTS 3 
33  
34 
#define PHASE_BITS 4 
35 
#define NB_PHASES (1 << PHASE_BITS) 
36 
#define NB_TAPS 4 
37 
#define FCENTER 1 /* index of the center of the filter */ 
38 
//#define TEST 1 /* Test it */

39  
40 
#define POS_FRAC_BITS 16 
41 
#define POS_FRAC (1 << POS_FRAC_BITS) 
42 
/* 6 bits precision is needed for MMX */

43 
#define FILTER_BITS 8 
44  
45 
#define LINE_BUF_HEIGHT (NB_TAPS * 4) 
46  
47 
struct ImgReSampleContext {

48 
int iwidth, iheight, owidth, oheight, topBand, bottomBand, leftBand, rightBand;

49 
int h_incr, v_incr;

50 
int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */

51 
int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */

52 
uint8_t *line_buf; 
53 
}; 
54  
55 
static inline int get_phase(int pos) 
56 
{ 
57 
return ((pos) >> (POS_FRAC_BITS  PHASE_BITS)) & ((1 << PHASE_BITS)  1); 
58 
} 
59  
60 
/* This function must be optimized */

61 
static void h_resample_fast(uint8_t *dst, int dst_width, uint8_t *src, int src_width, 
62 
int src_start, int src_incr, int16_t *filters) 
63 
{ 
64 
int src_pos, phase, sum, i;

65 
uint8_t *s; 
66 
int16_t *filter; 
67  
68 
src_pos = src_start; 
69 
for(i=0;i<dst_width;i++) { 
70 
#ifdef TEST

71 
/* test */

72 
if ((src_pos >> POS_FRAC_BITS) < 0  
73 
(src_pos >> POS_FRAC_BITS) > (src_width  NB_TAPS)) 
74 
av_abort(); 
75 
#endif

76 
s = src + (src_pos >> POS_FRAC_BITS); 
77 
phase = get_phase(src_pos); 
78 
filter = filters + phase * NB_TAPS; 
79 
#if NB_TAPS == 4 
80 
sum = s[0] * filter[0] + 
81 
s[1] * filter[1] + 
82 
s[2] * filter[2] + 
83 
s[3] * filter[3]; 
84 
#else

85 
{ 
86 
int j;

87 
sum = 0;

88 
for(j=0;j<NB_TAPS;j++) 
89 
sum += s[j] * filter[j]; 
90 
} 
91 
#endif

92 
sum = sum >> FILTER_BITS; 
93 
if (sum < 0) 
94 
sum = 0;

95 
else if (sum > 255) 
96 
sum = 255;

97 
dst[0] = sum;

98 
src_pos += src_incr; 
99 
dst++; 
100 
} 
101 
} 
102  
103 
/* This function must be optimized */

104 
static void v_resample(uint8_t *dst, int dst_width, uint8_t *src, int wrap, 
105 
int16_t *filter) 
106 
{ 
107 
int sum, i;

108 
uint8_t *s; 
109  
110 
s = src; 
111 
for(i=0;i<dst_width;i++) { 
112 
#if NB_TAPS == 4 
113 
sum = s[0 * wrap] * filter[0] + 
114 
s[1 * wrap] * filter[1] + 
115 
s[2 * wrap] * filter[2] + 
116 
s[3 * wrap] * filter[3]; 
117 
#else

118 
{ 
119 
int j;

120 
uint8_t *s1 = s; 
121  
122 
sum = 0;

123 
for(j=0;j<NB_TAPS;j++) { 
124 
sum += s1[0] * filter[j];

125 
s1 += wrap; 
126 
} 
127 
} 
128 
#endif

129 
sum = sum >> FILTER_BITS; 
130 
if (sum < 0) 
131 
sum = 0;

132 
else if (sum > 255) 
133 
sum = 255;

134 
dst[0] = sum;

135 
dst++; 
136 
s++; 
137 
} 
138 
} 
139  
140 
#ifdef HAVE_MMX

141  
142 
#include "i386/mmx.h" 
143  
144 
#define FILTER4(reg) \

145 
{\ 
146 
s = src + (src_pos >> POS_FRAC_BITS);\ 
147 
phase = get_phase(src_pos);\ 
148 
filter = filters + phase * NB_TAPS;\ 
149 
movq_m2r(*s, reg);\ 
150 
punpcklbw_r2r(mm7, reg);\ 
151 
movq_m2r(*filter, mm6);\ 
152 
pmaddwd_r2r(reg, mm6);\ 
153 
movq_r2r(mm6, reg);\ 
154 
psrlq_i2r(32, reg);\

155 
paddd_r2r(mm6, reg);\ 
156 
psrad_i2r(FILTER_BITS, reg);\ 
157 
src_pos += src_incr;\ 
158 
} 
159  
160 
#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq); 
161  
162 
/* XXX: do four pixels at a time */

163 
static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, uint8_t *src, int src_width, 
164 
int src_start, int src_incr, int16_t *filters) 
165 
{ 
166 
int src_pos, phase;

167 
uint8_t *s; 
168 
int16_t *filter; 
169 
mmx_t tmp; 
170 

171 
src_pos = src_start; 
172 
pxor_r2r(mm7, mm7); 
173  
174 
while (dst_width >= 4) { 
175  
176 
FILTER4(mm0); 
177 
FILTER4(mm1); 
178 
FILTER4(mm2); 
179 
FILTER4(mm3); 
180  
181 
packuswb_r2r(mm7, mm0); 
182 
packuswb_r2r(mm7, mm1); 
183 
packuswb_r2r(mm7, mm3); 
184 
packuswb_r2r(mm7, mm2); 
185 
movq_r2m(mm0, tmp); 
186 
dst[0] = tmp.ub[0]; 
187 
movq_r2m(mm1, tmp); 
188 
dst[1] = tmp.ub[0]; 
189 
movq_r2m(mm2, tmp); 
190 
dst[2] = tmp.ub[0]; 
191 
movq_r2m(mm3, tmp); 
192 
dst[3] = tmp.ub[0]; 
193 
dst += 4;

194 
dst_width = 4;

195 
} 
196 
while (dst_width > 0) { 
197 
FILTER4(mm0); 
198 
packuswb_r2r(mm7, mm0); 
199 
movq_r2m(mm0, tmp); 
200 
dst[0] = tmp.ub[0]; 
201 
dst++; 
202 
dst_width; 
203 
} 
204 
emms(); 
205 
} 
206  
207 
static void v_resample4_mmx(uint8_t *dst, int dst_width, uint8_t *src, int wrap, 
208 
int16_t *filter) 
209 
{ 
210 
int sum, i, v;

211 
uint8_t *s; 
212 
mmx_t tmp; 
213 
mmx_t coefs[4];

214 

215 
for(i=0;i<4;i++) { 
216 
v = filter[i]; 
217 
coefs[i].uw[0] = v;

218 
coefs[i].uw[1] = v;

219 
coefs[i].uw[2] = v;

220 
coefs[i].uw[3] = v;

221 
} 
222 

223 
pxor_r2r(mm7, mm7); 
224 
s = src; 
225 
while (dst_width >= 4) { 
226 
movq_m2r(s[0 * wrap], mm0);

227 
punpcklbw_r2r(mm7, mm0); 
228 
movq_m2r(s[1 * wrap], mm1);

229 
punpcklbw_r2r(mm7, mm1); 
230 
movq_m2r(s[2 * wrap], mm2);

231 
punpcklbw_r2r(mm7, mm2); 
232 
movq_m2r(s[3 * wrap], mm3);

233 
punpcklbw_r2r(mm7, mm3); 
234  
235 
pmullw_m2r(coefs[0], mm0);

236 
pmullw_m2r(coefs[1], mm1);

237 
pmullw_m2r(coefs[2], mm2);

238 
pmullw_m2r(coefs[3], mm3);

239  
240 
paddw_r2r(mm1, mm0); 
241 
paddw_r2r(mm3, mm2); 
242 
paddw_r2r(mm2, mm0); 
243 
psraw_i2r(FILTER_BITS, mm0); 
244 

245 
packuswb_r2r(mm7, mm0); 
246 
movq_r2m(mm0, tmp); 
247  
248 
*(uint32_t *)dst = tmp.ud[0];

249 
dst += 4;

250 
s += 4;

251 
dst_width = 4;

252 
} 
253 
while (dst_width > 0) { 
254 
sum = s[0 * wrap] * filter[0] + 
255 
s[1 * wrap] * filter[1] + 
256 
s[2 * wrap] * filter[2] + 
257 
s[3 * wrap] * filter[3]; 
258 
sum = sum >> FILTER_BITS; 
259 
if (sum < 0) 
260 
sum = 0;

261 
else if (sum > 255) 
262 
sum = 255;

263 
dst[0] = sum;

264 
dst++; 
265 
s++; 
266 
dst_width; 
267 
} 
268 
emms(); 
269 
} 
270 
#endif

271  
272 
#ifdef HAVE_ALTIVEC

273 
typedef union { 
274 
vector unsigned char v; 
275 
unsigned char c[16]; 
276 
} vec_uc_t; 
277  
278 
typedef union { 
279 
vector signed short v; 
280 
signed short s[8]; 
281 
} vec_ss_t; 
282  
283 
void v_resample16_altivec(uint8_t *dst, int dst_width, uint8_t *src, int wrap, 
284 
int16_t *filter) 
285 
{ 
286 
int sum, i;

287 
uint8_t *s; 
288 
vector unsigned char *tv, tmp, dstv, zero; 
289 
vec_ss_t srchv[4], srclv[4], fv[4]; 
290 
vector signed short zeros, sumhv, sumlv; 
291 
s = src; 
292  
293 
for(i=0;i<4;i++) 
294 
{ 
295 
/*

296 
The vec_madds later on does an implicit >>15 on the result.

297 
Since FILTER_BITS is 8, and we have 15 bits of magnitude in

298 
a signed short, we have just enough bits to preshift our

299 
filter constants <<7 to compensate for vec_madds.

300 
*/

301 
fv[i].s[0] = filter[i] << (15FILTER_BITS); 
302 
fv[i].v = vec_splat(fv[i].v, 0);

303 
} 
304 

305 
zero = vec_splat_u8(0);

306 
zeros = vec_splat_s16(0);

307  
308  
309 
/*

310 
When we're resampling, we'd ideally like both our input buffers,

311 
and output buffers to be 16byte aligned, so we can do both aligned

312 
reads and writes. Sadly we can't always have this at the moment, so

313 
we opt for aligned writes, as unaligned writes have a huge overhead.

314 
To do this, do enough scalar resamples to get dst 16byte aligned.

315 
*/

316 
i = ((int)dst) & 0xf; 
317 
while(i>0) { 
318 
sum = s[0 * wrap] * filter[0] + 
319 
s[1 * wrap] * filter[1] + 
320 
s[2 * wrap] * filter[2] + 
321 
s[3 * wrap] * filter[3]; 
322 
sum = sum >> FILTER_BITS; 
323 
if (sum<0) sum = 0; else if (sum>255) sum=255; 
324 
dst[0] = sum;

325 
dst++; 
326 
s++; 
327 
dst_width; 
328 
i; 
329 
} 
330 

331 
/* Do our altivec resampling on 16 pixels at once. */

332 
while(dst_width>=16) { 
333 
/*

334 
Read 16 (potentially unaligned) bytes from each of

335 
4 lines into 4 vectors, and split them into shorts.

336 
Interleave the multipy/accumulate for the resample

337 
filter with the loads to hide the 3 cycle latency

338 
the vec_madds have.

339 
*/

340 
tv = (vector unsigned char *) &s[0 * wrap]; 
341 
tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap])); 
342 
srchv[0].v = (vector signed short) vec_mergeh(zero, tmp); 
343 
srclv[0].v = (vector signed short) vec_mergel(zero, tmp); 
344 
sumhv = vec_madds(srchv[0].v, fv[0].v, zeros); 
345 
sumlv = vec_madds(srclv[0].v, fv[0].v, zeros); 
346  
347 
tv = (vector unsigned char *) &s[1 * wrap]; 
348 
tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap])); 
349 
srchv[1].v = (vector signed short) vec_mergeh(zero, tmp); 
350 
srclv[1].v = (vector signed short) vec_mergel(zero, tmp); 
351 
sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv); 
352 
sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv); 
353  
354 
tv = (vector unsigned char *) &s[2 * wrap]; 
355 
tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap])); 
356 
srchv[2].v = (vector signed short) vec_mergeh(zero, tmp); 
357 
srclv[2].v = (vector signed short) vec_mergel(zero, tmp); 
358 
sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv); 
359 
sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv); 
360  
361 
tv = (vector unsigned char *) &s[3 * wrap]; 
362 
tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap])); 
363 
srchv[3].v = (vector signed short) vec_mergeh(zero, tmp); 
364 
srclv[3].v = (vector signed short) vec_mergel(zero, tmp); 
365 
sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv); 
366 
sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv); 
367 

368 
/*

369 
Pack the results into our destination vector,

370 
and do an aligned write of that back to memory.

371 
*/

372 
dstv = vec_packsu(sumhv, sumlv) ; 
373 
vec_st(dstv, 0, (vector unsigned char *) dst); 
374 

375 
dst+=16;

376 
s+=16;

377 
dst_width=16;

378 
} 
379  
380 
/*

381 
If there are any leftover pixels, resample them

382 
with the slow scalar method.

383 
*/

384 
while(dst_width>0) { 
385 
sum = s[0 * wrap] * filter[0] + 
386 
s[1 * wrap] * filter[1] + 
387 
s[2 * wrap] * filter[2] + 
388 
s[3 * wrap] * filter[3]; 
389 
sum = sum >> FILTER_BITS; 
390 
if (sum<0) sum = 0; else if (sum>255) sum=255; 
391 
dst[0] = sum;

392 
dst++; 
393 
s++; 
394 
dst_width; 
395 
} 
396 
} 
397 
#endif

398  
399 
/* slow version to handle limit cases. Does not need optimisation */

400 
static void h_resample_slow(uint8_t *dst, int dst_width, uint8_t *src, int src_width, 
401 
int src_start, int src_incr, int16_t *filters) 
402 
{ 
403 
int src_pos, phase, sum, j, v, i;

404 
uint8_t *s, *src_end; 
405 
int16_t *filter; 
406  
407 
src_end = src + src_width; 
408 
src_pos = src_start; 
409 
for(i=0;i<dst_width;i++) { 
410 
s = src + (src_pos >> POS_FRAC_BITS); 
411 
phase = get_phase(src_pos); 
412 
filter = filters + phase * NB_TAPS; 
413 
sum = 0;

414 
for(j=0;j<NB_TAPS;j++) { 
415 
if (s < src)

416 
v = src[0];

417 
else if (s >= src_end) 
418 
v = src_end[1];

419 
else

420 
v = s[0];

421 
sum += v * filter[j]; 
422 
s++; 
423 
} 
424 
sum = sum >> FILTER_BITS; 
425 
if (sum < 0) 
426 
sum = 0;

427 
else if (sum > 255) 
428 
sum = 255;

429 
dst[0] = sum;

430 
src_pos += src_incr; 
431 
dst++; 
432 
} 
433 
} 
434  
435 
static void h_resample(uint8_t *dst, int dst_width, uint8_t *src, int src_width, 
436 
int src_start, int src_incr, int16_t *filters) 
437 
{ 
438 
int n, src_end;

439  
440 
if (src_start < 0) { 
441 
n = (0  src_start + src_incr  1) / src_incr; 
442 
h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters); 
443 
dst += n; 
444 
dst_width = n; 
445 
src_start += n * src_incr; 
446 
} 
447 
src_end = src_start + dst_width * src_incr; 
448 
if (src_end > ((src_width  NB_TAPS) << POS_FRAC_BITS)) {

449 
n = (((src_width  NB_TAPS + 1) << POS_FRAC_BITS)  1  src_start) / 
450 
src_incr; 
451 
} else {

452 
n = dst_width; 
453 
} 
454 
#ifdef HAVE_MMX

455 
if ((mm_flags & MM_MMX) && NB_TAPS == 4) 
456 
h_resample_fast4_mmx(dst, n, 
457 
src, src_width, src_start, src_incr, filters); 
458 
else

459 
#endif

460 
h_resample_fast(dst, n, 
461 
src, src_width, src_start, src_incr, filters); 
462 
if (n < dst_width) {

463 
dst += n; 
464 
dst_width = n; 
465 
src_start += n * src_incr; 
466 
h_resample_slow(dst, dst_width, 
467 
src, src_width, src_start, src_incr, filters); 
468 
} 
469 
} 
470  
471 
static void component_resample(ImgReSampleContext *s, 
472 
uint8_t *output, int owrap, int owidth, int oheight, 
473 
uint8_t *input, int iwrap, int iwidth, int iheight) 
474 
{ 
475 
int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;

476 
uint8_t *new_line, *src_line; 
477  
478 
last_src_y =  FCENTER  1;

479 
/* position of the bottom of the filter in the source image */

480 
src_y = (last_src_y + NB_TAPS) * POS_FRAC; 
481 
ring_y = NB_TAPS; /* position in ring buffer */

482 
for(y=0;y<oheight;y++) { 
483 
/* apply horizontal filter on new lines from input if needed */

484 
src_y1 = src_y >> POS_FRAC_BITS; 
485 
while (last_src_y < src_y1) {

486 
if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)

487 
ring_y = NB_TAPS; 
488 
last_src_y++; 
489 
/* handle limit conditions : replicate line (slightly

490 
inefficient because we filter multiple times) */

491 
y1 = last_src_y; 
492 
if (y1 < 0) { 
493 
y1 = 0;

494 
} else if (y1 >= iheight) { 
495 
y1 = iheight  1;

496 
} 
497 
src_line = input + y1 * iwrap; 
498 
new_line = s>line_buf + ring_y * owidth; 
499 
/* apply filter and handle limit cases correctly */

500 
h_resample(new_line, owidth, 
501 
src_line, iwidth,  FCENTER * POS_FRAC, s>h_incr, 
502 
&s>h_filters[0][0]); 
503 
/* handle ring buffer wraping */

504 
if (ring_y >= LINE_BUF_HEIGHT) {

505 
memcpy(s>line_buf + (ring_y  LINE_BUF_HEIGHT) * owidth, 
506 
new_line, owidth); 
507 
} 
508 
} 
509 
/* apply vertical filter */

510 
phase_y = get_phase(src_y); 
511 
#ifdef HAVE_MMX

512 
/* desactivated MMX because loss of precision */

513 
if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0) 
514 
v_resample4_mmx(output, owidth, 
515 
s>line_buf + (ring_y  NB_TAPS + 1) * owidth, owidth,

516 
&s>v_filters[phase_y][0]);

517 
else

518 
#endif

519 
#ifdef HAVE_ALTIVEC

520 
if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6) 
521 
v_resample16_altivec(output, owidth, 
522 
s>line_buf + (ring_y  NB_TAPS + 1) * owidth, owidth,

523 
&s>v_filters[phase_y][0]);

524 
else

525 
#endif

526 
v_resample(output, owidth, 
527 
s>line_buf + (ring_y  NB_TAPS + 1) * owidth, owidth,

528 
&s>v_filters[phase_y][0]);

529 

530 
src_y += s>v_incr; 
531 
output += owrap; 
532 
} 
533 
} 
534  
535 
/* XXX: the following filter is quite naive, but it seems to suffice

536 
for 4 taps */

537 
static void build_filter(int16_t *filter, float factor) 
538 
{ 
539 
int ph, i, v;

540 
float x, y, tab[NB_TAPS], norm, mult;

541  
542 
/* if upsampling, only need to interpolate, no filter */

543 
if (factor > 1.0) 
544 
factor = 1.0; 
545  
546 
for(ph=0;ph<NB_PHASES;ph++) { 
547 
norm = 0;

548 
for(i=0;i<NB_TAPS;i++) { 
549 

550 
x = M_PI * ((float)(i  FCENTER)  (float)ph / NB_PHASES) * factor; 
551 
if (x == 0) 
552 
y = 1.0; 
553 
else

554 
y = sin(x) / x; 
555 
tab[i] = y; 
556 
norm += y; 
557 
} 
558  
559 
/* normalize so that an uniform color remains the same */

560 
mult = (float)(1 << FILTER_BITS) / norm; 
561 
for(i=0;i<NB_TAPS;i++) { 
562 
v = (int)(tab[i] * mult);

563 
filter[ph * NB_TAPS + i] = v; 
564 
} 
565 
} 
566 
} 
567  
568 
ImgReSampleContext *img_resample_init(int owidth, int oheight, 
569 
int iwidth, int iheight) 
570 
{ 
571 
return img_resample_full_init(owidth, oheight, iwidth, iheight, 0, 0, 0, 0); 
572 
} 
573  
574 
ImgReSampleContext *img_resample_full_init(int owidth, int oheight, 
575 
int iwidth, int iheight, 
576 
int topBand, int bottomBand, 
577 
int leftBand, int rightBand) 
578 
{ 
579 
ImgReSampleContext *s; 
580  
581 
s = av_mallocz(sizeof(ImgReSampleContext));

582 
if (!s)

583 
return NULL; 
584 
s>line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS)); 
585 
if (!s>line_buf)

586 
goto fail;

587 

588 
s>owidth = owidth; 
589 
s>oheight = oheight; 
590 
s>iwidth = iwidth; 
591 
s>iheight = iheight; 
592 
s>topBand = topBand; 
593 
s>bottomBand = bottomBand; 
594 
s>leftBand = leftBand; 
595 
s>rightBand = rightBand; 
596 

597 
s>h_incr = ((iwidth  leftBand  rightBand) * POS_FRAC) / owidth; 
598 
s>v_incr = ((iheight  topBand  bottomBand) * POS_FRAC) / oheight; 
599 

600 
build_filter(&s>h_filters[0][0], (float) owidth / (float) (iwidth  leftBand  rightBand)); 
601 
build_filter(&s>v_filters[0][0], (float) oheight / (float) (iheight  topBand  bottomBand)); 
602  
603 
return s;

604 
fail:

605 
av_free(s); 
606 
return NULL; 
607 
} 
608  
609 
void img_resample(ImgReSampleContext *s,

610 
AVPicture *output, AVPicture *input) 
611 
{ 
612 
int i, shift;

613  
614 
for(i=0;i<3;i++) { 
615 
shift = (i == 0) ? 0 : 1; 
616 
component_resample(s, output>data[i], output>linesize[i], 
617 
s>owidth >> shift, s>oheight >> shift, 
618 
input>data[i] + (input>linesize[i] * (s>topBand >> shift)) + (s>leftBand >> shift), 
619 
input>linesize[i], ((s>iwidth  s>leftBand  s>rightBand) >> shift), 
620 
(s>iheight  s>topBand  s>bottomBand) >> shift); 
621 
} 
622 
} 
623  
624 
void img_resample_close(ImgReSampleContext *s)

625 
{ 
626 
av_free(s>line_buf); 
627 
av_free(s); 
628 
} 
629  
630 
#ifdef TEST

631  
632 
void *av_mallocz(int size) 
633 
{ 
634 
void *ptr;

635 
ptr = malloc(size); 
636 
memset(ptr, 0, size);

637 
return ptr;

638 
} 
639  
640 
void av_free(void *ptr) 
641 
{ 
642 
/* XXX: this test should not be needed on most libcs */

643 
if (ptr)

644 
free(ptr); 
645 
} 
646  
647 
/* input */

648 
#define XSIZE 256 
649 
#define YSIZE 256 
650 
uint8_t img[XSIZE * YSIZE]; 
651  
652 
/* output */

653 
#define XSIZE1 512 
654 
#define YSIZE1 512 
655 
uint8_t img1[XSIZE1 * YSIZE1]; 
656 
uint8_t img2[XSIZE1 * YSIZE1]; 
657  
658 
void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize) 
659 
{ 
660 
FILE *f; 
661 
f=fopen(filename,"w");

662 
fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255); 
663 
fwrite(img,1, xsize * ysize,f);

664 
fclose(f); 
665 
} 
666  
667 
static void dump_filter(int16_t *filter) 
668 
{ 
669 
int i, ph;

670  
671 
for(ph=0;ph<NB_PHASES;ph++) { 
672 
printf("%2d: ", ph);

673 
for(i=0;i<NB_TAPS;i++) { 
674 
printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0); 
675 
} 
676 
printf("\n");

677 
} 
678 
} 
679  
680 
#ifdef HAVE_MMX

681 
int mm_flags;

682 
#endif

683  
684 
int main(int argc, char **argv) 
685 
{ 
686 
int x, y, v, i, xsize, ysize;

687 
ImgReSampleContext *s; 
688 
float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 }; 
689 
char buf[256]; 
690  
691 
/* build test image */

692 
for(y=0;y<YSIZE;y++) { 
693 
for(x=0;x<XSIZE;x++) { 
694 
if (x < XSIZE/2 && y < YSIZE/2) { 
695 
if (x < XSIZE/4 && y < YSIZE/4) { 
696 
if ((x % 10) <= 6 && 
697 
(y % 10) <= 6) 
698 
v = 0xff;

699 
else

700 
v = 0x00;

701 
} else if (x < XSIZE/4) { 
702 
if (x & 1) 
703 
v = 0xff;

704 
else

705 
v = 0;

706 
} else if (y < XSIZE/4) { 
707 
if (y & 1) 
708 
v = 0xff;

709 
else

710 
v = 0;

711 
} else {

712 
if (y < YSIZE*3/8) { 
713 
if ((y+x) & 1) 
714 
v = 0xff;

715 
else

716 
v = 0;

717 
} else {

718 
if (((x+3) % 4) <= 1 && 
719 
((y+3) % 4) <= 1) 
720 
v = 0xff;

721 
else

722 
v = 0x00;

723 
} 
724 
} 
725 
} else if (x < XSIZE/2) { 
726 
v = ((x  (XSIZE/2)) * 255) / (XSIZE/2); 
727 
} else if (y < XSIZE/2) { 
728 
v = ((y  (XSIZE/2)) * 255) / (XSIZE/2); 
729 
} else {

730 
v = ((x + y  XSIZE) * 255) / XSIZE;

731 
} 
732 
img[(YSIZE  y) * XSIZE + (XSIZE  x)] = v; 
733 
} 
734 
} 
735 
save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);

736 
for(i=0;i<sizeof(factors)/sizeof(float);i++) { 
737 
fact = factors[i]; 
738 
xsize = (int)(XSIZE * fact);

739 
ysize = (int)((YSIZE  100) * fact); 
740 
s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0); 
741 
printf("Factor=%0.2f\n", fact);

742 
dump_filter(&s>h_filters[0][0]); 
743 
component_resample(s, img1, xsize, xsize, ysize, 
744 
img + 50 * XSIZE, XSIZE, XSIZE, YSIZE  100); 
745 
img_resample_close(s); 
746  
747 
sprintf(buf, "/tmp/out%d.pgm", i);

748 
save_pgm(buf, img1, xsize, ysize); 
749 
} 
750  
751 
/* mmx test */

752 
#ifdef HAVE_MMX

753 
printf("MMX test\n");

754 
fact = 0.72; 
755 
xsize = (int)(XSIZE * fact);

756 
ysize = (int)(YSIZE * fact);

757 
mm_flags = MM_MMX; 
758 
s = img_resample_init(xsize, ysize, XSIZE, YSIZE); 
759 
component_resample(s, img1, xsize, xsize, ysize, 
760 
img, XSIZE, XSIZE, YSIZE); 
761  
762 
mm_flags = 0;

763 
s = img_resample_init(xsize, ysize, XSIZE, YSIZE); 
764 
component_resample(s, img2, xsize, xsize, ysize, 
765 
img, XSIZE, XSIZE, YSIZE); 
766 
if (memcmp(img1, img2, xsize * ysize) != 0) { 
767 
fprintf(stderr, "mmx error\n");

768 
exit(1);

769 
} 
770 
printf("MMX OK\n");

771 
#endif

772 
return 0; 
773 
} 
774  
775 
#endif
