ffmpeg / libavcodec / imgresample.c @ 5509bffa
History | View | Annotate | Download (22.5 KB)
1 |
/*
|
---|---|
2 |
* High quality image resampling with polyphase filters
|
3 |
* Copyright (c) 2001 Fabrice Bellard.
|
4 |
*
|
5 |
* This library is free software; you can redistribute it and/or
|
6 |
* modify it under the terms of the GNU Lesser General Public
|
7 |
* License as published by the Free Software Foundation; either
|
8 |
* version 2 of the License, or (at your option) any later version.
|
9 |
*
|
10 |
* This library is distributed in the hope that it will be useful,
|
11 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
13 |
* Lesser General Public License for more details.
|
14 |
*
|
15 |
* You should have received a copy of the GNU Lesser General Public
|
16 |
* License along with this library; if not, write to the Free Software
|
17 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
18 |
*/
|
19 |
|
20 |
/**
|
21 |
* @file imgresample.c
|
22 |
* High quality image resampling with polyphase filters .
|
23 |
*/
|
24 |
|
25 |
#include "avcodec.h" |
26 |
#include "dsputil.h" |
27 |
|
28 |
#ifdef USE_FASTMEMCPY
|
29 |
#include "fastmemcpy.h" |
30 |
#endif
|
31 |
|
32 |
#define NB_COMPONENTS 3 |
33 |
|
34 |
#define PHASE_BITS 4 |
35 |
#define NB_PHASES (1 << PHASE_BITS) |
36 |
#define NB_TAPS 4 |
37 |
#define FCENTER 1 /* index of the center of the filter */ |
38 |
//#define TEST 1 /* Test it */
|
39 |
|
40 |
#define POS_FRAC_BITS 16 |
41 |
#define POS_FRAC (1 << POS_FRAC_BITS) |
42 |
/* 6 bits precision is needed for MMX */
|
43 |
#define FILTER_BITS 8 |
44 |
|
45 |
#define LINE_BUF_HEIGHT (NB_TAPS * 4) |
46 |
|
47 |
struct ImgReSampleContext {
|
48 |
int iwidth, iheight, owidth, oheight;
|
49 |
int topBand, bottomBand, leftBand, rightBand;
|
50 |
int padtop, padbottom, padleft, padright;
|
51 |
int pad_owidth, pad_oheight;
|
52 |
int h_incr, v_incr;
|
53 |
int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
|
54 |
int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
|
55 |
uint8_t *line_buf; |
56 |
}; |
57 |
|
58 |
void av_build_filter(int16_t *filter, double factor, int tap_count, int phase_count, int scale, int type); |
59 |
|
60 |
static inline int get_phase(int pos) |
61 |
{ |
62 |
return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1); |
63 |
} |
64 |
|
65 |
/* This function must be optimized */
|
66 |
static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src, |
67 |
int src_width, int src_start, int src_incr, |
68 |
int16_t *filters) |
69 |
{ |
70 |
int src_pos, phase, sum, i;
|
71 |
const uint8_t *s;
|
72 |
int16_t *filter; |
73 |
|
74 |
src_pos = src_start; |
75 |
for(i=0;i<dst_width;i++) { |
76 |
#ifdef TEST
|
77 |
/* test */
|
78 |
if ((src_pos >> POS_FRAC_BITS) < 0 || |
79 |
(src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS)) |
80 |
av_abort(); |
81 |
#endif
|
82 |
s = src + (src_pos >> POS_FRAC_BITS); |
83 |
phase = get_phase(src_pos); |
84 |
filter = filters + phase * NB_TAPS; |
85 |
#if NB_TAPS == 4 |
86 |
sum = s[0] * filter[0] + |
87 |
s[1] * filter[1] + |
88 |
s[2] * filter[2] + |
89 |
s[3] * filter[3]; |
90 |
#else
|
91 |
{ |
92 |
int j;
|
93 |
sum = 0;
|
94 |
for(j=0;j<NB_TAPS;j++) |
95 |
sum += s[j] * filter[j]; |
96 |
} |
97 |
#endif
|
98 |
sum = sum >> FILTER_BITS; |
99 |
if (sum < 0) |
100 |
sum = 0;
|
101 |
else if (sum > 255) |
102 |
sum = 255;
|
103 |
dst[0] = sum;
|
104 |
src_pos += src_incr; |
105 |
dst++; |
106 |
} |
107 |
} |
108 |
|
109 |
/* This function must be optimized */
|
110 |
static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src, |
111 |
int wrap, int16_t *filter)
|
112 |
{ |
113 |
int sum, i;
|
114 |
const uint8_t *s;
|
115 |
|
116 |
s = src; |
117 |
for(i=0;i<dst_width;i++) { |
118 |
#if NB_TAPS == 4 |
119 |
sum = s[0 * wrap] * filter[0] + |
120 |
s[1 * wrap] * filter[1] + |
121 |
s[2 * wrap] * filter[2] + |
122 |
s[3 * wrap] * filter[3]; |
123 |
#else
|
124 |
{ |
125 |
int j;
|
126 |
uint8_t *s1 = s; |
127 |
|
128 |
sum = 0;
|
129 |
for(j=0;j<NB_TAPS;j++) { |
130 |
sum += s1[0] * filter[j];
|
131 |
s1 += wrap; |
132 |
} |
133 |
} |
134 |
#endif
|
135 |
sum = sum >> FILTER_BITS; |
136 |
if (sum < 0) |
137 |
sum = 0;
|
138 |
else if (sum > 255) |
139 |
sum = 255;
|
140 |
dst[0] = sum;
|
141 |
dst++; |
142 |
s++; |
143 |
} |
144 |
} |
145 |
|
146 |
#ifdef HAVE_MMX
|
147 |
|
148 |
#include "i386/mmx.h" |
149 |
|
150 |
#define FILTER4(reg) \
|
151 |
{\ |
152 |
s = src + (src_pos >> POS_FRAC_BITS);\ |
153 |
phase = get_phase(src_pos);\ |
154 |
filter = filters + phase * NB_TAPS;\ |
155 |
movq_m2r(*s, reg);\ |
156 |
punpcklbw_r2r(mm7, reg);\ |
157 |
movq_m2r(*filter, mm6);\ |
158 |
pmaddwd_r2r(reg, mm6);\ |
159 |
movq_r2r(mm6, reg);\ |
160 |
psrlq_i2r(32, reg);\
|
161 |
paddd_r2r(mm6, reg);\ |
162 |
psrad_i2r(FILTER_BITS, reg);\ |
163 |
src_pos += src_incr;\ |
164 |
} |
165 |
|
166 |
#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq); |
167 |
|
168 |
/* XXX: do four pixels at a time */
|
169 |
static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, |
170 |
const uint8_t *src, int src_width, |
171 |
int src_start, int src_incr, int16_t *filters) |
172 |
{ |
173 |
int src_pos, phase;
|
174 |
const uint8_t *s;
|
175 |
int16_t *filter; |
176 |
mmx_t tmp; |
177 |
|
178 |
src_pos = src_start; |
179 |
pxor_r2r(mm7, mm7); |
180 |
|
181 |
while (dst_width >= 4) { |
182 |
|
183 |
FILTER4(mm0); |
184 |
FILTER4(mm1); |
185 |
FILTER4(mm2); |
186 |
FILTER4(mm3); |
187 |
|
188 |
packuswb_r2r(mm7, mm0); |
189 |
packuswb_r2r(mm7, mm1); |
190 |
packuswb_r2r(mm7, mm3); |
191 |
packuswb_r2r(mm7, mm2); |
192 |
movq_r2m(mm0, tmp); |
193 |
dst[0] = tmp.ub[0]; |
194 |
movq_r2m(mm1, tmp); |
195 |
dst[1] = tmp.ub[0]; |
196 |
movq_r2m(mm2, tmp); |
197 |
dst[2] = tmp.ub[0]; |
198 |
movq_r2m(mm3, tmp); |
199 |
dst[3] = tmp.ub[0]; |
200 |
dst += 4;
|
201 |
dst_width -= 4;
|
202 |
} |
203 |
while (dst_width > 0) { |
204 |
FILTER4(mm0); |
205 |
packuswb_r2r(mm7, mm0); |
206 |
movq_r2m(mm0, tmp); |
207 |
dst[0] = tmp.ub[0]; |
208 |
dst++; |
209 |
dst_width--; |
210 |
} |
211 |
emms(); |
212 |
} |
213 |
|
214 |
static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, |
215 |
int wrap, int16_t *filter)
|
216 |
{ |
217 |
int sum, i, v;
|
218 |
const uint8_t *s;
|
219 |
mmx_t tmp; |
220 |
mmx_t coefs[4];
|
221 |
|
222 |
for(i=0;i<4;i++) { |
223 |
v = filter[i]; |
224 |
coefs[i].uw[0] = v;
|
225 |
coefs[i].uw[1] = v;
|
226 |
coefs[i].uw[2] = v;
|
227 |
coefs[i].uw[3] = v;
|
228 |
} |
229 |
|
230 |
pxor_r2r(mm7, mm7); |
231 |
s = src; |
232 |
while (dst_width >= 4) { |
233 |
movq_m2r(s[0 * wrap], mm0);
|
234 |
punpcklbw_r2r(mm7, mm0); |
235 |
movq_m2r(s[1 * wrap], mm1);
|
236 |
punpcklbw_r2r(mm7, mm1); |
237 |
movq_m2r(s[2 * wrap], mm2);
|
238 |
punpcklbw_r2r(mm7, mm2); |
239 |
movq_m2r(s[3 * wrap], mm3);
|
240 |
punpcklbw_r2r(mm7, mm3); |
241 |
|
242 |
pmullw_m2r(coefs[0], mm0);
|
243 |
pmullw_m2r(coefs[1], mm1);
|
244 |
pmullw_m2r(coefs[2], mm2);
|
245 |
pmullw_m2r(coefs[3], mm3);
|
246 |
|
247 |
paddw_r2r(mm1, mm0); |
248 |
paddw_r2r(mm3, mm2); |
249 |
paddw_r2r(mm2, mm0); |
250 |
psraw_i2r(FILTER_BITS, mm0); |
251 |
|
252 |
packuswb_r2r(mm7, mm0); |
253 |
movq_r2m(mm0, tmp); |
254 |
|
255 |
*(uint32_t *)dst = tmp.ud[0];
|
256 |
dst += 4;
|
257 |
s += 4;
|
258 |
dst_width -= 4;
|
259 |
} |
260 |
while (dst_width > 0) { |
261 |
sum = s[0 * wrap] * filter[0] + |
262 |
s[1 * wrap] * filter[1] + |
263 |
s[2 * wrap] * filter[2] + |
264 |
s[3 * wrap] * filter[3]; |
265 |
sum = sum >> FILTER_BITS; |
266 |
if (sum < 0) |
267 |
sum = 0;
|
268 |
else if (sum > 255) |
269 |
sum = 255;
|
270 |
dst[0] = sum;
|
271 |
dst++; |
272 |
s++; |
273 |
dst_width--; |
274 |
} |
275 |
emms(); |
276 |
} |
277 |
#endif
|
278 |
|
279 |
#ifdef HAVE_ALTIVEC
|
280 |
typedef union { |
281 |
vector unsigned char v; |
282 |
unsigned char c[16]; |
283 |
} vec_uc_t; |
284 |
|
285 |
typedef union { |
286 |
vector signed short v; |
287 |
signed short s[8]; |
288 |
} vec_ss_t; |
289 |
|
290 |
void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, |
291 |
int wrap, int16_t *filter)
|
292 |
{ |
293 |
int sum, i;
|
294 |
const uint8_t *s;
|
295 |
vector unsigned char *tv, tmp, dstv, zero; |
296 |
vec_ss_t srchv[4], srclv[4], fv[4]; |
297 |
vector signed short zeros, sumhv, sumlv; |
298 |
s = src; |
299 |
|
300 |
for(i=0;i<4;i++) |
301 |
{ |
302 |
/*
|
303 |
The vec_madds later on does an implicit >>15 on the result.
|
304 |
Since FILTER_BITS is 8, and we have 15 bits of magnitude in
|
305 |
a signed short, we have just enough bits to pre-shift our
|
306 |
filter constants <<7 to compensate for vec_madds.
|
307 |
*/
|
308 |
fv[i].s[0] = filter[i] << (15-FILTER_BITS); |
309 |
fv[i].v = vec_splat(fv[i].v, 0);
|
310 |
} |
311 |
|
312 |
zero = vec_splat_u8(0);
|
313 |
zeros = vec_splat_s16(0);
|
314 |
|
315 |
|
316 |
/*
|
317 |
When we're resampling, we'd ideally like both our input buffers,
|
318 |
and output buffers to be 16-byte aligned, so we can do both aligned
|
319 |
reads and writes. Sadly we can't always have this at the moment, so
|
320 |
we opt for aligned writes, as unaligned writes have a huge overhead.
|
321 |
To do this, do enough scalar resamples to get dst 16-byte aligned.
|
322 |
*/
|
323 |
i = (-(int)dst) & 0xf; |
324 |
while(i>0) { |
325 |
sum = s[0 * wrap] * filter[0] + |
326 |
s[1 * wrap] * filter[1] + |
327 |
s[2 * wrap] * filter[2] + |
328 |
s[3 * wrap] * filter[3]; |
329 |
sum = sum >> FILTER_BITS; |
330 |
if (sum<0) sum = 0; else if (sum>255) sum=255; |
331 |
dst[0] = sum;
|
332 |
dst++; |
333 |
s++; |
334 |
dst_width--; |
335 |
i--; |
336 |
} |
337 |
|
338 |
/* Do our altivec resampling on 16 pixels at once. */
|
339 |
while(dst_width>=16) { |
340 |
/*
|
341 |
Read 16 (potentially unaligned) bytes from each of
|
342 |
4 lines into 4 vectors, and split them into shorts.
|
343 |
Interleave the multipy/accumulate for the resample
|
344 |
filter with the loads to hide the 3 cycle latency
|
345 |
the vec_madds have.
|
346 |
*/
|
347 |
tv = (vector unsigned char *) &s[0 * wrap]; |
348 |
tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap])); |
349 |
srchv[0].v = (vector signed short) vec_mergeh(zero, tmp); |
350 |
srclv[0].v = (vector signed short) vec_mergel(zero, tmp); |
351 |
sumhv = vec_madds(srchv[0].v, fv[0].v, zeros); |
352 |
sumlv = vec_madds(srclv[0].v, fv[0].v, zeros); |
353 |
|
354 |
tv = (vector unsigned char *) &s[1 * wrap]; |
355 |
tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap])); |
356 |
srchv[1].v = (vector signed short) vec_mergeh(zero, tmp); |
357 |
srclv[1].v = (vector signed short) vec_mergel(zero, tmp); |
358 |
sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv); |
359 |
sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv); |
360 |
|
361 |
tv = (vector unsigned char *) &s[2 * wrap]; |
362 |
tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap])); |
363 |
srchv[2].v = (vector signed short) vec_mergeh(zero, tmp); |
364 |
srclv[2].v = (vector signed short) vec_mergel(zero, tmp); |
365 |
sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv); |
366 |
sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv); |
367 |
|
368 |
tv = (vector unsigned char *) &s[3 * wrap]; |
369 |
tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap])); |
370 |
srchv[3].v = (vector signed short) vec_mergeh(zero, tmp); |
371 |
srclv[3].v = (vector signed short) vec_mergel(zero, tmp); |
372 |
sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv); |
373 |
sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv); |
374 |
|
375 |
/*
|
376 |
Pack the results into our destination vector,
|
377 |
and do an aligned write of that back to memory.
|
378 |
*/
|
379 |
dstv = vec_packsu(sumhv, sumlv) ; |
380 |
vec_st(dstv, 0, (vector unsigned char *) dst); |
381 |
|
382 |
dst+=16;
|
383 |
s+=16;
|
384 |
dst_width-=16;
|
385 |
} |
386 |
|
387 |
/*
|
388 |
If there are any leftover pixels, resample them
|
389 |
with the slow scalar method.
|
390 |
*/
|
391 |
while(dst_width>0) { |
392 |
sum = s[0 * wrap] * filter[0] + |
393 |
s[1 * wrap] * filter[1] + |
394 |
s[2 * wrap] * filter[2] + |
395 |
s[3 * wrap] * filter[3]; |
396 |
sum = sum >> FILTER_BITS; |
397 |
if (sum<0) sum = 0; else if (sum>255) sum=255; |
398 |
dst[0] = sum;
|
399 |
dst++; |
400 |
s++; |
401 |
dst_width--; |
402 |
} |
403 |
} |
404 |
#endif
|
405 |
|
406 |
/* slow version to handle limit cases. Does not need optimisation */
|
407 |
static void h_resample_slow(uint8_t *dst, int dst_width, |
408 |
const uint8_t *src, int src_width, |
409 |
int src_start, int src_incr, int16_t *filters) |
410 |
{ |
411 |
int src_pos, phase, sum, j, v, i;
|
412 |
const uint8_t *s, *src_end;
|
413 |
int16_t *filter; |
414 |
|
415 |
src_end = src + src_width; |
416 |
src_pos = src_start; |
417 |
for(i=0;i<dst_width;i++) { |
418 |
s = src + (src_pos >> POS_FRAC_BITS); |
419 |
phase = get_phase(src_pos); |
420 |
filter = filters + phase * NB_TAPS; |
421 |
sum = 0;
|
422 |
for(j=0;j<NB_TAPS;j++) { |
423 |
if (s < src)
|
424 |
v = src[0];
|
425 |
else if (s >= src_end) |
426 |
v = src_end[-1];
|
427 |
else
|
428 |
v = s[0];
|
429 |
sum += v * filter[j]; |
430 |
s++; |
431 |
} |
432 |
sum = sum >> FILTER_BITS; |
433 |
if (sum < 0) |
434 |
sum = 0;
|
435 |
else if (sum > 255) |
436 |
sum = 255;
|
437 |
dst[0] = sum;
|
438 |
src_pos += src_incr; |
439 |
dst++; |
440 |
} |
441 |
} |
442 |
|
443 |
static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src, |
444 |
int src_width, int src_start, int src_incr, |
445 |
int16_t *filters) |
446 |
{ |
447 |
int n, src_end;
|
448 |
|
449 |
if (src_start < 0) { |
450 |
n = (0 - src_start + src_incr - 1) / src_incr; |
451 |
h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters); |
452 |
dst += n; |
453 |
dst_width -= n; |
454 |
src_start += n * src_incr; |
455 |
} |
456 |
src_end = src_start + dst_width * src_incr; |
457 |
if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
|
458 |
n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) / |
459 |
src_incr; |
460 |
} else {
|
461 |
n = dst_width; |
462 |
} |
463 |
#ifdef HAVE_MMX
|
464 |
if ((mm_flags & MM_MMX) && NB_TAPS == 4) |
465 |
h_resample_fast4_mmx(dst, n, |
466 |
src, src_width, src_start, src_incr, filters); |
467 |
else
|
468 |
#endif
|
469 |
h_resample_fast(dst, n, |
470 |
src, src_width, src_start, src_incr, filters); |
471 |
if (n < dst_width) {
|
472 |
dst += n; |
473 |
dst_width -= n; |
474 |
src_start += n * src_incr; |
475 |
h_resample_slow(dst, dst_width, |
476 |
src, src_width, src_start, src_incr, filters); |
477 |
} |
478 |
} |
479 |
|
480 |
static void component_resample(ImgReSampleContext *s, |
481 |
uint8_t *output, int owrap, int owidth, int oheight, |
482 |
uint8_t *input, int iwrap, int iwidth, int iheight) |
483 |
{ |
484 |
int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
|
485 |
uint8_t *new_line, *src_line; |
486 |
|
487 |
last_src_y = - FCENTER - 1;
|
488 |
/* position of the bottom of the filter in the source image */
|
489 |
src_y = (last_src_y + NB_TAPS) * POS_FRAC; |
490 |
ring_y = NB_TAPS; /* position in ring buffer */
|
491 |
for(y=0;y<oheight;y++) { |
492 |
/* apply horizontal filter on new lines from input if needed */
|
493 |
src_y1 = src_y >> POS_FRAC_BITS; |
494 |
while (last_src_y < src_y1) {
|
495 |
if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
|
496 |
ring_y = NB_TAPS; |
497 |
last_src_y++; |
498 |
/* handle limit conditions : replicate line (slightly
|
499 |
inefficient because we filter multiple times) */
|
500 |
y1 = last_src_y; |
501 |
if (y1 < 0) { |
502 |
y1 = 0;
|
503 |
} else if (y1 >= iheight) { |
504 |
y1 = iheight - 1;
|
505 |
} |
506 |
src_line = input + y1 * iwrap; |
507 |
new_line = s->line_buf + ring_y * owidth; |
508 |
/* apply filter and handle limit cases correctly */
|
509 |
h_resample(new_line, owidth, |
510 |
src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, |
511 |
&s->h_filters[0][0]); |
512 |
/* handle ring buffer wraping */
|
513 |
if (ring_y >= LINE_BUF_HEIGHT) {
|
514 |
memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth, |
515 |
new_line, owidth); |
516 |
} |
517 |
} |
518 |
/* apply vertical filter */
|
519 |
phase_y = get_phase(src_y); |
520 |
#ifdef HAVE_MMX
|
521 |
/* desactivated MMX because loss of precision */
|
522 |
if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0) |
523 |
v_resample4_mmx(output, owidth, |
524 |
s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
|
525 |
&s->v_filters[phase_y][0]);
|
526 |
else
|
527 |
#endif
|
528 |
#ifdef HAVE_ALTIVEC
|
529 |
if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6) |
530 |
v_resample16_altivec(output, owidth, |
531 |
s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
|
532 |
&s->v_filters[phase_y][0]);
|
533 |
else
|
534 |
#endif
|
535 |
v_resample(output, owidth, |
536 |
s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
|
537 |
&s->v_filters[phase_y][0]);
|
538 |
|
539 |
src_y += s->v_incr; |
540 |
|
541 |
output += owrap; |
542 |
} |
543 |
} |
544 |
|
545 |
ImgReSampleContext *img_resample_init(int owidth, int oheight, |
546 |
int iwidth, int iheight) |
547 |
{ |
548 |
return img_resample_full_init(owidth, oheight, iwidth, iheight,
|
549 |
0, 0, 0, 0, 0, 0, 0, 0); |
550 |
} |
551 |
|
552 |
ImgReSampleContext *img_resample_full_init(int owidth, int oheight, |
553 |
int iwidth, int iheight, |
554 |
int topBand, int bottomBand, |
555 |
int leftBand, int rightBand, |
556 |
int padtop, int padbottom, |
557 |
int padleft, int padright) |
558 |
{ |
559 |
ImgReSampleContext *s; |
560 |
|
561 |
if (!owidth || !oheight || !iwidth || !iheight)
|
562 |
return NULL; |
563 |
|
564 |
s = av_mallocz(sizeof(ImgReSampleContext));
|
565 |
if (!s)
|
566 |
return NULL; |
567 |
if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS)) |
568 |
return NULL; |
569 |
s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS)); |
570 |
if (!s->line_buf)
|
571 |
goto fail;
|
572 |
|
573 |
s->owidth = owidth; |
574 |
s->oheight = oheight; |
575 |
s->iwidth = iwidth; |
576 |
s->iheight = iheight; |
577 |
|
578 |
s->topBand = topBand; |
579 |
s->bottomBand = bottomBand; |
580 |
s->leftBand = leftBand; |
581 |
s->rightBand = rightBand; |
582 |
|
583 |
s->padtop = padtop; |
584 |
s->padbottom = padbottom; |
585 |
s->padleft = padleft; |
586 |
s->padright = padright; |
587 |
|
588 |
s->pad_owidth = owidth - (padleft + padright); |
589 |
s->pad_oheight = oheight - (padtop + padbottom); |
590 |
|
591 |
s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth; |
592 |
s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight; |
593 |
|
594 |
av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth / |
595 |
(float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0); |
596 |
av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight / |
597 |
(float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0); |
598 |
|
599 |
return s;
|
600 |
fail:
|
601 |
av_free(s); |
602 |
return NULL; |
603 |
} |
604 |
|
605 |
void img_resample(ImgReSampleContext *s,
|
606 |
AVPicture *output, const AVPicture *input)
|
607 |
{ |
608 |
int i, shift;
|
609 |
uint8_t* optr; |
610 |
|
611 |
for (i=0;i<3;i++) { |
612 |
shift = (i == 0) ? 0 : 1; |
613 |
|
614 |
optr = output->data[i] + (((output->linesize[i] * |
615 |
s->padtop) + s->padleft) >> shift); |
616 |
|
617 |
component_resample(s, optr, output->linesize[i], |
618 |
s->pad_owidth >> shift, s->pad_oheight >> shift, |
619 |
input->data[i] + (input->linesize[i] * |
620 |
(s->topBand >> shift)) + (s->leftBand >> shift), |
621 |
input->linesize[i], ((s->iwidth - s->leftBand - |
622 |
s->rightBand) >> shift), |
623 |
(s->iheight - s->topBand - s->bottomBand) >> shift); |
624 |
} |
625 |
} |
626 |
|
627 |
void img_resample_close(ImgReSampleContext *s)
|
628 |
{ |
629 |
av_free(s->line_buf); |
630 |
av_free(s); |
631 |
} |
632 |
|
633 |
#ifdef TEST
|
634 |
#include <stdio.h> |
635 |
|
636 |
/* input */
|
637 |
#define XSIZE 256 |
638 |
#define YSIZE 256 |
639 |
uint8_t img[XSIZE * YSIZE]; |
640 |
|
641 |
/* output */
|
642 |
#define XSIZE1 512 |
643 |
#define YSIZE1 512 |
644 |
uint8_t img1[XSIZE1 * YSIZE1]; |
645 |
uint8_t img2[XSIZE1 * YSIZE1]; |
646 |
|
647 |
void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize) |
648 |
{ |
649 |
#undef fprintf
|
650 |
FILE *f; |
651 |
f=fopen(filename,"w");
|
652 |
fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255); |
653 |
fwrite(img,1, xsize * ysize,f);
|
654 |
fclose(f); |
655 |
#define fprintf please_use_av_log
|
656 |
} |
657 |
|
658 |
static void dump_filter(int16_t *filter) |
659 |
{ |
660 |
int i, ph;
|
661 |
|
662 |
for(ph=0;ph<NB_PHASES;ph++) { |
663 |
av_log(NULL, AV_LOG_INFO, "%2d: ", ph); |
664 |
for(i=0;i<NB_TAPS;i++) { |
665 |
av_log(NULL, AV_LOG_INFO, " %5.2f", filter[ph * NB_TAPS + i] / 256.0); |
666 |
} |
667 |
av_log(NULL, AV_LOG_INFO, "\n"); |
668 |
} |
669 |
} |
670 |
|
671 |
#ifdef HAVE_MMX
|
672 |
int mm_flags;
|
673 |
#endif
|
674 |
|
675 |
int main(int argc, char **argv) |
676 |
{ |
677 |
int x, y, v, i, xsize, ysize;
|
678 |
ImgReSampleContext *s; |
679 |
float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 }; |
680 |
char buf[256]; |
681 |
|
682 |
/* build test image */
|
683 |
for(y=0;y<YSIZE;y++) { |
684 |
for(x=0;x<XSIZE;x++) { |
685 |
if (x < XSIZE/2 && y < YSIZE/2) { |
686 |
if (x < XSIZE/4 && y < YSIZE/4) { |
687 |
if ((x % 10) <= 6 && |
688 |
(y % 10) <= 6) |
689 |
v = 0xff;
|
690 |
else
|
691 |
v = 0x00;
|
692 |
} else if (x < XSIZE/4) { |
693 |
if (x & 1) |
694 |
v = 0xff;
|
695 |
else
|
696 |
v = 0;
|
697 |
} else if (y < XSIZE/4) { |
698 |
if (y & 1) |
699 |
v = 0xff;
|
700 |
else
|
701 |
v = 0;
|
702 |
} else {
|
703 |
if (y < YSIZE*3/8) { |
704 |
if ((y+x) & 1) |
705 |
v = 0xff;
|
706 |
else
|
707 |
v = 0;
|
708 |
} else {
|
709 |
if (((x+3) % 4) <= 1 && |
710 |
((y+3) % 4) <= 1) |
711 |
v = 0xff;
|
712 |
else
|
713 |
v = 0x00;
|
714 |
} |
715 |
} |
716 |
} else if (x < XSIZE/2) { |
717 |
v = ((x - (XSIZE/2)) * 255) / (XSIZE/2); |
718 |
} else if (y < XSIZE/2) { |
719 |
v = ((y - (XSIZE/2)) * 255) / (XSIZE/2); |
720 |
} else {
|
721 |
v = ((x + y - XSIZE) * 255) / XSIZE;
|
722 |
} |
723 |
img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v; |
724 |
} |
725 |
} |
726 |
save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
|
727 |
for(i=0;i<sizeof(factors)/sizeof(float);i++) { |
728 |
fact = factors[i]; |
729 |
xsize = (int)(XSIZE * fact);
|
730 |
ysize = (int)((YSIZE - 100) * fact); |
731 |
s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0, 0, 0, 0, 0); |
732 |
av_log(NULL, AV_LOG_INFO, "Factor=%0.2f\n", fact); |
733 |
dump_filter(&s->h_filters[0][0]); |
734 |
component_resample(s, img1, xsize, xsize, ysize, |
735 |
img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100); |
736 |
img_resample_close(s); |
737 |
|
738 |
snprintf(buf, sizeof(buf), "/tmp/out%d.pgm", i); |
739 |
save_pgm(buf, img1, xsize, ysize); |
740 |
} |
741 |
|
742 |
/* mmx test */
|
743 |
#ifdef HAVE_MMX
|
744 |
av_log(NULL, AV_LOG_INFO, "MMX test\n"); |
745 |
fact = 0.72; |
746 |
xsize = (int)(XSIZE * fact);
|
747 |
ysize = (int)(YSIZE * fact);
|
748 |
mm_flags = MM_MMX; |
749 |
s = img_resample_init(xsize, ysize, XSIZE, YSIZE); |
750 |
component_resample(s, img1, xsize, xsize, ysize, |
751 |
img, XSIZE, XSIZE, YSIZE); |
752 |
|
753 |
mm_flags = 0;
|
754 |
s = img_resample_init(xsize, ysize, XSIZE, YSIZE); |
755 |
component_resample(s, img2, xsize, xsize, ysize, |
756 |
img, XSIZE, XSIZE, YSIZE); |
757 |
if (memcmp(img1, img2, xsize * ysize) != 0) { |
758 |
av_log(NULL, AV_LOG_ERROR, "mmx error\n"); |
759 |
exit(1);
|
760 |
} |
761 |
av_log(NULL, AV_LOG_INFO, "MMX OK\n"); |
762 |
#endif
|
763 |
return 0; |
764 |
} |
765 |
|
766 |
#endif
|