Statistics
| Branch: | Revision:

ffmpeg / libavcodec / imgresample.c @ 8dbcc9f2

History | View | Annotate | Download (22.4 KB)

1 de6d9b64 Fabrice Bellard
/*
2
 * High quality image resampling with polyphase filters 
3 ff4ec49e Fabrice Bellard
 * Copyright (c) 2001 Fabrice Bellard.
4 de6d9b64 Fabrice Bellard
 *
5 ff4ec49e Fabrice Bellard
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9 de6d9b64 Fabrice Bellard
 *
10 ff4ec49e Fabrice Bellard
 * This library is distributed in the hope that it will be useful,
11 de6d9b64 Fabrice Bellard
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ff4ec49e Fabrice Bellard
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14 de6d9b64 Fabrice Bellard
 *
15 ff4ec49e Fabrice Bellard
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18 de6d9b64 Fabrice Bellard
 */
19 983e3246 Michael Niedermayer
 
20
/**
21
 * @file imgresample.c
22
 * High quality image resampling with polyphase filters .
23
 */
24
 
25 de6d9b64 Fabrice Bellard
#include "avcodec.h"
26 6000abfa Fabrice Bellard
#include "dsputil.h"
27 de6d9b64 Fabrice Bellard
28 54329dd5 Nick Kurshev
#ifdef USE_FASTMEMCPY
29
#include "fastmemcpy.h"
30
#endif
31
32 de6d9b64 Fabrice Bellard
#define NB_COMPONENTS 3
33
34
#define PHASE_BITS 4
35
#define NB_PHASES  (1 << PHASE_BITS)
36
#define NB_TAPS    4
37
#define FCENTER    1  /* index of the center of the filter */
38 ab6d194a Michael Niedermayer
//#define TEST    1  /* Test it */
39 de6d9b64 Fabrice Bellard
40
#define POS_FRAC_BITS 16
41
#define POS_FRAC      (1 << POS_FRAC_BITS)
42
/* 6 bits precision is needed for MMX */
43
#define FILTER_BITS   8
44
45
#define LINE_BUF_HEIGHT (NB_TAPS * 4)
46
47
struct ImgReSampleContext {
48 ab6d194a Michael Niedermayer
    int iwidth, iheight, owidth, oheight, topBand, bottomBand, leftBand, rightBand;
49 de6d9b64 Fabrice Bellard
    int h_incr, v_incr;
50 0c1a9eda Zdenek Kabelac
    int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
51
    int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
52
    uint8_t *line_buf;
53 de6d9b64 Fabrice Bellard
};
54
55
static inline int get_phase(int pos)
56
{
57
    return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
58
}
59
60
/* This function must be optimized */
61 da64ecc3 Drew Hess
static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
62
                            int src_width, int src_start, int src_incr,
63
                            int16_t *filters)
64 de6d9b64 Fabrice Bellard
{
65
    int src_pos, phase, sum, i;
66 da64ecc3 Drew Hess
    const uint8_t *s;
67 0c1a9eda Zdenek Kabelac
    int16_t *filter;
68 de6d9b64 Fabrice Bellard
69
    src_pos = src_start;
70
    for(i=0;i<dst_width;i++) {
71
#ifdef TEST
72
        /* test */
73
        if ((src_pos >> POS_FRAC_BITS) < 0 ||
74
            (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
75 02ac3136 Philip Gladstone
            av_abort();
76 de6d9b64 Fabrice Bellard
#endif
77
        s = src + (src_pos >> POS_FRAC_BITS);
78
        phase = get_phase(src_pos);
79
        filter = filters + phase * NB_TAPS;
80
#if NB_TAPS == 4
81
        sum = s[0] * filter[0] +
82
            s[1] * filter[1] +
83
            s[2] * filter[2] +
84
            s[3] * filter[3];
85
#else
86
        {
87
            int j;
88
            sum = 0;
89
            for(j=0;j<NB_TAPS;j++)
90
                sum += s[j] * filter[j];
91
        }
92
#endif
93
        sum = sum >> FILTER_BITS;
94
        if (sum < 0)
95
            sum = 0;
96
        else if (sum > 255)
97
            sum = 255;
98
        dst[0] = sum;
99
        src_pos += src_incr;
100
        dst++;
101
    }
102
}
103
104
/* This function must be optimized */
105 da64ecc3 Drew Hess
static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
106
                       int wrap, int16_t *filter)
107 de6d9b64 Fabrice Bellard
{
108
    int sum, i;
109 da64ecc3 Drew Hess
    const uint8_t *s;
110 de6d9b64 Fabrice Bellard
111
    s = src;
112
    for(i=0;i<dst_width;i++) {
113
#if NB_TAPS == 4
114
        sum = s[0 * wrap] * filter[0] +
115
            s[1 * wrap] * filter[1] +
116
            s[2 * wrap] * filter[2] +
117
            s[3 * wrap] * filter[3];
118
#else
119
        {
120
            int j;
121 0c1a9eda Zdenek Kabelac
            uint8_t *s1 = s;
122 de6d9b64 Fabrice Bellard
123
            sum = 0;
124
            for(j=0;j<NB_TAPS;j++) {
125
                sum += s1[0] * filter[j];
126
                s1 += wrap;
127
            }
128
        }
129
#endif
130
        sum = sum >> FILTER_BITS;
131
        if (sum < 0)
132
            sum = 0;
133
        else if (sum > 255)
134
            sum = 255;
135
        dst[0] = sum;
136
        dst++;
137
        s++;
138
    }
139
}
140
141 980fc7b8 Fabrice Bellard
#ifdef HAVE_MMX
142 de6d9b64 Fabrice Bellard
143
#include "i386/mmx.h"
144
145
#define FILTER4(reg) \
146
{\
147
        s = src + (src_pos >> POS_FRAC_BITS);\
148
        phase = get_phase(src_pos);\
149
        filter = filters + phase * NB_TAPS;\
150
        movq_m2r(*s, reg);\
151
        punpcklbw_r2r(mm7, reg);\
152
        movq_m2r(*filter, mm6);\
153
        pmaddwd_r2r(reg, mm6);\
154
        movq_r2r(mm6, reg);\
155
        psrlq_i2r(32, reg);\
156
        paddd_r2r(mm6, reg);\
157
        psrad_i2r(FILTER_BITS, reg);\
158
        src_pos += src_incr;\
159
}
160
161
#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
162
163
/* XXX: do four pixels at a time */
164 da64ecc3 Drew Hess
static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
165
                                 const uint8_t *src, int src_width,
166 0c1a9eda Zdenek Kabelac
                                 int src_start, int src_incr, int16_t *filters)
167 de6d9b64 Fabrice Bellard
{
168
    int src_pos, phase;
169 da64ecc3 Drew Hess
    const uint8_t *s;
170 0c1a9eda Zdenek Kabelac
    int16_t *filter;
171 de6d9b64 Fabrice Bellard
    mmx_t tmp;
172
    
173
    src_pos = src_start;
174
    pxor_r2r(mm7, mm7);
175
176
    while (dst_width >= 4) {
177
178
        FILTER4(mm0);
179
        FILTER4(mm1);
180
        FILTER4(mm2);
181
        FILTER4(mm3);
182
183
        packuswb_r2r(mm7, mm0);
184
        packuswb_r2r(mm7, mm1);
185
        packuswb_r2r(mm7, mm3);
186
        packuswb_r2r(mm7, mm2);
187
        movq_r2m(mm0, tmp);
188
        dst[0] = tmp.ub[0];
189
        movq_r2m(mm1, tmp);
190
        dst[1] = tmp.ub[0];
191
        movq_r2m(mm2, tmp);
192
        dst[2] = tmp.ub[0];
193
        movq_r2m(mm3, tmp);
194
        dst[3] = tmp.ub[0];
195
        dst += 4;
196
        dst_width -= 4;
197
    }
198
    while (dst_width > 0) {
199
        FILTER4(mm0);
200
        packuswb_r2r(mm7, mm0);
201
        movq_r2m(mm0, tmp);
202
        dst[0] = tmp.ub[0];
203
        dst++;
204
        dst_width--;
205
    }
206
    emms();
207
}
208
209 da64ecc3 Drew Hess
static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
210
                            int wrap, int16_t *filter)
211 de6d9b64 Fabrice Bellard
{
212
    int sum, i, v;
213 da64ecc3 Drew Hess
    const uint8_t *s;
214 de6d9b64 Fabrice Bellard
    mmx_t tmp;
215
    mmx_t coefs[4];
216
    
217
    for(i=0;i<4;i++) {
218
        v = filter[i];
219
        coefs[i].uw[0] = v;
220
        coefs[i].uw[1] = v;
221
        coefs[i].uw[2] = v;
222
        coefs[i].uw[3] = v;
223
    }
224
    
225
    pxor_r2r(mm7, mm7);
226
    s = src;
227
    while (dst_width >= 4) {
228
        movq_m2r(s[0 * wrap], mm0);
229
        punpcklbw_r2r(mm7, mm0);
230
        movq_m2r(s[1 * wrap], mm1);
231
        punpcklbw_r2r(mm7, mm1);
232
        movq_m2r(s[2 * wrap], mm2);
233
        punpcklbw_r2r(mm7, mm2);
234
        movq_m2r(s[3 * wrap], mm3);
235
        punpcklbw_r2r(mm7, mm3);
236
237
        pmullw_m2r(coefs[0], mm0);
238
        pmullw_m2r(coefs[1], mm1);
239
        pmullw_m2r(coefs[2], mm2);
240
        pmullw_m2r(coefs[3], mm3);
241
242
        paddw_r2r(mm1, mm0);
243
        paddw_r2r(mm3, mm2);
244
        paddw_r2r(mm2, mm0);
245
        psraw_i2r(FILTER_BITS, mm0);
246
        
247
        packuswb_r2r(mm7, mm0);
248
        movq_r2m(mm0, tmp);
249
250 0c1a9eda Zdenek Kabelac
        *(uint32_t *)dst = tmp.ud[0];
251 de6d9b64 Fabrice Bellard
        dst += 4;
252
        s += 4;
253
        dst_width -= 4;
254
    }
255
    while (dst_width > 0) {
256
        sum = s[0 * wrap] * filter[0] +
257
            s[1 * wrap] * filter[1] +
258
            s[2 * wrap] * filter[2] +
259
            s[3 * wrap] * filter[3];
260
        sum = sum >> FILTER_BITS;
261
        if (sum < 0)
262
            sum = 0;
263
        else if (sum > 255)
264
            sum = 255;
265
        dst[0] = sum;
266
        dst++;
267
        s++;
268
        dst_width--;
269
    }
270
    emms();
271
}
272
#endif
273
274 404d2241 Brian Foley
#ifdef HAVE_ALTIVEC
275
typedef        union {
276
    vector unsigned char v;
277
    unsigned char c[16];
278
} vec_uc_t;
279
280
typedef        union {
281
    vector signed short v;
282
    signed short s[8];
283
} vec_ss_t;
284
285 da64ecc3 Drew Hess
void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
286
                          int wrap, int16_t *filter)
287 404d2241 Brian Foley
{
288
    int sum, i;
289 da64ecc3 Drew Hess
    const uint8_t *s;
290 404d2241 Brian Foley
    vector unsigned char *tv, tmp, dstv, zero;
291
    vec_ss_t srchv[4], srclv[4], fv[4];
292
    vector signed short zeros, sumhv, sumlv;    
293
    s = src;
294
295
    for(i=0;i<4;i++)
296
    {
297
        /*
298
           The vec_madds later on does an implicit >>15 on the result.
299
           Since FILTER_BITS is 8, and we have 15 bits of magnitude in
300
           a signed short, we have just enough bits to pre-shift our
301
           filter constants <<7 to compensate for vec_madds.
302
        */
303
        fv[i].s[0] = filter[i] << (15-FILTER_BITS);
304
        fv[i].v = vec_splat(fv[i].v, 0);
305
    }
306
    
307
    zero = vec_splat_u8(0);
308
    zeros = vec_splat_s16(0);
309
310
311
    /*
312
       When we're resampling, we'd ideally like both our input buffers,
313
       and output buffers to be 16-byte aligned, so we can do both aligned
314
       reads and writes. Sadly we can't always have this at the moment, so
315
       we opt for aligned writes, as unaligned writes have a huge overhead.
316
       To do this, do enough scalar resamples to get dst 16-byte aligned.
317
    */
318 9e4e1659 Philip Gladstone
    i = (-(int)dst) & 0xf;
319 404d2241 Brian Foley
    while(i>0) {
320
        sum = s[0 * wrap] * filter[0] +
321
        s[1 * wrap] * filter[1] +
322
        s[2 * wrap] * filter[2] +
323
        s[3 * wrap] * filter[3];
324
        sum = sum >> FILTER_BITS;
325
        if (sum<0) sum = 0; else if (sum>255) sum=255;
326
        dst[0] = sum;
327
        dst++;
328
        s++;
329
        dst_width--;
330
        i--;
331
    }
332
    
333
    /* Do our altivec resampling on 16 pixels at once. */
334
    while(dst_width>=16) {
335
        /*
336
           Read 16 (potentially unaligned) bytes from each of
337
           4 lines into 4 vectors, and split them into shorts.
338
           Interleave the multipy/accumulate for the resample
339
           filter with the loads to hide the 3 cycle latency
340
           the vec_madds have.
341
        */
342
        tv = (vector unsigned char *) &s[0 * wrap];
343
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
344
        srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
345
        srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
346
        sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
347
        sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
348
349
        tv = (vector unsigned char *) &s[1 * wrap];
350
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
351
        srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
352
        srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
353
        sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
354
        sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
355
356
        tv = (vector unsigned char *) &s[2 * wrap];
357
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
358
        srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
359
        srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
360
        sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
361
        sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
362
363
        tv = (vector unsigned char *) &s[3 * wrap];
364
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
365
        srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
366
        srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
367
        sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
368
        sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
369
    
370
        /*
371
           Pack the results into our destination vector,
372
           and do an aligned write of that back to memory.
373
        */
374
        dstv = vec_packsu(sumhv, sumlv) ;
375
        vec_st(dstv, 0, (vector unsigned char *) dst);
376
        
377
        dst+=16;
378
        s+=16;
379
        dst_width-=16;
380
    }
381
382
    /*
383
       If there are any leftover pixels, resample them
384
       with the slow scalar method.
385
    */
386
    while(dst_width>0) {
387
        sum = s[0 * wrap] * filter[0] +
388
        s[1 * wrap] * filter[1] +
389
        s[2 * wrap] * filter[2] +
390
        s[3 * wrap] * filter[3];
391
        sum = sum >> FILTER_BITS;
392
        if (sum<0) sum = 0; else if (sum>255) sum=255;
393
        dst[0] = sum;
394
        dst++;
395
        s++;
396
        dst_width--;
397
    }
398
}
399
#endif
400
401 de6d9b64 Fabrice Bellard
/* slow version to handle limit cases. Does not need optimisation */
402 da64ecc3 Drew Hess
static void h_resample_slow(uint8_t *dst, int dst_width,
403
                            const uint8_t *src, int src_width,
404 0c1a9eda Zdenek Kabelac
                            int src_start, int src_incr, int16_t *filters)
405 de6d9b64 Fabrice Bellard
{
406
    int src_pos, phase, sum, j, v, i;
407 da64ecc3 Drew Hess
    const uint8_t *s, *src_end;
408 0c1a9eda Zdenek Kabelac
    int16_t *filter;
409 de6d9b64 Fabrice Bellard
410
    src_end = src + src_width;
411
    src_pos = src_start;
412
    for(i=0;i<dst_width;i++) {
413
        s = src + (src_pos >> POS_FRAC_BITS);
414
        phase = get_phase(src_pos);
415
        filter = filters + phase * NB_TAPS;
416
        sum = 0;
417
        for(j=0;j<NB_TAPS;j++) {
418
            if (s < src)
419
                v = src[0];
420
            else if (s >= src_end)
421
                v = src_end[-1];
422
            else
423
                v = s[0];
424
            sum += v * filter[j];
425
            s++;
426
        }
427
        sum = sum >> FILTER_BITS;
428
        if (sum < 0)
429
            sum = 0;
430
        else if (sum > 255)
431
            sum = 255;
432
        dst[0] = sum;
433
        src_pos += src_incr;
434
        dst++;
435
    }
436
}
437
438 da64ecc3 Drew Hess
static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
439
                       int src_width, int src_start, int src_incr,
440
                       int16_t *filters)
441 de6d9b64 Fabrice Bellard
{
442
    int n, src_end;
443
444
    if (src_start < 0) {
445
        n = (0 - src_start + src_incr - 1) / src_incr;
446
        h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
447
        dst += n;
448
        dst_width -= n;
449
        src_start += n * src_incr;
450
    }
451
    src_end = src_start + dst_width * src_incr;
452
    if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
453
        n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) / 
454
            src_incr;
455
    } else {
456
        n = dst_width;
457
    }
458 980fc7b8 Fabrice Bellard
#ifdef HAVE_MMX
459 de6d9b64 Fabrice Bellard
    if ((mm_flags & MM_MMX) && NB_TAPS == 4)
460
        h_resample_fast4_mmx(dst, n, 
461
                             src, src_width, src_start, src_incr, filters);
462
    else
463
#endif
464
        h_resample_fast(dst, n, 
465
                        src, src_width, src_start, src_incr, filters);
466
    if (n < dst_width) {
467
        dst += n;
468
        dst_width -= n;
469
        src_start += n * src_incr;
470
        h_resample_slow(dst, dst_width, 
471
                        src, src_width, src_start, src_incr, filters);
472
    }
473
}
474
475
static void component_resample(ImgReSampleContext *s, 
476 0c1a9eda Zdenek Kabelac
                               uint8_t *output, int owrap, int owidth, int oheight,
477
                               uint8_t *input, int iwrap, int iwidth, int iheight)
478 de6d9b64 Fabrice Bellard
{
479
    int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
480 0c1a9eda Zdenek Kabelac
    uint8_t *new_line, *src_line;
481 de6d9b64 Fabrice Bellard
482
    last_src_y = - FCENTER - 1;
483
    /* position of the bottom of the filter in the source image */
484
    src_y = (last_src_y + NB_TAPS) * POS_FRAC; 
485
    ring_y = NB_TAPS; /* position in ring buffer */
486
    for(y=0;y<oheight;y++) {
487
        /* apply horizontal filter on new lines from input if needed */
488
        src_y1 = src_y >> POS_FRAC_BITS;
489
        while (last_src_y < src_y1) {
490
            if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
491
                ring_y = NB_TAPS;
492
            last_src_y++;
493 ab6d194a Michael Niedermayer
            /* handle limit conditions : replicate line (slightly
494
               inefficient because we filter multiple times) */
495 de6d9b64 Fabrice Bellard
            y1 = last_src_y;
496
            if (y1 < 0) {
497
                y1 = 0;
498
            } else if (y1 >= iheight) {
499
                y1 = iheight - 1;
500
            }
501
            src_line = input + y1 * iwrap;
502
            new_line = s->line_buf + ring_y * owidth;
503
            /* apply filter and handle limit cases correctly */
504
            h_resample(new_line, owidth, 
505
                       src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, 
506
                       &s->h_filters[0][0]);
507
            /* handle ring buffer wraping */
508
            if (ring_y >= LINE_BUF_HEIGHT) {
509
                memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
510
                       new_line, owidth);
511
            }
512
        }
513
        /* apply vertical filter */
514
        phase_y = get_phase(src_y);
515 980fc7b8 Fabrice Bellard
#ifdef HAVE_MMX
516 de6d9b64 Fabrice Bellard
        /* desactivated MMX because loss of precision */
517
        if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
518
            v_resample4_mmx(output, owidth, 
519
                            s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, 
520
                            &s->v_filters[phase_y][0]);
521 404d2241 Brian Foley
        else
522
#endif
523
#ifdef HAVE_ALTIVEC
524 00a7d8d6 Dieter
            if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
525 404d2241 Brian Foley
                v_resample16_altivec(output, owidth,
526
                                s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
527
                                &s->v_filters[phase_y][0]);
528 de6d9b64 Fabrice Bellard
        else
529
#endif
530
            v_resample(output, owidth, 
531
                       s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, 
532
                       &s->v_filters[phase_y][0]);
533
            
534
        src_y += s->v_incr;
535
        output += owrap;
536
    }
537
}
538
539
/* XXX: the following filter is quite naive, but it seems to suffice
540
   for 4 taps */
541 0c1a9eda Zdenek Kabelac
static void build_filter(int16_t *filter, float factor)
542 de6d9b64 Fabrice Bellard
{
543
    int ph, i, v;
544
    float x, y, tab[NB_TAPS], norm, mult;
545
546
    /* if upsampling, only need to interpolate, no filter */
547
    if (factor > 1.0)
548
        factor = 1.0;
549
550
    for(ph=0;ph<NB_PHASES;ph++) {
551
        norm = 0;
552
        for(i=0;i<NB_TAPS;i++) {
553
            
554
            x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor;
555
            if (x == 0)
556
                y = 1.0;
557
            else
558
                y = sin(x) / x;
559
            tab[i] = y;
560
            norm += y;
561
        }
562
563
        /* normalize so that an uniform color remains the same */
564
        mult = (float)(1 << FILTER_BITS) / norm;
565
        for(i=0;i<NB_TAPS;i++) {
566
            v = (int)(tab[i] * mult);
567
            filter[ph * NB_TAPS + i] = v;
568
        }
569
    }
570
}
571
572
ImgReSampleContext *img_resample_init(int owidth, int oheight,
573
                                      int iwidth, int iheight)
574
{
575 ab6d194a Michael Niedermayer
        return img_resample_full_init(owidth, oheight, iwidth, iheight, 0, 0, 0, 0);
576
}
577
578
ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
579
                                      int iwidth, int iheight,
580
                                      int topBand, int bottomBand,
581
                                      int leftBand, int rightBand)
582
{
583 de6d9b64 Fabrice Bellard
    ImgReSampleContext *s;
584
585
    s = av_mallocz(sizeof(ImgReSampleContext));
586
    if (!s)
587
        return NULL;
588
    s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
589
    if (!s->line_buf) 
590
        goto fail;
591
    
592
    s->owidth = owidth;
593
    s->oheight = oheight;
594
    s->iwidth = iwidth;
595
    s->iheight = iheight;
596 ab6d194a Michael Niedermayer
    s->topBand = topBand;
597
    s->bottomBand = bottomBand;
598
    s->leftBand = leftBand;
599
    s->rightBand = rightBand;
600 de6d9b64 Fabrice Bellard
    
601 ab6d194a Michael Niedermayer
    s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / owidth;
602
    s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / oheight;
603 de6d9b64 Fabrice Bellard
    
604 ab6d194a Michael Niedermayer
    build_filter(&s->h_filters[0][0], (float) owidth  / (float) (iwidth - leftBand - rightBand));
605
    build_filter(&s->v_filters[0][0], (float) oheight / (float) (iheight - topBand - bottomBand));
606 de6d9b64 Fabrice Bellard
607
    return s;
608
 fail:
609 6000abfa Fabrice Bellard
    av_free(s);
610 de6d9b64 Fabrice Bellard
    return NULL;
611
}
612
613
void img_resample(ImgReSampleContext *s, 
614 da64ecc3 Drew Hess
                  AVPicture *output, const AVPicture *input)
615 de6d9b64 Fabrice Bellard
{
616
    int i, shift;
617
618
    for(i=0;i<3;i++) {
619
        shift = (i == 0) ? 0 : 1;
620
        component_resample(s, output->data[i], output->linesize[i], 
621
                           s->owidth >> shift, s->oheight >> shift,
622 ab6d194a Michael Niedermayer
                           input->data[i] + (input->linesize[i] * (s->topBand >> shift)) + (s->leftBand >> shift),
623
                           input->linesize[i], ((s->iwidth - s->leftBand - s->rightBand) >> shift),
624
                           (s->iheight - s->topBand - s->bottomBand) >> shift);
625 de6d9b64 Fabrice Bellard
    }
626
}
627
628
void img_resample_close(ImgReSampleContext *s)
629
{
630 6000abfa Fabrice Bellard
    av_free(s->line_buf);
631
    av_free(s);
632 de6d9b64 Fabrice Bellard
}
633
634
#ifdef TEST
635
636
void *av_mallocz(int size)
637
{
638
    void *ptr;
639
    ptr = malloc(size);
640
    memset(ptr, 0, size);
641
    return ptr;
642
}
643
644 ab6d194a Michael Niedermayer
void av_free(void *ptr)
645
{
646
    /* XXX: this test should not be needed on most libcs */
647
    if (ptr)
648
        free(ptr);
649
}
650
651 de6d9b64 Fabrice Bellard
/* input */
652
#define XSIZE 256
653
#define YSIZE 256
654 0c1a9eda Zdenek Kabelac
uint8_t img[XSIZE * YSIZE];
655 de6d9b64 Fabrice Bellard
656
/* output */
657
#define XSIZE1 512
658
#define YSIZE1 512
659 0c1a9eda Zdenek Kabelac
uint8_t img1[XSIZE1 * YSIZE1];
660
uint8_t img2[XSIZE1 * YSIZE1];
661 de6d9b64 Fabrice Bellard
662 0c1a9eda Zdenek Kabelac
void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
663 de6d9b64 Fabrice Bellard
{
664
    FILE *f;
665
    f=fopen(filename,"w");
666
    fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
667
    fwrite(img,1, xsize * ysize,f);
668
    fclose(f);
669
}
670
671 0c1a9eda Zdenek Kabelac
static void dump_filter(int16_t *filter)
672 de6d9b64 Fabrice Bellard
{
673
    int i, ph;
674
675
    for(ph=0;ph<NB_PHASES;ph++) {
676
        printf("%2d: ", ph);
677
        for(i=0;i<NB_TAPS;i++) {
678
            printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0);
679
        }
680
        printf("\n");
681
    }
682
}
683
684 980fc7b8 Fabrice Bellard
#ifdef HAVE_MMX
685 6acce86b Michael Niedermayer
int mm_flags;
686 de6d9b64 Fabrice Bellard
#endif
687
688
int main(int argc, char **argv)
689
{
690
    int x, y, v, i, xsize, ysize;
691
    ImgReSampleContext *s;
692
    float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
693
    char buf[256];
694
695
    /* build test image */
696
    for(y=0;y<YSIZE;y++) {
697
        for(x=0;x<XSIZE;x++) {
698
            if (x < XSIZE/2 && y < YSIZE/2) {
699
                if (x < XSIZE/4 && y < YSIZE/4) {
700
                    if ((x % 10) <= 6 &&
701
                        (y % 10) <= 6)
702
                        v = 0xff;
703
                    else
704
                        v = 0x00;
705
                } else if (x < XSIZE/4) {
706
                    if (x & 1) 
707
                        v = 0xff;
708
                    else 
709
                        v = 0;
710
                } else if (y < XSIZE/4) {
711
                    if (y & 1) 
712
                        v = 0xff;
713
                    else 
714
                        v = 0;
715
                } else {
716
                    if (y < YSIZE*3/8) {
717
                        if ((y+x) & 1) 
718
                            v = 0xff;
719
                        else 
720
                            v = 0;
721
                    } else {
722
                        if (((x+3) % 4) <= 1 &&
723
                            ((y+3) % 4) <= 1)
724
                            v = 0xff;
725
                        else
726
                            v = 0x00;
727
                    }
728
                }
729
            } else if (x < XSIZE/2) {
730
                v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
731
            } else if (y < XSIZE/2) {
732
                v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
733
            } else {
734
                v = ((x + y - XSIZE) * 255) / XSIZE;
735
            }
736 ab6d194a Michael Niedermayer
            img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
737 de6d9b64 Fabrice Bellard
        }
738
    }
739
    save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
740
    for(i=0;i<sizeof(factors)/sizeof(float);i++) {
741
        fact = factors[i];
742
        xsize = (int)(XSIZE * fact);
743 ab6d194a Michael Niedermayer
        ysize = (int)((YSIZE - 100) * fact);
744 6acce86b Michael Niedermayer
        s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0);
745 de6d9b64 Fabrice Bellard
        printf("Factor=%0.2f\n", fact);
746
        dump_filter(&s->h_filters[0][0]);
747
        component_resample(s, img1, xsize, xsize, ysize,
748 ab6d194a Michael Niedermayer
                           img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
749 de6d9b64 Fabrice Bellard
        img_resample_close(s);
750
751
        sprintf(buf, "/tmp/out%d.pgm", i);
752
        save_pgm(buf, img1, xsize, ysize);
753
    }
754
755
    /* mmx test */
756 980fc7b8 Fabrice Bellard
#ifdef HAVE_MMX
757 de6d9b64 Fabrice Bellard
    printf("MMX test\n");
758
    fact = 0.72;
759
    xsize = (int)(XSIZE * fact);
760
    ysize = (int)(YSIZE * fact);
761
    mm_flags = MM_MMX;
762
    s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
763
    component_resample(s, img1, xsize, xsize, ysize,
764
                       img, XSIZE, XSIZE, YSIZE);
765
766
    mm_flags = 0;
767
    s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
768
    component_resample(s, img2, xsize, xsize, ysize,
769
                       img, XSIZE, XSIZE, YSIZE);
770
    if (memcmp(img1, img2, xsize * ysize) != 0) {
771
        fprintf(stderr, "mmx error\n");
772
        exit(1);
773
    }
774
    printf("MMX OK\n");
775
#endif
776
    return 0;
777
}
778
779
#endif