Statistics
| Branch: | Revision:

ffmpeg / libavcodec / imgresample.c @ cb231c48

History | View | Annotate | Download (22.4 KB)

1 de6d9b64 Fabrice Bellard
/*
2
 * High quality image resampling with polyphase filters 
3 ff4ec49e Fabrice Bellard
 * Copyright (c) 2001 Fabrice Bellard.
4 de6d9b64 Fabrice Bellard
 *
5 ff4ec49e Fabrice Bellard
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9 de6d9b64 Fabrice Bellard
 *
10 ff4ec49e Fabrice Bellard
 * This library is distributed in the hope that it will be useful,
11 de6d9b64 Fabrice Bellard
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ff4ec49e Fabrice Bellard
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14 de6d9b64 Fabrice Bellard
 *
15 ff4ec49e Fabrice Bellard
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18 de6d9b64 Fabrice Bellard
 */
19 983e3246 Michael Niedermayer
 
20
/**
21
 * @file imgresample.c
22
 * High quality image resampling with polyphase filters .
23
 */
24
 
25 de6d9b64 Fabrice Bellard
#include "avcodec.h"
26 6000abfa Fabrice Bellard
#include "dsputil.h"
27 de6d9b64 Fabrice Bellard
28 54329dd5 Nick Kurshev
#ifdef USE_FASTMEMCPY
29
#include "fastmemcpy.h"
30
#endif
31
32 de6d9b64 Fabrice Bellard
#define NB_COMPONENTS 3
33
34
#define PHASE_BITS 4
35
#define NB_PHASES  (1 << PHASE_BITS)
36
#define NB_TAPS    4
37
#define FCENTER    1  /* index of the center of the filter */
38 ab6d194a Michael Niedermayer
//#define TEST    1  /* Test it */
39 de6d9b64 Fabrice Bellard
40
#define POS_FRAC_BITS 16
41
#define POS_FRAC      (1 << POS_FRAC_BITS)
42
/* 6 bits precision is needed for MMX */
43
#define FILTER_BITS   8
44
45
#define LINE_BUF_HEIGHT (NB_TAPS * 4)
46
47
struct ImgReSampleContext {
48 ab6d194a Michael Niedermayer
    int iwidth, iheight, owidth, oheight, topBand, bottomBand, leftBand, rightBand;
49 de6d9b64 Fabrice Bellard
    int h_incr, v_incr;
50 0c1a9eda Zdenek Kabelac
    int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
51
    int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
52
    uint8_t *line_buf;
53 de6d9b64 Fabrice Bellard
};
54
55
static inline int get_phase(int pos)
56
{
57
    return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
58
}
59
60
/* This function must be optimized */
61 0c1a9eda Zdenek Kabelac
static void h_resample_fast(uint8_t *dst, int dst_width, uint8_t *src, int src_width,
62
                            int src_start, int src_incr, int16_t *filters)
63 de6d9b64 Fabrice Bellard
{
64
    int src_pos, phase, sum, i;
65 0c1a9eda Zdenek Kabelac
    uint8_t *s;
66
    int16_t *filter;
67 de6d9b64 Fabrice Bellard
68
    src_pos = src_start;
69
    for(i=0;i<dst_width;i++) {
70
#ifdef TEST
71
        /* test */
72
        if ((src_pos >> POS_FRAC_BITS) < 0 ||
73
            (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
74 02ac3136 Philip Gladstone
            av_abort();
75 de6d9b64 Fabrice Bellard
#endif
76
        s = src + (src_pos >> POS_FRAC_BITS);
77
        phase = get_phase(src_pos);
78
        filter = filters + phase * NB_TAPS;
79
#if NB_TAPS == 4
80
        sum = s[0] * filter[0] +
81
            s[1] * filter[1] +
82
            s[2] * filter[2] +
83
            s[3] * filter[3];
84
#else
85
        {
86
            int j;
87
            sum = 0;
88
            for(j=0;j<NB_TAPS;j++)
89
                sum += s[j] * filter[j];
90
        }
91
#endif
92
        sum = sum >> FILTER_BITS;
93
        if (sum < 0)
94
            sum = 0;
95
        else if (sum > 255)
96
            sum = 255;
97
        dst[0] = sum;
98
        src_pos += src_incr;
99
        dst++;
100
    }
101
}
102
103
/* This function must be optimized */
104 0c1a9eda Zdenek Kabelac
static void v_resample(uint8_t *dst, int dst_width, uint8_t *src, int wrap, 
105
                       int16_t *filter)
106 de6d9b64 Fabrice Bellard
{
107
    int sum, i;
108 0c1a9eda Zdenek Kabelac
    uint8_t *s;
109 de6d9b64 Fabrice Bellard
110
    s = src;
111
    for(i=0;i<dst_width;i++) {
112
#if NB_TAPS == 4
113
        sum = s[0 * wrap] * filter[0] +
114
            s[1 * wrap] * filter[1] +
115
            s[2 * wrap] * filter[2] +
116
            s[3 * wrap] * filter[3];
117
#else
118
        {
119
            int j;
120 0c1a9eda Zdenek Kabelac
            uint8_t *s1 = s;
121 de6d9b64 Fabrice Bellard
122
            sum = 0;
123
            for(j=0;j<NB_TAPS;j++) {
124
                sum += s1[0] * filter[j];
125
                s1 += wrap;
126
            }
127
        }
128
#endif
129
        sum = sum >> FILTER_BITS;
130
        if (sum < 0)
131
            sum = 0;
132
        else if (sum > 255)
133
            sum = 255;
134
        dst[0] = sum;
135
        dst++;
136
        s++;
137
    }
138
}
139
140 980fc7b8 Fabrice Bellard
#ifdef HAVE_MMX
141 de6d9b64 Fabrice Bellard
142
#include "i386/mmx.h"
143
144
#define FILTER4(reg) \
145
{\
146
        s = src + (src_pos >> POS_FRAC_BITS);\
147
        phase = get_phase(src_pos);\
148
        filter = filters + phase * NB_TAPS;\
149
        movq_m2r(*s, reg);\
150
        punpcklbw_r2r(mm7, reg);\
151
        movq_m2r(*filter, mm6);\
152
        pmaddwd_r2r(reg, mm6);\
153
        movq_r2r(mm6, reg);\
154
        psrlq_i2r(32, reg);\
155
        paddd_r2r(mm6, reg);\
156
        psrad_i2r(FILTER_BITS, reg);\
157
        src_pos += src_incr;\
158
}
159
160
#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
161
162
/* XXX: do four pixels at a time */
163 0c1a9eda Zdenek Kabelac
static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, uint8_t *src, int src_width,
164
                                 int src_start, int src_incr, int16_t *filters)
165 de6d9b64 Fabrice Bellard
{
166
    int src_pos, phase;
167 0c1a9eda Zdenek Kabelac
    uint8_t *s;
168
    int16_t *filter;
169 de6d9b64 Fabrice Bellard
    mmx_t tmp;
170
    
171
    src_pos = src_start;
172
    pxor_r2r(mm7, mm7);
173
174
    while (dst_width >= 4) {
175
176
        FILTER4(mm0);
177
        FILTER4(mm1);
178
        FILTER4(mm2);
179
        FILTER4(mm3);
180
181
        packuswb_r2r(mm7, mm0);
182
        packuswb_r2r(mm7, mm1);
183
        packuswb_r2r(mm7, mm3);
184
        packuswb_r2r(mm7, mm2);
185
        movq_r2m(mm0, tmp);
186
        dst[0] = tmp.ub[0];
187
        movq_r2m(mm1, tmp);
188
        dst[1] = tmp.ub[0];
189
        movq_r2m(mm2, tmp);
190
        dst[2] = tmp.ub[0];
191
        movq_r2m(mm3, tmp);
192
        dst[3] = tmp.ub[0];
193
        dst += 4;
194
        dst_width -= 4;
195
    }
196
    while (dst_width > 0) {
197
        FILTER4(mm0);
198
        packuswb_r2r(mm7, mm0);
199
        movq_r2m(mm0, tmp);
200
        dst[0] = tmp.ub[0];
201
        dst++;
202
        dst_width--;
203
    }
204
    emms();
205
}
206
207 0c1a9eda Zdenek Kabelac
static void v_resample4_mmx(uint8_t *dst, int dst_width, uint8_t *src, int wrap, 
208
                            int16_t *filter)
209 de6d9b64 Fabrice Bellard
{
210
    int sum, i, v;
211 0c1a9eda Zdenek Kabelac
    uint8_t *s;
212 de6d9b64 Fabrice Bellard
    mmx_t tmp;
213
    mmx_t coefs[4];
214
    
215
    for(i=0;i<4;i++) {
216
        v = filter[i];
217
        coefs[i].uw[0] = v;
218
        coefs[i].uw[1] = v;
219
        coefs[i].uw[2] = v;
220
        coefs[i].uw[3] = v;
221
    }
222
    
223
    pxor_r2r(mm7, mm7);
224
    s = src;
225
    while (dst_width >= 4) {
226
        movq_m2r(s[0 * wrap], mm0);
227
        punpcklbw_r2r(mm7, mm0);
228
        movq_m2r(s[1 * wrap], mm1);
229
        punpcklbw_r2r(mm7, mm1);
230
        movq_m2r(s[2 * wrap], mm2);
231
        punpcklbw_r2r(mm7, mm2);
232
        movq_m2r(s[3 * wrap], mm3);
233
        punpcklbw_r2r(mm7, mm3);
234
235
        pmullw_m2r(coefs[0], mm0);
236
        pmullw_m2r(coefs[1], mm1);
237
        pmullw_m2r(coefs[2], mm2);
238
        pmullw_m2r(coefs[3], mm3);
239
240
        paddw_r2r(mm1, mm0);
241
        paddw_r2r(mm3, mm2);
242
        paddw_r2r(mm2, mm0);
243
        psraw_i2r(FILTER_BITS, mm0);
244
        
245
        packuswb_r2r(mm7, mm0);
246
        movq_r2m(mm0, tmp);
247
248 0c1a9eda Zdenek Kabelac
        *(uint32_t *)dst = tmp.ud[0];
249 de6d9b64 Fabrice Bellard
        dst += 4;
250
        s += 4;
251
        dst_width -= 4;
252
    }
253
    while (dst_width > 0) {
254
        sum = s[0 * wrap] * filter[0] +
255
            s[1 * wrap] * filter[1] +
256
            s[2 * wrap] * filter[2] +
257
            s[3 * wrap] * filter[3];
258
        sum = sum >> FILTER_BITS;
259
        if (sum < 0)
260
            sum = 0;
261
        else if (sum > 255)
262
            sum = 255;
263
        dst[0] = sum;
264
        dst++;
265
        s++;
266
        dst_width--;
267
    }
268
    emms();
269
}
270
#endif
271
272 404d2241 Brian Foley
#ifdef HAVE_ALTIVEC
273
typedef        union {
274
    vector unsigned char v;
275
    unsigned char c[16];
276
} vec_uc_t;
277
278
typedef        union {
279
    vector signed short v;
280
    signed short s[8];
281
} vec_ss_t;
282
283 0c1a9eda Zdenek Kabelac
void v_resample16_altivec(uint8_t *dst, int dst_width, uint8_t *src, int wrap,
284
                            int16_t *filter)
285 404d2241 Brian Foley
{
286
    int sum, i;
287
    uint8_t *s;
288
    vector unsigned char *tv, tmp, dstv, zero;
289
    vec_ss_t srchv[4], srclv[4], fv[4];
290
    vector signed short zeros, sumhv, sumlv;    
291
    s = src;
292
293
    for(i=0;i<4;i++)
294
    {
295
        /*
296
           The vec_madds later on does an implicit >>15 on the result.
297
           Since FILTER_BITS is 8, and we have 15 bits of magnitude in
298
           a signed short, we have just enough bits to pre-shift our
299
           filter constants <<7 to compensate for vec_madds.
300
        */
301
        fv[i].s[0] = filter[i] << (15-FILTER_BITS);
302
        fv[i].v = vec_splat(fv[i].v, 0);
303
    }
304
    
305
    zero = vec_splat_u8(0);
306
    zeros = vec_splat_s16(0);
307
308
309
    /*
310
       When we're resampling, we'd ideally like both our input buffers,
311
       and output buffers to be 16-byte aligned, so we can do both aligned
312
       reads and writes. Sadly we can't always have this at the moment, so
313
       we opt for aligned writes, as unaligned writes have a huge overhead.
314
       To do this, do enough scalar resamples to get dst 16-byte aligned.
315
    */
316 9e4e1659 Philip Gladstone
    i = (-(int)dst) & 0xf;
317 404d2241 Brian Foley
    while(i>0) {
318
        sum = s[0 * wrap] * filter[0] +
319
        s[1 * wrap] * filter[1] +
320
        s[2 * wrap] * filter[2] +
321
        s[3 * wrap] * filter[3];
322
        sum = sum >> FILTER_BITS;
323
        if (sum<0) sum = 0; else if (sum>255) sum=255;
324
        dst[0] = sum;
325
        dst++;
326
        s++;
327
        dst_width--;
328
        i--;
329
    }
330
    
331
    /* Do our altivec resampling on 16 pixels at once. */
332
    while(dst_width>=16) {
333
        /*
334
           Read 16 (potentially unaligned) bytes from each of
335
           4 lines into 4 vectors, and split them into shorts.
336
           Interleave the multipy/accumulate for the resample
337
           filter with the loads to hide the 3 cycle latency
338
           the vec_madds have.
339
        */
340
        tv = (vector unsigned char *) &s[0 * wrap];
341
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
342
        srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
343
        srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
344
        sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
345
        sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
346
347
        tv = (vector unsigned char *) &s[1 * wrap];
348
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
349
        srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
350
        srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
351
        sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
352
        sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
353
354
        tv = (vector unsigned char *) &s[2 * wrap];
355
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
356
        srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
357
        srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
358
        sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
359
        sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
360
361
        tv = (vector unsigned char *) &s[3 * wrap];
362
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
363
        srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
364
        srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
365
        sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
366
        sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
367
    
368
        /*
369
           Pack the results into our destination vector,
370
           and do an aligned write of that back to memory.
371
        */
372
        dstv = vec_packsu(sumhv, sumlv) ;
373
        vec_st(dstv, 0, (vector unsigned char *) dst);
374
        
375
        dst+=16;
376
        s+=16;
377
        dst_width-=16;
378
    }
379
380
    /*
381
       If there are any leftover pixels, resample them
382
       with the slow scalar method.
383
    */
384
    while(dst_width>0) {
385
        sum = s[0 * wrap] * filter[0] +
386
        s[1 * wrap] * filter[1] +
387
        s[2 * wrap] * filter[2] +
388
        s[3 * wrap] * filter[3];
389
        sum = sum >> FILTER_BITS;
390
        if (sum<0) sum = 0; else if (sum>255) sum=255;
391
        dst[0] = sum;
392
        dst++;
393
        s++;
394
        dst_width--;
395
    }
396
}
397
#endif
398
399 de6d9b64 Fabrice Bellard
/* slow version to handle limit cases. Does not need optimisation */
400 0c1a9eda Zdenek Kabelac
static void h_resample_slow(uint8_t *dst, int dst_width, uint8_t *src, int src_width,
401
                            int src_start, int src_incr, int16_t *filters)
402 de6d9b64 Fabrice Bellard
{
403
    int src_pos, phase, sum, j, v, i;
404 0c1a9eda Zdenek Kabelac
    uint8_t *s, *src_end;
405
    int16_t *filter;
406 de6d9b64 Fabrice Bellard
407
    src_end = src + src_width;
408
    src_pos = src_start;
409
    for(i=0;i<dst_width;i++) {
410
        s = src + (src_pos >> POS_FRAC_BITS);
411
        phase = get_phase(src_pos);
412
        filter = filters + phase * NB_TAPS;
413
        sum = 0;
414
        for(j=0;j<NB_TAPS;j++) {
415
            if (s < src)
416
                v = src[0];
417
            else if (s >= src_end)
418
                v = src_end[-1];
419
            else
420
                v = s[0];
421
            sum += v * filter[j];
422
            s++;
423
        }
424
        sum = sum >> FILTER_BITS;
425
        if (sum < 0)
426
            sum = 0;
427
        else if (sum > 255)
428
            sum = 255;
429
        dst[0] = sum;
430
        src_pos += src_incr;
431
        dst++;
432
    }
433
}
434
435 0c1a9eda Zdenek Kabelac
static void h_resample(uint8_t *dst, int dst_width, uint8_t *src, int src_width,
436
                       int src_start, int src_incr, int16_t *filters)
437 de6d9b64 Fabrice Bellard
{
438
    int n, src_end;
439
440
    if (src_start < 0) {
441
        n = (0 - src_start + src_incr - 1) / src_incr;
442
        h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
443
        dst += n;
444
        dst_width -= n;
445
        src_start += n * src_incr;
446
    }
447
    src_end = src_start + dst_width * src_incr;
448
    if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
449
        n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) / 
450
            src_incr;
451
    } else {
452
        n = dst_width;
453
    }
454 980fc7b8 Fabrice Bellard
#ifdef HAVE_MMX
455 de6d9b64 Fabrice Bellard
    if ((mm_flags & MM_MMX) && NB_TAPS == 4)
456
        h_resample_fast4_mmx(dst, n, 
457
                             src, src_width, src_start, src_incr, filters);
458
    else
459
#endif
460
        h_resample_fast(dst, n, 
461
                        src, src_width, src_start, src_incr, filters);
462
    if (n < dst_width) {
463
        dst += n;
464
        dst_width -= n;
465
        src_start += n * src_incr;
466
        h_resample_slow(dst, dst_width, 
467
                        src, src_width, src_start, src_incr, filters);
468
    }
469
}
470
471
static void component_resample(ImgReSampleContext *s, 
472 0c1a9eda Zdenek Kabelac
                               uint8_t *output, int owrap, int owidth, int oheight,
473
                               uint8_t *input, int iwrap, int iwidth, int iheight)
474 de6d9b64 Fabrice Bellard
{
475
    int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
476 0c1a9eda Zdenek Kabelac
    uint8_t *new_line, *src_line;
477 de6d9b64 Fabrice Bellard
478
    last_src_y = - FCENTER - 1;
479
    /* position of the bottom of the filter in the source image */
480
    src_y = (last_src_y + NB_TAPS) * POS_FRAC; 
481
    ring_y = NB_TAPS; /* position in ring buffer */
482
    for(y=0;y<oheight;y++) {
483
        /* apply horizontal filter on new lines from input if needed */
484
        src_y1 = src_y >> POS_FRAC_BITS;
485
        while (last_src_y < src_y1) {
486
            if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
487
                ring_y = NB_TAPS;
488
            last_src_y++;
489 ab6d194a Michael Niedermayer
            /* handle limit conditions : replicate line (slightly
490
               inefficient because we filter multiple times) */
491 de6d9b64 Fabrice Bellard
            y1 = last_src_y;
492
            if (y1 < 0) {
493
                y1 = 0;
494
            } else if (y1 >= iheight) {
495
                y1 = iheight - 1;
496
            }
497
            src_line = input + y1 * iwrap;
498
            new_line = s->line_buf + ring_y * owidth;
499
            /* apply filter and handle limit cases correctly */
500
            h_resample(new_line, owidth, 
501
                       src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, 
502
                       &s->h_filters[0][0]);
503
            /* handle ring buffer wraping */
504
            if (ring_y >= LINE_BUF_HEIGHT) {
505
                memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
506
                       new_line, owidth);
507
            }
508
        }
509
        /* apply vertical filter */
510
        phase_y = get_phase(src_y);
511 980fc7b8 Fabrice Bellard
#ifdef HAVE_MMX
512 de6d9b64 Fabrice Bellard
        /* desactivated MMX because loss of precision */
513
        if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
514
            v_resample4_mmx(output, owidth, 
515
                            s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, 
516
                            &s->v_filters[phase_y][0]);
517 404d2241 Brian Foley
        else
518
#endif
519
#ifdef HAVE_ALTIVEC
520 00a7d8d6 Dieter
            if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
521 404d2241 Brian Foley
                v_resample16_altivec(output, owidth,
522
                                s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
523
                                &s->v_filters[phase_y][0]);
524 de6d9b64 Fabrice Bellard
        else
525
#endif
526
            v_resample(output, owidth, 
527
                       s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, 
528
                       &s->v_filters[phase_y][0]);
529
            
530
        src_y += s->v_incr;
531
        output += owrap;
532
    }
533
}
534
535
/* XXX: the following filter is quite naive, but it seems to suffice
536
   for 4 taps */
537 0c1a9eda Zdenek Kabelac
static void build_filter(int16_t *filter, float factor)
538 de6d9b64 Fabrice Bellard
{
539
    int ph, i, v;
540
    float x, y, tab[NB_TAPS], norm, mult;
541
542
    /* if upsampling, only need to interpolate, no filter */
543
    if (factor > 1.0)
544
        factor = 1.0;
545
546
    for(ph=0;ph<NB_PHASES;ph++) {
547
        norm = 0;
548
        for(i=0;i<NB_TAPS;i++) {
549
            
550
            x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor;
551
            if (x == 0)
552
                y = 1.0;
553
            else
554
                y = sin(x) / x;
555
            tab[i] = y;
556
            norm += y;
557
        }
558
559
        /* normalize so that an uniform color remains the same */
560
        mult = (float)(1 << FILTER_BITS) / norm;
561
        for(i=0;i<NB_TAPS;i++) {
562
            v = (int)(tab[i] * mult);
563
            filter[ph * NB_TAPS + i] = v;
564
        }
565
    }
566
}
567
568
ImgReSampleContext *img_resample_init(int owidth, int oheight,
569
                                      int iwidth, int iheight)
570
{
571 ab6d194a Michael Niedermayer
        return img_resample_full_init(owidth, oheight, iwidth, iheight, 0, 0, 0, 0);
572
}
573
574
ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
575
                                      int iwidth, int iheight,
576
                                      int topBand, int bottomBand,
577
                                      int leftBand, int rightBand)
578
{
579 de6d9b64 Fabrice Bellard
    ImgReSampleContext *s;
580
581
    s = av_mallocz(sizeof(ImgReSampleContext));
582
    if (!s)
583
        return NULL;
584
    s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
585
    if (!s->line_buf) 
586
        goto fail;
587
    
588
    s->owidth = owidth;
589
    s->oheight = oheight;
590
    s->iwidth = iwidth;
591
    s->iheight = iheight;
592 ab6d194a Michael Niedermayer
    s->topBand = topBand;
593
    s->bottomBand = bottomBand;
594
    s->leftBand = leftBand;
595
    s->rightBand = rightBand;
596 de6d9b64 Fabrice Bellard
    
597 ab6d194a Michael Niedermayer
    s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / owidth;
598
    s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / oheight;
599 de6d9b64 Fabrice Bellard
    
600 ab6d194a Michael Niedermayer
    build_filter(&s->h_filters[0][0], (float) owidth  / (float) (iwidth - leftBand - rightBand));
601
    build_filter(&s->v_filters[0][0], (float) oheight / (float) (iheight - topBand - bottomBand));
602 de6d9b64 Fabrice Bellard
603
    return s;
604
 fail:
605 6000abfa Fabrice Bellard
    av_free(s);
606 de6d9b64 Fabrice Bellard
    return NULL;
607
}
608
609
void img_resample(ImgReSampleContext *s, 
610
                  AVPicture *output, AVPicture *input)
611
{
612
    int i, shift;
613
614
    for(i=0;i<3;i++) {
615
        shift = (i == 0) ? 0 : 1;
616
        component_resample(s, output->data[i], output->linesize[i], 
617
                           s->owidth >> shift, s->oheight >> shift,
618 ab6d194a Michael Niedermayer
                           input->data[i] + (input->linesize[i] * (s->topBand >> shift)) + (s->leftBand >> shift),
619
                           input->linesize[i], ((s->iwidth - s->leftBand - s->rightBand) >> shift),
620
                           (s->iheight - s->topBand - s->bottomBand) >> shift);
621 de6d9b64 Fabrice Bellard
    }
622
}
623
624
void img_resample_close(ImgReSampleContext *s)
625
{
626 6000abfa Fabrice Bellard
    av_free(s->line_buf);
627
    av_free(s);
628 de6d9b64 Fabrice Bellard
}
629
630
#ifdef TEST
631
632
void *av_mallocz(int size)
633
{
634
    void *ptr;
635
    ptr = malloc(size);
636
    memset(ptr, 0, size);
637
    return ptr;
638
}
639
640 ab6d194a Michael Niedermayer
void av_free(void *ptr)
641
{
642
    /* XXX: this test should not be needed on most libcs */
643
    if (ptr)
644
        free(ptr);
645
}
646
647 de6d9b64 Fabrice Bellard
/* input */
648
#define XSIZE 256
649
#define YSIZE 256
650 0c1a9eda Zdenek Kabelac
uint8_t img[XSIZE * YSIZE];
651 de6d9b64 Fabrice Bellard
652
/* output */
653
#define XSIZE1 512
654
#define YSIZE1 512
655 0c1a9eda Zdenek Kabelac
uint8_t img1[XSIZE1 * YSIZE1];
656
uint8_t img2[XSIZE1 * YSIZE1];
657 de6d9b64 Fabrice Bellard
658 0c1a9eda Zdenek Kabelac
void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
659 de6d9b64 Fabrice Bellard
{
660
    FILE *f;
661
    f=fopen(filename,"w");
662
    fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
663
    fwrite(img,1, xsize * ysize,f);
664
    fclose(f);
665
}
666
667 0c1a9eda Zdenek Kabelac
static void dump_filter(int16_t *filter)
668 de6d9b64 Fabrice Bellard
{
669
    int i, ph;
670
671
    for(ph=0;ph<NB_PHASES;ph++) {
672
        printf("%2d: ", ph);
673
        for(i=0;i<NB_TAPS;i++) {
674
            printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0);
675
        }
676
        printf("\n");
677
    }
678
}
679
680 980fc7b8 Fabrice Bellard
#ifdef HAVE_MMX
681 6acce86b Michael Niedermayer
int mm_flags;
682 de6d9b64 Fabrice Bellard
#endif
683
684
int main(int argc, char **argv)
685
{
686
    int x, y, v, i, xsize, ysize;
687
    ImgReSampleContext *s;
688
    float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
689
    char buf[256];
690
691
    /* build test image */
692
    for(y=0;y<YSIZE;y++) {
693
        for(x=0;x<XSIZE;x++) {
694
            if (x < XSIZE/2 && y < YSIZE/2) {
695
                if (x < XSIZE/4 && y < YSIZE/4) {
696
                    if ((x % 10) <= 6 &&
697
                        (y % 10) <= 6)
698
                        v = 0xff;
699
                    else
700
                        v = 0x00;
701
                } else if (x < XSIZE/4) {
702
                    if (x & 1) 
703
                        v = 0xff;
704
                    else 
705
                        v = 0;
706
                } else if (y < XSIZE/4) {
707
                    if (y & 1) 
708
                        v = 0xff;
709
                    else 
710
                        v = 0;
711
                } else {
712
                    if (y < YSIZE*3/8) {
713
                        if ((y+x) & 1) 
714
                            v = 0xff;
715
                        else 
716
                            v = 0;
717
                    } else {
718
                        if (((x+3) % 4) <= 1 &&
719
                            ((y+3) % 4) <= 1)
720
                            v = 0xff;
721
                        else
722
                            v = 0x00;
723
                    }
724
                }
725
            } else if (x < XSIZE/2) {
726
                v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
727
            } else if (y < XSIZE/2) {
728
                v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
729
            } else {
730
                v = ((x + y - XSIZE) * 255) / XSIZE;
731
            }
732 ab6d194a Michael Niedermayer
            img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
733 de6d9b64 Fabrice Bellard
        }
734
    }
735
    save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
736
    for(i=0;i<sizeof(factors)/sizeof(float);i++) {
737
        fact = factors[i];
738
        xsize = (int)(XSIZE * fact);
739 ab6d194a Michael Niedermayer
        ysize = (int)((YSIZE - 100) * fact);
740 6acce86b Michael Niedermayer
        s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0);
741 de6d9b64 Fabrice Bellard
        printf("Factor=%0.2f\n", fact);
742
        dump_filter(&s->h_filters[0][0]);
743
        component_resample(s, img1, xsize, xsize, ysize,
744 ab6d194a Michael Niedermayer
                           img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
745 de6d9b64 Fabrice Bellard
        img_resample_close(s);
746
747
        sprintf(buf, "/tmp/out%d.pgm", i);
748
        save_pgm(buf, img1, xsize, ysize);
749
    }
750
751
    /* mmx test */
752 980fc7b8 Fabrice Bellard
#ifdef HAVE_MMX
753 de6d9b64 Fabrice Bellard
    printf("MMX test\n");
754
    fact = 0.72;
755
    xsize = (int)(XSIZE * fact);
756
    ysize = (int)(YSIZE * fact);
757
    mm_flags = MM_MMX;
758
    s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
759
    component_resample(s, img1, xsize, xsize, ysize,
760
                       img, XSIZE, XSIZE, YSIZE);
761
762
    mm_flags = 0;
763
    s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
764
    component_resample(s, img2, xsize, xsize, ysize,
765
                       img, XSIZE, XSIZE, YSIZE);
766
    if (memcmp(img1, img2, xsize * ysize) != 0) {
767
        fprintf(stderr, "mmx error\n");
768
        exit(1);
769
    }
770
    printf("MMX OK\n");
771
#endif
772
    return 0;
773
}
774
775
#endif