Statistics
| Branch: | Revision:

ffmpeg / libavcodec / imgresample.c @ ea937d01

History | View | Annotate | Download (22.3 KB)

1 de6d9b64 Fabrice Bellard
/*
2
 * High quality image resampling with polyphase filters 
3 ff4ec49e Fabrice Bellard
 * Copyright (c) 2001 Fabrice Bellard.
4 de6d9b64 Fabrice Bellard
 *
5 ff4ec49e Fabrice Bellard
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9 de6d9b64 Fabrice Bellard
 *
10 ff4ec49e Fabrice Bellard
 * This library is distributed in the hope that it will be useful,
11 de6d9b64 Fabrice Bellard
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ff4ec49e Fabrice Bellard
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14 de6d9b64 Fabrice Bellard
 *
15 ff4ec49e Fabrice Bellard
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18 de6d9b64 Fabrice Bellard
 */
19
#include "avcodec.h"
20 6000abfa Fabrice Bellard
#include "dsputil.h"
21 de6d9b64 Fabrice Bellard
22 54329dd5 Nick Kurshev
#ifdef USE_FASTMEMCPY
23
#include "fastmemcpy.h"
24
#endif
25 404d2241 Brian Foley
extern int mm_flags;
26 54329dd5 Nick Kurshev
27 de6d9b64 Fabrice Bellard
#define NB_COMPONENTS 3
28
29
#define PHASE_BITS 4
30
#define NB_PHASES  (1 << PHASE_BITS)
31
#define NB_TAPS    4
32
#define FCENTER    1  /* index of the center of the filter */
33 ab6d194a Michael Niedermayer
//#define TEST    1  /* Test it */
34 de6d9b64 Fabrice Bellard
35
#define POS_FRAC_BITS 16
36
#define POS_FRAC      (1 << POS_FRAC_BITS)
37
/* 6 bits precision is needed for MMX */
38
#define FILTER_BITS   8
39
40
#define LINE_BUF_HEIGHT (NB_TAPS * 4)
41
42
struct ImgReSampleContext {
43 ab6d194a Michael Niedermayer
    int iwidth, iheight, owidth, oheight, topBand, bottomBand, leftBand, rightBand;
44 de6d9b64 Fabrice Bellard
    int h_incr, v_incr;
45
    INT16 h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
46
    INT16 v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
47
    UINT8 *line_buf;
48
};
49
50
static inline int get_phase(int pos)
51
{
52
    return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
53
}
54
55
/* This function must be optimized */
56
static void h_resample_fast(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
57
                            int src_start, int src_incr, INT16 *filters)
58
{
59
    int src_pos, phase, sum, i;
60
    UINT8 *s;
61
    INT16 *filter;
62
63
    src_pos = src_start;
64
    for(i=0;i<dst_width;i++) {
65
#ifdef TEST
66
        /* test */
67
        if ((src_pos >> POS_FRAC_BITS) < 0 ||
68
            (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
69 02ac3136 Philip Gladstone
            av_abort();
70 de6d9b64 Fabrice Bellard
#endif
71
        s = src + (src_pos >> POS_FRAC_BITS);
72
        phase = get_phase(src_pos);
73
        filter = filters + phase * NB_TAPS;
74
#if NB_TAPS == 4
75
        sum = s[0] * filter[0] +
76
            s[1] * filter[1] +
77
            s[2] * filter[2] +
78
            s[3] * filter[3];
79
#else
80
        {
81
            int j;
82
            sum = 0;
83
            for(j=0;j<NB_TAPS;j++)
84
                sum += s[j] * filter[j];
85
        }
86
#endif
87
        sum = sum >> FILTER_BITS;
88
        if (sum < 0)
89
            sum = 0;
90
        else if (sum > 255)
91
            sum = 255;
92
        dst[0] = sum;
93
        src_pos += src_incr;
94
        dst++;
95
    }
96
}
97
98
/* This function must be optimized */
99
static void v_resample(UINT8 *dst, int dst_width, UINT8 *src, int wrap, 
100
                       INT16 *filter)
101
{
102
    int sum, i;
103
    UINT8 *s;
104
105
    s = src;
106
    for(i=0;i<dst_width;i++) {
107
#if NB_TAPS == 4
108
        sum = s[0 * wrap] * filter[0] +
109
            s[1 * wrap] * filter[1] +
110
            s[2 * wrap] * filter[2] +
111
            s[3 * wrap] * filter[3];
112
#else
113
        {
114
            int j;
115
            UINT8 *s1 = s;
116
117
            sum = 0;
118
            for(j=0;j<NB_TAPS;j++) {
119
                sum += s1[0] * filter[j];
120
                s1 += wrap;
121
            }
122
        }
123
#endif
124
        sum = sum >> FILTER_BITS;
125
        if (sum < 0)
126
            sum = 0;
127
        else if (sum > 255)
128
            sum = 255;
129
        dst[0] = sum;
130
        dst++;
131
        s++;
132
    }
133
}
134
135 980fc7b8 Fabrice Bellard
#ifdef HAVE_MMX
136 de6d9b64 Fabrice Bellard
137
#include "i386/mmx.h"
138
139
#define FILTER4(reg) \
140
{\
141
        s = src + (src_pos >> POS_FRAC_BITS);\
142
        phase = get_phase(src_pos);\
143
        filter = filters + phase * NB_TAPS;\
144
        movq_m2r(*s, reg);\
145
        punpcklbw_r2r(mm7, reg);\
146
        movq_m2r(*filter, mm6);\
147
        pmaddwd_r2r(reg, mm6);\
148
        movq_r2r(mm6, reg);\
149
        psrlq_i2r(32, reg);\
150
        paddd_r2r(mm6, reg);\
151
        psrad_i2r(FILTER_BITS, reg);\
152
        src_pos += src_incr;\
153
}
154
155
#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
156
157
/* XXX: do four pixels at a time */
158
static void h_resample_fast4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
159
                                 int src_start, int src_incr, INT16 *filters)
160
{
161
    int src_pos, phase;
162
    UINT8 *s;
163
    INT16 *filter;
164
    mmx_t tmp;
165
    
166
    src_pos = src_start;
167
    pxor_r2r(mm7, mm7);
168
169
    while (dst_width >= 4) {
170
171
        FILTER4(mm0);
172
        FILTER4(mm1);
173
        FILTER4(mm2);
174
        FILTER4(mm3);
175
176
        packuswb_r2r(mm7, mm0);
177
        packuswb_r2r(mm7, mm1);
178
        packuswb_r2r(mm7, mm3);
179
        packuswb_r2r(mm7, mm2);
180
        movq_r2m(mm0, tmp);
181
        dst[0] = tmp.ub[0];
182
        movq_r2m(mm1, tmp);
183
        dst[1] = tmp.ub[0];
184
        movq_r2m(mm2, tmp);
185
        dst[2] = tmp.ub[0];
186
        movq_r2m(mm3, tmp);
187
        dst[3] = tmp.ub[0];
188
        dst += 4;
189
        dst_width -= 4;
190
    }
191
    while (dst_width > 0) {
192
        FILTER4(mm0);
193
        packuswb_r2r(mm7, mm0);
194
        movq_r2m(mm0, tmp);
195
        dst[0] = tmp.ub[0];
196
        dst++;
197
        dst_width--;
198
    }
199
    emms();
200
}
201
202
static void v_resample4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int wrap, 
203
                            INT16 *filter)
204
{
205
    int sum, i, v;
206
    UINT8 *s;
207
    mmx_t tmp;
208
    mmx_t coefs[4];
209
    
210
    for(i=0;i<4;i++) {
211
        v = filter[i];
212
        coefs[i].uw[0] = v;
213
        coefs[i].uw[1] = v;
214
        coefs[i].uw[2] = v;
215
        coefs[i].uw[3] = v;
216
    }
217
    
218
    pxor_r2r(mm7, mm7);
219
    s = src;
220
    while (dst_width >= 4) {
221
        movq_m2r(s[0 * wrap], mm0);
222
        punpcklbw_r2r(mm7, mm0);
223
        movq_m2r(s[1 * wrap], mm1);
224
        punpcklbw_r2r(mm7, mm1);
225
        movq_m2r(s[2 * wrap], mm2);
226
        punpcklbw_r2r(mm7, mm2);
227
        movq_m2r(s[3 * wrap], mm3);
228
        punpcklbw_r2r(mm7, mm3);
229
230
        pmullw_m2r(coefs[0], mm0);
231
        pmullw_m2r(coefs[1], mm1);
232
        pmullw_m2r(coefs[2], mm2);
233
        pmullw_m2r(coefs[3], mm3);
234
235
        paddw_r2r(mm1, mm0);
236
        paddw_r2r(mm3, mm2);
237
        paddw_r2r(mm2, mm0);
238
        psraw_i2r(FILTER_BITS, mm0);
239
        
240
        packuswb_r2r(mm7, mm0);
241
        movq_r2m(mm0, tmp);
242
243
        *(UINT32 *)dst = tmp.ud[0];
244
        dst += 4;
245
        s += 4;
246
        dst_width -= 4;
247
    }
248
    while (dst_width > 0) {
249
        sum = s[0 * wrap] * filter[0] +
250
            s[1 * wrap] * filter[1] +
251
            s[2 * wrap] * filter[2] +
252
            s[3 * wrap] * filter[3];
253
        sum = sum >> FILTER_BITS;
254
        if (sum < 0)
255
            sum = 0;
256
        else if (sum > 255)
257
            sum = 255;
258
        dst[0] = sum;
259
        dst++;
260
        s++;
261
        dst_width--;
262
    }
263
    emms();
264
}
265
#endif
266
267 404d2241 Brian Foley
#ifdef HAVE_ALTIVEC
268
typedef        union {
269
    vector unsigned char v;
270
    unsigned char c[16];
271
} vec_uc_t;
272
273
typedef        union {
274
    vector signed short v;
275
    signed short s[8];
276
} vec_ss_t;
277
278
void v_resample16_altivec(UINT8 *dst, int dst_width, UINT8 *src, int wrap,
279
                            INT16 *filter)
280
{
281
    int sum, i;
282
    uint8_t *s;
283
    vector unsigned char *tv, tmp, dstv, zero;
284
    vec_ss_t srchv[4], srclv[4], fv[4];
285
    vector signed short zeros, sumhv, sumlv;    
286
    s = src;
287
288
    for(i=0;i<4;i++)
289
    {
290
        /*
291
           The vec_madds later on does an implicit >>15 on the result.
292
           Since FILTER_BITS is 8, and we have 15 bits of magnitude in
293
           a signed short, we have just enough bits to pre-shift our
294
           filter constants <<7 to compensate for vec_madds.
295
        */
296
        fv[i].s[0] = filter[i] << (15-FILTER_BITS);
297
        fv[i].v = vec_splat(fv[i].v, 0);
298
    }
299
    
300
    zero = vec_splat_u8(0);
301
    zeros = vec_splat_s16(0);
302
303
304
    /*
305
       When we're resampling, we'd ideally like both our input buffers,
306
       and output buffers to be 16-byte aligned, so we can do both aligned
307
       reads and writes. Sadly we can't always have this at the moment, so
308
       we opt for aligned writes, as unaligned writes have a huge overhead.
309
       To do this, do enough scalar resamples to get dst 16-byte aligned.
310
    */
311 9e4e1659 Philip Gladstone
    i = (-(int)dst) & 0xf;
312 404d2241 Brian Foley
    while(i>0) {
313
        sum = s[0 * wrap] * filter[0] +
314
        s[1 * wrap] * filter[1] +
315
        s[2 * wrap] * filter[2] +
316
        s[3 * wrap] * filter[3];
317
        sum = sum >> FILTER_BITS;
318
        if (sum<0) sum = 0; else if (sum>255) sum=255;
319
        dst[0] = sum;
320
        dst++;
321
        s++;
322
        dst_width--;
323
        i--;
324
    }
325
    
326
    /* Do our altivec resampling on 16 pixels at once. */
327
    while(dst_width>=16) {
328
        /*
329
           Read 16 (potentially unaligned) bytes from each of
330
           4 lines into 4 vectors, and split them into shorts.
331
           Interleave the multipy/accumulate for the resample
332
           filter with the loads to hide the 3 cycle latency
333
           the vec_madds have.
334
        */
335
        tv = (vector unsigned char *) &s[0 * wrap];
336
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
337
        srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
338
        srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
339
        sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
340
        sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
341
342
        tv = (vector unsigned char *) &s[1 * wrap];
343
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
344
        srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
345
        srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
346
        sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
347
        sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
348
349
        tv = (vector unsigned char *) &s[2 * wrap];
350
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
351
        srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
352
        srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
353
        sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
354
        sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
355
356
        tv = (vector unsigned char *) &s[3 * wrap];
357
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
358
        srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
359
        srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
360
        sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
361
        sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
362
    
363
        /*
364
           Pack the results into our destination vector,
365
           and do an aligned write of that back to memory.
366
        */
367
        dstv = vec_packsu(sumhv, sumlv) ;
368
        vec_st(dstv, 0, (vector unsigned char *) dst);
369
        
370
        dst+=16;
371
        s+=16;
372
        dst_width-=16;
373
    }
374
375
    /*
376
       If there are any leftover pixels, resample them
377
       with the slow scalar method.
378
    */
379
    while(dst_width>0) {
380
        sum = s[0 * wrap] * filter[0] +
381
        s[1 * wrap] * filter[1] +
382
        s[2 * wrap] * filter[2] +
383
        s[3 * wrap] * filter[3];
384
        sum = sum >> FILTER_BITS;
385
        if (sum<0) sum = 0; else if (sum>255) sum=255;
386
        dst[0] = sum;
387
        dst++;
388
        s++;
389
        dst_width--;
390
    }
391
}
392
#endif
393
394 de6d9b64 Fabrice Bellard
/* slow version to handle limit cases. Does not need optimisation */
395
static void h_resample_slow(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
396
                            int src_start, int src_incr, INT16 *filters)
397
{
398
    int src_pos, phase, sum, j, v, i;
399
    UINT8 *s, *src_end;
400
    INT16 *filter;
401
402
    src_end = src + src_width;
403
    src_pos = src_start;
404
    for(i=0;i<dst_width;i++) {
405
        s = src + (src_pos >> POS_FRAC_BITS);
406
        phase = get_phase(src_pos);
407
        filter = filters + phase * NB_TAPS;
408
        sum = 0;
409
        for(j=0;j<NB_TAPS;j++) {
410
            if (s < src)
411
                v = src[0];
412
            else if (s >= src_end)
413
                v = src_end[-1];
414
            else
415
                v = s[0];
416
            sum += v * filter[j];
417
            s++;
418
        }
419
        sum = sum >> FILTER_BITS;
420
        if (sum < 0)
421
            sum = 0;
422
        else if (sum > 255)
423
            sum = 255;
424
        dst[0] = sum;
425
        src_pos += src_incr;
426
        dst++;
427
    }
428
}
429
430
static void h_resample(UINT8 *dst, int dst_width, UINT8 *src, int src_width,
431
                       int src_start, int src_incr, INT16 *filters)
432
{
433
    int n, src_end;
434
435
    if (src_start < 0) {
436
        n = (0 - src_start + src_incr - 1) / src_incr;
437
        h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
438
        dst += n;
439
        dst_width -= n;
440
        src_start += n * src_incr;
441
    }
442
    src_end = src_start + dst_width * src_incr;
443
    if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
444
        n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) / 
445
            src_incr;
446
    } else {
447
        n = dst_width;
448
    }
449 980fc7b8 Fabrice Bellard
#ifdef HAVE_MMX
450 de6d9b64 Fabrice Bellard
    if ((mm_flags & MM_MMX) && NB_TAPS == 4)
451
        h_resample_fast4_mmx(dst, n, 
452
                             src, src_width, src_start, src_incr, filters);
453
    else
454
#endif
455
        h_resample_fast(dst, n, 
456
                        src, src_width, src_start, src_incr, filters);
457
    if (n < dst_width) {
458
        dst += n;
459
        dst_width -= n;
460
        src_start += n * src_incr;
461
        h_resample_slow(dst, dst_width, 
462
                        src, src_width, src_start, src_incr, filters);
463
    }
464
}
465
466
static void component_resample(ImgReSampleContext *s, 
467
                               UINT8 *output, int owrap, int owidth, int oheight,
468
                               UINT8 *input, int iwrap, int iwidth, int iheight)
469
{
470
    int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
471
    UINT8 *new_line, *src_line;
472
473
    last_src_y = - FCENTER - 1;
474
    /* position of the bottom of the filter in the source image */
475
    src_y = (last_src_y + NB_TAPS) * POS_FRAC; 
476
    ring_y = NB_TAPS; /* position in ring buffer */
477
    for(y=0;y<oheight;y++) {
478
        /* apply horizontal filter on new lines from input if needed */
479
        src_y1 = src_y >> POS_FRAC_BITS;
480
        while (last_src_y < src_y1) {
481
            if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
482
                ring_y = NB_TAPS;
483
            last_src_y++;
484 ab6d194a Michael Niedermayer
            /* handle limit conditions : replicate line (slightly
485
               inefficient because we filter multiple times) */
486 de6d9b64 Fabrice Bellard
            y1 = last_src_y;
487
            if (y1 < 0) {
488
                y1 = 0;
489
            } else if (y1 >= iheight) {
490
                y1 = iheight - 1;
491
            }
492
            src_line = input + y1 * iwrap;
493
            new_line = s->line_buf + ring_y * owidth;
494
            /* apply filter and handle limit cases correctly */
495
            h_resample(new_line, owidth, 
496
                       src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, 
497
                       &s->h_filters[0][0]);
498
            /* handle ring buffer wraping */
499
            if (ring_y >= LINE_BUF_HEIGHT) {
500
                memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
501
                       new_line, owidth);
502
            }
503
        }
504
        /* apply vertical filter */
505
        phase_y = get_phase(src_y);
506 980fc7b8 Fabrice Bellard
#ifdef HAVE_MMX
507 de6d9b64 Fabrice Bellard
        /* desactivated MMX because loss of precision */
508
        if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
509
            v_resample4_mmx(output, owidth, 
510
                            s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, 
511
                            &s->v_filters[phase_y][0]);
512 404d2241 Brian Foley
        else
513
#endif
514
#ifdef HAVE_ALTIVEC
515 00a7d8d6 Dieter
            if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
516 404d2241 Brian Foley
                v_resample16_altivec(output, owidth,
517
                                s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
518
                                &s->v_filters[phase_y][0]);
519 de6d9b64 Fabrice Bellard
        else
520
#endif
521
            v_resample(output, owidth, 
522
                       s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, 
523
                       &s->v_filters[phase_y][0]);
524
            
525
        src_y += s->v_incr;
526
        output += owrap;
527
    }
528
}
529
530
/* XXX: the following filter is quite naive, but it seems to suffice
531
   for 4 taps */
532
static void build_filter(INT16 *filter, float factor)
533
{
534
    int ph, i, v;
535
    float x, y, tab[NB_TAPS], norm, mult;
536
537
    /* if upsampling, only need to interpolate, no filter */
538
    if (factor > 1.0)
539
        factor = 1.0;
540
541
    for(ph=0;ph<NB_PHASES;ph++) {
542
        norm = 0;
543
        for(i=0;i<NB_TAPS;i++) {
544
            
545
            x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor;
546
            if (x == 0)
547
                y = 1.0;
548
            else
549
                y = sin(x) / x;
550
            tab[i] = y;
551
            norm += y;
552
        }
553
554
        /* normalize so that an uniform color remains the same */
555
        mult = (float)(1 << FILTER_BITS) / norm;
556
        for(i=0;i<NB_TAPS;i++) {
557
            v = (int)(tab[i] * mult);
558
            filter[ph * NB_TAPS + i] = v;
559
        }
560
    }
561
}
562
563
ImgReSampleContext *img_resample_init(int owidth, int oheight,
564
                                      int iwidth, int iheight)
565
{
566 ab6d194a Michael Niedermayer
        return img_resample_full_init(owidth, oheight, iwidth, iheight, 0, 0, 0, 0);
567
}
568
569
ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
570
                                      int iwidth, int iheight,
571
                                      int topBand, int bottomBand,
572
                                      int leftBand, int rightBand)
573
{
574 de6d9b64 Fabrice Bellard
    ImgReSampleContext *s;
575
576
    s = av_mallocz(sizeof(ImgReSampleContext));
577
    if (!s)
578
        return NULL;
579
    s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
580
    if (!s->line_buf) 
581
        goto fail;
582
    
583
    s->owidth = owidth;
584
    s->oheight = oheight;
585
    s->iwidth = iwidth;
586
    s->iheight = iheight;
587 ab6d194a Michael Niedermayer
    s->topBand = topBand;
588
    s->bottomBand = bottomBand;
589
    s->leftBand = leftBand;
590
    s->rightBand = rightBand;
591 de6d9b64 Fabrice Bellard
    
592 ab6d194a Michael Niedermayer
    s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / owidth;
593
    s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / oheight;
594 de6d9b64 Fabrice Bellard
    
595 ab6d194a Michael Niedermayer
    build_filter(&s->h_filters[0][0], (float) owidth  / (float) (iwidth - leftBand - rightBand));
596
    build_filter(&s->v_filters[0][0], (float) oheight / (float) (iheight - topBand - bottomBand));
597 de6d9b64 Fabrice Bellard
598
    return s;
599
 fail:
600 6000abfa Fabrice Bellard
    av_free(s);
601 de6d9b64 Fabrice Bellard
    return NULL;
602
}
603
604
void img_resample(ImgReSampleContext *s, 
605
                  AVPicture *output, AVPicture *input)
606
{
607
    int i, shift;
608
609
    for(i=0;i<3;i++) {
610
        shift = (i == 0) ? 0 : 1;
611
        component_resample(s, output->data[i], output->linesize[i], 
612
                           s->owidth >> shift, s->oheight >> shift,
613 ab6d194a Michael Niedermayer
                           input->data[i] + (input->linesize[i] * (s->topBand >> shift)) + (s->leftBand >> shift),
614
                           input->linesize[i], ((s->iwidth - s->leftBand - s->rightBand) >> shift),
615
                           (s->iheight - s->topBand - s->bottomBand) >> shift);
616 de6d9b64 Fabrice Bellard
    }
617
}
618
619
void img_resample_close(ImgReSampleContext *s)
620
{
621 6000abfa Fabrice Bellard
    av_free(s->line_buf);
622
    av_free(s);
623 de6d9b64 Fabrice Bellard
}
624
625
#ifdef TEST
626
627
void *av_mallocz(int size)
628
{
629
    void *ptr;
630
    ptr = malloc(size);
631
    memset(ptr, 0, size);
632
    return ptr;
633
}
634
635 ab6d194a Michael Niedermayer
void av_free(void *ptr)
636
{
637
    /* XXX: this test should not be needed on most libcs */
638
    if (ptr)
639
        free(ptr);
640
}
641
642 de6d9b64 Fabrice Bellard
/* input */
643
#define XSIZE 256
644
#define YSIZE 256
645
UINT8 img[XSIZE * YSIZE];
646
647
/* output */
648
#define XSIZE1 512
649
#define YSIZE1 512
650
UINT8 img1[XSIZE1 * YSIZE1];
651
UINT8 img2[XSIZE1 * YSIZE1];
652
653
void save_pgm(const char *filename, UINT8 *img, int xsize, int ysize)
654
{
655
    FILE *f;
656
    f=fopen(filename,"w");
657
    fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
658
    fwrite(img,1, xsize * ysize,f);
659
    fclose(f);
660
}
661
662
static void dump_filter(INT16 *filter)
663
{
664
    int i, ph;
665
666
    for(ph=0;ph<NB_PHASES;ph++) {
667
        printf("%2d: ", ph);
668
        for(i=0;i<NB_TAPS;i++) {
669
            printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0);
670
        }
671
        printf("\n");
672
    }
673
}
674
675 980fc7b8 Fabrice Bellard
#ifdef HAVE_MMX
676 6acce86b Michael Niedermayer
int mm_flags;
677 de6d9b64 Fabrice Bellard
#endif
678
679
int main(int argc, char **argv)
680
{
681
    int x, y, v, i, xsize, ysize;
682
    ImgReSampleContext *s;
683
    float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
684
    char buf[256];
685
686
    /* build test image */
687
    for(y=0;y<YSIZE;y++) {
688
        for(x=0;x<XSIZE;x++) {
689
            if (x < XSIZE/2 && y < YSIZE/2) {
690
                if (x < XSIZE/4 && y < YSIZE/4) {
691
                    if ((x % 10) <= 6 &&
692
                        (y % 10) <= 6)
693
                        v = 0xff;
694
                    else
695
                        v = 0x00;
696
                } else if (x < XSIZE/4) {
697
                    if (x & 1) 
698
                        v = 0xff;
699
                    else 
700
                        v = 0;
701
                } else if (y < XSIZE/4) {
702
                    if (y & 1) 
703
                        v = 0xff;
704
                    else 
705
                        v = 0;
706
                } else {
707
                    if (y < YSIZE*3/8) {
708
                        if ((y+x) & 1) 
709
                            v = 0xff;
710
                        else 
711
                            v = 0;
712
                    } else {
713
                        if (((x+3) % 4) <= 1 &&
714
                            ((y+3) % 4) <= 1)
715
                            v = 0xff;
716
                        else
717
                            v = 0x00;
718
                    }
719
                }
720
            } else if (x < XSIZE/2) {
721
                v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
722
            } else if (y < XSIZE/2) {
723
                v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
724
            } else {
725
                v = ((x + y - XSIZE) * 255) / XSIZE;
726
            }
727 ab6d194a Michael Niedermayer
            img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
728 de6d9b64 Fabrice Bellard
        }
729
    }
730
    save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
731
    for(i=0;i<sizeof(factors)/sizeof(float);i++) {
732
        fact = factors[i];
733
        xsize = (int)(XSIZE * fact);
734 ab6d194a Michael Niedermayer
        ysize = (int)((YSIZE - 100) * fact);
735 6acce86b Michael Niedermayer
        s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0);
736 de6d9b64 Fabrice Bellard
        printf("Factor=%0.2f\n", fact);
737
        dump_filter(&s->h_filters[0][0]);
738
        component_resample(s, img1, xsize, xsize, ysize,
739 ab6d194a Michael Niedermayer
                           img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
740 de6d9b64 Fabrice Bellard
        img_resample_close(s);
741
742
        sprintf(buf, "/tmp/out%d.pgm", i);
743
        save_pgm(buf, img1, xsize, ysize);
744
    }
745
746
    /* mmx test */
747 980fc7b8 Fabrice Bellard
#ifdef HAVE_MMX
748 de6d9b64 Fabrice Bellard
    printf("MMX test\n");
749
    fact = 0.72;
750
    xsize = (int)(XSIZE * fact);
751
    ysize = (int)(YSIZE * fact);
752
    mm_flags = MM_MMX;
753
    s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
754
    component_resample(s, img1, xsize, xsize, ysize,
755
                       img, XSIZE, XSIZE, YSIZE);
756
757
    mm_flags = 0;
758
    s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
759
    component_resample(s, img2, xsize, xsize, ysize,
760
                       img, XSIZE, XSIZE, YSIZE);
761
    if (memcmp(img1, img2, xsize * ysize) != 0) {
762
        fprintf(stderr, "mmx error\n");
763
        exit(1);
764
    }
765
    printf("MMX OK\n");
766
#endif
767
    return 0;
768
}
769
770
#endif