Statistics
| Branch: | Revision:

ffmpeg / libavcodec / imgresample.c @ 2b647ac8

History | View | Annotate | Download (23.4 KB)

1
/*
2
 * High quality image resampling with polyphase filters 
3
 * Copyright (c) 2001 Fabrice Bellard.
4
 *
5
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9
 *
10
 * This library is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 */
19
 
20
/**
21
 * @file imgresample.c
22
 * High quality image resampling with polyphase filters .
23
 */
24
 
25
#include "avcodec.h"
26
#include "dsputil.h"
27

    
28
#ifdef USE_FASTMEMCPY
29
#include "fastmemcpy.h"
30
#endif
31

    
32
#define NB_COMPONENTS 3
33

    
34
#define PHASE_BITS 4
35
#define NB_PHASES  (1 << PHASE_BITS)
36
#define NB_TAPS    4
37
#define FCENTER    1  /* index of the center of the filter */
38
//#define TEST    1  /* Test it */
39

    
40
#define POS_FRAC_BITS 16
41
#define POS_FRAC      (1 << POS_FRAC_BITS)
42
/* 6 bits precision is needed for MMX */
43
#define FILTER_BITS   8
44

    
45
#define LINE_BUF_HEIGHT (NB_TAPS * 4)
46

    
47
struct ImgReSampleContext {
48
    int iwidth, iheight, owidth, oheight;
49
    int topBand, bottomBand, leftBand, rightBand;
50
    int padtop, padbottom, padleft, padright;
51
    int pad_owidth, pad_oheight;
52
    int h_incr, v_incr;
53
    int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
54
    int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
55
    uint8_t *line_buf;
56
};
57

    
58
static inline int get_phase(int pos)
59
{
60
    return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
61
}
62

    
63
/* This function must be optimized */
64
static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
65
                            int src_width, int src_start, int src_incr,
66
                            int16_t *filters)
67
{
68
    int src_pos, phase, sum, i;
69
    const uint8_t *s;
70
    int16_t *filter;
71

    
72
    src_pos = src_start;
73
    for(i=0;i<dst_width;i++) {
74
#ifdef TEST
75
        /* test */
76
        if ((src_pos >> POS_FRAC_BITS) < 0 ||
77
            (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
78
            av_abort();
79
#endif
80
        s = src + (src_pos >> POS_FRAC_BITS);
81
        phase = get_phase(src_pos);
82
        filter = filters + phase * NB_TAPS;
83
#if NB_TAPS == 4
84
        sum = s[0] * filter[0] +
85
            s[1] * filter[1] +
86
            s[2] * filter[2] +
87
            s[3] * filter[3];
88
#else
89
        {
90
            int j;
91
            sum = 0;
92
            for(j=0;j<NB_TAPS;j++)
93
                sum += s[j] * filter[j];
94
        }
95
#endif
96
        sum = sum >> FILTER_BITS;
97
        if (sum < 0)
98
            sum = 0;
99
        else if (sum > 255)
100
            sum = 255;
101
        dst[0] = sum;
102
        src_pos += src_incr;
103
        dst++;
104
    }
105
}
106

    
107
/* This function must be optimized */
108
static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
109
                       int wrap, int16_t *filter)
110
{
111
    int sum, i;
112
    const uint8_t *s;
113

    
114
    s = src;
115
    for(i=0;i<dst_width;i++) {
116
#if NB_TAPS == 4
117
        sum = s[0 * wrap] * filter[0] +
118
            s[1 * wrap] * filter[1] +
119
            s[2 * wrap] * filter[2] +
120
            s[3 * wrap] * filter[3];
121
#else
122
        {
123
            int j;
124
            uint8_t *s1 = s;
125

    
126
            sum = 0;
127
            for(j=0;j<NB_TAPS;j++) {
128
                sum += s1[0] * filter[j];
129
                s1 += wrap;
130
            }
131
        }
132
#endif
133
        sum = sum >> FILTER_BITS;
134
        if (sum < 0)
135
            sum = 0;
136
        else if (sum > 255)
137
            sum = 255;
138
        dst[0] = sum;
139
        dst++;
140
        s++;
141
    }
142
}
143

    
144
#ifdef HAVE_MMX
145

    
146
#include "i386/mmx.h"
147

    
148
#define FILTER4(reg) \
149
{\
150
        s = src + (src_pos >> POS_FRAC_BITS);\
151
        phase = get_phase(src_pos);\
152
        filter = filters + phase * NB_TAPS;\
153
        movq_m2r(*s, reg);\
154
        punpcklbw_r2r(mm7, reg);\
155
        movq_m2r(*filter, mm6);\
156
        pmaddwd_r2r(reg, mm6);\
157
        movq_r2r(mm6, reg);\
158
        psrlq_i2r(32, reg);\
159
        paddd_r2r(mm6, reg);\
160
        psrad_i2r(FILTER_BITS, reg);\
161
        src_pos += src_incr;\
162
}
163

    
164
#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
165

    
166
/* XXX: do four pixels at a time */
167
static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
168
                                 const uint8_t *src, int src_width,
169
                                 int src_start, int src_incr, int16_t *filters)
170
{
171
    int src_pos, phase;
172
    const uint8_t *s;
173
    int16_t *filter;
174
    mmx_t tmp;
175
    
176
    src_pos = src_start;
177
    pxor_r2r(mm7, mm7);
178

    
179
    while (dst_width >= 4) {
180

    
181
        FILTER4(mm0);
182
        FILTER4(mm1);
183
        FILTER4(mm2);
184
        FILTER4(mm3);
185

    
186
        packuswb_r2r(mm7, mm0);
187
        packuswb_r2r(mm7, mm1);
188
        packuswb_r2r(mm7, mm3);
189
        packuswb_r2r(mm7, mm2);
190
        movq_r2m(mm0, tmp);
191
        dst[0] = tmp.ub[0];
192
        movq_r2m(mm1, tmp);
193
        dst[1] = tmp.ub[0];
194
        movq_r2m(mm2, tmp);
195
        dst[2] = tmp.ub[0];
196
        movq_r2m(mm3, tmp);
197
        dst[3] = tmp.ub[0];
198
        dst += 4;
199
        dst_width -= 4;
200
    }
201
    while (dst_width > 0) {
202
        FILTER4(mm0);
203
        packuswb_r2r(mm7, mm0);
204
        movq_r2m(mm0, tmp);
205
        dst[0] = tmp.ub[0];
206
        dst++;
207
        dst_width--;
208
    }
209
    emms();
210
}
211

    
212
static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
213
                            int wrap, int16_t *filter)
214
{
215
    int sum, i, v;
216
    const uint8_t *s;
217
    mmx_t tmp;
218
    mmx_t coefs[4];
219
    
220
    for(i=0;i<4;i++) {
221
        v = filter[i];
222
        coefs[i].uw[0] = v;
223
        coefs[i].uw[1] = v;
224
        coefs[i].uw[2] = v;
225
        coefs[i].uw[3] = v;
226
    }
227
    
228
    pxor_r2r(mm7, mm7);
229
    s = src;
230
    while (dst_width >= 4) {
231
        movq_m2r(s[0 * wrap], mm0);
232
        punpcklbw_r2r(mm7, mm0);
233
        movq_m2r(s[1 * wrap], mm1);
234
        punpcklbw_r2r(mm7, mm1);
235
        movq_m2r(s[2 * wrap], mm2);
236
        punpcklbw_r2r(mm7, mm2);
237
        movq_m2r(s[3 * wrap], mm3);
238
        punpcklbw_r2r(mm7, mm3);
239

    
240
        pmullw_m2r(coefs[0], mm0);
241
        pmullw_m2r(coefs[1], mm1);
242
        pmullw_m2r(coefs[2], mm2);
243
        pmullw_m2r(coefs[3], mm3);
244

    
245
        paddw_r2r(mm1, mm0);
246
        paddw_r2r(mm3, mm2);
247
        paddw_r2r(mm2, mm0);
248
        psraw_i2r(FILTER_BITS, mm0);
249
        
250
        packuswb_r2r(mm7, mm0);
251
        movq_r2m(mm0, tmp);
252

    
253
        *(uint32_t *)dst = tmp.ud[0];
254
        dst += 4;
255
        s += 4;
256
        dst_width -= 4;
257
    }
258
    while (dst_width > 0) {
259
        sum = s[0 * wrap] * filter[0] +
260
            s[1 * wrap] * filter[1] +
261
            s[2 * wrap] * filter[2] +
262
            s[3 * wrap] * filter[3];
263
        sum = sum >> FILTER_BITS;
264
        if (sum < 0)
265
            sum = 0;
266
        else if (sum > 255)
267
            sum = 255;
268
        dst[0] = sum;
269
        dst++;
270
        s++;
271
        dst_width--;
272
    }
273
    emms();
274
}
275
#endif
276

    
277
#ifdef HAVE_ALTIVEC
278
typedef        union {
279
    vector unsigned char v;
280
    unsigned char c[16];
281
} vec_uc_t;
282

    
283
typedef        union {
284
    vector signed short v;
285
    signed short s[8];
286
} vec_ss_t;
287

    
288
void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
289
                          int wrap, int16_t *filter)
290
{
291
    int sum, i;
292
    const uint8_t *s;
293
    vector unsigned char *tv, tmp, dstv, zero;
294
    vec_ss_t srchv[4], srclv[4], fv[4];
295
    vector signed short zeros, sumhv, sumlv;    
296
    s = src;
297

    
298
    for(i=0;i<4;i++)
299
    {
300
        /*
301
           The vec_madds later on does an implicit >>15 on the result.
302
           Since FILTER_BITS is 8, and we have 15 bits of magnitude in
303
           a signed short, we have just enough bits to pre-shift our
304
           filter constants <<7 to compensate for vec_madds.
305
        */
306
        fv[i].s[0] = filter[i] << (15-FILTER_BITS);
307
        fv[i].v = vec_splat(fv[i].v, 0);
308
    }
309
    
310
    zero = vec_splat_u8(0);
311
    zeros = vec_splat_s16(0);
312

    
313

    
314
    /*
315
       When we're resampling, we'd ideally like both our input buffers,
316
       and output buffers to be 16-byte aligned, so we can do both aligned
317
       reads and writes. Sadly we can't always have this at the moment, so
318
       we opt for aligned writes, as unaligned writes have a huge overhead.
319
       To do this, do enough scalar resamples to get dst 16-byte aligned.
320
    */
321
    i = (-(int)dst) & 0xf;
322
    while(i>0) {
323
        sum = s[0 * wrap] * filter[0] +
324
        s[1 * wrap] * filter[1] +
325
        s[2 * wrap] * filter[2] +
326
        s[3 * wrap] * filter[3];
327
        sum = sum >> FILTER_BITS;
328
        if (sum<0) sum = 0; else if (sum>255) sum=255;
329
        dst[0] = sum;
330
        dst++;
331
        s++;
332
        dst_width--;
333
        i--;
334
    }
335
    
336
    /* Do our altivec resampling on 16 pixels at once. */
337
    while(dst_width>=16) {
338
        /*
339
           Read 16 (potentially unaligned) bytes from each of
340
           4 lines into 4 vectors, and split them into shorts.
341
           Interleave the multipy/accumulate for the resample
342
           filter with the loads to hide the 3 cycle latency
343
           the vec_madds have.
344
        */
345
        tv = (vector unsigned char *) &s[0 * wrap];
346
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
347
        srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
348
        srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
349
        sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
350
        sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
351

    
352
        tv = (vector unsigned char *) &s[1 * wrap];
353
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
354
        srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
355
        srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
356
        sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
357
        sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
358

    
359
        tv = (vector unsigned char *) &s[2 * wrap];
360
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
361
        srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
362
        srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
363
        sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
364
        sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
365

    
366
        tv = (vector unsigned char *) &s[3 * wrap];
367
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
368
        srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
369
        srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
370
        sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
371
        sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
372
    
373
        /*
374
           Pack the results into our destination vector,
375
           and do an aligned write of that back to memory.
376
        */
377
        dstv = vec_packsu(sumhv, sumlv) ;
378
        vec_st(dstv, 0, (vector unsigned char *) dst);
379
        
380
        dst+=16;
381
        s+=16;
382
        dst_width-=16;
383
    }
384

    
385
    /*
386
       If there are any leftover pixels, resample them
387
       with the slow scalar method.
388
    */
389
    while(dst_width>0) {
390
        sum = s[0 * wrap] * filter[0] +
391
        s[1 * wrap] * filter[1] +
392
        s[2 * wrap] * filter[2] +
393
        s[3 * wrap] * filter[3];
394
        sum = sum >> FILTER_BITS;
395
        if (sum<0) sum = 0; else if (sum>255) sum=255;
396
        dst[0] = sum;
397
        dst++;
398
        s++;
399
        dst_width--;
400
    }
401
}
402
#endif
403

    
404
/* slow version to handle limit cases. Does not need optimisation */
405
static void h_resample_slow(uint8_t *dst, int dst_width,
406
                            const uint8_t *src, int src_width,
407
                            int src_start, int src_incr, int16_t *filters)
408
{
409
    int src_pos, phase, sum, j, v, i;
410
    const uint8_t *s, *src_end;
411
    int16_t *filter;
412

    
413
    src_end = src + src_width;
414
    src_pos = src_start;
415
    for(i=0;i<dst_width;i++) {
416
        s = src + (src_pos >> POS_FRAC_BITS);
417
        phase = get_phase(src_pos);
418
        filter = filters + phase * NB_TAPS;
419
        sum = 0;
420
        for(j=0;j<NB_TAPS;j++) {
421
            if (s < src)
422
                v = src[0];
423
            else if (s >= src_end)
424
                v = src_end[-1];
425
            else
426
                v = s[0];
427
            sum += v * filter[j];
428
            s++;
429
        }
430
        sum = sum >> FILTER_BITS;
431
        if (sum < 0)
432
            sum = 0;
433
        else if (sum > 255)
434
            sum = 255;
435
        dst[0] = sum;
436
        src_pos += src_incr;
437
        dst++;
438
    }
439
}
440

    
441
static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
442
                       int src_width, int src_start, int src_incr,
443
                       int16_t *filters)
444
{
445
    int n, src_end;
446

    
447
    if (src_start < 0) {
448
        n = (0 - src_start + src_incr - 1) / src_incr;
449
        h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
450
        dst += n;
451
        dst_width -= n;
452
        src_start += n * src_incr;
453
    }
454
    src_end = src_start + dst_width * src_incr;
455
    if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
456
        n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) / 
457
            src_incr;
458
    } else {
459
        n = dst_width;
460
    }
461
#ifdef HAVE_MMX
462
    if ((mm_flags & MM_MMX) && NB_TAPS == 4)
463
        h_resample_fast4_mmx(dst, n, 
464
                             src, src_width, src_start, src_incr, filters);
465
    else
466
#endif
467
        h_resample_fast(dst, n, 
468
                        src, src_width, src_start, src_incr, filters);
469
    if (n < dst_width) {
470
        dst += n;
471
        dst_width -= n;
472
        src_start += n * src_incr;
473
        h_resample_slow(dst, dst_width, 
474
                        src, src_width, src_start, src_incr, filters);
475
    }
476
}
477

    
478
static void component_resample(ImgReSampleContext *s, 
479
                               uint8_t *output, int owrap, int owidth, int oheight,
480
                               uint8_t *input, int iwrap, int iwidth, int iheight)
481
{
482
    int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
483
    uint8_t *new_line, *src_line;
484

    
485
    last_src_y = - FCENTER - 1;
486
    /* position of the bottom of the filter in the source image */
487
    src_y = (last_src_y + NB_TAPS) * POS_FRAC; 
488
    ring_y = NB_TAPS; /* position in ring buffer */
489
    for(y=0;y<oheight;y++) {
490
        /* apply horizontal filter on new lines from input if needed */
491
        src_y1 = src_y >> POS_FRAC_BITS;
492
        while (last_src_y < src_y1) {
493
            if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
494
                ring_y = NB_TAPS;
495
            last_src_y++;
496
            /* handle limit conditions : replicate line (slightly
497
               inefficient because we filter multiple times) */
498
            y1 = last_src_y;
499
            if (y1 < 0) {
500
                y1 = 0;
501
            } else if (y1 >= iheight) {
502
                y1 = iheight - 1;
503
            }
504
            src_line = input + y1 * iwrap;
505
            new_line = s->line_buf + ring_y * owidth;
506
            /* apply filter and handle limit cases correctly */
507
            h_resample(new_line, owidth, 
508
                       src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, 
509
                       &s->h_filters[0][0]);
510
            /* handle ring buffer wraping */
511
            if (ring_y >= LINE_BUF_HEIGHT) {
512
                memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
513
                       new_line, owidth);
514
            }
515
        }
516
        /* apply vertical filter */
517
        phase_y = get_phase(src_y);
518
#ifdef HAVE_MMX
519
        /* desactivated MMX because loss of precision */
520
        if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
521
            v_resample4_mmx(output, owidth, 
522
                            s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, 
523
                            &s->v_filters[phase_y][0]);
524
        else
525
#endif
526
#ifdef HAVE_ALTIVEC
527
            if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
528
                v_resample16_altivec(output, owidth,
529
                                s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
530
                                &s->v_filters[phase_y][0]);
531
        else
532
#endif
533
            v_resample(output, owidth, 
534
                       s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, 
535
                       &s->v_filters[phase_y][0]);
536
            
537
        src_y += s->v_incr;
538
        
539
        output += owrap;
540
    }
541
}
542

    
543
/* XXX: the following filter is quite naive, but it seems to suffice
544
   for 4 taps */
545
static void build_filter(int16_t *filter, float factor)
546
{
547
    int ph, i, v;
548
    float x, y, tab[NB_TAPS], norm, mult, target;
549

    
550
    /* if upsampling, only need to interpolate, no filter */
551
    if (factor > 1.0)
552
        factor = 1.0;
553

    
554
    for(ph=0;ph<NB_PHASES;ph++) {
555
        norm = 0;
556
        for(i=0;i<NB_TAPS;i++) {
557
#if 1
558
            const float d= -0.5; //first order derivative = -0.5
559
            x = fabs(((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor);
560
            if(x<1.0) y= 1 - 3*x*x + 2*x*x*x + d*(            -x*x + x*x*x);
561
            else      y=                       d*(-4 + 8*x - 5*x*x + x*x*x);
562
#else
563
            x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor;
564
            if (x == 0)
565
                y = 1.0;
566
            else
567
                y = sin(x) / x;
568
#endif
569
            tab[i] = y;
570
            norm += y;
571
        }
572

    
573
        /* normalize so that an uniform color remains the same */
574
        target= 1 << FILTER_BITS;
575
        for(i=0;i<NB_TAPS;i++) {
576
            mult = target / norm;
577
            v = lrintf(tab[i] * mult);
578
            filter[ph * NB_TAPS + i] = v;
579
            norm -= tab[i];
580
            target -= v;
581
        }
582
    }
583
}
584

    
585
ImgReSampleContext *img_resample_init(int owidth, int oheight,
586
                                      int iwidth, int iheight)
587
{
588
    return img_resample_full_init(owidth, oheight, iwidth, iheight, 
589
            0, 0, 0, 0, 0, 0, 0, 0);
590
}
591

    
592
ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
593
                                      int iwidth, int iheight,
594
                                      int topBand, int bottomBand,
595
        int leftBand, int rightBand,
596
        int padtop, int padbottom,
597
        int padleft, int padright)
598
{
599
    ImgReSampleContext *s;
600

    
601
    s = av_mallocz(sizeof(ImgReSampleContext));
602
    if (!s)
603
        return NULL;
604
    s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
605
    if (!s->line_buf) 
606
        goto fail;
607
    
608
    s->owidth = owidth;
609
    s->oheight = oheight;
610
    s->iwidth = iwidth;
611
    s->iheight = iheight;
612
  
613
    s->topBand = topBand;
614
    s->bottomBand = bottomBand;
615
    s->leftBand = leftBand;
616
    s->rightBand = rightBand;
617
    
618
    s->padtop = padtop;
619
    s->padbottom = padbottom;
620
    s->padleft = padleft;
621
    s->padright = padright;
622

    
623
    s->pad_owidth = owidth - (padleft + padright);
624
    s->pad_oheight = oheight - (padtop + padbottom);
625

    
626
    s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
627
    s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight; 
628

    
629
    build_filter(&s->h_filters[0][0], (float) s->pad_owidth  / 
630
            (float) (iwidth - leftBand - rightBand));
631
    build_filter(&s->v_filters[0][0], (float) s->pad_oheight / 
632
            (float) (iheight - topBand - bottomBand));
633

    
634
    return s;
635
fail:
636
    av_free(s);
637
    return NULL;
638
}
639

    
640
void img_resample(ImgReSampleContext *s, 
641
                  AVPicture *output, const AVPicture *input)
642
{
643
    int i, shift;
644
    uint8_t* optr;
645

    
646
    for (i=0;i<3;i++) {
647
        shift = (i == 0) ? 0 : 1;
648

    
649
        optr = output->data[i] + (((output->linesize[i] * 
650
                        s->padtop) + s->padleft) >> shift);
651

    
652
        component_resample(s, optr, output->linesize[i], 
653
                s->pad_owidth >> shift, s->pad_oheight >> shift,
654
                input->data[i] + (input->linesize[i] * 
655
                    (s->topBand >> shift)) + (s->leftBand >> shift),
656
                input->linesize[i], ((s->iwidth - s->leftBand - 
657
                        s->rightBand) >> shift),
658
                           (s->iheight - s->topBand - s->bottomBand) >> shift);
659
    }
660
}
661

    
662
void img_resample_close(ImgReSampleContext *s)
663
{
664
    av_free(s->line_buf);
665
    av_free(s);
666
}
667

    
668
#ifdef TEST
669

    
670
void *av_mallocz(int size)
671
{
672
    void *ptr;
673
    ptr = malloc(size);
674
    memset(ptr, 0, size);
675
    return ptr;
676
}
677

    
678
void av_free(void *ptr)
679
{
680
    /* XXX: this test should not be needed on most libcs */
681
    if (ptr)
682
        free(ptr);
683
}
684

    
685
/* input */
686
#define XSIZE 256
687
#define YSIZE 256
688
uint8_t img[XSIZE * YSIZE];
689

    
690
/* output */
691
#define XSIZE1 512
692
#define YSIZE1 512
693
uint8_t img1[XSIZE1 * YSIZE1];
694
uint8_t img2[XSIZE1 * YSIZE1];
695

    
696
void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
697
{
698
    FILE *f;
699
    f=fopen(filename,"w");
700
    fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
701
    fwrite(img,1, xsize * ysize,f);
702
    fclose(f);
703
}
704

    
705
static void dump_filter(int16_t *filter)
706
{
707
    int i, ph;
708

    
709
    for(ph=0;ph<NB_PHASES;ph++) {
710
        printf("%2d: ", ph);
711
        for(i=0;i<NB_TAPS;i++) {
712
            printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0);
713
        }
714
        printf("\n");
715
    }
716
}
717

    
718
#ifdef HAVE_MMX
719
int mm_flags;
720
#endif
721

    
722
int main(int argc, char **argv)
723
{
724
    int x, y, v, i, xsize, ysize;
725
    ImgReSampleContext *s;
726
    float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
727
    char buf[256];
728

    
729
    /* build test image */
730
    for(y=0;y<YSIZE;y++) {
731
        for(x=0;x<XSIZE;x++) {
732
            if (x < XSIZE/2 && y < YSIZE/2) {
733
                if (x < XSIZE/4 && y < YSIZE/4) {
734
                    if ((x % 10) <= 6 &&
735
                        (y % 10) <= 6)
736
                        v = 0xff;
737
                    else
738
                        v = 0x00;
739
                } else if (x < XSIZE/4) {
740
                    if (x & 1) 
741
                        v = 0xff;
742
                    else 
743
                        v = 0;
744
                } else if (y < XSIZE/4) {
745
                    if (y & 1) 
746
                        v = 0xff;
747
                    else 
748
                        v = 0;
749
                } else {
750
                    if (y < YSIZE*3/8) {
751
                        if ((y+x) & 1) 
752
                            v = 0xff;
753
                        else 
754
                            v = 0;
755
                    } else {
756
                        if (((x+3) % 4) <= 1 &&
757
                            ((y+3) % 4) <= 1)
758
                            v = 0xff;
759
                        else
760
                            v = 0x00;
761
                    }
762
                }
763
            } else if (x < XSIZE/2) {
764
                v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
765
            } else if (y < XSIZE/2) {
766
                v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
767
            } else {
768
                v = ((x + y - XSIZE) * 255) / XSIZE;
769
            }
770
            img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
771
        }
772
    }
773
    save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
774
    for(i=0;i<sizeof(factors)/sizeof(float);i++) {
775
        fact = factors[i];
776
        xsize = (int)(XSIZE * fact);
777
        ysize = (int)((YSIZE - 100) * fact);
778
        s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0);
779
        printf("Factor=%0.2f\n", fact);
780
        dump_filter(&s->h_filters[0][0]);
781
        component_resample(s, img1, xsize, xsize, ysize,
782
                           img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
783
        img_resample_close(s);
784

    
785
        sprintf(buf, "/tmp/out%d.pgm", i);
786
        save_pgm(buf, img1, xsize, ysize);
787
    }
788

    
789
    /* mmx test */
790
#ifdef HAVE_MMX
791
    printf("MMX test\n");
792
    fact = 0.72;
793
    xsize = (int)(XSIZE * fact);
794
    ysize = (int)(YSIZE * fact);
795
    mm_flags = MM_MMX;
796
    s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
797
    component_resample(s, img1, xsize, xsize, ysize,
798
                       img, XSIZE, XSIZE, YSIZE);
799

    
800
    mm_flags = 0;
801
    s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
802
    component_resample(s, img2, xsize, xsize, ysize,
803
                       img, XSIZE, XSIZE, YSIZE);
804
    if (memcmp(img1, img2, xsize * ysize) != 0) {
805
        fprintf(stderr, "mmx error\n");
806
        exit(1);
807
    }
808
    printf("MMX OK\n");
809
#endif
810
    return 0;
811
}
812

    
813
#endif