Statistics
| Branch: | Revision:

ffmpeg / libavcodec / imgresample.c @ 8dbcc9f2

History | View | Annotate | Download (22.4 KB)

1
/*
2
 * High quality image resampling with polyphase filters 
3
 * Copyright (c) 2001 Fabrice Bellard.
4
 *
5
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9
 *
10
 * This library is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 */
19
 
20
/**
21
 * @file imgresample.c
22
 * High quality image resampling with polyphase filters .
23
 */
24
 
25
#include "avcodec.h"
26
#include "dsputil.h"
27

    
28
#ifdef USE_FASTMEMCPY
29
#include "fastmemcpy.h"
30
#endif
31

    
32
#define NB_COMPONENTS 3
33

    
34
#define PHASE_BITS 4
35
#define NB_PHASES  (1 << PHASE_BITS)
36
#define NB_TAPS    4
37
#define FCENTER    1  /* index of the center of the filter */
38
//#define TEST    1  /* Test it */
39

    
40
#define POS_FRAC_BITS 16
41
#define POS_FRAC      (1 << POS_FRAC_BITS)
42
/* 6 bits precision is needed for MMX */
43
#define FILTER_BITS   8
44

    
45
#define LINE_BUF_HEIGHT (NB_TAPS * 4)
46

    
47
struct ImgReSampleContext {
48
    int iwidth, iheight, owidth, oheight, topBand, bottomBand, leftBand, rightBand;
49
    int h_incr, v_incr;
50
    int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
51
    int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
52
    uint8_t *line_buf;
53
};
54

    
55
static inline int get_phase(int pos)
56
{
57
    return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
58
}
59

    
60
/* This function must be optimized */
61
static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
62
                            int src_width, int src_start, int src_incr,
63
                            int16_t *filters)
64
{
65
    int src_pos, phase, sum, i;
66
    const uint8_t *s;
67
    int16_t *filter;
68

    
69
    src_pos = src_start;
70
    for(i=0;i<dst_width;i++) {
71
#ifdef TEST
72
        /* test */
73
        if ((src_pos >> POS_FRAC_BITS) < 0 ||
74
            (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
75
            av_abort();
76
#endif
77
        s = src + (src_pos >> POS_FRAC_BITS);
78
        phase = get_phase(src_pos);
79
        filter = filters + phase * NB_TAPS;
80
#if NB_TAPS == 4
81
        sum = s[0] * filter[0] +
82
            s[1] * filter[1] +
83
            s[2] * filter[2] +
84
            s[3] * filter[3];
85
#else
86
        {
87
            int j;
88
            sum = 0;
89
            for(j=0;j<NB_TAPS;j++)
90
                sum += s[j] * filter[j];
91
        }
92
#endif
93
        sum = sum >> FILTER_BITS;
94
        if (sum < 0)
95
            sum = 0;
96
        else if (sum > 255)
97
            sum = 255;
98
        dst[0] = sum;
99
        src_pos += src_incr;
100
        dst++;
101
    }
102
}
103

    
104
/* This function must be optimized */
105
static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
106
                       int wrap, int16_t *filter)
107
{
108
    int sum, i;
109
    const uint8_t *s;
110

    
111
    s = src;
112
    for(i=0;i<dst_width;i++) {
113
#if NB_TAPS == 4
114
        sum = s[0 * wrap] * filter[0] +
115
            s[1 * wrap] * filter[1] +
116
            s[2 * wrap] * filter[2] +
117
            s[3 * wrap] * filter[3];
118
#else
119
        {
120
            int j;
121
            uint8_t *s1 = s;
122

    
123
            sum = 0;
124
            for(j=0;j<NB_TAPS;j++) {
125
                sum += s1[0] * filter[j];
126
                s1 += wrap;
127
            }
128
        }
129
#endif
130
        sum = sum >> FILTER_BITS;
131
        if (sum < 0)
132
            sum = 0;
133
        else if (sum > 255)
134
            sum = 255;
135
        dst[0] = sum;
136
        dst++;
137
        s++;
138
    }
139
}
140

    
141
#ifdef HAVE_MMX
142

    
143
#include "i386/mmx.h"
144

    
145
#define FILTER4(reg) \
146
{\
147
        s = src + (src_pos >> POS_FRAC_BITS);\
148
        phase = get_phase(src_pos);\
149
        filter = filters + phase * NB_TAPS;\
150
        movq_m2r(*s, reg);\
151
        punpcklbw_r2r(mm7, reg);\
152
        movq_m2r(*filter, mm6);\
153
        pmaddwd_r2r(reg, mm6);\
154
        movq_r2r(mm6, reg);\
155
        psrlq_i2r(32, reg);\
156
        paddd_r2r(mm6, reg);\
157
        psrad_i2r(FILTER_BITS, reg);\
158
        src_pos += src_incr;\
159
}
160

    
161
#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
162

    
163
/* XXX: do four pixels at a time */
164
static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
165
                                 const uint8_t *src, int src_width,
166
                                 int src_start, int src_incr, int16_t *filters)
167
{
168
    int src_pos, phase;
169
    const uint8_t *s;
170
    int16_t *filter;
171
    mmx_t tmp;
172
    
173
    src_pos = src_start;
174
    pxor_r2r(mm7, mm7);
175

    
176
    while (dst_width >= 4) {
177

    
178
        FILTER4(mm0);
179
        FILTER4(mm1);
180
        FILTER4(mm2);
181
        FILTER4(mm3);
182

    
183
        packuswb_r2r(mm7, mm0);
184
        packuswb_r2r(mm7, mm1);
185
        packuswb_r2r(mm7, mm3);
186
        packuswb_r2r(mm7, mm2);
187
        movq_r2m(mm0, tmp);
188
        dst[0] = tmp.ub[0];
189
        movq_r2m(mm1, tmp);
190
        dst[1] = tmp.ub[0];
191
        movq_r2m(mm2, tmp);
192
        dst[2] = tmp.ub[0];
193
        movq_r2m(mm3, tmp);
194
        dst[3] = tmp.ub[0];
195
        dst += 4;
196
        dst_width -= 4;
197
    }
198
    while (dst_width > 0) {
199
        FILTER4(mm0);
200
        packuswb_r2r(mm7, mm0);
201
        movq_r2m(mm0, tmp);
202
        dst[0] = tmp.ub[0];
203
        dst++;
204
        dst_width--;
205
    }
206
    emms();
207
}
208

    
209
static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
210
                            int wrap, int16_t *filter)
211
{
212
    int sum, i, v;
213
    const uint8_t *s;
214
    mmx_t tmp;
215
    mmx_t coefs[4];
216
    
217
    for(i=0;i<4;i++) {
218
        v = filter[i];
219
        coefs[i].uw[0] = v;
220
        coefs[i].uw[1] = v;
221
        coefs[i].uw[2] = v;
222
        coefs[i].uw[3] = v;
223
    }
224
    
225
    pxor_r2r(mm7, mm7);
226
    s = src;
227
    while (dst_width >= 4) {
228
        movq_m2r(s[0 * wrap], mm0);
229
        punpcklbw_r2r(mm7, mm0);
230
        movq_m2r(s[1 * wrap], mm1);
231
        punpcklbw_r2r(mm7, mm1);
232
        movq_m2r(s[2 * wrap], mm2);
233
        punpcklbw_r2r(mm7, mm2);
234
        movq_m2r(s[3 * wrap], mm3);
235
        punpcklbw_r2r(mm7, mm3);
236

    
237
        pmullw_m2r(coefs[0], mm0);
238
        pmullw_m2r(coefs[1], mm1);
239
        pmullw_m2r(coefs[2], mm2);
240
        pmullw_m2r(coefs[3], mm3);
241

    
242
        paddw_r2r(mm1, mm0);
243
        paddw_r2r(mm3, mm2);
244
        paddw_r2r(mm2, mm0);
245
        psraw_i2r(FILTER_BITS, mm0);
246
        
247
        packuswb_r2r(mm7, mm0);
248
        movq_r2m(mm0, tmp);
249

    
250
        *(uint32_t *)dst = tmp.ud[0];
251
        dst += 4;
252
        s += 4;
253
        dst_width -= 4;
254
    }
255
    while (dst_width > 0) {
256
        sum = s[0 * wrap] * filter[0] +
257
            s[1 * wrap] * filter[1] +
258
            s[2 * wrap] * filter[2] +
259
            s[3 * wrap] * filter[3];
260
        sum = sum >> FILTER_BITS;
261
        if (sum < 0)
262
            sum = 0;
263
        else if (sum > 255)
264
            sum = 255;
265
        dst[0] = sum;
266
        dst++;
267
        s++;
268
        dst_width--;
269
    }
270
    emms();
271
}
272
#endif
273

    
274
#ifdef HAVE_ALTIVEC
275
typedef        union {
276
    vector unsigned char v;
277
    unsigned char c[16];
278
} vec_uc_t;
279

    
280
typedef        union {
281
    vector signed short v;
282
    signed short s[8];
283
} vec_ss_t;
284

    
285
void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
286
                          int wrap, int16_t *filter)
287
{
288
    int sum, i;
289
    const uint8_t *s;
290
    vector unsigned char *tv, tmp, dstv, zero;
291
    vec_ss_t srchv[4], srclv[4], fv[4];
292
    vector signed short zeros, sumhv, sumlv;    
293
    s = src;
294

    
295
    for(i=0;i<4;i++)
296
    {
297
        /*
298
           The vec_madds later on does an implicit >>15 on the result.
299
           Since FILTER_BITS is 8, and we have 15 bits of magnitude in
300
           a signed short, we have just enough bits to pre-shift our
301
           filter constants <<7 to compensate for vec_madds.
302
        */
303
        fv[i].s[0] = filter[i] << (15-FILTER_BITS);
304
        fv[i].v = vec_splat(fv[i].v, 0);
305
    }
306
    
307
    zero = vec_splat_u8(0);
308
    zeros = vec_splat_s16(0);
309

    
310

    
311
    /*
312
       When we're resampling, we'd ideally like both our input buffers,
313
       and output buffers to be 16-byte aligned, so we can do both aligned
314
       reads and writes. Sadly we can't always have this at the moment, so
315
       we opt for aligned writes, as unaligned writes have a huge overhead.
316
       To do this, do enough scalar resamples to get dst 16-byte aligned.
317
    */
318
    i = (-(int)dst) & 0xf;
319
    while(i>0) {
320
        sum = s[0 * wrap] * filter[0] +
321
        s[1 * wrap] * filter[1] +
322
        s[2 * wrap] * filter[2] +
323
        s[3 * wrap] * filter[3];
324
        sum = sum >> FILTER_BITS;
325
        if (sum<0) sum = 0; else if (sum>255) sum=255;
326
        dst[0] = sum;
327
        dst++;
328
        s++;
329
        dst_width--;
330
        i--;
331
    }
332
    
333
    /* Do our altivec resampling on 16 pixels at once. */
334
    while(dst_width>=16) {
335
        /*
336
           Read 16 (potentially unaligned) bytes from each of
337
           4 lines into 4 vectors, and split them into shorts.
338
           Interleave the multipy/accumulate for the resample
339
           filter with the loads to hide the 3 cycle latency
340
           the vec_madds have.
341
        */
342
        tv = (vector unsigned char *) &s[0 * wrap];
343
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
344
        srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
345
        srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
346
        sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
347
        sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
348

    
349
        tv = (vector unsigned char *) &s[1 * wrap];
350
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
351
        srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
352
        srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
353
        sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
354
        sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
355

    
356
        tv = (vector unsigned char *) &s[2 * wrap];
357
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
358
        srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
359
        srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
360
        sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
361
        sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
362

    
363
        tv = (vector unsigned char *) &s[3 * wrap];
364
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
365
        srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
366
        srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
367
        sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
368
        sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
369
    
370
        /*
371
           Pack the results into our destination vector,
372
           and do an aligned write of that back to memory.
373
        */
374
        dstv = vec_packsu(sumhv, sumlv) ;
375
        vec_st(dstv, 0, (vector unsigned char *) dst);
376
        
377
        dst+=16;
378
        s+=16;
379
        dst_width-=16;
380
    }
381

    
382
    /*
383
       If there are any leftover pixels, resample them
384
       with the slow scalar method.
385
    */
386
    while(dst_width>0) {
387
        sum = s[0 * wrap] * filter[0] +
388
        s[1 * wrap] * filter[1] +
389
        s[2 * wrap] * filter[2] +
390
        s[3 * wrap] * filter[3];
391
        sum = sum >> FILTER_BITS;
392
        if (sum<0) sum = 0; else if (sum>255) sum=255;
393
        dst[0] = sum;
394
        dst++;
395
        s++;
396
        dst_width--;
397
    }
398
}
399
#endif
400

    
401
/* slow version to handle limit cases. Does not need optimisation */
402
static void h_resample_slow(uint8_t *dst, int dst_width,
403
                            const uint8_t *src, int src_width,
404
                            int src_start, int src_incr, int16_t *filters)
405
{
406
    int src_pos, phase, sum, j, v, i;
407
    const uint8_t *s, *src_end;
408
    int16_t *filter;
409

    
410
    src_end = src + src_width;
411
    src_pos = src_start;
412
    for(i=0;i<dst_width;i++) {
413
        s = src + (src_pos >> POS_FRAC_BITS);
414
        phase = get_phase(src_pos);
415
        filter = filters + phase * NB_TAPS;
416
        sum = 0;
417
        for(j=0;j<NB_TAPS;j++) {
418
            if (s < src)
419
                v = src[0];
420
            else if (s >= src_end)
421
                v = src_end[-1];
422
            else
423
                v = s[0];
424
            sum += v * filter[j];
425
            s++;
426
        }
427
        sum = sum >> FILTER_BITS;
428
        if (sum < 0)
429
            sum = 0;
430
        else if (sum > 255)
431
            sum = 255;
432
        dst[0] = sum;
433
        src_pos += src_incr;
434
        dst++;
435
    }
436
}
437

    
438
static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
439
                       int src_width, int src_start, int src_incr,
440
                       int16_t *filters)
441
{
442
    int n, src_end;
443

    
444
    if (src_start < 0) {
445
        n = (0 - src_start + src_incr - 1) / src_incr;
446
        h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
447
        dst += n;
448
        dst_width -= n;
449
        src_start += n * src_incr;
450
    }
451
    src_end = src_start + dst_width * src_incr;
452
    if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
453
        n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) / 
454
            src_incr;
455
    } else {
456
        n = dst_width;
457
    }
458
#ifdef HAVE_MMX
459
    if ((mm_flags & MM_MMX) && NB_TAPS == 4)
460
        h_resample_fast4_mmx(dst, n, 
461
                             src, src_width, src_start, src_incr, filters);
462
    else
463
#endif
464
        h_resample_fast(dst, n, 
465
                        src, src_width, src_start, src_incr, filters);
466
    if (n < dst_width) {
467
        dst += n;
468
        dst_width -= n;
469
        src_start += n * src_incr;
470
        h_resample_slow(dst, dst_width, 
471
                        src, src_width, src_start, src_incr, filters);
472
    }
473
}
474

    
475
static void component_resample(ImgReSampleContext *s, 
476
                               uint8_t *output, int owrap, int owidth, int oheight,
477
                               uint8_t *input, int iwrap, int iwidth, int iheight)
478
{
479
    int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
480
    uint8_t *new_line, *src_line;
481

    
482
    last_src_y = - FCENTER - 1;
483
    /* position of the bottom of the filter in the source image */
484
    src_y = (last_src_y + NB_TAPS) * POS_FRAC; 
485
    ring_y = NB_TAPS; /* position in ring buffer */
486
    for(y=0;y<oheight;y++) {
487
        /* apply horizontal filter on new lines from input if needed */
488
        src_y1 = src_y >> POS_FRAC_BITS;
489
        while (last_src_y < src_y1) {
490
            if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
491
                ring_y = NB_TAPS;
492
            last_src_y++;
493
            /* handle limit conditions : replicate line (slightly
494
               inefficient because we filter multiple times) */
495
            y1 = last_src_y;
496
            if (y1 < 0) {
497
                y1 = 0;
498
            } else if (y1 >= iheight) {
499
                y1 = iheight - 1;
500
            }
501
            src_line = input + y1 * iwrap;
502
            new_line = s->line_buf + ring_y * owidth;
503
            /* apply filter and handle limit cases correctly */
504
            h_resample(new_line, owidth, 
505
                       src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, 
506
                       &s->h_filters[0][0]);
507
            /* handle ring buffer wraping */
508
            if (ring_y >= LINE_BUF_HEIGHT) {
509
                memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
510
                       new_line, owidth);
511
            }
512
        }
513
        /* apply vertical filter */
514
        phase_y = get_phase(src_y);
515
#ifdef HAVE_MMX
516
        /* desactivated MMX because loss of precision */
517
        if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
518
            v_resample4_mmx(output, owidth, 
519
                            s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, 
520
                            &s->v_filters[phase_y][0]);
521
        else
522
#endif
523
#ifdef HAVE_ALTIVEC
524
            if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
525
                v_resample16_altivec(output, owidth,
526
                                s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
527
                                &s->v_filters[phase_y][0]);
528
        else
529
#endif
530
            v_resample(output, owidth, 
531
                       s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, 
532
                       &s->v_filters[phase_y][0]);
533
            
534
        src_y += s->v_incr;
535
        output += owrap;
536
    }
537
}
538

    
539
/* XXX: the following filter is quite naive, but it seems to suffice
540
   for 4 taps */
541
static void build_filter(int16_t *filter, float factor)
542
{
543
    int ph, i, v;
544
    float x, y, tab[NB_TAPS], norm, mult;
545

    
546
    /* if upsampling, only need to interpolate, no filter */
547
    if (factor > 1.0)
548
        factor = 1.0;
549

    
550
    for(ph=0;ph<NB_PHASES;ph++) {
551
        norm = 0;
552
        for(i=0;i<NB_TAPS;i++) {
553
            
554
            x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor;
555
            if (x == 0)
556
                y = 1.0;
557
            else
558
                y = sin(x) / x;
559
            tab[i] = y;
560
            norm += y;
561
        }
562

    
563
        /* normalize so that an uniform color remains the same */
564
        mult = (float)(1 << FILTER_BITS) / norm;
565
        for(i=0;i<NB_TAPS;i++) {
566
            v = (int)(tab[i] * mult);
567
            filter[ph * NB_TAPS + i] = v;
568
        }
569
    }
570
}
571

    
572
ImgReSampleContext *img_resample_init(int owidth, int oheight,
573
                                      int iwidth, int iheight)
574
{
575
        return img_resample_full_init(owidth, oheight, iwidth, iheight, 0, 0, 0, 0);
576
}
577

    
578
ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
579
                                      int iwidth, int iheight,
580
                                      int topBand, int bottomBand,
581
                                      int leftBand, int rightBand)
582
{
583
    ImgReSampleContext *s;
584

    
585
    s = av_mallocz(sizeof(ImgReSampleContext));
586
    if (!s)
587
        return NULL;
588
    s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
589
    if (!s->line_buf) 
590
        goto fail;
591
    
592
    s->owidth = owidth;
593
    s->oheight = oheight;
594
    s->iwidth = iwidth;
595
    s->iheight = iheight;
596
    s->topBand = topBand;
597
    s->bottomBand = bottomBand;
598
    s->leftBand = leftBand;
599
    s->rightBand = rightBand;
600
    
601
    s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / owidth;
602
    s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / oheight;
603
    
604
    build_filter(&s->h_filters[0][0], (float) owidth  / (float) (iwidth - leftBand - rightBand));
605
    build_filter(&s->v_filters[0][0], (float) oheight / (float) (iheight - topBand - bottomBand));
606

    
607
    return s;
608
 fail:
609
    av_free(s);
610
    return NULL;
611
}
612

    
613
void img_resample(ImgReSampleContext *s, 
614
                  AVPicture *output, const AVPicture *input)
615
{
616
    int i, shift;
617

    
618
    for(i=0;i<3;i++) {
619
        shift = (i == 0) ? 0 : 1;
620
        component_resample(s, output->data[i], output->linesize[i], 
621
                           s->owidth >> shift, s->oheight >> shift,
622
                           input->data[i] + (input->linesize[i] * (s->topBand >> shift)) + (s->leftBand >> shift),
623
                           input->linesize[i], ((s->iwidth - s->leftBand - s->rightBand) >> shift),
624
                           (s->iheight - s->topBand - s->bottomBand) >> shift);
625
    }
626
}
627

    
628
void img_resample_close(ImgReSampleContext *s)
629
{
630
    av_free(s->line_buf);
631
    av_free(s);
632
}
633

    
634
#ifdef TEST
635

    
636
void *av_mallocz(int size)
637
{
638
    void *ptr;
639
    ptr = malloc(size);
640
    memset(ptr, 0, size);
641
    return ptr;
642
}
643

    
644
void av_free(void *ptr)
645
{
646
    /* XXX: this test should not be needed on most libcs */
647
    if (ptr)
648
        free(ptr);
649
}
650

    
651
/* input */
652
#define XSIZE 256
653
#define YSIZE 256
654
uint8_t img[XSIZE * YSIZE];
655

    
656
/* output */
657
#define XSIZE1 512
658
#define YSIZE1 512
659
uint8_t img1[XSIZE1 * YSIZE1];
660
uint8_t img2[XSIZE1 * YSIZE1];
661

    
662
void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
663
{
664
    FILE *f;
665
    f=fopen(filename,"w");
666
    fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
667
    fwrite(img,1, xsize * ysize,f);
668
    fclose(f);
669
}
670

    
671
static void dump_filter(int16_t *filter)
672
{
673
    int i, ph;
674

    
675
    for(ph=0;ph<NB_PHASES;ph++) {
676
        printf("%2d: ", ph);
677
        for(i=0;i<NB_TAPS;i++) {
678
            printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0);
679
        }
680
        printf("\n");
681
    }
682
}
683

    
684
#ifdef HAVE_MMX
685
int mm_flags;
686
#endif
687

    
688
int main(int argc, char **argv)
689
{
690
    int x, y, v, i, xsize, ysize;
691
    ImgReSampleContext *s;
692
    float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
693
    char buf[256];
694

    
695
    /* build test image */
696
    for(y=0;y<YSIZE;y++) {
697
        for(x=0;x<XSIZE;x++) {
698
            if (x < XSIZE/2 && y < YSIZE/2) {
699
                if (x < XSIZE/4 && y < YSIZE/4) {
700
                    if ((x % 10) <= 6 &&
701
                        (y % 10) <= 6)
702
                        v = 0xff;
703
                    else
704
                        v = 0x00;
705
                } else if (x < XSIZE/4) {
706
                    if (x & 1) 
707
                        v = 0xff;
708
                    else 
709
                        v = 0;
710
                } else if (y < XSIZE/4) {
711
                    if (y & 1) 
712
                        v = 0xff;
713
                    else 
714
                        v = 0;
715
                } else {
716
                    if (y < YSIZE*3/8) {
717
                        if ((y+x) & 1) 
718
                            v = 0xff;
719
                        else 
720
                            v = 0;
721
                    } else {
722
                        if (((x+3) % 4) <= 1 &&
723
                            ((y+3) % 4) <= 1)
724
                            v = 0xff;
725
                        else
726
                            v = 0x00;
727
                    }
728
                }
729
            } else if (x < XSIZE/2) {
730
                v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
731
            } else if (y < XSIZE/2) {
732
                v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
733
            } else {
734
                v = ((x + y - XSIZE) * 255) / XSIZE;
735
            }
736
            img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
737
        }
738
    }
739
    save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
740
    for(i=0;i<sizeof(factors)/sizeof(float);i++) {
741
        fact = factors[i];
742
        xsize = (int)(XSIZE * fact);
743
        ysize = (int)((YSIZE - 100) * fact);
744
        s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0);
745
        printf("Factor=%0.2f\n", fact);
746
        dump_filter(&s->h_filters[0][0]);
747
        component_resample(s, img1, xsize, xsize, ysize,
748
                           img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
749
        img_resample_close(s);
750

    
751
        sprintf(buf, "/tmp/out%d.pgm", i);
752
        save_pgm(buf, img1, xsize, ysize);
753
    }
754

    
755
    /* mmx test */
756
#ifdef HAVE_MMX
757
    printf("MMX test\n");
758
    fact = 0.72;
759
    xsize = (int)(XSIZE * fact);
760
    ysize = (int)(YSIZE * fact);
761
    mm_flags = MM_MMX;
762
    s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
763
    component_resample(s, img1, xsize, xsize, ysize,
764
                       img, XSIZE, XSIZE, YSIZE);
765

    
766
    mm_flags = 0;
767
    s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
768
    component_resample(s, img2, xsize, xsize, ysize,
769
                       img, XSIZE, XSIZE, YSIZE);
770
    if (memcmp(img1, img2, xsize * ysize) != 0) {
771
        fprintf(stderr, "mmx error\n");
772
        exit(1);
773
    }
774
    printf("MMX OK\n");
775
#endif
776
    return 0;
777
}
778

    
779
#endif