Statistics
| Branch: | Revision:

ffmpeg / libavcodec / imgresample.c @ 983e3246

History | View | Annotate | Download (22.4 KB)

1
/*
2
 * High quality image resampling with polyphase filters 
3
 * Copyright (c) 2001 Fabrice Bellard.
4
 *
5
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9
 *
10
 * This library is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 */
19
 
20
/**
21
 * @file imgresample.c
22
 * High quality image resampling with polyphase filters .
23
 */
24
 
25
#include "avcodec.h"
26
#include "dsputil.h"
27

    
28
#ifdef USE_FASTMEMCPY
29
#include "fastmemcpy.h"
30
#endif
31

    
32
#define NB_COMPONENTS 3
33

    
34
#define PHASE_BITS 4
35
#define NB_PHASES  (1 << PHASE_BITS)
36
#define NB_TAPS    4
37
#define FCENTER    1  /* index of the center of the filter */
38
//#define TEST    1  /* Test it */
39

    
40
#define POS_FRAC_BITS 16
41
#define POS_FRAC      (1 << POS_FRAC_BITS)
42
/* 6 bits precision is needed for MMX */
43
#define FILTER_BITS   8
44

    
45
#define LINE_BUF_HEIGHT (NB_TAPS * 4)
46

    
47
struct ImgReSampleContext {
48
    int iwidth, iheight, owidth, oheight, topBand, bottomBand, leftBand, rightBand;
49
    int h_incr, v_incr;
50
    int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
51
    int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
52
    uint8_t *line_buf;
53
};
54

    
55
static inline int get_phase(int pos)
56
{
57
    return ((pos) >> (POS_FRAC_BITS - PHASE_BITS)) & ((1 << PHASE_BITS) - 1);
58
}
59

    
60
/* This function must be optimized */
61
static void h_resample_fast(uint8_t *dst, int dst_width, uint8_t *src, int src_width,
62
                            int src_start, int src_incr, int16_t *filters)
63
{
64
    int src_pos, phase, sum, i;
65
    uint8_t *s;
66
    int16_t *filter;
67

    
68
    src_pos = src_start;
69
    for(i=0;i<dst_width;i++) {
70
#ifdef TEST
71
        /* test */
72
        if ((src_pos >> POS_FRAC_BITS) < 0 ||
73
            (src_pos >> POS_FRAC_BITS) > (src_width - NB_TAPS))
74
            av_abort();
75
#endif
76
        s = src + (src_pos >> POS_FRAC_BITS);
77
        phase = get_phase(src_pos);
78
        filter = filters + phase * NB_TAPS;
79
#if NB_TAPS == 4
80
        sum = s[0] * filter[0] +
81
            s[1] * filter[1] +
82
            s[2] * filter[2] +
83
            s[3] * filter[3];
84
#else
85
        {
86
            int j;
87
            sum = 0;
88
            for(j=0;j<NB_TAPS;j++)
89
                sum += s[j] * filter[j];
90
        }
91
#endif
92
        sum = sum >> FILTER_BITS;
93
        if (sum < 0)
94
            sum = 0;
95
        else if (sum > 255)
96
            sum = 255;
97
        dst[0] = sum;
98
        src_pos += src_incr;
99
        dst++;
100
    }
101
}
102

    
103
/* This function must be optimized */
104
static void v_resample(uint8_t *dst, int dst_width, uint8_t *src, int wrap, 
105
                       int16_t *filter)
106
{
107
    int sum, i;
108
    uint8_t *s;
109

    
110
    s = src;
111
    for(i=0;i<dst_width;i++) {
112
#if NB_TAPS == 4
113
        sum = s[0 * wrap] * filter[0] +
114
            s[1 * wrap] * filter[1] +
115
            s[2 * wrap] * filter[2] +
116
            s[3 * wrap] * filter[3];
117
#else
118
        {
119
            int j;
120
            uint8_t *s1 = s;
121

    
122
            sum = 0;
123
            for(j=0;j<NB_TAPS;j++) {
124
                sum += s1[0] * filter[j];
125
                s1 += wrap;
126
            }
127
        }
128
#endif
129
        sum = sum >> FILTER_BITS;
130
        if (sum < 0)
131
            sum = 0;
132
        else if (sum > 255)
133
            sum = 255;
134
        dst[0] = sum;
135
        dst++;
136
        s++;
137
    }
138
}
139

    
140
#ifdef HAVE_MMX
141

    
142
#include "i386/mmx.h"
143

    
144
#define FILTER4(reg) \
145
{\
146
        s = src + (src_pos >> POS_FRAC_BITS);\
147
        phase = get_phase(src_pos);\
148
        filter = filters + phase * NB_TAPS;\
149
        movq_m2r(*s, reg);\
150
        punpcklbw_r2r(mm7, reg);\
151
        movq_m2r(*filter, mm6);\
152
        pmaddwd_r2r(reg, mm6);\
153
        movq_r2r(mm6, reg);\
154
        psrlq_i2r(32, reg);\
155
        paddd_r2r(mm6, reg);\
156
        psrad_i2r(FILTER_BITS, reg);\
157
        src_pos += src_incr;\
158
}
159

    
160
#define DUMP(reg) movq_r2m(reg, tmp); printf(#reg "=%016Lx\n", tmp.uq);
161

    
162
/* XXX: do four pixels at a time */
163
static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, uint8_t *src, int src_width,
164
                                 int src_start, int src_incr, int16_t *filters)
165
{
166
    int src_pos, phase;
167
    uint8_t *s;
168
    int16_t *filter;
169
    mmx_t tmp;
170
    
171
    src_pos = src_start;
172
    pxor_r2r(mm7, mm7);
173

    
174
    while (dst_width >= 4) {
175

    
176
        FILTER4(mm0);
177
        FILTER4(mm1);
178
        FILTER4(mm2);
179
        FILTER4(mm3);
180

    
181
        packuswb_r2r(mm7, mm0);
182
        packuswb_r2r(mm7, mm1);
183
        packuswb_r2r(mm7, mm3);
184
        packuswb_r2r(mm7, mm2);
185
        movq_r2m(mm0, tmp);
186
        dst[0] = tmp.ub[0];
187
        movq_r2m(mm1, tmp);
188
        dst[1] = tmp.ub[0];
189
        movq_r2m(mm2, tmp);
190
        dst[2] = tmp.ub[0];
191
        movq_r2m(mm3, tmp);
192
        dst[3] = tmp.ub[0];
193
        dst += 4;
194
        dst_width -= 4;
195
    }
196
    while (dst_width > 0) {
197
        FILTER4(mm0);
198
        packuswb_r2r(mm7, mm0);
199
        movq_r2m(mm0, tmp);
200
        dst[0] = tmp.ub[0];
201
        dst++;
202
        dst_width--;
203
    }
204
    emms();
205
}
206

    
207
static void v_resample4_mmx(uint8_t *dst, int dst_width, uint8_t *src, int wrap, 
208
                            int16_t *filter)
209
{
210
    int sum, i, v;
211
    uint8_t *s;
212
    mmx_t tmp;
213
    mmx_t coefs[4];
214
    
215
    for(i=0;i<4;i++) {
216
        v = filter[i];
217
        coefs[i].uw[0] = v;
218
        coefs[i].uw[1] = v;
219
        coefs[i].uw[2] = v;
220
        coefs[i].uw[3] = v;
221
    }
222
    
223
    pxor_r2r(mm7, mm7);
224
    s = src;
225
    while (dst_width >= 4) {
226
        movq_m2r(s[0 * wrap], mm0);
227
        punpcklbw_r2r(mm7, mm0);
228
        movq_m2r(s[1 * wrap], mm1);
229
        punpcklbw_r2r(mm7, mm1);
230
        movq_m2r(s[2 * wrap], mm2);
231
        punpcklbw_r2r(mm7, mm2);
232
        movq_m2r(s[3 * wrap], mm3);
233
        punpcklbw_r2r(mm7, mm3);
234

    
235
        pmullw_m2r(coefs[0], mm0);
236
        pmullw_m2r(coefs[1], mm1);
237
        pmullw_m2r(coefs[2], mm2);
238
        pmullw_m2r(coefs[3], mm3);
239

    
240
        paddw_r2r(mm1, mm0);
241
        paddw_r2r(mm3, mm2);
242
        paddw_r2r(mm2, mm0);
243
        psraw_i2r(FILTER_BITS, mm0);
244
        
245
        packuswb_r2r(mm7, mm0);
246
        movq_r2m(mm0, tmp);
247

    
248
        *(uint32_t *)dst = tmp.ud[0];
249
        dst += 4;
250
        s += 4;
251
        dst_width -= 4;
252
    }
253
    while (dst_width > 0) {
254
        sum = s[0 * wrap] * filter[0] +
255
            s[1 * wrap] * filter[1] +
256
            s[2 * wrap] * filter[2] +
257
            s[3 * wrap] * filter[3];
258
        sum = sum >> FILTER_BITS;
259
        if (sum < 0)
260
            sum = 0;
261
        else if (sum > 255)
262
            sum = 255;
263
        dst[0] = sum;
264
        dst++;
265
        s++;
266
        dst_width--;
267
    }
268
    emms();
269
}
270
#endif
271

    
272
#ifdef HAVE_ALTIVEC
273
typedef        union {
274
    vector unsigned char v;
275
    unsigned char c[16];
276
} vec_uc_t;
277

    
278
typedef        union {
279
    vector signed short v;
280
    signed short s[8];
281
} vec_ss_t;
282

    
283
void v_resample16_altivec(uint8_t *dst, int dst_width, uint8_t *src, int wrap,
284
                            int16_t *filter)
285
{
286
    int sum, i;
287
    uint8_t *s;
288
    vector unsigned char *tv, tmp, dstv, zero;
289
    vec_ss_t srchv[4], srclv[4], fv[4];
290
    vector signed short zeros, sumhv, sumlv;    
291
    s = src;
292

    
293
    for(i=0;i<4;i++)
294
    {
295
        /*
296
           The vec_madds later on does an implicit >>15 on the result.
297
           Since FILTER_BITS is 8, and we have 15 bits of magnitude in
298
           a signed short, we have just enough bits to pre-shift our
299
           filter constants <<7 to compensate for vec_madds.
300
        */
301
        fv[i].s[0] = filter[i] << (15-FILTER_BITS);
302
        fv[i].v = vec_splat(fv[i].v, 0);
303
    }
304
    
305
    zero = vec_splat_u8(0);
306
    zeros = vec_splat_s16(0);
307

    
308

    
309
    /*
310
       When we're resampling, we'd ideally like both our input buffers,
311
       and output buffers to be 16-byte aligned, so we can do both aligned
312
       reads and writes. Sadly we can't always have this at the moment, so
313
       we opt for aligned writes, as unaligned writes have a huge overhead.
314
       To do this, do enough scalar resamples to get dst 16-byte aligned.
315
    */
316
    i = (-(int)dst) & 0xf;
317
    while(i>0) {
318
        sum = s[0 * wrap] * filter[0] +
319
        s[1 * wrap] * filter[1] +
320
        s[2 * wrap] * filter[2] +
321
        s[3 * wrap] * filter[3];
322
        sum = sum >> FILTER_BITS;
323
        if (sum<0) sum = 0; else if (sum>255) sum=255;
324
        dst[0] = sum;
325
        dst++;
326
        s++;
327
        dst_width--;
328
        i--;
329
    }
330
    
331
    /* Do our altivec resampling on 16 pixels at once. */
332
    while(dst_width>=16) {
333
        /*
334
           Read 16 (potentially unaligned) bytes from each of
335
           4 lines into 4 vectors, and split them into shorts.
336
           Interleave the multipy/accumulate for the resample
337
           filter with the loads to hide the 3 cycle latency
338
           the vec_madds have.
339
        */
340
        tv = (vector unsigned char *) &s[0 * wrap];
341
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
342
        srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
343
        srclv[0].v = (vector signed short) vec_mergel(zero, tmp);
344
        sumhv = vec_madds(srchv[0].v, fv[0].v, zeros);
345
        sumlv = vec_madds(srclv[0].v, fv[0].v, zeros);
346

    
347
        tv = (vector unsigned char *) &s[1 * wrap];
348
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap]));
349
        srchv[1].v = (vector signed short) vec_mergeh(zero, tmp);
350
        srclv[1].v = (vector signed short) vec_mergel(zero, tmp);
351
        sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv);
352
        sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv);
353

    
354
        tv = (vector unsigned char *) &s[2 * wrap];
355
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap]));
356
        srchv[2].v = (vector signed short) vec_mergeh(zero, tmp);
357
        srclv[2].v = (vector signed short) vec_mergel(zero, tmp);
358
        sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv);
359
        sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv);
360

    
361
        tv = (vector unsigned char *) &s[3 * wrap];
362
        tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap]));
363
        srchv[3].v = (vector signed short) vec_mergeh(zero, tmp);
364
        srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
365
        sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
366
        sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
367
    
368
        /*
369
           Pack the results into our destination vector,
370
           and do an aligned write of that back to memory.
371
        */
372
        dstv = vec_packsu(sumhv, sumlv) ;
373
        vec_st(dstv, 0, (vector unsigned char *) dst);
374
        
375
        dst+=16;
376
        s+=16;
377
        dst_width-=16;
378
    }
379

    
380
    /*
381
       If there are any leftover pixels, resample them
382
       with the slow scalar method.
383
    */
384
    while(dst_width>0) {
385
        sum = s[0 * wrap] * filter[0] +
386
        s[1 * wrap] * filter[1] +
387
        s[2 * wrap] * filter[2] +
388
        s[3 * wrap] * filter[3];
389
        sum = sum >> FILTER_BITS;
390
        if (sum<0) sum = 0; else if (sum>255) sum=255;
391
        dst[0] = sum;
392
        dst++;
393
        s++;
394
        dst_width--;
395
    }
396
}
397
#endif
398

    
399
/* slow version to handle limit cases. Does not need optimisation */
400
static void h_resample_slow(uint8_t *dst, int dst_width, uint8_t *src, int src_width,
401
                            int src_start, int src_incr, int16_t *filters)
402
{
403
    int src_pos, phase, sum, j, v, i;
404
    uint8_t *s, *src_end;
405
    int16_t *filter;
406

    
407
    src_end = src + src_width;
408
    src_pos = src_start;
409
    for(i=0;i<dst_width;i++) {
410
        s = src + (src_pos >> POS_FRAC_BITS);
411
        phase = get_phase(src_pos);
412
        filter = filters + phase * NB_TAPS;
413
        sum = 0;
414
        for(j=0;j<NB_TAPS;j++) {
415
            if (s < src)
416
                v = src[0];
417
            else if (s >= src_end)
418
                v = src_end[-1];
419
            else
420
                v = s[0];
421
            sum += v * filter[j];
422
            s++;
423
        }
424
        sum = sum >> FILTER_BITS;
425
        if (sum < 0)
426
            sum = 0;
427
        else if (sum > 255)
428
            sum = 255;
429
        dst[0] = sum;
430
        src_pos += src_incr;
431
        dst++;
432
    }
433
}
434

    
435
static void h_resample(uint8_t *dst, int dst_width, uint8_t *src, int src_width,
436
                       int src_start, int src_incr, int16_t *filters)
437
{
438
    int n, src_end;
439

    
440
    if (src_start < 0) {
441
        n = (0 - src_start + src_incr - 1) / src_incr;
442
        h_resample_slow(dst, n, src, src_width, src_start, src_incr, filters);
443
        dst += n;
444
        dst_width -= n;
445
        src_start += n * src_incr;
446
    }
447
    src_end = src_start + dst_width * src_incr;
448
    if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
449
        n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) / 
450
            src_incr;
451
    } else {
452
        n = dst_width;
453
    }
454
#ifdef HAVE_MMX
455
    if ((mm_flags & MM_MMX) && NB_TAPS == 4)
456
        h_resample_fast4_mmx(dst, n, 
457
                             src, src_width, src_start, src_incr, filters);
458
    else
459
#endif
460
        h_resample_fast(dst, n, 
461
                        src, src_width, src_start, src_incr, filters);
462
    if (n < dst_width) {
463
        dst += n;
464
        dst_width -= n;
465
        src_start += n * src_incr;
466
        h_resample_slow(dst, dst_width, 
467
                        src, src_width, src_start, src_incr, filters);
468
    }
469
}
470

    
471
static void component_resample(ImgReSampleContext *s, 
472
                               uint8_t *output, int owrap, int owidth, int oheight,
473
                               uint8_t *input, int iwrap, int iwidth, int iheight)
474
{
475
    int src_y, src_y1, last_src_y, ring_y, phase_y, y1, y;
476
    uint8_t *new_line, *src_line;
477

    
478
    last_src_y = - FCENTER - 1;
479
    /* position of the bottom of the filter in the source image */
480
    src_y = (last_src_y + NB_TAPS) * POS_FRAC; 
481
    ring_y = NB_TAPS; /* position in ring buffer */
482
    for(y=0;y<oheight;y++) {
483
        /* apply horizontal filter on new lines from input if needed */
484
        src_y1 = src_y >> POS_FRAC_BITS;
485
        while (last_src_y < src_y1) {
486
            if (++ring_y >= LINE_BUF_HEIGHT + NB_TAPS)
487
                ring_y = NB_TAPS;
488
            last_src_y++;
489
            /* handle limit conditions : replicate line (slightly
490
               inefficient because we filter multiple times) */
491
            y1 = last_src_y;
492
            if (y1 < 0) {
493
                y1 = 0;
494
            } else if (y1 >= iheight) {
495
                y1 = iheight - 1;
496
            }
497
            src_line = input + y1 * iwrap;
498
            new_line = s->line_buf + ring_y * owidth;
499
            /* apply filter and handle limit cases correctly */
500
            h_resample(new_line, owidth, 
501
                       src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, 
502
                       &s->h_filters[0][0]);
503
            /* handle ring buffer wraping */
504
            if (ring_y >= LINE_BUF_HEIGHT) {
505
                memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth,
506
                       new_line, owidth);
507
            }
508
        }
509
        /* apply vertical filter */
510
        phase_y = get_phase(src_y);
511
#ifdef HAVE_MMX
512
        /* desactivated MMX because loss of precision */
513
        if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
514
            v_resample4_mmx(output, owidth, 
515
                            s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, 
516
                            &s->v_filters[phase_y][0]);
517
        else
518
#endif
519
#ifdef HAVE_ALTIVEC
520
            if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS <= 6)
521
                v_resample16_altivec(output, owidth,
522
                                s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
523
                                &s->v_filters[phase_y][0]);
524
        else
525
#endif
526
            v_resample(output, owidth, 
527
                       s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, 
528
                       &s->v_filters[phase_y][0]);
529
            
530
        src_y += s->v_incr;
531
        output += owrap;
532
    }
533
}
534

    
535
/* XXX: the following filter is quite naive, but it seems to suffice
536
   for 4 taps */
537
static void build_filter(int16_t *filter, float factor)
538
{
539
    int ph, i, v;
540
    float x, y, tab[NB_TAPS], norm, mult;
541

    
542
    /* if upsampling, only need to interpolate, no filter */
543
    if (factor > 1.0)
544
        factor = 1.0;
545

    
546
    for(ph=0;ph<NB_PHASES;ph++) {
547
        norm = 0;
548
        for(i=0;i<NB_TAPS;i++) {
549
            
550
            x = M_PI * ((float)(i - FCENTER) - (float)ph / NB_PHASES) * factor;
551
            if (x == 0)
552
                y = 1.0;
553
            else
554
                y = sin(x) / x;
555
            tab[i] = y;
556
            norm += y;
557
        }
558

    
559
        /* normalize so that an uniform color remains the same */
560
        mult = (float)(1 << FILTER_BITS) / norm;
561
        for(i=0;i<NB_TAPS;i++) {
562
            v = (int)(tab[i] * mult);
563
            filter[ph * NB_TAPS + i] = v;
564
        }
565
    }
566
}
567

    
568
ImgReSampleContext *img_resample_init(int owidth, int oheight,
569
                                      int iwidth, int iheight)
570
{
571
        return img_resample_full_init(owidth, oheight, iwidth, iheight, 0, 0, 0, 0);
572
}
573

    
574
ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
575
                                      int iwidth, int iheight,
576
                                      int topBand, int bottomBand,
577
                                      int leftBand, int rightBand)
578
{
579
    ImgReSampleContext *s;
580

    
581
    s = av_mallocz(sizeof(ImgReSampleContext));
582
    if (!s)
583
        return NULL;
584
    s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
585
    if (!s->line_buf) 
586
        goto fail;
587
    
588
    s->owidth = owidth;
589
    s->oheight = oheight;
590
    s->iwidth = iwidth;
591
    s->iheight = iheight;
592
    s->topBand = topBand;
593
    s->bottomBand = bottomBand;
594
    s->leftBand = leftBand;
595
    s->rightBand = rightBand;
596
    
597
    s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / owidth;
598
    s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / oheight;
599
    
600
    build_filter(&s->h_filters[0][0], (float) owidth  / (float) (iwidth - leftBand - rightBand));
601
    build_filter(&s->v_filters[0][0], (float) oheight / (float) (iheight - topBand - bottomBand));
602

    
603
    return s;
604
 fail:
605
    av_free(s);
606
    return NULL;
607
}
608

    
609
void img_resample(ImgReSampleContext *s, 
610
                  AVPicture *output, AVPicture *input)
611
{
612
    int i, shift;
613

    
614
    for(i=0;i<3;i++) {
615
        shift = (i == 0) ? 0 : 1;
616
        component_resample(s, output->data[i], output->linesize[i], 
617
                           s->owidth >> shift, s->oheight >> shift,
618
                           input->data[i] + (input->linesize[i] * (s->topBand >> shift)) + (s->leftBand >> shift),
619
                           input->linesize[i], ((s->iwidth - s->leftBand - s->rightBand) >> shift),
620
                           (s->iheight - s->topBand - s->bottomBand) >> shift);
621
    }
622
}
623

    
624
void img_resample_close(ImgReSampleContext *s)
625
{
626
    av_free(s->line_buf);
627
    av_free(s);
628
}
629

    
630
#ifdef TEST
631

    
632
void *av_mallocz(int size)
633
{
634
    void *ptr;
635
    ptr = malloc(size);
636
    memset(ptr, 0, size);
637
    return ptr;
638
}
639

    
640
void av_free(void *ptr)
641
{
642
    /* XXX: this test should not be needed on most libcs */
643
    if (ptr)
644
        free(ptr);
645
}
646

    
647
/* input */
648
#define XSIZE 256
649
#define YSIZE 256
650
uint8_t img[XSIZE * YSIZE];
651

    
652
/* output */
653
#define XSIZE1 512
654
#define YSIZE1 512
655
uint8_t img1[XSIZE1 * YSIZE1];
656
uint8_t img2[XSIZE1 * YSIZE1];
657

    
658
void save_pgm(const char *filename, uint8_t *img, int xsize, int ysize)
659
{
660
    FILE *f;
661
    f=fopen(filename,"w");
662
    fprintf(f,"P5\n%d %d\n%d\n", xsize, ysize, 255);
663
    fwrite(img,1, xsize * ysize,f);
664
    fclose(f);
665
}
666

    
667
static void dump_filter(int16_t *filter)
668
{
669
    int i, ph;
670

    
671
    for(ph=0;ph<NB_PHASES;ph++) {
672
        printf("%2d: ", ph);
673
        for(i=0;i<NB_TAPS;i++) {
674
            printf(" %5.2f", filter[ph * NB_TAPS + i] / 256.0);
675
        }
676
        printf("\n");
677
    }
678
}
679

    
680
#ifdef HAVE_MMX
681
int mm_flags;
682
#endif
683

    
684
int main(int argc, char **argv)
685
{
686
    int x, y, v, i, xsize, ysize;
687
    ImgReSampleContext *s;
688
    float fact, factors[] = { 1/2.0, 3.0/4.0, 1.0, 4.0/3.0, 16.0/9.0, 2.0 };
689
    char buf[256];
690

    
691
    /* build test image */
692
    for(y=0;y<YSIZE;y++) {
693
        for(x=0;x<XSIZE;x++) {
694
            if (x < XSIZE/2 && y < YSIZE/2) {
695
                if (x < XSIZE/4 && y < YSIZE/4) {
696
                    if ((x % 10) <= 6 &&
697
                        (y % 10) <= 6)
698
                        v = 0xff;
699
                    else
700
                        v = 0x00;
701
                } else if (x < XSIZE/4) {
702
                    if (x & 1) 
703
                        v = 0xff;
704
                    else 
705
                        v = 0;
706
                } else if (y < XSIZE/4) {
707
                    if (y & 1) 
708
                        v = 0xff;
709
                    else 
710
                        v = 0;
711
                } else {
712
                    if (y < YSIZE*3/8) {
713
                        if ((y+x) & 1) 
714
                            v = 0xff;
715
                        else 
716
                            v = 0;
717
                    } else {
718
                        if (((x+3) % 4) <= 1 &&
719
                            ((y+3) % 4) <= 1)
720
                            v = 0xff;
721
                        else
722
                            v = 0x00;
723
                    }
724
                }
725
            } else if (x < XSIZE/2) {
726
                v = ((x - (XSIZE/2)) * 255) / (XSIZE/2);
727
            } else if (y < XSIZE/2) {
728
                v = ((y - (XSIZE/2)) * 255) / (XSIZE/2);
729
            } else {
730
                v = ((x + y - XSIZE) * 255) / XSIZE;
731
            }
732
            img[(YSIZE - y) * XSIZE + (XSIZE - x)] = v;
733
        }
734
    }
735
    save_pgm("/tmp/in.pgm", img, XSIZE, YSIZE);
736
    for(i=0;i<sizeof(factors)/sizeof(float);i++) {
737
        fact = factors[i];
738
        xsize = (int)(XSIZE * fact);
739
        ysize = (int)((YSIZE - 100) * fact);
740
        s = img_resample_full_init(xsize, ysize, XSIZE, YSIZE, 50 ,50, 0, 0);
741
        printf("Factor=%0.2f\n", fact);
742
        dump_filter(&s->h_filters[0][0]);
743
        component_resample(s, img1, xsize, xsize, ysize,
744
                           img + 50 * XSIZE, XSIZE, XSIZE, YSIZE - 100);
745
        img_resample_close(s);
746

    
747
        sprintf(buf, "/tmp/out%d.pgm", i);
748
        save_pgm(buf, img1, xsize, ysize);
749
    }
750

    
751
    /* mmx test */
752
#ifdef HAVE_MMX
753
    printf("MMX test\n");
754
    fact = 0.72;
755
    xsize = (int)(XSIZE * fact);
756
    ysize = (int)(YSIZE * fact);
757
    mm_flags = MM_MMX;
758
    s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
759
    component_resample(s, img1, xsize, xsize, ysize,
760
                       img, XSIZE, XSIZE, YSIZE);
761

    
762
    mm_flags = 0;
763
    s = img_resample_init(xsize, ysize, XSIZE, YSIZE);
764
    component_resample(s, img2, xsize, xsize, ysize,
765
                       img, XSIZE, XSIZE, YSIZE);
766
    if (memcmp(img1, img2, xsize * ysize) != 0) {
767
        fprintf(stderr, "mmx error\n");
768
        exit(1);
769
    }
770
    printf("MMX OK\n");
771
#endif
772
    return 0;
773
}
774

    
775
#endif