Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dct-test.c @ cc61f96f

History | View | Annotate | Download (15.5 KB)

1
/*
2
 * (c) 2001 Fabrice Bellard
3
 *     2007 Marc Hoffman <marc.hoffman@analog.com>
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

    
22
/**
23
 * @file dct-test.c
24
 * DCT test. (c) 2001 Fabrice Bellard.
25
 * Started from sample code by Juan J. Sierralta P.
26
 */
27

    
28
#include <stdlib.h>
29
#include <stdio.h>
30
#include <string.h>
31
#include <sys/time.h>
32
#include <unistd.h>
33
#include <math.h>
34

    
35
#include "dsputil.h"
36

    
37
#include "simple_idct.h"
38
#include "faandct.h"
39

    
40
#ifndef MAX
41
#define MAX(a, b)  (((a) > (b)) ? (a) : (b))
42
#endif
43

    
44
#undef printf
45
#undef random
46

    
47
void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
48

    
49
/* reference fdct/idct */
50
extern void fdct(DCTELEM *block);
51
extern void idct(DCTELEM *block);
52
extern void ff_idct_xvid_mmx(DCTELEM *block);
53
extern void ff_idct_xvid_mmx2(DCTELEM *block);
54
extern void init_fdct();
55

    
56
extern void ff_mmx_idct(DCTELEM *data);
57
extern void ff_mmxext_idct(DCTELEM *data);
58

    
59
extern void odivx_idct_c (short *block);
60

    
61
// BFIN
62
extern void ff_bfin_idct (DCTELEM *block)  ;
63
extern void ff_bfin_fdct (DCTELEM *block) ;
64

    
65
// ALTIVEC
66
extern void fdct_altivec (DCTELEM *block);
67
//extern void idct_altivec (DCTELEM *block);?? no routine
68

    
69

    
70
struct algo {
71
  char *name;
72
  enum { FDCT, IDCT } is_idct;
73
  void (* func) (DCTELEM *block);
74
  void (* ref)  (DCTELEM *block);
75
  enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM } format;
76
};
77

    
78
#ifndef FAAN_POSTSCALE
79
#define FAAN_SCALE SCALE_PERM
80
#else
81
#define FAAN_SCALE NO_PERM
82
#endif
83

    
84
#define DCT_ERROR(name,is_idct,func,ref,form) {name,is_idct,func,ref,form}
85

    
86

    
87
struct algo algos[] = {
88
  DCT_ERROR( "REF-DBL",        0, fdct,               fdct, NO_PERM),
89
  DCT_ERROR("FAAN",            0, ff_faandct,         fdct, FAAN_SCALE),
90
  DCT_ERROR("IJG-AAN-INT",     0, fdct_ifast,         fdct, SCALE_PERM),
91
  DCT_ERROR("IJG-LLM-INT",     0, ff_jpeg_fdct_islow, fdct, NO_PERM),
92
  DCT_ERROR("REF-DBL",         1, idct,               idct, NO_PERM),
93
  DCT_ERROR("INT",             1, j_rev_dct,          idct, MMX_PERM),
94
  DCT_ERROR("SIMPLE-C",        1, simple_idct,        idct, NO_PERM),
95

    
96
#ifdef HAVE_MMX
97
  DCT_ERROR("MMX",             0, ff_fdct_mmx,        fdct, NO_PERM),
98
#ifdef HAVE_MMX2
99
  DCT_ERROR("MMX2",            0, ff_fdct_mmx2,       fdct, NO_PERM),
100
#endif
101

    
102
#ifdef CONFIG_GPL
103
  DCT_ERROR("LIBMPEG2-MMX",    1, ff_mmx_idct,        idct, MMX_PERM),
104
  DCT_ERROR("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct,     idct, MMX_PERM),
105
#endif
106
  DCT_ERROR("SIMPLE-MMX",      1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM),
107
  DCT_ERROR("XVID-MMX",        1, ff_idct_xvid_mmx,   idct, NO_PERM),
108
  DCT_ERROR("XVID-MMX2",       1, ff_idct_xvid_mmx2,  idct, NO_PERM),
109
#endif
110

    
111
#ifdef HAVE_ALTIVEC
112
  DCT_ERROR("altivecfdct",     0, fdct_altivec,       fdct, NO_PERM),
113
#endif
114

    
115
#ifdef ARCH_BFIN
116
  DCT_ERROR("BFINfdct",        0, ff_bfin_fdct,       fdct, NO_PERM),
117
  DCT_ERROR("BFINidct",        1, ff_bfin_idct,       idct, NO_PERM),
118
#endif
119

    
120
  { 0 }
121
};
122

    
123
#define AANSCALE_BITS 12
124
static const unsigned short aanscales[64] = {
125
    /* precomputed values scaled up by 14 bits */
126
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
127
    22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
128
    21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
129
    19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
130
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
131
    12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
132
    8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
133
    4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
134
};
135

    
136
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
137

    
138
int64_t gettime(void)
139
{
140
    struct timeval tv;
141
    gettimeofday(&tv,NULL);
142
    return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
143
}
144

    
145
#define NB_ITS 20000
146
#define NB_ITS_SPEED 50000
147

    
148
static short idct_mmx_perm[64];
149

    
150
static short idct_simple_mmx_perm[64]={
151
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
152
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
153
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
154
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
155
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
156
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
157
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
158
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
159
};
160

    
161
void idct_mmx_init(void)
162
{
163
    int i;
164

    
165
    /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
166
    for (i = 0; i < 64; i++) {
167
        idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
168
//        idct_simple_mmx_perm[i] = simple_block_permute_op(i);
169
    }
170
}
171

    
172
static DCTELEM block[64] __attribute__ ((aligned (8)));
173
static DCTELEM block1[64] __attribute__ ((aligned (8)));
174
static DCTELEM block_org[64] __attribute__ ((aligned (8)));
175

    
176
void dct_error(const char *name, int is_idct,
177
               void (*fdct_func)(DCTELEM *block),
178
               void (*fdct_ref)(DCTELEM *block), int form, int test)
179
{
180
    int it, i, scale;
181
    int err_inf, v;
182
    int64_t err2, ti, ti1, it1;
183
    int64_t sysErr[64], sysErrMax=0;
184
    int maxout=0;
185
    int blockSumErrMax=0, blockSumErr;
186

    
187
    srandom(0);
188

    
189
    err_inf = 0;
190
    err2 = 0;
191
    for(i=0; i<64; i++) sysErr[i]=0;
192
    for(it=0;it<NB_ITS;it++) {
193
        for(i=0;i<64;i++)
194
            block1[i] = 0;
195
        switch(test){
196
        case 0:
197
            for(i=0;i<64;i++)
198
                block1[i] = (random() % 512) -256;
199
            if (is_idct){
200
                fdct(block1);
201

    
202
                for(i=0;i<64;i++)
203
                    block1[i]>>=3;
204
            }
205
        break;
206
        case 1:{
207
            int num= (random()%10)+1;
208
            for(i=0;i<num;i++)
209
                block1[random()%64] = (random() % 512) -256;
210
        }break;
211
        case 2:
212
            block1[0]= (random()%4096)-2048;
213
            block1[63]= (block1[0]&1)^1;
214
        break;
215
        }
216

    
217
#if 0 // simulate mismatch control
218
{ int sum=0;
219
        for(i=0;i<64;i++)
220
           sum+=block1[i];
221

222
        if((sum&1)==0) block1[63]^=1;
223
}
224
#endif
225

    
226
        for(i=0; i<64; i++)
227
            block_org[i]= block1[i];
228

    
229
        if (form == MMX_PERM) {
230
            for(i=0;i<64;i++)
231
                block[idct_mmx_perm[i]] = block1[i];
232
            } else if (form == MMX_SIMPLE_PERM) {
233
            for(i=0;i<64;i++)
234
                block[idct_simple_mmx_perm[i]] = block1[i];
235

    
236
        } else {
237
            for(i=0; i<64; i++)
238
                block[i]= block1[i];
239
        }
240
#if 0 // simulate mismatch control for tested IDCT but not the ref
241
{ int sum=0;
242
        for(i=0;i<64;i++)
243
           sum+=block[i];
244

245
        if((sum&1)==0) block[63]^=1;
246
}
247
#endif
248

    
249
        fdct_func(block);
250
        emms_c(); /* for ff_mmx_idct */
251

    
252
        if (form == SCALE_PERM) {
253
            for(i=0; i<64; i++) {
254
                scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
255
                block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
256
            }
257
        }
258

    
259
        fdct_ref(block1);
260

    
261
        blockSumErr=0;
262
        for(i=0;i<64;i++) {
263
            v = abs(block[i] - block1[i]);
264
            if (v > err_inf)
265
                err_inf = v;
266
            err2 += v * v;
267
            sysErr[i] += block[i] - block1[i];
268
            blockSumErr += v;
269
            if( abs(block[i])>maxout) maxout=abs(block[i]);
270
        }
271
        if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
272
#if 0 // print different matrix pairs
273
        if(blockSumErr){
274
            printf("\n");
275
            for(i=0; i<64; i++){
276
                if((i&7)==0) printf("\n");
277
                printf("%4d ", block_org[i]);
278
            }
279
            for(i=0; i<64; i++){
280
                if((i&7)==0) printf("\n");
281
                printf("%4d ", block[i] - block1[i]);
282
            }
283
        }
284
#endif
285
    }
286
    for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i]));
287

    
288
#if 1 // dump systematic errors
289
    for(i=0; i<64; i++){
290
        if(i%8==0) printf("\n");
291
        printf("%5d ", (int)sysErr[i]);
292
    }
293
    printf("\n");
294
#endif
295

    
296
    printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
297
           is_idct ? "IDCT" : "DCT",
298
           name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
299
#if 1 //Speed test
300
    /* speed test */
301
    for(i=0;i<64;i++)
302
        block1[i] = 0;
303
    switch(test){
304
    case 0:
305
        for(i=0;i<64;i++)
306
            block1[i] = (random() % 512) -256;
307
        if (is_idct){
308
            fdct(block1);
309

    
310
            for(i=0;i<64;i++)
311
                block1[i]>>=3;
312
        }
313
    break;
314
    case 1:{
315
    case 2:
316
        block1[0] = (random() % 512) -256;
317
        block1[1] = (random() % 512) -256;
318
        block1[2] = (random() % 512) -256;
319
        block1[3] = (random() % 512) -256;
320
    }break;
321
    }
322

    
323
    if (form == MMX_PERM) {
324
        for(i=0;i<64;i++)
325
            block[idct_mmx_perm[i]] = block1[i];
326
    } else if(form == MMX_SIMPLE_PERM) {
327
        for(i=0;i<64;i++)
328
            block[idct_simple_mmx_perm[i]] = block1[i];
329
    } else {
330
        for(i=0; i<64; i++)
331
            block[i]= block1[i];
332
    }
333

    
334
    ti = gettime();
335
    it1 = 0;
336
    do {
337
        for(it=0;it<NB_ITS_SPEED;it++) {
338
            for(i=0; i<64; i++)
339
                block[i]= block1[i];
340
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
341
// do not memcpy especially not fastmemcpy because it does movntq !!!
342
            fdct_func(block);
343
        }
344
        it1 += NB_ITS_SPEED;
345
        ti1 = gettime() - ti;
346
    } while (ti1 < 1000000);
347
    emms_c();
348

    
349
    printf("%s %s: %0.1f kdct/s\n",
350
           is_idct ? "IDCT" : "DCT",
351
           name, (double)it1 * 1000.0 / (double)ti1);
352
#endif
353
}
354

    
355
static uint8_t img_dest[64] __attribute__ ((aligned (8)));
356
static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
357

    
358
void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
359
{
360
    static int init;
361
    static double c8[8][8];
362
    static double c4[4][4];
363
    double block1[64], block2[64], block3[64];
364
    double s, sum, v;
365
    int i, j, k;
366

    
367
    if (!init) {
368
        init = 1;
369

    
370
        for(i=0;i<8;i++) {
371
            sum = 0;
372
            for(j=0;j<8;j++) {
373
                s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
374
                c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
375
                sum += c8[i][j] * c8[i][j];
376
            }
377
        }
378

    
379
        for(i=0;i<4;i++) {
380
            sum = 0;
381
            for(j=0;j<4;j++) {
382
                s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
383
                c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
384
                sum += c4[i][j] * c4[i][j];
385
            }
386
        }
387
    }
388

    
389
    /* butterfly */
390
    s = 0.5 * sqrt(2.0);
391
    for(i=0;i<4;i++) {
392
        for(j=0;j<8;j++) {
393
            block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
394
            block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
395
        }
396
    }
397

    
398
    /* idct8 on lines */
399
    for(i=0;i<8;i++) {
400
        for(j=0;j<8;j++) {
401
            sum = 0;
402
            for(k=0;k<8;k++)
403
                sum += c8[k][j] * block1[8*i+k];
404
            block2[8*i+j] = sum;
405
        }
406
    }
407

    
408
    /* idct4 */
409
    for(i=0;i<8;i++) {
410
        for(j=0;j<4;j++) {
411
            /* top */
412
            sum = 0;
413
            for(k=0;k<4;k++)
414
                sum += c4[k][j] * block2[8*(2*k)+i];
415
            block3[8*(2*j)+i] = sum;
416

    
417
            /* bottom */
418
            sum = 0;
419
            for(k=0;k<4;k++)
420
                sum += c4[k][j] * block2[8*(2*k+1)+i];
421
            block3[8*(2*j+1)+i] = sum;
422
        }
423
    }
424

    
425
    /* clamp and store the result */
426
    for(i=0;i<8;i++) {
427
        for(j=0;j<8;j++) {
428
            v = block3[8*i+j];
429
            if (v < 0)
430
                v = 0;
431
            else if (v > 255)
432
                v = 255;
433
            dest[i * linesize + j] = (int)rint(v);
434
        }
435
    }
436
}
437

    
438
void idct248_error(const char *name,
439
                    void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
440
{
441
    int it, i, it1, ti, ti1, err_max, v;
442

    
443
    srandom(0);
444

    
445
    /* just one test to see if code is correct (precision is less
446
       important here) */
447
    err_max = 0;
448
    for(it=0;it<NB_ITS;it++) {
449

    
450
        /* XXX: use forward transform to generate values */
451
        for(i=0;i<64;i++)
452
            block1[i] = (random() % 256) - 128;
453
        block1[0] += 1024;
454

    
455
        for(i=0; i<64; i++)
456
            block[i]= block1[i];
457
        idct248_ref(img_dest1, 8, block);
458

    
459
        for(i=0; i<64; i++)
460
            block[i]= block1[i];
461
        idct248_put(img_dest, 8, block);
462

    
463
        for(i=0;i<64;i++) {
464
            v = abs((int)img_dest[i] - (int)img_dest1[i]);
465
            if (v == 255)
466
                printf("%d %d\n", img_dest[i], img_dest1[i]);
467
            if (v > err_max)
468
                err_max = v;
469
        }
470
#if 0
471
        printf("ref=\n");
472
        for(i=0;i<8;i++) {
473
            int j;
474
            for(j=0;j<8;j++) {
475
                printf(" %3d", img_dest1[i*8+j]);
476
            }
477
            printf("\n");
478
        }
479

480
        printf("out=\n");
481
        for(i=0;i<8;i++) {
482
            int j;
483
            for(j=0;j<8;j++) {
484
                printf(" %3d", img_dest[i*8+j]);
485
            }
486
            printf("\n");
487
        }
488
#endif
489
    }
490
    printf("%s %s: err_inf=%d\n",
491
           1 ? "IDCT248" : "DCT248",
492
           name, err_max);
493

    
494
    ti = gettime();
495
    it1 = 0;
496
    do {
497
        for(it=0;it<NB_ITS_SPEED;it++) {
498
            for(i=0; i<64; i++)
499
                block[i]= block1[i];
500
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
501
// do not memcpy especially not fastmemcpy because it does movntq !!!
502
            idct248_put(img_dest, 8, block);
503
        }
504
        it1 += NB_ITS_SPEED;
505
        ti1 = gettime() - ti;
506
    } while (ti1 < 1000000);
507
    emms_c();
508

    
509
    printf("%s %s: %0.1f kdct/s\n",
510
           1 ? "IDCT248" : "DCT248",
511
           name, (double)it1 * 1000.0 / (double)ti1);
512
}
513

    
514
void help(void)
515
{
516
    printf("dct-test [-i] [<test-number>]\n"
517
           "test-number 0 -> test with random matrixes\n"
518
           "            1 -> test with random sparse matrixes\n"
519
           "            2 -> do 3. test from mpeg4 std\n"
520
           "-i          test IDCT implementations\n"
521
           "-4          test IDCT248 implementations\n");
522
}
523

    
524
int main(int argc, char **argv)
525
{
526
    int test_idct = 0, test_248_dct = 0;
527
    int c,i;
528
    int test=1;
529

    
530
    init_fdct();
531
    idct_mmx_init();
532

    
533
    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
534
    for(i=0;i<MAX_NEG_CROP;i++) {
535
        cropTbl[i] = 0;
536
        cropTbl[i + MAX_NEG_CROP + 256] = 255;
537
    }
538

    
539
    for(;;) {
540
        c = getopt(argc, argv, "ih4");
541
        if (c == -1)
542
            break;
543
        switch(c) {
544
        case 'i':
545
            test_idct = 1;
546
            break;
547
        case '4':
548
            test_248_dct = 1;
549
            break;
550
        default :
551
        case 'h':
552
            help();
553
            return 0;
554
        }
555
    }
556

    
557
    if(optind <argc) test= atoi(argv[optind]);
558

    
559
    printf("ffmpeg DCT/IDCT test\n");
560

    
561
    if (test_248_dct) {
562
        idct248_error("SIMPLE-C", simple_idct248_put);
563
    } else {
564
      for (i=0;algos[i].name;i++)
565
        if (algos[i].is_idct == test_idct) {
566
          dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
567
        }
568
    }
569
    return 0;
570
}