Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dct-test.c @ 755bfeab

History | View | Annotate | Download (15.4 KB)

1
/*
2
 * (c) 2001 Fabrice Bellard
3
 *     2007 Marc Hoffman <marc.hoffman@analog.com>
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

    
22
/**
23
 * @file dct-test.c
24
 * DCT test. (c) 2001 Fabrice Bellard.
25
 * Started from sample code by Juan J. Sierralta P.
26
 */
27

    
28
#include <stdlib.h>
29
#include <stdio.h>
30
#include <string.h>
31
#include <sys/time.h>
32
#include <unistd.h>
33
#include <math.h>
34

    
35
#include "dsputil.h"
36

    
37
#include "simple_idct.h"
38
#include "faandct.h"
39

    
40
#ifndef MAX
41
#define MAX(a, b)  (((a) > (b)) ? (a) : (b))
42
#endif
43

    
44
#undef printf
45

    
46
void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
47

    
48
/* reference fdct/idct */
49
extern void fdct(DCTELEM *block);
50
extern void idct(DCTELEM *block);
51
extern void ff_idct_xvid_mmx(DCTELEM *block);
52
extern void ff_idct_xvid_mmx2(DCTELEM *block);
53
extern void init_fdct();
54

    
55
extern void ff_mmx_idct(DCTELEM *data);
56
extern void ff_mmxext_idct(DCTELEM *data);
57

    
58
extern void odivx_idct_c (short *block);
59

    
60
// BFIN
61
extern void ff_bfin_idct (DCTELEM *block)  ;
62
extern void ff_bfin_fdct (DCTELEM *block) ;
63

    
64
// ALTIVEC
65
extern void fdct_altivec (DCTELEM *block);
66
//extern void idct_altivec (DCTELEM *block);?? no routine
67

    
68

    
69
struct algo {
70
  char *name;
71
  enum { FDCT, IDCT } is_idct;
72
  void (* func) (DCTELEM *block);
73
  void (* ref)  (DCTELEM *block);
74
  enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM } format;
75
};
76

    
77
#ifndef FAAN_POSTSCALE
78
#define FAAN_SCALE SCALE_PERM
79
#else
80
#define FAAN_SCALE NO_PERM
81
#endif
82

    
83
#define DCT_ERROR(name,is_idct,func,ref,form) {name,is_idct,func,ref,form}
84

    
85

    
86
struct algo algos[] = {
87
  DCT_ERROR( "REF-DBL",        0, fdct,               fdct, NO_PERM),
88
  DCT_ERROR("FAAN",            0, ff_faandct,         fdct, FAAN_SCALE),
89
  DCT_ERROR("IJG-AAN-INT",     0, fdct_ifast,         fdct, SCALE_PERM),
90
  DCT_ERROR("IJG-LLM-INT",     0, ff_jpeg_fdct_islow, fdct, NO_PERM),
91
  DCT_ERROR("REF-DBL",         1, idct,               idct, NO_PERM),
92
  DCT_ERROR("INT",             1, j_rev_dct,          idct, MMX_PERM),
93
  DCT_ERROR("SIMPLE-C",        1, simple_idct,        idct, NO_PERM),
94

    
95
#ifdef HAVE_MMX
96
  DCT_ERROR("MMX",             0, ff_fdct_mmx,        fdct, NO_PERM),
97
#ifdef HAVE_MMX2
98
  DCT_ERROR("MMX2",            0, ff_fdct_mmx2,       fdct, NO_PERM),
99
#endif
100

    
101
#ifdef CONFIG_GPL
102
  DCT_ERROR("LIBMPEG2-MMX",    1, ff_mmx_idct,        idct, MMX_PERM),
103
  DCT_ERROR("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct,     idct, MMX_PERM),
104
#endif
105
  DCT_ERROR("SIMPLE-MMX",      1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM),
106
  DCT_ERROR("XVID-MMX",        1, ff_idct_xvid_mmx,   idct, NO_PERM),
107
  DCT_ERROR("XVID-MMX2",       1, ff_idct_xvid_mmx2,  idct, NO_PERM),
108
#endif
109

    
110
#ifdef HAVE_ALTIVEC
111
  DCT_ERROR("altivecfdct",     0, fdct_altivec,       fdct, NO_PERM),
112
#endif
113

    
114
#ifdef ARCH_BFIN
115
  DCT_ERROR("BFINfdct",        0, ff_bfin_fdct,       fdct, NO_PERM),
116
  DCT_ERROR("BFINidct",        1, ff_bfin_idct,       idct, NO_PERM),
117
#endif
118

    
119
  { 0 }
120
};
121

    
122
#define AANSCALE_BITS 12
123
static const unsigned short aanscales[64] = {
124
    /* precomputed values scaled up by 14 bits */
125
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
126
    22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
127
    21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
128
    19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
129
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
130
    12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
131
    8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
132
    4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
133
};
134

    
135
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
136

    
137
int64_t gettime(void)
138
{
139
    struct timeval tv;
140
    gettimeofday(&tv,NULL);
141
    return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
142
}
143

    
144
#define NB_ITS 20000
145
#define NB_ITS_SPEED 50000
146

    
147
static short idct_mmx_perm[64];
148

    
149
static short idct_simple_mmx_perm[64]={
150
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
151
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
152
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
153
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
154
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
155
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
156
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
157
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
158
};
159

    
160
void idct_mmx_init(void)
161
{
162
    int i;
163

    
164
    /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
165
    for (i = 0; i < 64; i++) {
166
        idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
167
//        idct_simple_mmx_perm[i] = simple_block_permute_op(i);
168
    }
169
}
170

    
171
static DCTELEM block[64] __attribute__ ((aligned (8)));
172
static DCTELEM block1[64] __attribute__ ((aligned (8)));
173
static DCTELEM block_org[64] __attribute__ ((aligned (8)));
174

    
175
void dct_error(const char *name, int is_idct,
176
               void (*fdct_func)(DCTELEM *block),
177
               void (*fdct_ref)(DCTELEM *block), int form, int test)
178
{
179
    int it, i, scale;
180
    int err_inf, v;
181
    int64_t err2, ti, ti1, it1;
182
    int64_t sysErr[64], sysErrMax=0;
183
    int maxout=0;
184
    int blockSumErrMax=0, blockSumErr;
185

    
186
    srandom(0);
187

    
188
    err_inf = 0;
189
    err2 = 0;
190
    for(i=0; i<64; i++) sysErr[i]=0;
191
    for(it=0;it<NB_ITS;it++) {
192
        for(i=0;i<64;i++)
193
            block1[i] = 0;
194
        switch(test){
195
        case 0:
196
            for(i=0;i<64;i++)
197
                block1[i] = (random() % 512) -256;
198
            if (is_idct){
199
                fdct(block1);
200

    
201
                for(i=0;i<64;i++)
202
                    block1[i]>>=3;
203
            }
204
        break;
205
        case 1:{
206
            int num= (random()%10)+1;
207
            for(i=0;i<num;i++)
208
                block1[random()%64] = (random() % 512) -256;
209
        }break;
210
        case 2:
211
            block1[0]= (random()%4096)-2048;
212
            block1[63]= (block1[0]&1)^1;
213
        break;
214
        }
215

    
216
#if 0 // simulate mismatch control
217
{ int sum=0;
218
        for(i=0;i<64;i++)
219
           sum+=block1[i];
220

221
        if((sum&1)==0) block1[63]^=1;
222
}
223
#endif
224

    
225
        for(i=0; i<64; i++)
226
            block_org[i]= block1[i];
227

    
228
        if (form == MMX_PERM) {
229
            for(i=0;i<64;i++)
230
                block[idct_mmx_perm[i]] = block1[i];
231
            } else if (form == MMX_SIMPLE_PERM) {
232
            for(i=0;i<64;i++)
233
                block[idct_simple_mmx_perm[i]] = block1[i];
234

    
235
        } else {
236
            for(i=0; i<64; i++)
237
                block[i]= block1[i];
238
        }
239
#if 0 // simulate mismatch control for tested IDCT but not the ref
240
{ int sum=0;
241
        for(i=0;i<64;i++)
242
           sum+=block[i];
243

244
        if((sum&1)==0) block[63]^=1;
245
}
246
#endif
247

    
248
        fdct_func(block);
249
        emms_c(); /* for ff_mmx_idct */
250

    
251
        if (form == SCALE_PERM) {
252
            for(i=0; i<64; i++) {
253
                scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
254
                block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
255
            }
256
        }
257

    
258
        fdct_ref(block1);
259

    
260
        blockSumErr=0;
261
        for(i=0;i<64;i++) {
262
            v = abs(block[i] - block1[i]);
263
            if (v > err_inf)
264
                err_inf = v;
265
            err2 += v * v;
266
            sysErr[i] += block[i] - block1[i];
267
            blockSumErr += v;
268
            if( abs(block[i])>maxout) maxout=abs(block[i]);
269
        }
270
        if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
271
#if 0 // print different matrix pairs
272
        if(blockSumErr){
273
            printf("\n");
274
            for(i=0; i<64; i++){
275
                if((i&7)==0) printf("\n");
276
                printf("%4d ", block_org[i]);
277
            }
278
            for(i=0; i<64; i++){
279
                if((i&7)==0) printf("\n");
280
                printf("%4d ", block[i] - block1[i]);
281
            }
282
        }
283
#endif
284
    }
285
    for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i]));
286

    
287
#if 1 // dump systematic errors
288
    for(i=0; i<64; i++){
289
        if(i%8==0) printf("\n");
290
        printf("%5d ", (int)sysErr[i]);
291
    }
292
    printf("\n");
293
#endif
294

    
295
    printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
296
           is_idct ? "IDCT" : "DCT",
297
           name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
298
#if 1 //Speed test
299
    /* speed test */
300
    for(i=0;i<64;i++)
301
        block1[i] = 0;
302
    switch(test){
303
    case 0:
304
        for(i=0;i<64;i++)
305
            block1[i] = (random() % 512) -256;
306
        if (is_idct){
307
            fdct(block1);
308

    
309
            for(i=0;i<64;i++)
310
                block1[i]>>=3;
311
        }
312
    break;
313
    case 1:{
314
    case 2:
315
        block1[0] = (random() % 512) -256;
316
        block1[1] = (random() % 512) -256;
317
        block1[2] = (random() % 512) -256;
318
        block1[3] = (random() % 512) -256;
319
    }break;
320
    }
321

    
322
    if (form == MMX_PERM) {
323
        for(i=0;i<64;i++)
324
            block[idct_mmx_perm[i]] = block1[i];
325
    } else if(form == MMX_SIMPLE_PERM) {
326
        for(i=0;i<64;i++)
327
            block[idct_simple_mmx_perm[i]] = block1[i];
328
    } else {
329
        for(i=0; i<64; i++)
330
            block[i]= block1[i];
331
    }
332

    
333
    ti = gettime();
334
    it1 = 0;
335
    do {
336
        for(it=0;it<NB_ITS_SPEED;it++) {
337
            for(i=0; i<64; i++)
338
                block[i]= block1[i];
339
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
340
// do not memcpy especially not fastmemcpy because it does movntq !!!
341
            fdct_func(block);
342
        }
343
        it1 += NB_ITS_SPEED;
344
        ti1 = gettime() - ti;
345
    } while (ti1 < 1000000);
346
    emms_c();
347

    
348
    printf("%s %s: %0.1f kdct/s\n",
349
           is_idct ? "IDCT" : "DCT",
350
           name, (double)it1 * 1000.0 / (double)ti1);
351
#endif
352
}
353

    
354
static uint8_t img_dest[64] __attribute__ ((aligned (8)));
355
static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
356

    
357
void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
358
{
359
    static int init;
360
    static double c8[8][8];
361
    static double c4[4][4];
362
    double block1[64], block2[64], block3[64];
363
    double s, sum, v;
364
    int i, j, k;
365

    
366
    if (!init) {
367
        init = 1;
368

    
369
        for(i=0;i<8;i++) {
370
            sum = 0;
371
            for(j=0;j<8;j++) {
372
                s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
373
                c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
374
                sum += c8[i][j] * c8[i][j];
375
            }
376
        }
377

    
378
        for(i=0;i<4;i++) {
379
            sum = 0;
380
            for(j=0;j<4;j++) {
381
                s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
382
                c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
383
                sum += c4[i][j] * c4[i][j];
384
            }
385
        }
386
    }
387

    
388
    /* butterfly */
389
    s = 0.5 * sqrt(2.0);
390
    for(i=0;i<4;i++) {
391
        for(j=0;j<8;j++) {
392
            block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
393
            block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
394
        }
395
    }
396

    
397
    /* idct8 on lines */
398
    for(i=0;i<8;i++) {
399
        for(j=0;j<8;j++) {
400
            sum = 0;
401
            for(k=0;k<8;k++)
402
                sum += c8[k][j] * block1[8*i+k];
403
            block2[8*i+j] = sum;
404
        }
405
    }
406

    
407
    /* idct4 */
408
    for(i=0;i<8;i++) {
409
        for(j=0;j<4;j++) {
410
            /* top */
411
            sum = 0;
412
            for(k=0;k<4;k++)
413
                sum += c4[k][j] * block2[8*(2*k)+i];
414
            block3[8*(2*j)+i] = sum;
415

    
416
            /* bottom */
417
            sum = 0;
418
            for(k=0;k<4;k++)
419
                sum += c4[k][j] * block2[8*(2*k+1)+i];
420
            block3[8*(2*j+1)+i] = sum;
421
        }
422
    }
423

    
424
    /* clamp and store the result */
425
    for(i=0;i<8;i++) {
426
        for(j=0;j<8;j++) {
427
            v = block3[8*i+j];
428
            if (v < 0)
429
                v = 0;
430
            else if (v > 255)
431
                v = 255;
432
            dest[i * linesize + j] = (int)rint(v);
433
        }
434
    }
435
}
436

    
437
void idct248_error(const char *name,
438
                    void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
439
{
440
    int it, i, it1, ti, ti1, err_max, v;
441

    
442
    srandom(0);
443

    
444
    /* just one test to see if code is correct (precision is less
445
       important here) */
446
    err_max = 0;
447
    for(it=0;it<NB_ITS;it++) {
448

    
449
        /* XXX: use forward transform to generate values */
450
        for(i=0;i<64;i++)
451
            block1[i] = (random() % 256) - 128;
452
        block1[0] += 1024;
453

    
454
        for(i=0; i<64; i++)
455
            block[i]= block1[i];
456
        idct248_ref(img_dest1, 8, block);
457

    
458
        for(i=0; i<64; i++)
459
            block[i]= block1[i];
460
        idct248_put(img_dest, 8, block);
461

    
462
        for(i=0;i<64;i++) {
463
            v = abs((int)img_dest[i] - (int)img_dest1[i]);
464
            if (v == 255)
465
                printf("%d %d\n", img_dest[i], img_dest1[i]);
466
            if (v > err_max)
467
                err_max = v;
468
        }
469
#if 0
470
        printf("ref=\n");
471
        for(i=0;i<8;i++) {
472
            int j;
473
            for(j=0;j<8;j++) {
474
                printf(" %3d", img_dest1[i*8+j]);
475
            }
476
            printf("\n");
477
        }
478

479
        printf("out=\n");
480
        for(i=0;i<8;i++) {
481
            int j;
482
            for(j=0;j<8;j++) {
483
                printf(" %3d", img_dest[i*8+j]);
484
            }
485
            printf("\n");
486
        }
487
#endif
488
    }
489
    printf("%s %s: err_inf=%d\n",
490
           1 ? "IDCT248" : "DCT248",
491
           name, err_max);
492

    
493
    ti = gettime();
494
    it1 = 0;
495
    do {
496
        for(it=0;it<NB_ITS_SPEED;it++) {
497
            for(i=0; i<64; i++)
498
                block[i]= block1[i];
499
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
500
// do not memcpy especially not fastmemcpy because it does movntq !!!
501
            idct248_put(img_dest, 8, block);
502
        }
503
        it1 += NB_ITS_SPEED;
504
        ti1 = gettime() - ti;
505
    } while (ti1 < 1000000);
506
    emms_c();
507

    
508
    printf("%s %s: %0.1f kdct/s\n",
509
           1 ? "IDCT248" : "DCT248",
510
           name, (double)it1 * 1000.0 / (double)ti1);
511
}
512

    
513
void help(void)
514
{
515
    printf("dct-test [-i] [<test-number>]\n"
516
           "test-number 0 -> test with random matrixes\n"
517
           "            1 -> test with random sparse matrixes\n"
518
           "            2 -> do 3. test from mpeg4 std\n"
519
           "-i          test IDCT implementations\n"
520
           "-4          test IDCT248 implementations\n");
521
}
522

    
523
int main(int argc, char **argv)
524
{
525
    int test_idct = 0, test_248_dct = 0;
526
    int c,i;
527
    int test=1;
528

    
529
    init_fdct();
530
    idct_mmx_init();
531

    
532
    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
533
    for(i=0;i<MAX_NEG_CROP;i++) {
534
        cropTbl[i] = 0;
535
        cropTbl[i + MAX_NEG_CROP + 256] = 255;
536
    }
537

    
538
    for(;;) {
539
        c = getopt(argc, argv, "ih4");
540
        if (c == -1)
541
            break;
542
        switch(c) {
543
        case 'i':
544
            test_idct = 1;
545
            break;
546
        case '4':
547
            test_248_dct = 1;
548
            break;
549
        default :
550
        case 'h':
551
            help();
552
            return 0;
553
        }
554
    }
555

    
556
    if(optind <argc) test= atoi(argv[optind]);
557

    
558
    printf("ffmpeg DCT/IDCT test\n");
559

    
560
    if (test_248_dct) {
561
        idct248_error("SIMPLE-C", simple_idct248_put);
562
    } else {
563
      for (i=0;algos[i].name;i++)
564
        if (algos[i].is_idct == test_idct) {
565
          dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
566
        }
567
    }
568
    return 0;
569
}