Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dct-test.c @ ad1ea1f1

History | View | Annotate | Download (15.4 KB)

1
/*
2
 * (c) 2001 Fabrice Bellard
3
 *     2007 Marc Hoffman <marc.hoffman@analog.com>
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

    
22
/**
23
 * @file dct-test.c
24
 * DCT test. (c) 2001 Fabrice Bellard.
25
 * Started from sample code by Juan J. Sierralta P.
26
 */
27

    
28
#include <stdlib.h>
29
#include <stdio.h>
30
#include <string.h>
31
#include <sys/time.h>
32
#include <unistd.h>
33

    
34
#include "dsputil.h"
35

    
36
#include "simple_idct.h"
37
#include "faandct.h"
38

    
39
#ifndef MAX
40
#define MAX(a, b)  (((a) > (b)) ? (a) : (b))
41
#endif
42

    
43
#undef printf
44

    
45
void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
46

    
47
/* reference fdct/idct */
48
extern void fdct(DCTELEM *block);
49
extern void idct(DCTELEM *block);
50
extern void ff_idct_xvid_mmx(DCTELEM *block);
51
extern void ff_idct_xvid_mmx2(DCTELEM *block);
52
extern void init_fdct();
53

    
54
extern void j_rev_dct(DCTELEM *data);
55
extern void ff_mmx_idct(DCTELEM *data);
56
extern void ff_mmxext_idct(DCTELEM *data);
57

    
58
extern void odivx_idct_c (short *block);
59

    
60
// BFIN
61
extern void ff_bfin_idct (DCTELEM *block)  ;
62
extern void ff_bfin_fdct (DCTELEM *block) ;
63

    
64
// ALTIVEC
65
extern void fdct_altivec (DCTELEM *block);
66
//extern void idct_altivec (DCTELEM *block);?? no routine
67

    
68

    
69
struct algo {
70
  char *name;
71
  enum { FDCT, IDCT } is_idct;
72
  void (* func) (DCTELEM *block);
73
  void (* ref)  (DCTELEM *block);
74
  enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM } format;
75
};
76

    
77
#ifndef FAAN_POSTSCALE
78
#define FAAN_SCALE SCALE_PERM
79
#else
80
#define FAAN_SCALE NO_PERM
81
#endif
82

    
83
#define DCT_ERROR(name,is_idct,func,ref,form) {name,is_idct,func,ref,form}
84

    
85

    
86
struct algo algos[] = {
87
  DCT_ERROR( "REF-DBL",        0, fdct,               fdct, NO_PERM),
88
  DCT_ERROR("IJG-AAN-INT",     0, fdct_ifast,         fdct, SCALE_PERM),
89
  DCT_ERROR("IJG-LLM-INT",     0, ff_jpeg_fdct_islow, fdct, NO_PERM),
90
  DCT_ERROR("REF-DBL",         1, idct,               idct, NO_PERM),
91
  DCT_ERROR("INT",             1, j_rev_dct,          idct, MMX_PERM),
92
  DCT_ERROR("SIMPLE-C",        1, simple_idct,        idct, NO_PERM),
93

    
94
#ifdef ARCH_X86
95
  DCT_ERROR("MMX",             0, ff_fdct_mmx,        fdct, NO_PERM),
96
  DCT_ERROR("MMX2",            0, ff_fdct_mmx2,       fdct, NO_PERM),
97
  DCT_ERROR("FAAN",            0, ff_faandct,         fdct, FAAN_SCALE),
98

    
99
  DCT_ERROR("LIBMPEG2-MMX",    1, ff_mmx_idct,        idct, MMX_PERM),
100
  DCT_ERROR("LIBMPEG2-MMXEXT", 1, ff_mmxext_idct,     idct, MMX_PERM),
101
  DCT_ERROR("SIMPLE-MMX",      1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM),
102
  DCT_ERROR("XVID-MMX",        1, ff_idct_xvid_mmx,   idct, NO_PERM),
103
  DCT_ERROR("XVID-MMX2",       1, ff_idct_xvid_mmx2,  idct, NO_PERM),
104
#endif
105

    
106
#ifdef HAVE_ALTIVEC
107
  DCT_ERROR("altivecfdct",     0, fdct_altivec,       fdct, NO_PERM),
108
#endif
109

    
110
#ifdef ARCH_BFIN
111
  DCT_ERROR("BFINfdct",        0, ff_bfin_fdct,       fdct, NO_PERM),
112
  DCT_ERROR("BFINidct",        1, ff_bfin_idct,       idct, NO_PERM),
113
#endif
114

    
115
  { 0 }
116
};
117

    
118
#define AANSCALE_BITS 12
119
static const unsigned short aanscales[64] = {
120
    /* precomputed values scaled up by 14 bits */
121
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
122
    22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
123
    21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
124
    19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
125
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
126
    12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
127
    8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
128
    4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
129
};
130

    
131
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
132

    
133
int64_t gettime(void)
134
{
135
    struct timeval tv;
136
    gettimeofday(&tv,NULL);
137
    return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
138
}
139

    
140
#define NB_ITS 20000
141
#define NB_ITS_SPEED 50000
142

    
143
static short idct_mmx_perm[64];
144

    
145
static short idct_simple_mmx_perm[64]={
146
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
147
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
148
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
149
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
150
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
151
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
152
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
153
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
154
};
155

    
156
void idct_mmx_init(void)
157
{
158
    int i;
159

    
160
    /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
161
    for (i = 0; i < 64; i++) {
162
        idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
163
//        idct_simple_mmx_perm[i] = simple_block_permute_op(i);
164
    }
165
}
166

    
167
static DCTELEM block[64] __attribute__ ((aligned (8)));
168
static DCTELEM block1[64] __attribute__ ((aligned (8)));
169
static DCTELEM block_org[64] __attribute__ ((aligned (8)));
170

    
171
void dct_error(const char *name, int is_idct,
172
               void (*fdct_func)(DCTELEM *block),
173
               void (*fdct_ref)(DCTELEM *block), int form, int test)
174
{
175
    int it, i, scale;
176
    int err_inf, v;
177
    int64_t err2, ti, ti1, it1;
178
    int64_t sysErr[64], sysErrMax=0;
179
    int maxout=0;
180
    int blockSumErrMax=0, blockSumErr;
181

    
182
    srandom(0);
183

    
184
    err_inf = 0;
185
    err2 = 0;
186
    for(i=0; i<64; i++) sysErr[i]=0;
187
    for(it=0;it<NB_ITS;it++) {
188
        for(i=0;i<64;i++)
189
            block1[i] = 0;
190
        switch(test){
191
        case 0:
192
            for(i=0;i<64;i++)
193
                block1[i] = (random() % 512) -256;
194
            if (is_idct){
195
                fdct(block1);
196

    
197
                for(i=0;i<64;i++)
198
                    block1[i]>>=3;
199
            }
200
        break;
201
        case 1:{
202
            int num= (random()%10)+1;
203
            for(i=0;i<num;i++)
204
                block1[random()%64] = (random() % 512) -256;
205
        }break;
206
        case 2:
207
            block1[0]= (random()%4096)-2048;
208
            block1[63]= (block1[0]&1)^1;
209
        break;
210
        }
211

    
212
#if 0 // simulate mismatch control
213
{ int sum=0;
214
        for(i=0;i<64;i++)
215
           sum+=block1[i];
216

217
        if((sum&1)==0) block1[63]^=1;
218
}
219
#endif
220

    
221
        for(i=0; i<64; i++)
222
            block_org[i]= block1[i];
223

    
224
        if (form == MMX_PERM) {
225
            for(i=0;i<64;i++)
226
                block[idct_mmx_perm[i]] = block1[i];
227
            } else if (form == MMX_SIMPLE_PERM) {
228
            for(i=0;i<64;i++)
229
                block[idct_simple_mmx_perm[i]] = block1[i];
230

    
231
        } else {
232
            for(i=0; i<64; i++)
233
                block[i]= block1[i];
234
        }
235
#if 0 // simulate mismatch control for tested IDCT but not the ref
236
{ int sum=0;
237
        for(i=0;i<64;i++)
238
           sum+=block[i];
239

240
        if((sum&1)==0) block[63]^=1;
241
}
242
#endif
243

    
244
        fdct_func(block);
245
        emms_c(); /* for ff_mmx_idct */
246

    
247
        if (form == SCALE_PERM) {
248
            for(i=0; i<64; i++) {
249
                scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
250
                block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
251
            }
252
        }
253

    
254
        fdct_ref(block1);
255

    
256
        blockSumErr=0;
257
        for(i=0;i<64;i++) {
258
            v = abs(block[i] - block1[i]);
259
            if (v > err_inf)
260
                err_inf = v;
261
            err2 += v * v;
262
            sysErr[i] += block[i] - block1[i];
263
            blockSumErr += v;
264
            if( abs(block[i])>maxout) maxout=abs(block[i]);
265
        }
266
        if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
267
#if 0 // print different matrix pairs
268
        if(blockSumErr){
269
            printf("\n");
270
            for(i=0; i<64; i++){
271
                if((i&7)==0) printf("\n");
272
                printf("%4d ", block_org[i]);
273
            }
274
            for(i=0; i<64; i++){
275
                if((i&7)==0) printf("\n");
276
                printf("%4d ", block[i] - block1[i]);
277
            }
278
        }
279
#endif
280
    }
281
    for(i=0; i<64; i++) sysErrMax= MAX(sysErrMax, FFABS(sysErr[i]));
282

    
283
#if 1 // dump systematic errors
284
    for(i=0; i<64; i++){
285
        if(i%8==0) printf("\n");
286
        printf("%5d ", (int)sysErr[i]);
287
    }
288
    printf("\n");
289
#endif
290

    
291
    printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
292
           is_idct ? "IDCT" : "DCT",
293
           name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
294
#if 1 //Speed test
295
    /* speed test */
296
    for(i=0;i<64;i++)
297
        block1[i] = 0;
298
    switch(test){
299
    case 0:
300
        for(i=0;i<64;i++)
301
            block1[i] = (random() % 512) -256;
302
        if (is_idct){
303
            fdct(block1);
304

    
305
            for(i=0;i<64;i++)
306
                block1[i]>>=3;
307
        }
308
    break;
309
    case 1:{
310
    case 2:
311
        block1[0] = (random() % 512) -256;
312
        block1[1] = (random() % 512) -256;
313
        block1[2] = (random() % 512) -256;
314
        block1[3] = (random() % 512) -256;
315
    }break;
316
    }
317

    
318
    if (form == MMX_PERM) {
319
        for(i=0;i<64;i++)
320
            block[idct_mmx_perm[i]] = block1[i];
321
    } else if(form == MMX_SIMPLE_PERM) {
322
        for(i=0;i<64;i++)
323
            block[idct_simple_mmx_perm[i]] = block1[i];
324
    } else {
325
        for(i=0; i<64; i++)
326
            block[i]= block1[i];
327
    }
328

    
329
    ti = gettime();
330
    it1 = 0;
331
    do {
332
        for(it=0;it<NB_ITS_SPEED;it++) {
333
            for(i=0; i<64; i++)
334
                block[i]= block1[i];
335
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
336
// dont memcpy especially not fastmemcpy because it does movntq !!!
337
            fdct_func(block);
338
        }
339
        it1 += NB_ITS_SPEED;
340
        ti1 = gettime() - ti;
341
    } while (ti1 < 1000000);
342
    emms_c();
343

    
344
    printf("%s %s: %0.1f kdct/s\n",
345
           is_idct ? "IDCT" : "DCT",
346
           name, (double)it1 * 1000.0 / (double)ti1);
347
#endif
348
}
349

    
350
static uint8_t img_dest[64] __attribute__ ((aligned (8)));
351
static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
352

    
353
void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
354
{
355
    static int init;
356
    static double c8[8][8];
357
    static double c4[4][4];
358
    double block1[64], block2[64], block3[64];
359
    double s, sum, v;
360
    int i, j, k;
361

    
362
    if (!init) {
363
        init = 1;
364

    
365
        for(i=0;i<8;i++) {
366
            sum = 0;
367
            for(j=0;j<8;j++) {
368
                s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
369
                c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
370
                sum += c8[i][j] * c8[i][j];
371
            }
372
        }
373

    
374
        for(i=0;i<4;i++) {
375
            sum = 0;
376
            for(j=0;j<4;j++) {
377
                s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
378
                c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
379
                sum += c4[i][j] * c4[i][j];
380
            }
381
        }
382
    }
383

    
384
    /* butterfly */
385
    s = 0.5 * sqrt(2.0);
386
    for(i=0;i<4;i++) {
387
        for(j=0;j<8;j++) {
388
            block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
389
            block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
390
        }
391
    }
392

    
393
    /* idct8 on lines */
394
    for(i=0;i<8;i++) {
395
        for(j=0;j<8;j++) {
396
            sum = 0;
397
            for(k=0;k<8;k++)
398
                sum += c8[k][j] * block1[8*i+k];
399
            block2[8*i+j] = sum;
400
        }
401
    }
402

    
403
    /* idct4 */
404
    for(i=0;i<8;i++) {
405
        for(j=0;j<4;j++) {
406
            /* top */
407
            sum = 0;
408
            for(k=0;k<4;k++)
409
                sum += c4[k][j] * block2[8*(2*k)+i];
410
            block3[8*(2*j)+i] = sum;
411

    
412
            /* bottom */
413
            sum = 0;
414
            for(k=0;k<4;k++)
415
                sum += c4[k][j] * block2[8*(2*k+1)+i];
416
            block3[8*(2*j+1)+i] = sum;
417
        }
418
    }
419

    
420
    /* clamp and store the result */
421
    for(i=0;i<8;i++) {
422
        for(j=0;j<8;j++) {
423
            v = block3[8*i+j];
424
            if (v < 0)
425
                v = 0;
426
            else if (v > 255)
427
                v = 255;
428
            dest[i * linesize + j] = (int)rint(v);
429
        }
430
    }
431
}
432

    
433
void idct248_error(const char *name,
434
                    void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
435
{
436
    int it, i, it1, ti, ti1, err_max, v;
437

    
438
    srandom(0);
439

    
440
    /* just one test to see if code is correct (precision is less
441
       important here) */
442
    err_max = 0;
443
    for(it=0;it<NB_ITS;it++) {
444

    
445
        /* XXX: use forward transform to generate values */
446
        for(i=0;i<64;i++)
447
            block1[i] = (random() % 256) - 128;
448
        block1[0] += 1024;
449

    
450
        for(i=0; i<64; i++)
451
            block[i]= block1[i];
452
        idct248_ref(img_dest1, 8, block);
453

    
454
        for(i=0; i<64; i++)
455
            block[i]= block1[i];
456
        idct248_put(img_dest, 8, block);
457

    
458
        for(i=0;i<64;i++) {
459
            v = abs((int)img_dest[i] - (int)img_dest1[i]);
460
            if (v == 255)
461
                printf("%d %d\n", img_dest[i], img_dest1[i]);
462
            if (v > err_max)
463
                err_max = v;
464
        }
465
#if 0
466
        printf("ref=\n");
467
        for(i=0;i<8;i++) {
468
            int j;
469
            for(j=0;j<8;j++) {
470
                printf(" %3d", img_dest1[i*8+j]);
471
            }
472
            printf("\n");
473
        }
474

475
        printf("out=\n");
476
        for(i=0;i<8;i++) {
477
            int j;
478
            for(j=0;j<8;j++) {
479
                printf(" %3d", img_dest[i*8+j]);
480
            }
481
            printf("\n");
482
        }
483
#endif
484
    }
485
    printf("%s %s: err_inf=%d\n",
486
           1 ? "IDCT248" : "DCT248",
487
           name, err_max);
488

    
489
    ti = gettime();
490
    it1 = 0;
491
    do {
492
        for(it=0;it<NB_ITS_SPEED;it++) {
493
            for(i=0; i<64; i++)
494
                block[i]= block1[i];
495
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
496
// dont memcpy especially not fastmemcpy because it does movntq !!!
497
            idct248_put(img_dest, 8, block);
498
        }
499
        it1 += NB_ITS_SPEED;
500
        ti1 = gettime() - ti;
501
    } while (ti1 < 1000000);
502
    emms_c();
503

    
504
    printf("%s %s: %0.1f kdct/s\n",
505
           1 ? "IDCT248" : "DCT248",
506
           name, (double)it1 * 1000.0 / (double)ti1);
507
}
508

    
509
void help(void)
510
{
511
    printf("dct-test [-i] [<test-number>]\n"
512
           "test-number 0 -> test with random matrixes\n"
513
           "            1 -> test with random sparse matrixes\n"
514
           "            2 -> do 3. test from mpeg4 std\n"
515
           "-i          test IDCT implementations\n"
516
           "-4          test IDCT248 implementations\n");
517
}
518

    
519
int main(int argc, char **argv)
520
{
521
    int test_idct = 0, test_248_dct = 0;
522
    int c,i;
523
    int test=1;
524

    
525
    init_fdct();
526
    idct_mmx_init();
527

    
528
    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
529
    for(i=0;i<MAX_NEG_CROP;i++) {
530
        cropTbl[i] = 0;
531
        cropTbl[i + MAX_NEG_CROP + 256] = 255;
532
    }
533

    
534
    for(;;) {
535
        c = getopt(argc, argv, "ih4");
536
        if (c == -1)
537
            break;
538
        switch(c) {
539
        case 'i':
540
            test_idct = 1;
541
            break;
542
        case '4':
543
            test_248_dct = 1;
544
            break;
545
        default :
546
        case 'h':
547
            help();
548
            return 0;
549
        }
550
    }
551

    
552
    if(optind <argc) test= atoi(argv[optind]);
553

    
554
    printf("ffmpeg DCT/IDCT test\n");
555

    
556
    if (test_248_dct) {
557
        idct248_error("SIMPLE-C", simple_idct248_put);
558
    } else {
559
      for (i=0;algos[i].name;i++)
560
        if (algos[i].is_idct == test_idct) {
561
          dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
562
        }
563
    }
564
    return 0;
565
}