Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dct-test.c @ be449fca

History | View | Annotate | Download (15.8 KB)

1
/*
2
 * (c) 2001 Fabrice Bellard
3
 *     2007 Marc Hoffman <marc.hoffman@analog.com>
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

    
22
/**
23
 * @file dct-test.c
24
 * DCT test. (c) 2001 Fabrice Bellard.
25
 * Started from sample code by Juan J. Sierralta P.
26
 */
27

    
28
#include <stdlib.h>
29
#include <stdio.h>
30
#include <string.h>
31
#include <sys/time.h>
32
#include <unistd.h>
33
#include <math.h>
34

    
35
#include "libavutil/common.h"
36

    
37
#include "simple_idct.h"
38
#include "faandct.h"
39
#include "faanidct.h"
40
#include "i386/idct_xvid.h"
41

    
42
#undef printf
43
#undef random
44

    
45
void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
46

    
47
/* reference fdct/idct */
48
extern void fdct(DCTELEM *block);
49
extern void idct(DCTELEM *block);
50
extern void init_fdct();
51

    
52
extern void ff_mmx_idct(DCTELEM *data);
53
extern void ff_mmxext_idct(DCTELEM *data);
54

    
55
extern void odivx_idct_c (short *block);
56

    
57
// BFIN
58
extern void ff_bfin_idct (DCTELEM *block)  ;
59
extern void ff_bfin_fdct (DCTELEM *block) ;
60

    
61
// ALTIVEC
62
extern void fdct_altivec (DCTELEM *block);
63
//extern void idct_altivec (DCTELEM *block);?? no routine
64

    
65

    
66
struct algo {
67
  const char *name;
68
  enum { FDCT, IDCT } is_idct;
69
  void (* func) (DCTELEM *block);
70
  void (* ref)  (DCTELEM *block);
71
  enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM } format;
72
  int  mm_support;
73
};
74

    
75
#ifndef FAAN_POSTSCALE
76
#define FAAN_SCALE SCALE_PERM
77
#else
78
#define FAAN_SCALE NO_PERM
79
#endif
80

    
81
static int cpu_flags;
82

    
83
struct algo algos[] = {
84
  {"REF-DBL",         0, fdct,               fdct, NO_PERM},
85
  {"FAAN",            0, ff_faandct,         fdct, FAAN_SCALE},
86
  {"FAANI",           1, ff_faanidct,        idct, NO_PERM},
87
  {"IJG-AAN-INT",     0, fdct_ifast,         fdct, SCALE_PERM},
88
  {"IJG-LLM-INT",     0, ff_jpeg_fdct_islow, fdct, NO_PERM},
89
  {"REF-DBL",         1, idct,               idct, NO_PERM},
90
  {"INT",             1, j_rev_dct,          idct, MMX_PERM},
91
  {"SIMPLE-C",        1, ff_simple_idct,     idct, NO_PERM},
92

    
93
#ifdef HAVE_MMX
94
  {"MMX",             0, ff_fdct_mmx,        fdct, NO_PERM, MM_MMX},
95
#ifdef HAVE_MMX2
96
  {"MMX2",            0, ff_fdct_mmx2,       fdct, NO_PERM, MM_MMXEXT},
97
#endif
98

    
99
#ifdef CONFIG_GPL
100
  {"LIBMPEG2-MMX",    1, ff_mmx_idct,        idct, MMX_PERM, MM_MMX},
101
  {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct,     idct, MMX_PERM, MM_MMXEXT},
102
#endif
103
  {"SIMPLE-MMX",      1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM, MM_MMX},
104
  {"XVID-MMX",        1, ff_idct_xvid_mmx,   idct, NO_PERM, MM_MMX},
105
  {"XVID-MMX2",       1, ff_idct_xvid_mmx2,  idct, NO_PERM, MM_MMXEXT},
106
  {"XVID-SSE2",       1, ff_idct_xvid_sse2,  idct, SSE2_PERM, MM_SSE2},
107
#endif
108

    
109
#ifdef HAVE_ALTIVEC
110
  {"altivecfdct",     0, fdct_altivec,       fdct, NO_PERM, MM_ALTIVEC},
111
#endif
112

    
113
#ifdef ARCH_BFIN
114
  {"BFINfdct",        0, ff_bfin_fdct,       fdct, NO_PERM},
115
  {"BFINidct",        1, ff_bfin_idct,       idct, NO_PERM},
116
#endif
117

    
118
  { 0 }
119
};
120

    
121
#define AANSCALE_BITS 12
122
static const unsigned short aanscales[64] = {
123
    /* precomputed values scaled up by 14 bits */
124
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
125
    22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
126
    21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
127
    19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
128
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
129
    12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
130
    8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
131
    4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
132
};
133

    
134
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
135

    
136
int64_t gettime(void)
137
{
138
    struct timeval tv;
139
    gettimeofday(&tv,NULL);
140
    return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
141
}
142

    
143
#define NB_ITS 20000
144
#define NB_ITS_SPEED 50000
145

    
146
static short idct_mmx_perm[64];
147

    
148
static short idct_simple_mmx_perm[64]={
149
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
150
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
151
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
152
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
153
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
154
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
155
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
156
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
157
};
158

    
159
static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
160

    
161
void idct_mmx_init(void)
162
{
163
    int i;
164

    
165
    /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
166
    for (i = 0; i < 64; i++) {
167
        idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
168
//        idct_simple_mmx_perm[i] = simple_block_permute_op(i);
169
    }
170
}
171

    
172
static DCTELEM block[64] __attribute__ ((aligned (16)));
173
static DCTELEM block1[64] __attribute__ ((aligned (8)));
174
static DCTELEM block_org[64] __attribute__ ((aligned (8)));
175

    
176
static inline void mmx_emms(void)
177
{
178
#ifdef HAVE_MMX
179
    if (cpu_flags & MM_MMX)
180
        __asm__ volatile ("emms\n\t");
181
#endif
182
}
183

    
184
void dct_error(const char *name, int is_idct,
185
               void (*fdct_func)(DCTELEM *block),
186
               void (*fdct_ref)(DCTELEM *block), int form, int test)
187
{
188
    int it, i, scale;
189
    int err_inf, v;
190
    int64_t err2, ti, ti1, it1;
191
    int64_t sysErr[64], sysErrMax=0;
192
    int maxout=0;
193
    int blockSumErrMax=0, blockSumErr;
194

    
195
    srandom(0);
196

    
197
    err_inf = 0;
198
    err2 = 0;
199
    for(i=0; i<64; i++) sysErr[i]=0;
200
    for(it=0;it<NB_ITS;it++) {
201
        for(i=0;i<64;i++)
202
            block1[i] = 0;
203
        switch(test){
204
        case 0:
205
            for(i=0;i<64;i++)
206
                block1[i] = (random() % 512) -256;
207
            if (is_idct){
208
                fdct(block1);
209

    
210
                for(i=0;i<64;i++)
211
                    block1[i]>>=3;
212
            }
213
        break;
214
        case 1:{
215
            int num= (random()%10)+1;
216
            for(i=0;i<num;i++)
217
                block1[random()%64] = (random() % 512) -256;
218
        }break;
219
        case 2:
220
            block1[0]= (random()%4096)-2048;
221
            block1[63]= (block1[0]&1)^1;
222
        break;
223
        }
224

    
225
#if 0 // simulate mismatch control
226
{ int sum=0;
227
        for(i=0;i<64;i++)
228
           sum+=block1[i];
229

230
        if((sum&1)==0) block1[63]^=1;
231
}
232
#endif
233

    
234
        for(i=0; i<64; i++)
235
            block_org[i]= block1[i];
236

    
237
        if (form == MMX_PERM) {
238
            for(i=0;i<64;i++)
239
                block[idct_mmx_perm[i]] = block1[i];
240
            } else if (form == MMX_SIMPLE_PERM) {
241
            for(i=0;i<64;i++)
242
                block[idct_simple_mmx_perm[i]] = block1[i];
243

    
244
        } else if (form == SSE2_PERM) {
245
            for(i=0; i<64; i++)
246
                block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
247
        } else {
248
            for(i=0; i<64; i++)
249
                block[i]= block1[i];
250
        }
251
#if 0 // simulate mismatch control for tested IDCT but not the ref
252
{ int sum=0;
253
        for(i=0;i<64;i++)
254
           sum+=block[i];
255

256
        if((sum&1)==0) block[63]^=1;
257
}
258
#endif
259

    
260
        fdct_func(block);
261
        mmx_emms();
262

    
263
        if (form == SCALE_PERM) {
264
            for(i=0; i<64; i++) {
265
                scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
266
                block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
267
            }
268
        }
269

    
270
        fdct_ref(block1);
271

    
272
        blockSumErr=0;
273
        for(i=0;i<64;i++) {
274
            v = abs(block[i] - block1[i]);
275
            if (v > err_inf)
276
                err_inf = v;
277
            err2 += v * v;
278
            sysErr[i] += block[i] - block1[i];
279
            blockSumErr += v;
280
            if( abs(block[i])>maxout) maxout=abs(block[i]);
281
        }
282
        if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
283
#if 0 // print different matrix pairs
284
        if(blockSumErr){
285
            printf("\n");
286
            for(i=0; i<64; i++){
287
                if((i&7)==0) printf("\n");
288
                printf("%4d ", block_org[i]);
289
            }
290
            for(i=0; i<64; i++){
291
                if((i&7)==0) printf("\n");
292
                printf("%4d ", block[i] - block1[i]);
293
            }
294
        }
295
#endif
296
    }
297
    for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
298

    
299
#if 1 // dump systematic errors
300
    for(i=0; i<64; i++){
301
        if(i%8==0) printf("\n");
302
        printf("%5d ", (int)sysErr[i]);
303
    }
304
    printf("\n");
305
#endif
306

    
307
    printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
308
           is_idct ? "IDCT" : "DCT",
309
           name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
310
#if 1 //Speed test
311
    /* speed test */
312
    for(i=0;i<64;i++)
313
        block1[i] = 0;
314
    switch(test){
315
    case 0:
316
        for(i=0;i<64;i++)
317
            block1[i] = (random() % 512) -256;
318
        if (is_idct){
319
            fdct(block1);
320

    
321
            for(i=0;i<64;i++)
322
                block1[i]>>=3;
323
        }
324
    break;
325
    case 1:{
326
    case 2:
327
        block1[0] = (random() % 512) -256;
328
        block1[1] = (random() % 512) -256;
329
        block1[2] = (random() % 512) -256;
330
        block1[3] = (random() % 512) -256;
331
    }break;
332
    }
333

    
334
    if (form == MMX_PERM) {
335
        for(i=0;i<64;i++)
336
            block[idct_mmx_perm[i]] = block1[i];
337
    } else if(form == MMX_SIMPLE_PERM) {
338
        for(i=0;i<64;i++)
339
            block[idct_simple_mmx_perm[i]] = block1[i];
340
    } else {
341
        for(i=0; i<64; i++)
342
            block[i]= block1[i];
343
    }
344

    
345
    ti = gettime();
346
    it1 = 0;
347
    do {
348
        for(it=0;it<NB_ITS_SPEED;it++) {
349
            for(i=0; i<64; i++)
350
                block[i]= block1[i];
351
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
352
// do not memcpy especially not fastmemcpy because it does movntq !!!
353
            fdct_func(block);
354
        }
355
        it1 += NB_ITS_SPEED;
356
        ti1 = gettime() - ti;
357
    } while (ti1 < 1000000);
358
    mmx_emms();
359

    
360
    printf("%s %s: %0.1f kdct/s\n",
361
           is_idct ? "IDCT" : "DCT",
362
           name, (double)it1 * 1000.0 / (double)ti1);
363
#endif
364
}
365

    
366
static uint8_t img_dest[64] __attribute__ ((aligned (8)));
367
static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
368

    
369
void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
370
{
371
    static int init;
372
    static double c8[8][8];
373
    static double c4[4][4];
374
    double block1[64], block2[64], block3[64];
375
    double s, sum, v;
376
    int i, j, k;
377

    
378
    if (!init) {
379
        init = 1;
380

    
381
        for(i=0;i<8;i++) {
382
            sum = 0;
383
            for(j=0;j<8;j++) {
384
                s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
385
                c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
386
                sum += c8[i][j] * c8[i][j];
387
            }
388
        }
389

    
390
        for(i=0;i<4;i++) {
391
            sum = 0;
392
            for(j=0;j<4;j++) {
393
                s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
394
                c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
395
                sum += c4[i][j] * c4[i][j];
396
            }
397
        }
398
    }
399

    
400
    /* butterfly */
401
    s = 0.5 * sqrt(2.0);
402
    for(i=0;i<4;i++) {
403
        for(j=0;j<8;j++) {
404
            block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
405
            block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
406
        }
407
    }
408

    
409
    /* idct8 on lines */
410
    for(i=0;i<8;i++) {
411
        for(j=0;j<8;j++) {
412
            sum = 0;
413
            for(k=0;k<8;k++)
414
                sum += c8[k][j] * block1[8*i+k];
415
            block2[8*i+j] = sum;
416
        }
417
    }
418

    
419
    /* idct4 */
420
    for(i=0;i<8;i++) {
421
        for(j=0;j<4;j++) {
422
            /* top */
423
            sum = 0;
424
            for(k=0;k<4;k++)
425
                sum += c4[k][j] * block2[8*(2*k)+i];
426
            block3[8*(2*j)+i] = sum;
427

    
428
            /* bottom */
429
            sum = 0;
430
            for(k=0;k<4;k++)
431
                sum += c4[k][j] * block2[8*(2*k+1)+i];
432
            block3[8*(2*j+1)+i] = sum;
433
        }
434
    }
435

    
436
    /* clamp and store the result */
437
    for(i=0;i<8;i++) {
438
        for(j=0;j<8;j++) {
439
            v = block3[8*i+j];
440
            if (v < 0)
441
                v = 0;
442
            else if (v > 255)
443
                v = 255;
444
            dest[i * linesize + j] = (int)rint(v);
445
        }
446
    }
447
}
448

    
449
void idct248_error(const char *name,
450
                    void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
451
{
452
    int it, i, it1, ti, ti1, err_max, v;
453

    
454
    srandom(0);
455

    
456
    /* just one test to see if code is correct (precision is less
457
       important here) */
458
    err_max = 0;
459
    for(it=0;it<NB_ITS;it++) {
460

    
461
        /* XXX: use forward transform to generate values */
462
        for(i=0;i<64;i++)
463
            block1[i] = (random() % 256) - 128;
464
        block1[0] += 1024;
465

    
466
        for(i=0; i<64; i++)
467
            block[i]= block1[i];
468
        idct248_ref(img_dest1, 8, block);
469

    
470
        for(i=0; i<64; i++)
471
            block[i]= block1[i];
472
        idct248_put(img_dest, 8, block);
473

    
474
        for(i=0;i<64;i++) {
475
            v = abs((int)img_dest[i] - (int)img_dest1[i]);
476
            if (v == 255)
477
                printf("%d %d\n", img_dest[i], img_dest1[i]);
478
            if (v > err_max)
479
                err_max = v;
480
        }
481
#if 0
482
        printf("ref=\n");
483
        for(i=0;i<8;i++) {
484
            int j;
485
            for(j=0;j<8;j++) {
486
                printf(" %3d", img_dest1[i*8+j]);
487
            }
488
            printf("\n");
489
        }
490

491
        printf("out=\n");
492
        for(i=0;i<8;i++) {
493
            int j;
494
            for(j=0;j<8;j++) {
495
                printf(" %3d", img_dest[i*8+j]);
496
            }
497
            printf("\n");
498
        }
499
#endif
500
    }
501
    printf("%s %s: err_inf=%d\n",
502
           1 ? "IDCT248" : "DCT248",
503
           name, err_max);
504

    
505
    ti = gettime();
506
    it1 = 0;
507
    do {
508
        for(it=0;it<NB_ITS_SPEED;it++) {
509
            for(i=0; i<64; i++)
510
                block[i]= block1[i];
511
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
512
// do not memcpy especially not fastmemcpy because it does movntq !!!
513
            idct248_put(img_dest, 8, block);
514
        }
515
        it1 += NB_ITS_SPEED;
516
        ti1 = gettime() - ti;
517
    } while (ti1 < 1000000);
518
    mmx_emms();
519

    
520
    printf("%s %s: %0.1f kdct/s\n",
521
           1 ? "IDCT248" : "DCT248",
522
           name, (double)it1 * 1000.0 / (double)ti1);
523
}
524

    
525
void help(void)
526
{
527
    printf("dct-test [-i] [<test-number>]\n"
528
           "test-number 0 -> test with random matrixes\n"
529
           "            1 -> test with random sparse matrixes\n"
530
           "            2 -> do 3. test from mpeg4 std\n"
531
           "-i          test IDCT implementations\n"
532
           "-4          test IDCT248 implementations\n");
533
}
534

    
535
int main(int argc, char **argv)
536
{
537
    int test_idct = 0, test_248_dct = 0;
538
    int c,i;
539
    int test=1;
540
    cpu_flags = mm_support();
541

    
542
    init_fdct();
543
    idct_mmx_init();
544

    
545
    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
546
    for(i=0;i<MAX_NEG_CROP;i++) {
547
        cropTbl[i] = 0;
548
        cropTbl[i + MAX_NEG_CROP + 256] = 255;
549
    }
550

    
551
    for(;;) {
552
        c = getopt(argc, argv, "ih4");
553
        if (c == -1)
554
            break;
555
        switch(c) {
556
        case 'i':
557
            test_idct = 1;
558
            break;
559
        case '4':
560
            test_248_dct = 1;
561
            break;
562
        default :
563
        case 'h':
564
            help();
565
            return 0;
566
        }
567
    }
568

    
569
    if(optind <argc) test= atoi(argv[optind]);
570

    
571
    printf("ffmpeg DCT/IDCT test\n");
572

    
573
    if (test_248_dct) {
574
        idct248_error("SIMPLE-C", ff_simple_idct248_put);
575
    } else {
576
      for (i=0;algos[i].name;i++)
577
        if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
578
          dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
579
        }
580
    }
581
    return 0;
582
}