Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dct-test.c @ a2fc0f6a

History | View | Annotate | Download (16.1 KB)

1
/*
2
 * (c) 2001 Fabrice Bellard
3
 *     2007 Marc Hoffman <marc.hoffman@analog.com>
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

    
22
/**
23
 * @file dct-test.c
24
 * DCT test. (c) 2001 Fabrice Bellard.
25
 * Started from sample code by Juan J. Sierralta P.
26
 */
27

    
28
#include <stdlib.h>
29
#include <stdio.h>
30
#include <string.h>
31
#include <sys/time.h>
32
#include <unistd.h>
33
#include <math.h>
34

    
35
#include "libavutil/common.h"
36

    
37
#include "simple_idct.h"
38
#include "aandcttab.h"
39
#include "faandct.h"
40
#include "faanidct.h"
41
#include "i386/idct_xvid.h"
42

    
43
#undef printf
44
#undef random
45

    
46
void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
47

    
48
/* reference fdct/idct */
49
void fdct(DCTELEM *block);
50
void idct(DCTELEM *block);
51
void init_fdct();
52

    
53
void ff_mmx_idct(DCTELEM *data);
54
void ff_mmxext_idct(DCTELEM *data);
55

    
56
void odivx_idct_c(short *block);
57

    
58
// BFIN
59
void ff_bfin_idct(DCTELEM *block);
60
void ff_bfin_fdct(DCTELEM *block);
61

    
62
// ALTIVEC
63
void fdct_altivec(DCTELEM *block);
64
//void idct_altivec(DCTELEM *block);?? no routine
65

    
66
// ARM
67
void j_rev_dct_ARM(DCTELEM *data);
68
void simple_idct_ARM(DCTELEM *data);
69
void simple_idct_armv5te(DCTELEM *data);
70
void ff_simple_idct_armv6(DCTELEM *data);
71
void ff_simple_idct_neon(DCTELEM *data);
72

    
73
struct algo {
74
  const char *name;
75
  enum { FDCT, IDCT } is_idct;
76
  void (* func) (DCTELEM *block);
77
  void (* ref)  (DCTELEM *block);
78
  enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM, PARTTRANS_PERM } format;
79
  int  mm_support;
80
};
81

    
82
#ifndef FAAN_POSTSCALE
83
#define FAAN_SCALE SCALE_PERM
84
#else
85
#define FAAN_SCALE NO_PERM
86
#endif
87

    
88
static int cpu_flags;
89

    
90
struct algo algos[] = {
91
  {"REF-DBL",         0, fdct,               fdct, NO_PERM},
92
  {"FAAN",            0, ff_faandct,         fdct, FAAN_SCALE},
93
  {"FAANI",           1, ff_faanidct,        idct, NO_PERM},
94
  {"IJG-AAN-INT",     0, fdct_ifast,         fdct, SCALE_PERM},
95
  {"IJG-LLM-INT",     0, ff_jpeg_fdct_islow, fdct, NO_PERM},
96
  {"REF-DBL",         1, idct,               idct, NO_PERM},
97
  {"INT",             1, j_rev_dct,          idct, MMX_PERM},
98
  {"SIMPLE-C",        1, ff_simple_idct,     idct, NO_PERM},
99

    
100
#ifdef HAVE_MMX
101
  {"MMX",             0, ff_fdct_mmx,        fdct, NO_PERM, FF_MM_MMX},
102
#ifdef HAVE_MMX2
103
  {"MMX2",            0, ff_fdct_mmx2,       fdct, NO_PERM, FF_MM_MMXEXT},
104
  {"SSE2",            0, ff_fdct_sse2,       fdct, NO_PERM, FF_MM_SSE2},
105
#endif
106

    
107
#ifdef CONFIG_GPL
108
  {"LIBMPEG2-MMX",    1, ff_mmx_idct,        idct, MMX_PERM, FF_MM_MMX},
109
  {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct,     idct, MMX_PERM, FF_MM_MMXEXT},
110
#endif
111
  {"SIMPLE-MMX",      1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM, FF_MM_MMX},
112
  {"XVID-MMX",        1, ff_idct_xvid_mmx,   idct, NO_PERM, FF_MM_MMX},
113
  {"XVID-MMX2",       1, ff_idct_xvid_mmx2,  idct, NO_PERM, FF_MM_MMXEXT},
114
  {"XVID-SSE2",       1, ff_idct_xvid_sse2,  idct, SSE2_PERM, FF_MM_SSE2},
115
#endif
116

    
117
#ifdef HAVE_ALTIVEC
118
  {"altivecfdct",     0, fdct_altivec,       fdct, NO_PERM, FF_MM_ALTIVEC},
119
#endif
120

    
121
#ifdef ARCH_BFIN
122
  {"BFINfdct",        0, ff_bfin_fdct,       fdct, NO_PERM},
123
  {"BFINidct",        1, ff_bfin_idct,       idct, NO_PERM},
124
#endif
125

    
126
#ifdef ARCH_ARM
127
  {"SIMPLE-ARM",      1, simple_idct_ARM,    idct, NO_PERM },
128
  {"INT-ARM",         1, j_rev_dct_ARM,      idct, MMX_PERM },
129
#ifdef HAVE_ARMV5TE
130
  {"SIMPLE-ARMV5TE",  1, simple_idct_armv5te, idct, NO_PERM },
131
#endif
132
#ifdef HAVE_ARMV6
133
  {"SIMPLE-ARMV6",    1, ff_simple_idct_armv6, idct, MMX_PERM },
134
#endif
135
#ifdef HAVE_NEON
136
  {"SIMPLE-NEON",     1, ff_simple_idct_neon, idct, PARTTRANS_PERM },
137
#endif
138
#endif /* ARCH_ARM */
139

    
140
  { 0 }
141
};
142

    
143
#define AANSCALE_BITS 12
144

    
145
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
146

    
147
int64_t gettime(void)
148
{
149
    struct timeval tv;
150
    gettimeofday(&tv,NULL);
151
    return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
152
}
153

    
154
#define NB_ITS 20000
155
#define NB_ITS_SPEED 50000
156

    
157
static short idct_mmx_perm[64];
158

    
159
static short idct_simple_mmx_perm[64]={
160
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
161
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
162
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
163
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
164
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
165
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
166
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
167
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
168
};
169

    
170
static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
171

    
172
void idct_mmx_init(void)
173
{
174
    int i;
175

    
176
    /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
177
    for (i = 0; i < 64; i++) {
178
        idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
179
//        idct_simple_mmx_perm[i] = simple_block_permute_op(i);
180
    }
181
}
182

    
183
static DCTELEM block[64] __attribute__ ((aligned (16)));
184
static DCTELEM block1[64] __attribute__ ((aligned (8)));
185
static DCTELEM block_org[64] __attribute__ ((aligned (8)));
186

    
187
static inline void mmx_emms(void)
188
{
189
#ifdef HAVE_MMX
190
    if (cpu_flags & FF_MM_MMX)
191
        __asm__ volatile ("emms\n\t");
192
#endif
193
}
194

    
195
void dct_error(const char *name, int is_idct,
196
               void (*fdct_func)(DCTELEM *block),
197
               void (*fdct_ref)(DCTELEM *block), int form, int test)
198
{
199
    int it, i, scale;
200
    int err_inf, v;
201
    int64_t err2, ti, ti1, it1;
202
    int64_t sysErr[64], sysErrMax=0;
203
    int maxout=0;
204
    int blockSumErrMax=0, blockSumErr;
205

    
206
    srandom(0);
207

    
208
    err_inf = 0;
209
    err2 = 0;
210
    for(i=0; i<64; i++) sysErr[i]=0;
211
    for(it=0;it<NB_ITS;it++) {
212
        for(i=0;i<64;i++)
213
            block1[i] = 0;
214
        switch(test){
215
        case 0:
216
            for(i=0;i<64;i++)
217
                block1[i] = (random() % 512) -256;
218
            if (is_idct){
219
                fdct(block1);
220

    
221
                for(i=0;i<64;i++)
222
                    block1[i]>>=3;
223
            }
224
        break;
225
        case 1:{
226
            int num= (random()%10)+1;
227
            for(i=0;i<num;i++)
228
                block1[random()%64] = (random() % 512) -256;
229
        }break;
230
        case 2:
231
            block1[0]= (random()%4096)-2048;
232
            block1[63]= (block1[0]&1)^1;
233
        break;
234
        }
235

    
236
#if 0 // simulate mismatch control
237
{ int sum=0;
238
        for(i=0;i<64;i++)
239
           sum+=block1[i];
240

241
        if((sum&1)==0) block1[63]^=1;
242
}
243
#endif
244

    
245
        for(i=0; i<64; i++)
246
            block_org[i]= block1[i];
247

    
248
        if (form == MMX_PERM) {
249
            for(i=0;i<64;i++)
250
                block[idct_mmx_perm[i]] = block1[i];
251
            } else if (form == MMX_SIMPLE_PERM) {
252
            for(i=0;i<64;i++)
253
                block[idct_simple_mmx_perm[i]] = block1[i];
254

    
255
        } else if (form == SSE2_PERM) {
256
            for(i=0; i<64; i++)
257
                block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
258
        } else if (form == PARTTRANS_PERM) {
259
            for(i=0; i<64; i++)
260
                block[(i&0x24) | ((i&3)<<3) | ((i>>3)&3)] = block1[i];
261
        } else {
262
            for(i=0; i<64; i++)
263
                block[i]= block1[i];
264
        }
265
#if 0 // simulate mismatch control for tested IDCT but not the ref
266
{ int sum=0;
267
        for(i=0;i<64;i++)
268
           sum+=block[i];
269

270
        if((sum&1)==0) block[63]^=1;
271
}
272
#endif
273

    
274
        fdct_func(block);
275
        mmx_emms();
276

    
277
        if (form == SCALE_PERM) {
278
            for(i=0; i<64; i++) {
279
                scale = 8*(1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
280
                block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
281
            }
282
        }
283

    
284
        fdct_ref(block1);
285

    
286
        blockSumErr=0;
287
        for(i=0;i<64;i++) {
288
            v = abs(block[i] - block1[i]);
289
            if (v > err_inf)
290
                err_inf = v;
291
            err2 += v * v;
292
            sysErr[i] += block[i] - block1[i];
293
            blockSumErr += v;
294
            if( abs(block[i])>maxout) maxout=abs(block[i]);
295
        }
296
        if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
297
#if 0 // print different matrix pairs
298
        if(blockSumErr){
299
            printf("\n");
300
            for(i=0; i<64; i++){
301
                if((i&7)==0) printf("\n");
302
                printf("%4d ", block_org[i]);
303
            }
304
            for(i=0; i<64; i++){
305
                if((i&7)==0) printf("\n");
306
                printf("%4d ", block[i] - block1[i]);
307
            }
308
        }
309
#endif
310
    }
311
    for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
312

    
313
#if 1 // dump systematic errors
314
    for(i=0; i<64; i++){
315
        if(i%8==0) printf("\n");
316
        printf("%5d ", (int)sysErr[i]);
317
    }
318
    printf("\n");
319
#endif
320

    
321
    printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
322
           is_idct ? "IDCT" : "DCT",
323
           name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
324
#if 1 //Speed test
325
    /* speed test */
326
    for(i=0;i<64;i++)
327
        block1[i] = 0;
328
    switch(test){
329
    case 0:
330
        for(i=0;i<64;i++)
331
            block1[i] = (random() % 512) -256;
332
        if (is_idct){
333
            fdct(block1);
334

    
335
            for(i=0;i<64;i++)
336
                block1[i]>>=3;
337
        }
338
    break;
339
    case 1:{
340
    case 2:
341
        block1[0] = (random() % 512) -256;
342
        block1[1] = (random() % 512) -256;
343
        block1[2] = (random() % 512) -256;
344
        block1[3] = (random() % 512) -256;
345
    }break;
346
    }
347

    
348
    if (form == MMX_PERM) {
349
        for(i=0;i<64;i++)
350
            block[idct_mmx_perm[i]] = block1[i];
351
    } else if(form == MMX_SIMPLE_PERM) {
352
        for(i=0;i<64;i++)
353
            block[idct_simple_mmx_perm[i]] = block1[i];
354
    } else {
355
        for(i=0; i<64; i++)
356
            block[i]= block1[i];
357
    }
358

    
359
    ti = gettime();
360
    it1 = 0;
361
    do {
362
        for(it=0;it<NB_ITS_SPEED;it++) {
363
            for(i=0; i<64; i++)
364
                block[i]= block1[i];
365
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
366
// do not memcpy especially not fastmemcpy because it does movntq !!!
367
            fdct_func(block);
368
        }
369
        it1 += NB_ITS_SPEED;
370
        ti1 = gettime() - ti;
371
    } while (ti1 < 1000000);
372
    mmx_emms();
373

    
374
    printf("%s %s: %0.1f kdct/s\n",
375
           is_idct ? "IDCT" : "DCT",
376
           name, (double)it1 * 1000.0 / (double)ti1);
377
#endif
378
}
379

    
380
static uint8_t img_dest[64] __attribute__ ((aligned (8)));
381
static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
382

    
383
void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
384
{
385
    static int init;
386
    static double c8[8][8];
387
    static double c4[4][4];
388
    double block1[64], block2[64], block3[64];
389
    double s, sum, v;
390
    int i, j, k;
391

    
392
    if (!init) {
393
        init = 1;
394

    
395
        for(i=0;i<8;i++) {
396
            sum = 0;
397
            for(j=0;j<8;j++) {
398
                s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
399
                c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
400
                sum += c8[i][j] * c8[i][j];
401
            }
402
        }
403

    
404
        for(i=0;i<4;i++) {
405
            sum = 0;
406
            for(j=0;j<4;j++) {
407
                s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
408
                c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
409
                sum += c4[i][j] * c4[i][j];
410
            }
411
        }
412
    }
413

    
414
    /* butterfly */
415
    s = 0.5 * sqrt(2.0);
416
    for(i=0;i<4;i++) {
417
        for(j=0;j<8;j++) {
418
            block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
419
            block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
420
        }
421
    }
422

    
423
    /* idct8 on lines */
424
    for(i=0;i<8;i++) {
425
        for(j=0;j<8;j++) {
426
            sum = 0;
427
            for(k=0;k<8;k++)
428
                sum += c8[k][j] * block1[8*i+k];
429
            block2[8*i+j] = sum;
430
        }
431
    }
432

    
433
    /* idct4 */
434
    for(i=0;i<8;i++) {
435
        for(j=0;j<4;j++) {
436
            /* top */
437
            sum = 0;
438
            for(k=0;k<4;k++)
439
                sum += c4[k][j] * block2[8*(2*k)+i];
440
            block3[8*(2*j)+i] = sum;
441

    
442
            /* bottom */
443
            sum = 0;
444
            for(k=0;k<4;k++)
445
                sum += c4[k][j] * block2[8*(2*k+1)+i];
446
            block3[8*(2*j+1)+i] = sum;
447
        }
448
    }
449

    
450
    /* clamp and store the result */
451
    for(i=0;i<8;i++) {
452
        for(j=0;j<8;j++) {
453
            v = block3[8*i+j];
454
            if (v < 0)
455
                v = 0;
456
            else if (v > 255)
457
                v = 255;
458
            dest[i * linesize + j] = (int)rint(v);
459
        }
460
    }
461
}
462

    
463
void idct248_error(const char *name,
464
                    void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
465
{
466
    int it, i, it1, ti, ti1, err_max, v;
467

    
468
    srandom(0);
469

    
470
    /* just one test to see if code is correct (precision is less
471
       important here) */
472
    err_max = 0;
473
    for(it=0;it<NB_ITS;it++) {
474

    
475
        /* XXX: use forward transform to generate values */
476
        for(i=0;i<64;i++)
477
            block1[i] = (random() % 256) - 128;
478
        block1[0] += 1024;
479

    
480
        for(i=0; i<64; i++)
481
            block[i]= block1[i];
482
        idct248_ref(img_dest1, 8, block);
483

    
484
        for(i=0; i<64; i++)
485
            block[i]= block1[i];
486
        idct248_put(img_dest, 8, block);
487

    
488
        for(i=0;i<64;i++) {
489
            v = abs((int)img_dest[i] - (int)img_dest1[i]);
490
            if (v == 255)
491
                printf("%d %d\n", img_dest[i], img_dest1[i]);
492
            if (v > err_max)
493
                err_max = v;
494
        }
495
#if 0
496
        printf("ref=\n");
497
        for(i=0;i<8;i++) {
498
            int j;
499
            for(j=0;j<8;j++) {
500
                printf(" %3d", img_dest1[i*8+j]);
501
            }
502
            printf("\n");
503
        }
504

505
        printf("out=\n");
506
        for(i=0;i<8;i++) {
507
            int j;
508
            for(j=0;j<8;j++) {
509
                printf(" %3d", img_dest[i*8+j]);
510
            }
511
            printf("\n");
512
        }
513
#endif
514
    }
515
    printf("%s %s: err_inf=%d\n",
516
           1 ? "IDCT248" : "DCT248",
517
           name, err_max);
518

    
519
    ti = gettime();
520
    it1 = 0;
521
    do {
522
        for(it=0;it<NB_ITS_SPEED;it++) {
523
            for(i=0; i<64; i++)
524
                block[i]= block1[i];
525
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
526
// do not memcpy especially not fastmemcpy because it does movntq !!!
527
            idct248_put(img_dest, 8, block);
528
        }
529
        it1 += NB_ITS_SPEED;
530
        ti1 = gettime() - ti;
531
    } while (ti1 < 1000000);
532
    mmx_emms();
533

    
534
    printf("%s %s: %0.1f kdct/s\n",
535
           1 ? "IDCT248" : "DCT248",
536
           name, (double)it1 * 1000.0 / (double)ti1);
537
}
538

    
539
void help(void)
540
{
541
    printf("dct-test [-i] [<test-number>]\n"
542
           "test-number 0 -> test with random matrixes\n"
543
           "            1 -> test with random sparse matrixes\n"
544
           "            2 -> do 3. test from mpeg4 std\n"
545
           "-i          test IDCT implementations\n"
546
           "-4          test IDCT248 implementations\n");
547
}
548

    
549
int main(int argc, char **argv)
550
{
551
    int test_idct = 0, test_248_dct = 0;
552
    int c,i;
553
    int test=1;
554
    cpu_flags = mm_support();
555

    
556
    init_fdct();
557
    idct_mmx_init();
558

    
559
    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
560
    for(i=0;i<MAX_NEG_CROP;i++) {
561
        cropTbl[i] = 0;
562
        cropTbl[i + MAX_NEG_CROP + 256] = 255;
563
    }
564

    
565
    for(;;) {
566
        c = getopt(argc, argv, "ih4");
567
        if (c == -1)
568
            break;
569
        switch(c) {
570
        case 'i':
571
            test_idct = 1;
572
            break;
573
        case '4':
574
            test_248_dct = 1;
575
            break;
576
        default :
577
        case 'h':
578
            help();
579
            return 0;
580
        }
581
    }
582

    
583
    if(optind <argc) test= atoi(argv[optind]);
584

    
585
    printf("ffmpeg DCT/IDCT test\n");
586

    
587
    if (test_248_dct) {
588
        idct248_error("SIMPLE-C", ff_simple_idct248_put);
589
    } else {
590
      for (i=0;algos[i].name;i++)
591
        if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
592
          dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
593
        }
594
    }
595
    return 0;
596
}