Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dct-test.c @ 74a841af

History | View | Annotate | Download (16.5 KB)

1
/*
2
 * (c) 2001 Fabrice Bellard
3
 *     2007 Marc Hoffman <marc.hoffman@analog.com>
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

    
22
/**
23
 * @file libavcodec/dct-test.c
24
 * DCT test (c) 2001 Fabrice Bellard
25
 * Started from sample code by Juan J. Sierralta P.
26
 */
27

    
28
#include <stdlib.h>
29
#include <stdio.h>
30
#include <string.h>
31
#include <sys/time.h>
32
#include <unistd.h>
33
#include <math.h>
34

    
35
#include "libavutil/common.h"
36
#include "libavutil/lfg.h"
37

    
38
#include "simple_idct.h"
39
#include "aandcttab.h"
40
#include "faandct.h"
41
#include "faanidct.h"
42
#include "x86/idct_xvid.h"
43

    
44
#undef printf
45

    
46
void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
47

    
48
/* reference fdct/idct */
49
void ff_ref_fdct(DCTELEM *block);
50
void ff_ref_idct(DCTELEM *block);
51
void ff_ref_dct_init(void);
52

    
53
void ff_mmx_idct(DCTELEM *data);
54
void ff_mmxext_idct(DCTELEM *data);
55

    
56
void odivx_idct_c(short *block);
57

    
58
// BFIN
59
void ff_bfin_idct(DCTELEM *block);
60
void ff_bfin_fdct(DCTELEM *block);
61

    
62
// ALTIVEC
63
void fdct_altivec(DCTELEM *block);
64
//void idct_altivec(DCTELEM *block);?? no routine
65

    
66
// ARM
67
void j_rev_dct_ARM(DCTELEM *data);
68
void simple_idct_ARM(DCTELEM *data);
69
void simple_idct_armv5te(DCTELEM *data);
70
void ff_simple_idct_armv6(DCTELEM *data);
71
void ff_simple_idct_neon(DCTELEM *data);
72

    
73
void ff_simple_idct_axp(DCTELEM *data);
74

    
75
struct algo {
76
  const char *name;
77
  enum { FDCT, IDCT } is_idct;
78
  void (* func) (DCTELEM *block);
79
  void (* ref)  (DCTELEM *block);
80
  enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM, PARTTRANS_PERM } format;
81
  int  mm_support;
82
};
83

    
84
#ifndef FAAN_POSTSCALE
85
#define FAAN_SCALE SCALE_PERM
86
#else
87
#define FAAN_SCALE NO_PERM
88
#endif
89

    
90
static int cpu_flags;
91

    
92
struct algo algos[] = {
93
  {"REF-DBL",         0, ff_ref_fdct,        ff_ref_fdct, NO_PERM},
94
  {"FAAN",            0, ff_faandct,         ff_ref_fdct, FAAN_SCALE},
95
  {"FAANI",           1, ff_faanidct,        ff_ref_idct, NO_PERM},
96
  {"IJG-AAN-INT",     0, fdct_ifast,         ff_ref_fdct, SCALE_PERM},
97
  {"IJG-LLM-INT",     0, ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM},
98
  {"REF-DBL",         1, ff_ref_idct,        ff_ref_idct, NO_PERM},
99
  {"INT",             1, j_rev_dct,          ff_ref_idct, MMX_PERM},
100
  {"SIMPLE-C",        1, ff_simple_idct,     ff_ref_idct, NO_PERM},
101

    
102
#if HAVE_MMX
103
  {"MMX",             0, ff_fdct_mmx,        ff_ref_fdct, NO_PERM, FF_MM_MMX},
104
#if HAVE_MMX2
105
  {"MMX2",            0, ff_fdct_mmx2,       ff_ref_fdct, NO_PERM, FF_MM_MMX2},
106
  {"SSE2",            0, ff_fdct_sse2,       ff_ref_fdct, NO_PERM, FF_MM_SSE2},
107
#endif
108

    
109
#if CONFIG_GPL
110
  {"LIBMPEG2-MMX",    1, ff_mmx_idct,        ff_ref_idct, MMX_PERM, FF_MM_MMX},
111
  {"LIBMPEG2-MMX2",   1, ff_mmxext_idct,     ff_ref_idct, MMX_PERM, FF_MM_MMX2},
112
#endif
113
  {"SIMPLE-MMX",      1, ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, FF_MM_MMX},
114
  {"XVID-MMX",        1, ff_idct_xvid_mmx,   ff_ref_idct, NO_PERM, FF_MM_MMX},
115
  {"XVID-MMX2",       1, ff_idct_xvid_mmx2,  ff_ref_idct, NO_PERM, FF_MM_MMX2},
116
  {"XVID-SSE2",       1, ff_idct_xvid_sse2,  ff_ref_idct, SSE2_PERM, FF_MM_SSE2},
117
#endif
118

    
119
#if HAVE_ALTIVEC
120
  {"altivecfdct",     0, fdct_altivec,       ff_ref_fdct, NO_PERM, FF_MM_ALTIVEC},
121
#endif
122

    
123
#if ARCH_BFIN
124
  {"BFINfdct",        0, ff_bfin_fdct,       ff_ref_fdct, NO_PERM},
125
  {"BFINidct",        1, ff_bfin_idct,       ff_ref_idct, NO_PERM},
126
#endif
127

    
128
#if ARCH_ARM
129
  {"SIMPLE-ARM",      1, simple_idct_ARM,    ff_ref_idct, NO_PERM },
130
  {"INT-ARM",         1, j_rev_dct_ARM,      ff_ref_idct, MMX_PERM },
131
#if HAVE_ARMV5TE
132
  {"SIMPLE-ARMV5TE",  1, simple_idct_armv5te, ff_ref_idct, NO_PERM },
133
#endif
134
#if HAVE_ARMV6
135
  {"SIMPLE-ARMV6",    1, ff_simple_idct_armv6, ff_ref_idct, MMX_PERM },
136
#endif
137
#if HAVE_NEON
138
  {"SIMPLE-NEON",     1, ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM },
139
#endif
140
#endif /* ARCH_ARM */
141

    
142
#if ARCH_ALPHA
143
  {"SIMPLE-ALPHA",    1, ff_simple_idct_axp,  ff_ref_idct, NO_PERM },
144
#endif
145

    
146
  { 0 }
147
};
148

    
149
#define AANSCALE_BITS 12
150

    
151
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
152

    
153
static int64_t gettime(void)
154
{
155
    struct timeval tv;
156
    gettimeofday(&tv,NULL);
157
    return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
158
}
159

    
160
#define NB_ITS 20000
161
#define NB_ITS_SPEED 50000
162

    
163
static short idct_mmx_perm[64];
164

    
165
static short idct_simple_mmx_perm[64]={
166
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
167
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
168
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
169
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
170
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
171
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
172
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
173
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
174
};
175

    
176
static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
177

    
178
static void idct_mmx_init(void)
179
{
180
    int i;
181

    
182
    /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
183
    for (i = 0; i < 64; i++) {
184
        idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
185
//        idct_simple_mmx_perm[i] = simple_block_permute_op(i);
186
    }
187
}
188

    
189
DECLARE_ALIGNED(16, static DCTELEM, block[64]);
190
DECLARE_ALIGNED(8, static DCTELEM, block1[64]);
191
DECLARE_ALIGNED(8, static DCTELEM, block_org[64]);
192

    
193
static inline void mmx_emms(void)
194
{
195
#if HAVE_MMX
196
    if (cpu_flags & FF_MM_MMX)
197
        __asm__ volatile ("emms\n\t");
198
#endif
199
}
200

    
201
static void dct_error(const char *name, int is_idct,
202
               void (*fdct_func)(DCTELEM *block),
203
               void (*fdct_ref)(DCTELEM *block), int form, int test)
204
{
205
    int it, i, scale;
206
    int err_inf, v;
207
    int64_t err2, ti, ti1, it1;
208
    int64_t sysErr[64], sysErrMax=0;
209
    int maxout=0;
210
    int blockSumErrMax=0, blockSumErr;
211
    AVLFG prng;
212

    
213
    av_lfg_init(&prng, 1);
214

    
215
    err_inf = 0;
216
    err2 = 0;
217
    for(i=0; i<64; i++) sysErr[i]=0;
218
    for(it=0;it<NB_ITS;it++) {
219
        for(i=0;i<64;i++)
220
            block1[i] = 0;
221
        switch(test){
222
        case 0:
223
            for(i=0;i<64;i++)
224
                block1[i] = (av_lfg_get(&prng) % 512) -256;
225
            if (is_idct){
226
                ff_ref_fdct(block1);
227

    
228
                for(i=0;i<64;i++)
229
                    block1[i]>>=3;
230
            }
231
        break;
232
        case 1:{
233
            int num = av_lfg_get(&prng) % 10 + 1;
234
            for(i=0;i<num;i++)
235
                block1[av_lfg_get(&prng) % 64] = av_lfg_get(&prng) % 512 -256;
236
        }break;
237
        case 2:
238
            block1[0] = av_lfg_get(&prng) % 4096 - 2048;
239
            block1[63]= (block1[0]&1)^1;
240
        break;
241
        }
242

    
243
#if 0 // simulate mismatch control
244
{ int sum=0;
245
        for(i=0;i<64;i++)
246
           sum+=block1[i];
247

248
        if((sum&1)==0) block1[63]^=1;
249
}
250
#endif
251

    
252
        for(i=0; i<64; i++)
253
            block_org[i]= block1[i];
254

    
255
        if (form == MMX_PERM) {
256
            for(i=0;i<64;i++)
257
                block[idct_mmx_perm[i]] = block1[i];
258
            } else if (form == MMX_SIMPLE_PERM) {
259
            for(i=0;i<64;i++)
260
                block[idct_simple_mmx_perm[i]] = block1[i];
261

    
262
        } else if (form == SSE2_PERM) {
263
            for(i=0; i<64; i++)
264
                block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
265
        } else if (form == PARTTRANS_PERM) {
266
            for(i=0; i<64; i++)
267
                block[(i&0x24) | ((i&3)<<3) | ((i>>3)&3)] = block1[i];
268
        } else {
269
            for(i=0; i<64; i++)
270
                block[i]= block1[i];
271
        }
272
#if 0 // simulate mismatch control for tested IDCT but not the ref
273
{ int sum=0;
274
        for(i=0;i<64;i++)
275
           sum+=block[i];
276

277
        if((sum&1)==0) block[63]^=1;
278
}
279
#endif
280

    
281
        fdct_func(block);
282
        mmx_emms();
283

    
284
        if (form == SCALE_PERM) {
285
            for(i=0; i<64; i++) {
286
                scale = 8*(1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
287
                block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
288
            }
289
        }
290

    
291
        fdct_ref(block1);
292

    
293
        blockSumErr=0;
294
        for(i=0;i<64;i++) {
295
            v = abs(block[i] - block1[i]);
296
            if (v > err_inf)
297
                err_inf = v;
298
            err2 += v * v;
299
            sysErr[i] += block[i] - block1[i];
300
            blockSumErr += v;
301
            if( abs(block[i])>maxout) maxout=abs(block[i]);
302
        }
303
        if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
304
#if 0 // print different matrix pairs
305
        if(blockSumErr){
306
            printf("\n");
307
            for(i=0; i<64; i++){
308
                if((i&7)==0) printf("\n");
309
                printf("%4d ", block_org[i]);
310
            }
311
            for(i=0; i<64; i++){
312
                if((i&7)==0) printf("\n");
313
                printf("%4d ", block[i] - block1[i]);
314
            }
315
        }
316
#endif
317
    }
318
    for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
319

    
320
#if 1 // dump systematic errors
321
    for(i=0; i<64; i++){
322
        if(i%8==0) printf("\n");
323
        printf("%7d ", (int)sysErr[i]);
324
    }
325
    printf("\n");
326
#endif
327

    
328
    printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
329
           is_idct ? "IDCT" : "DCT",
330
           name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
331
#if 1 //Speed test
332
    /* speed test */
333
    for(i=0;i<64;i++)
334
        block1[i] = 0;
335
    switch(test){
336
    case 0:
337
        for(i=0;i<64;i++)
338
            block1[i] = av_lfg_get(&prng) % 512 -256;
339
        if (is_idct){
340
            ff_ref_fdct(block1);
341

    
342
            for(i=0;i<64;i++)
343
                block1[i]>>=3;
344
        }
345
    break;
346
    case 1:{
347
    case 2:
348
        block1[0] = av_lfg_get(&prng) % 512 -256;
349
        block1[1] = av_lfg_get(&prng) % 512 -256;
350
        block1[2] = av_lfg_get(&prng) % 512 -256;
351
        block1[3] = av_lfg_get(&prng) % 512 -256;
352
    }break;
353
    }
354

    
355
    if (form == MMX_PERM) {
356
        for(i=0;i<64;i++)
357
            block[idct_mmx_perm[i]] = block1[i];
358
    } else if(form == MMX_SIMPLE_PERM) {
359
        for(i=0;i<64;i++)
360
            block[idct_simple_mmx_perm[i]] = block1[i];
361
    } else {
362
        for(i=0; i<64; i++)
363
            block[i]= block1[i];
364
    }
365

    
366
    ti = gettime();
367
    it1 = 0;
368
    do {
369
        for(it=0;it<NB_ITS_SPEED;it++) {
370
            for(i=0; i<64; i++)
371
                block[i]= block1[i];
372
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
373
// do not memcpy especially not fastmemcpy because it does movntq !!!
374
            fdct_func(block);
375
        }
376
        it1 += NB_ITS_SPEED;
377
        ti1 = gettime() - ti;
378
    } while (ti1 < 1000000);
379
    mmx_emms();
380

    
381
    printf("%s %s: %0.1f kdct/s\n",
382
           is_idct ? "IDCT" : "DCT",
383
           name, (double)it1 * 1000.0 / (double)ti1);
384
#endif
385
}
386

    
387
DECLARE_ALIGNED(8, static uint8_t, img_dest[64]);
388
DECLARE_ALIGNED(8, static uint8_t, img_dest1[64]);
389

    
390
static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
391
{
392
    static int init;
393
    static double c8[8][8];
394
    static double c4[4][4];
395
    double block1[64], block2[64], block3[64];
396
    double s, sum, v;
397
    int i, j, k;
398

    
399
    if (!init) {
400
        init = 1;
401

    
402
        for(i=0;i<8;i++) {
403
            sum = 0;
404
            for(j=0;j<8;j++) {
405
                s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
406
                c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
407
                sum += c8[i][j] * c8[i][j];
408
            }
409
        }
410

    
411
        for(i=0;i<4;i++) {
412
            sum = 0;
413
            for(j=0;j<4;j++) {
414
                s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
415
                c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
416
                sum += c4[i][j] * c4[i][j];
417
            }
418
        }
419
    }
420

    
421
    /* butterfly */
422
    s = 0.5 * sqrt(2.0);
423
    for(i=0;i<4;i++) {
424
        for(j=0;j<8;j++) {
425
            block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
426
            block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
427
        }
428
    }
429

    
430
    /* idct8 on lines */
431
    for(i=0;i<8;i++) {
432
        for(j=0;j<8;j++) {
433
            sum = 0;
434
            for(k=0;k<8;k++)
435
                sum += c8[k][j] * block1[8*i+k];
436
            block2[8*i+j] = sum;
437
        }
438
    }
439

    
440
    /* idct4 */
441
    for(i=0;i<8;i++) {
442
        for(j=0;j<4;j++) {
443
            /* top */
444
            sum = 0;
445
            for(k=0;k<4;k++)
446
                sum += c4[k][j] * block2[8*(2*k)+i];
447
            block3[8*(2*j)+i] = sum;
448

    
449
            /* bottom */
450
            sum = 0;
451
            for(k=0;k<4;k++)
452
                sum += c4[k][j] * block2[8*(2*k+1)+i];
453
            block3[8*(2*j+1)+i] = sum;
454
        }
455
    }
456

    
457
    /* clamp and store the result */
458
    for(i=0;i<8;i++) {
459
        for(j=0;j<8;j++) {
460
            v = block3[8*i+j];
461
            if (v < 0)
462
                v = 0;
463
            else if (v > 255)
464
                v = 255;
465
            dest[i * linesize + j] = (int)rint(v);
466
        }
467
    }
468
}
469

    
470
static void idct248_error(const char *name,
471
                    void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
472
{
473
    int it, i, it1, ti, ti1, err_max, v;
474

    
475
    AVLFG prng;
476

    
477
    av_lfg_init(&prng, 1);
478

    
479
    /* just one test to see if code is correct (precision is less
480
       important here) */
481
    err_max = 0;
482
    for(it=0;it<NB_ITS;it++) {
483

    
484
        /* XXX: use forward transform to generate values */
485
        for(i=0;i<64;i++)
486
            block1[i] = av_lfg_get(&prng) % 256 - 128;
487
        block1[0] += 1024;
488

    
489
        for(i=0; i<64; i++)
490
            block[i]= block1[i];
491
        idct248_ref(img_dest1, 8, block);
492

    
493
        for(i=0; i<64; i++)
494
            block[i]= block1[i];
495
        idct248_put(img_dest, 8, block);
496

    
497
        for(i=0;i<64;i++) {
498
            v = abs((int)img_dest[i] - (int)img_dest1[i]);
499
            if (v == 255)
500
                printf("%d %d\n", img_dest[i], img_dest1[i]);
501
            if (v > err_max)
502
                err_max = v;
503
        }
504
#if 0
505
        printf("ref=\n");
506
        for(i=0;i<8;i++) {
507
            int j;
508
            for(j=0;j<8;j++) {
509
                printf(" %3d", img_dest1[i*8+j]);
510
            }
511
            printf("\n");
512
        }
513

514
        printf("out=\n");
515
        for(i=0;i<8;i++) {
516
            int j;
517
            for(j=0;j<8;j++) {
518
                printf(" %3d", img_dest[i*8+j]);
519
            }
520
            printf("\n");
521
        }
522
#endif
523
    }
524
    printf("%s %s: err_inf=%d\n",
525
           1 ? "IDCT248" : "DCT248",
526
           name, err_max);
527

    
528
    ti = gettime();
529
    it1 = 0;
530
    do {
531
        for(it=0;it<NB_ITS_SPEED;it++) {
532
            for(i=0; i<64; i++)
533
                block[i]= block1[i];
534
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
535
// do not memcpy especially not fastmemcpy because it does movntq !!!
536
            idct248_put(img_dest, 8, block);
537
        }
538
        it1 += NB_ITS_SPEED;
539
        ti1 = gettime() - ti;
540
    } while (ti1 < 1000000);
541
    mmx_emms();
542

    
543
    printf("%s %s: %0.1f kdct/s\n",
544
           1 ? "IDCT248" : "DCT248",
545
           name, (double)it1 * 1000.0 / (double)ti1);
546
}
547

    
548
static void help(void)
549
{
550
    printf("dct-test [-i] [<test-number>]\n"
551
           "test-number 0 -> test with random matrixes\n"
552
           "            1 -> test with random sparse matrixes\n"
553
           "            2 -> do 3. test from mpeg4 std\n"
554
           "-i          test IDCT implementations\n"
555
           "-4          test IDCT248 implementations\n");
556
}
557

    
558
int main(int argc, char **argv)
559
{
560
    int test_idct = 0, test_248_dct = 0;
561
    int c,i;
562
    int test=1;
563
    cpu_flags = mm_support();
564

    
565
    ff_ref_dct_init();
566
    idct_mmx_init();
567

    
568
    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
569
    for(i=0;i<MAX_NEG_CROP;i++) {
570
        cropTbl[i] = 0;
571
        cropTbl[i + MAX_NEG_CROP + 256] = 255;
572
    }
573

    
574
    for(;;) {
575
        c = getopt(argc, argv, "ih4");
576
        if (c == -1)
577
            break;
578
        switch(c) {
579
        case 'i':
580
            test_idct = 1;
581
            break;
582
        case '4':
583
            test_248_dct = 1;
584
            break;
585
        default :
586
        case 'h':
587
            help();
588
            return 0;
589
        }
590
    }
591

    
592
    if(optind <argc) test= atoi(argv[optind]);
593

    
594
    printf("ffmpeg DCT/IDCT test\n");
595

    
596
    if (test_248_dct) {
597
        idct248_error("SIMPLE-C", ff_simple_idct248_put);
598
    } else {
599
      for (i=0;algos[i].name;i++)
600
        if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
601
          dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
602
        }
603
    }
604
    return 0;
605
}