Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dct-test.c @ 6a813295

History | View | Annotate | Download (16.4 KB)

1
/*
2
 * (c) 2001 Fabrice Bellard
3
 *     2007 Marc Hoffman <marc.hoffman@analog.com>
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

    
22
/**
23
 * @file libavcodec/dct-test.c
24
 * DCT test (c) 2001 Fabrice Bellard
25
 * Started from sample code by Juan J. Sierralta P.
26
 */
27

    
28
#include <stdlib.h>
29
#include <stdio.h>
30
#include <string.h>
31
#include <sys/time.h>
32
#include <unistd.h>
33
#include <math.h>
34

    
35
#include "libavutil/common.h"
36
#include "libavutil/lfg.h"
37

    
38
#include "simple_idct.h"
39
#include "aandcttab.h"
40
#include "faandct.h"
41
#include "faanidct.h"
42
#include "x86/idct_xvid.h"
43
#include "dctref.h"
44

    
45
#undef printf
46

    
47
void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
48

    
49
void ff_mmx_idct(DCTELEM *data);
50
void ff_mmxext_idct(DCTELEM *data);
51

    
52
void odivx_idct_c(short *block);
53

    
54
// BFIN
55
void ff_bfin_idct(DCTELEM *block);
56
void ff_bfin_fdct(DCTELEM *block);
57

    
58
// ALTIVEC
59
void fdct_altivec(DCTELEM *block);
60
//void idct_altivec(DCTELEM *block);?? no routine
61

    
62
// ARM
63
void j_rev_dct_ARM(DCTELEM *data);
64
void simple_idct_ARM(DCTELEM *data);
65
void simple_idct_armv5te(DCTELEM *data);
66
void ff_simple_idct_armv6(DCTELEM *data);
67
void ff_simple_idct_neon(DCTELEM *data);
68

    
69
void ff_simple_idct_axp(DCTELEM *data);
70

    
71
struct algo {
72
  const char *name;
73
  enum { FDCT, IDCT } is_idct;
74
  void (* func) (DCTELEM *block);
75
  void (* ref)  (DCTELEM *block);
76
  enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM, PARTTRANS_PERM } format;
77
  int  mm_support;
78
};
79

    
80
#ifndef FAAN_POSTSCALE
81
#define FAAN_SCALE SCALE_PERM
82
#else
83
#define FAAN_SCALE NO_PERM
84
#endif
85

    
86
static int cpu_flags;
87

    
88
struct algo algos[] = {
89
  {"REF-DBL",         0, ff_ref_fdct,        ff_ref_fdct, NO_PERM},
90
  {"FAAN",            0, ff_faandct,         ff_ref_fdct, FAAN_SCALE},
91
  {"FAANI",           1, ff_faanidct,        ff_ref_idct, NO_PERM},
92
  {"IJG-AAN-INT",     0, fdct_ifast,         ff_ref_fdct, SCALE_PERM},
93
  {"IJG-LLM-INT",     0, ff_jpeg_fdct_islow, ff_ref_fdct, NO_PERM},
94
  {"REF-DBL",         1, ff_ref_idct,        ff_ref_idct, NO_PERM},
95
  {"INT",             1, j_rev_dct,          ff_ref_idct, MMX_PERM},
96
  {"SIMPLE-C",        1, ff_simple_idct,     ff_ref_idct, NO_PERM},
97

    
98
#if HAVE_MMX
99
  {"MMX",             0, ff_fdct_mmx,        ff_ref_fdct, NO_PERM, FF_MM_MMX},
100
#if HAVE_MMX2
101
  {"MMX2",            0, ff_fdct_mmx2,       ff_ref_fdct, NO_PERM, FF_MM_MMX2},
102
  {"SSE2",            0, ff_fdct_sse2,       ff_ref_fdct, NO_PERM, FF_MM_SSE2},
103
#endif
104

    
105
#if CONFIG_GPL
106
  {"LIBMPEG2-MMX",    1, ff_mmx_idct,        ff_ref_idct, MMX_PERM, FF_MM_MMX},
107
  {"LIBMPEG2-MMX2",   1, ff_mmxext_idct,     ff_ref_idct, MMX_PERM, FF_MM_MMX2},
108
#endif
109
  {"SIMPLE-MMX",      1, ff_simple_idct_mmx, ff_ref_idct, MMX_SIMPLE_PERM, FF_MM_MMX},
110
  {"XVID-MMX",        1, ff_idct_xvid_mmx,   ff_ref_idct, NO_PERM, FF_MM_MMX},
111
  {"XVID-MMX2",       1, ff_idct_xvid_mmx2,  ff_ref_idct, NO_PERM, FF_MM_MMX2},
112
  {"XVID-SSE2",       1, ff_idct_xvid_sse2,  ff_ref_idct, SSE2_PERM, FF_MM_SSE2},
113
#endif
114

    
115
#if HAVE_ALTIVEC
116
  {"altivecfdct",     0, fdct_altivec,       ff_ref_fdct, NO_PERM, FF_MM_ALTIVEC},
117
#endif
118

    
119
#if ARCH_BFIN
120
  {"BFINfdct",        0, ff_bfin_fdct,       ff_ref_fdct, NO_PERM},
121
  {"BFINidct",        1, ff_bfin_idct,       ff_ref_idct, NO_PERM},
122
#endif
123

    
124
#if ARCH_ARM
125
  {"SIMPLE-ARM",      1, simple_idct_ARM,    ff_ref_idct, NO_PERM },
126
  {"INT-ARM",         1, j_rev_dct_ARM,      ff_ref_idct, MMX_PERM },
127
#if HAVE_ARMV5TE
128
  {"SIMPLE-ARMV5TE",  1, simple_idct_armv5te, ff_ref_idct, NO_PERM },
129
#endif
130
#if HAVE_ARMV6
131
  {"SIMPLE-ARMV6",    1, ff_simple_idct_armv6, ff_ref_idct, MMX_PERM },
132
#endif
133
#if HAVE_NEON
134
  {"SIMPLE-NEON",     1, ff_simple_idct_neon, ff_ref_idct, PARTTRANS_PERM },
135
#endif
136
#endif /* ARCH_ARM */
137

    
138
#if ARCH_ALPHA
139
  {"SIMPLE-ALPHA",    1, ff_simple_idct_axp,  ff_ref_idct, NO_PERM },
140
#endif
141

    
142
  { 0 }
143
};
144

    
145
#define AANSCALE_BITS 12
146

    
147
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
148

    
149
static int64_t gettime(void)
150
{
151
    struct timeval tv;
152
    gettimeofday(&tv,NULL);
153
    return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
154
}
155

    
156
#define NB_ITS 20000
157
#define NB_ITS_SPEED 50000
158

    
159
static short idct_mmx_perm[64];
160

    
161
static short idct_simple_mmx_perm[64]={
162
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
163
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
164
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
165
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
166
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
167
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
168
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
169
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
170
};
171

    
172
static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
173

    
174
static void idct_mmx_init(void)
175
{
176
    int i;
177

    
178
    /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
179
    for (i = 0; i < 64; i++) {
180
        idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
181
//        idct_simple_mmx_perm[i] = simple_block_permute_op(i);
182
    }
183
}
184

    
185
DECLARE_ALIGNED(16, static DCTELEM, block)[64];
186
DECLARE_ALIGNED(8, static DCTELEM, block1)[64];
187
DECLARE_ALIGNED(8, static DCTELEM, block_org)[64];
188

    
189
static inline void mmx_emms(void)
190
{
191
#if HAVE_MMX
192
    if (cpu_flags & FF_MM_MMX)
193
        __asm__ volatile ("emms\n\t");
194
#endif
195
}
196

    
197
static void dct_error(const char *name, int is_idct,
198
               void (*fdct_func)(DCTELEM *block),
199
               void (*fdct_ref)(DCTELEM *block), int form, int test)
200
{
201
    int it, i, scale;
202
    int err_inf, v;
203
    int64_t err2, ti, ti1, it1;
204
    int64_t sysErr[64], sysErrMax=0;
205
    int maxout=0;
206
    int blockSumErrMax=0, blockSumErr;
207
    AVLFG prng;
208

    
209
    av_lfg_init(&prng, 1);
210

    
211
    err_inf = 0;
212
    err2 = 0;
213
    for(i=0; i<64; i++) sysErr[i]=0;
214
    for(it=0;it<NB_ITS;it++) {
215
        for(i=0;i<64;i++)
216
            block1[i] = 0;
217
        switch(test){
218
        case 0:
219
            for(i=0;i<64;i++)
220
                block1[i] = (av_lfg_get(&prng) % 512) -256;
221
            if (is_idct){
222
                ff_ref_fdct(block1);
223

    
224
                for(i=0;i<64;i++)
225
                    block1[i]>>=3;
226
            }
227
        break;
228
        case 1:{
229
            int num = av_lfg_get(&prng) % 10 + 1;
230
            for(i=0;i<num;i++)
231
                block1[av_lfg_get(&prng) % 64] = av_lfg_get(&prng) % 512 -256;
232
        }break;
233
        case 2:
234
            block1[0] = av_lfg_get(&prng) % 4096 - 2048;
235
            block1[63]= (block1[0]&1)^1;
236
        break;
237
        }
238

    
239
#if 0 // simulate mismatch control
240
{ int sum=0;
241
        for(i=0;i<64;i++)
242
           sum+=block1[i];
243

244
        if((sum&1)==0) block1[63]^=1;
245
}
246
#endif
247

    
248
        for(i=0; i<64; i++)
249
            block_org[i]= block1[i];
250

    
251
        if (form == MMX_PERM) {
252
            for(i=0;i<64;i++)
253
                block[idct_mmx_perm[i]] = block1[i];
254
            } else if (form == MMX_SIMPLE_PERM) {
255
            for(i=0;i<64;i++)
256
                block[idct_simple_mmx_perm[i]] = block1[i];
257

    
258
        } else if (form == SSE2_PERM) {
259
            for(i=0; i<64; i++)
260
                block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
261
        } else if (form == PARTTRANS_PERM) {
262
            for(i=0; i<64; i++)
263
                block[(i&0x24) | ((i&3)<<3) | ((i>>3)&3)] = block1[i];
264
        } else {
265
            for(i=0; i<64; i++)
266
                block[i]= block1[i];
267
        }
268
#if 0 // simulate mismatch control for tested IDCT but not the ref
269
{ int sum=0;
270
        for(i=0;i<64;i++)
271
           sum+=block[i];
272

273
        if((sum&1)==0) block[63]^=1;
274
}
275
#endif
276

    
277
        fdct_func(block);
278
        mmx_emms();
279

    
280
        if (form == SCALE_PERM) {
281
            for(i=0; i<64; i++) {
282
                scale = 8*(1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
283
                block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
284
            }
285
        }
286

    
287
        fdct_ref(block1);
288

    
289
        blockSumErr=0;
290
        for(i=0;i<64;i++) {
291
            v = abs(block[i] - block1[i]);
292
            if (v > err_inf)
293
                err_inf = v;
294
            err2 += v * v;
295
            sysErr[i] += block[i] - block1[i];
296
            blockSumErr += v;
297
            if( abs(block[i])>maxout) maxout=abs(block[i]);
298
        }
299
        if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
300
#if 0 // print different matrix pairs
301
        if(blockSumErr){
302
            printf("\n");
303
            for(i=0; i<64; i++){
304
                if((i&7)==0) printf("\n");
305
                printf("%4d ", block_org[i]);
306
            }
307
            for(i=0; i<64; i++){
308
                if((i&7)==0) printf("\n");
309
                printf("%4d ", block[i] - block1[i]);
310
            }
311
        }
312
#endif
313
    }
314
    for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
315

    
316
#if 1 // dump systematic errors
317
    for(i=0; i<64; i++){
318
        if(i%8==0) printf("\n");
319
        printf("%7d ", (int)sysErr[i]);
320
    }
321
    printf("\n");
322
#endif
323

    
324
    printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
325
           is_idct ? "IDCT" : "DCT",
326
           name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
327
#if 1 //Speed test
328
    /* speed test */
329
    for(i=0;i<64;i++)
330
        block1[i] = 0;
331
    switch(test){
332
    case 0:
333
        for(i=0;i<64;i++)
334
            block1[i] = av_lfg_get(&prng) % 512 -256;
335
        if (is_idct){
336
            ff_ref_fdct(block1);
337

    
338
            for(i=0;i<64;i++)
339
                block1[i]>>=3;
340
        }
341
    break;
342
    case 1:{
343
    case 2:
344
        block1[0] = av_lfg_get(&prng) % 512 -256;
345
        block1[1] = av_lfg_get(&prng) % 512 -256;
346
        block1[2] = av_lfg_get(&prng) % 512 -256;
347
        block1[3] = av_lfg_get(&prng) % 512 -256;
348
    }break;
349
    }
350

    
351
    if (form == MMX_PERM) {
352
        for(i=0;i<64;i++)
353
            block[idct_mmx_perm[i]] = block1[i];
354
    } else if(form == MMX_SIMPLE_PERM) {
355
        for(i=0;i<64;i++)
356
            block[idct_simple_mmx_perm[i]] = block1[i];
357
    } else {
358
        for(i=0; i<64; i++)
359
            block[i]= block1[i];
360
    }
361

    
362
    ti = gettime();
363
    it1 = 0;
364
    do {
365
        for(it=0;it<NB_ITS_SPEED;it++) {
366
            for(i=0; i<64; i++)
367
                block[i]= block1[i];
368
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
369
// do not memcpy especially not fastmemcpy because it does movntq !!!
370
            fdct_func(block);
371
        }
372
        it1 += NB_ITS_SPEED;
373
        ti1 = gettime() - ti;
374
    } while (ti1 < 1000000);
375
    mmx_emms();
376

    
377
    printf("%s %s: %0.1f kdct/s\n",
378
           is_idct ? "IDCT" : "DCT",
379
           name, (double)it1 * 1000.0 / (double)ti1);
380
#endif
381
}
382

    
383
DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
384
DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
385

    
386
static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
387
{
388
    static int init;
389
    static double c8[8][8];
390
    static double c4[4][4];
391
    double block1[64], block2[64], block3[64];
392
    double s, sum, v;
393
    int i, j, k;
394

    
395
    if (!init) {
396
        init = 1;
397

    
398
        for(i=0;i<8;i++) {
399
            sum = 0;
400
            for(j=0;j<8;j++) {
401
                s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
402
                c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
403
                sum += c8[i][j] * c8[i][j];
404
            }
405
        }
406

    
407
        for(i=0;i<4;i++) {
408
            sum = 0;
409
            for(j=0;j<4;j++) {
410
                s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
411
                c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
412
                sum += c4[i][j] * c4[i][j];
413
            }
414
        }
415
    }
416

    
417
    /* butterfly */
418
    s = 0.5 * sqrt(2.0);
419
    for(i=0;i<4;i++) {
420
        for(j=0;j<8;j++) {
421
            block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
422
            block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
423
        }
424
    }
425

    
426
    /* idct8 on lines */
427
    for(i=0;i<8;i++) {
428
        for(j=0;j<8;j++) {
429
            sum = 0;
430
            for(k=0;k<8;k++)
431
                sum += c8[k][j] * block1[8*i+k];
432
            block2[8*i+j] = sum;
433
        }
434
    }
435

    
436
    /* idct4 */
437
    for(i=0;i<8;i++) {
438
        for(j=0;j<4;j++) {
439
            /* top */
440
            sum = 0;
441
            for(k=0;k<4;k++)
442
                sum += c4[k][j] * block2[8*(2*k)+i];
443
            block3[8*(2*j)+i] = sum;
444

    
445
            /* bottom */
446
            sum = 0;
447
            for(k=0;k<4;k++)
448
                sum += c4[k][j] * block2[8*(2*k+1)+i];
449
            block3[8*(2*j+1)+i] = sum;
450
        }
451
    }
452

    
453
    /* clamp and store the result */
454
    for(i=0;i<8;i++) {
455
        for(j=0;j<8;j++) {
456
            v = block3[8*i+j];
457
            if (v < 0)
458
                v = 0;
459
            else if (v > 255)
460
                v = 255;
461
            dest[i * linesize + j] = (int)rint(v);
462
        }
463
    }
464
}
465

    
466
static void idct248_error(const char *name,
467
                    void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
468
{
469
    int it, i, it1, ti, ti1, err_max, v;
470

    
471
    AVLFG prng;
472

    
473
    av_lfg_init(&prng, 1);
474

    
475
    /* just one test to see if code is correct (precision is less
476
       important here) */
477
    err_max = 0;
478
    for(it=0;it<NB_ITS;it++) {
479

    
480
        /* XXX: use forward transform to generate values */
481
        for(i=0;i<64;i++)
482
            block1[i] = av_lfg_get(&prng) % 256 - 128;
483
        block1[0] += 1024;
484

    
485
        for(i=0; i<64; i++)
486
            block[i]= block1[i];
487
        idct248_ref(img_dest1, 8, block);
488

    
489
        for(i=0; i<64; i++)
490
            block[i]= block1[i];
491
        idct248_put(img_dest, 8, block);
492

    
493
        for(i=0;i<64;i++) {
494
            v = abs((int)img_dest[i] - (int)img_dest1[i]);
495
            if (v == 255)
496
                printf("%d %d\n", img_dest[i], img_dest1[i]);
497
            if (v > err_max)
498
                err_max = v;
499
        }
500
#if 0
501
        printf("ref=\n");
502
        for(i=0;i<8;i++) {
503
            int j;
504
            for(j=0;j<8;j++) {
505
                printf(" %3d", img_dest1[i*8+j]);
506
            }
507
            printf("\n");
508
        }
509

510
        printf("out=\n");
511
        for(i=0;i<8;i++) {
512
            int j;
513
            for(j=0;j<8;j++) {
514
                printf(" %3d", img_dest[i*8+j]);
515
            }
516
            printf("\n");
517
        }
518
#endif
519
    }
520
    printf("%s %s: err_inf=%d\n",
521
           1 ? "IDCT248" : "DCT248",
522
           name, err_max);
523

    
524
    ti = gettime();
525
    it1 = 0;
526
    do {
527
        for(it=0;it<NB_ITS_SPEED;it++) {
528
            for(i=0; i<64; i++)
529
                block[i]= block1[i];
530
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
531
// do not memcpy especially not fastmemcpy because it does movntq !!!
532
            idct248_put(img_dest, 8, block);
533
        }
534
        it1 += NB_ITS_SPEED;
535
        ti1 = gettime() - ti;
536
    } while (ti1 < 1000000);
537
    mmx_emms();
538

    
539
    printf("%s %s: %0.1f kdct/s\n",
540
           1 ? "IDCT248" : "DCT248",
541
           name, (double)it1 * 1000.0 / (double)ti1);
542
}
543

    
544
static void help(void)
545
{
546
    printf("dct-test [-i] [<test-number>]\n"
547
           "test-number 0 -> test with random matrixes\n"
548
           "            1 -> test with random sparse matrixes\n"
549
           "            2 -> do 3. test from mpeg4 std\n"
550
           "-i          test IDCT implementations\n"
551
           "-4          test IDCT248 implementations\n");
552
}
553

    
554
int main(int argc, char **argv)
555
{
556
    int test_idct = 0, test_248_dct = 0;
557
    int c,i;
558
    int test=1;
559
    cpu_flags = mm_support();
560

    
561
    ff_ref_dct_init();
562
    idct_mmx_init();
563

    
564
    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
565
    for(i=0;i<MAX_NEG_CROP;i++) {
566
        cropTbl[i] = 0;
567
        cropTbl[i + MAX_NEG_CROP + 256] = 255;
568
    }
569

    
570
    for(;;) {
571
        c = getopt(argc, argv, "ih4");
572
        if (c == -1)
573
            break;
574
        switch(c) {
575
        case 'i':
576
            test_idct = 1;
577
            break;
578
        case '4':
579
            test_248_dct = 1;
580
            break;
581
        default :
582
        case 'h':
583
            help();
584
            return 0;
585
        }
586
    }
587

    
588
    if(optind <argc) test= atoi(argv[optind]);
589

    
590
    printf("ffmpeg DCT/IDCT test\n");
591

    
592
    if (test_248_dct) {
593
        idct248_error("SIMPLE-C", ff_simple_idct248_put);
594
    } else {
595
      for (i=0;algos[i].name;i++)
596
        if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
597
          dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
598
        }
599
    }
600
    return 0;
601
}