Statistics
| Branch: | Revision:

ffmpeg / libavcodec / dct-test.c @ a2fc0f6a

History | View | Annotate | Download (16.1 KB)

1 04d7f601 Diego Biurrun
/*
2
 * (c) 2001 Fabrice Bellard
3 3ac35bdb Marc Hoffman
 *     2007 Marc Hoffman <marc.hoffman@analog.com>
4 04d7f601 Diego Biurrun
 *
5 b78e7197 Diego Biurrun
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8 04d7f601 Diego Biurrun
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10 b78e7197 Diego Biurrun
 * version 2.1 of the License, or (at your option) any later version.
11 04d7f601 Diego Biurrun
 *
12 b78e7197 Diego Biurrun
 * FFmpeg is distributed in the hope that it will be useful,
13 04d7f601 Diego Biurrun
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18 b78e7197 Diego Biurrun
 * License along with FFmpeg; if not, write to the Free Software
19 04d7f601 Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21
22 983e3246 Michael Niedermayer
/**
23
 * @file dct-test.c
24 115329f1 Diego Biurrun
 * DCT test. (c) 2001 Fabrice Bellard.
25 983e3246 Michael Niedermayer
 * Started from sample code by Juan J. Sierralta P.
26
 */
27
28 de6d9b64 Fabrice Bellard
#include <stdlib.h>
29
#include <stdio.h>
30
#include <string.h>
31
#include <sys/time.h>
32
#include <unistd.h>
33 12807c8d Ronald S. Bultje
#include <math.h>
34 de6d9b64 Fabrice Bellard
35 ae32e509 Diego Biurrun
#include "libavutil/common.h"
36 de6d9b64 Fabrice Bellard
37 86748dbc Michael Niedermayer
#include "simple_idct.h"
38 10ac3618 Diego Biurrun
#include "aandcttab.h"
39 65e4c8c9 Michael Niedermayer
#include "faandct.h"
40 6f08c541 Michael Niedermayer
#include "faanidct.h"
41 54a0b6e5 Alexander Strange
#include "i386/idct_xvid.h"
42 9e1586fc Fabrice Bellard
43 434df899 Michael Niedermayer
#undef printf
44 cc61f96f Panagiotis Issaris
#undef random
45 434df899 Michael Niedermayer
46
void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
47
48 9e1586fc Fabrice Bellard
/* reference fdct/idct */
49 9686df2b Diego Biurrun
void fdct(DCTELEM *block);
50
void idct(DCTELEM *block);
51
void init_fdct();
52 de6d9b64 Fabrice Bellard
53 9686df2b Diego Biurrun
void ff_mmx_idct(DCTELEM *data);
54
void ff_mmxext_idct(DCTELEM *data);
55 9e1586fc Fabrice Bellard
56 9686df2b Diego Biurrun
void odivx_idct_c(short *block);
57 86748dbc Michael Niedermayer
58 3ac35bdb Marc Hoffman
// BFIN
59 9686df2b Diego Biurrun
void ff_bfin_idct(DCTELEM *block);
60
void ff_bfin_fdct(DCTELEM *block);
61 3ac35bdb Marc Hoffman
62
// ALTIVEC
63 9686df2b Diego Biurrun
void fdct_altivec(DCTELEM *block);
64
//void idct_altivec(DCTELEM *block);?? no routine
65 3ac35bdb Marc Hoffman
66 479044ce Måns Rullgård
// ARM
67
void j_rev_dct_ARM(DCTELEM *data);
68
void simple_idct_ARM(DCTELEM *data);
69
void simple_idct_armv5te(DCTELEM *data);
70
void ff_simple_idct_armv6(DCTELEM *data);
71
void ff_simple_idct_neon(DCTELEM *data);
72 3ac35bdb Marc Hoffman
73
struct algo {
74 f5b67781 Laurent Desnogues
  const char *name;
75 3ac35bdb Marc Hoffman
  enum { FDCT, IDCT } is_idct;
76
  void (* func) (DCTELEM *block);
77
  void (* ref)  (DCTELEM *block);
78 875f3125 Måns Rullgård
  enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM, PARTTRANS_PERM } format;
79 3794b928 Alexander Strange
  int  mm_support;
80 3ac35bdb Marc Hoffman
};
81
82
#ifndef FAAN_POSTSCALE
83
#define FAAN_SCALE SCALE_PERM
84
#else
85
#define FAAN_SCALE NO_PERM
86
#endif
87
88 aadd27cd Michael Niedermayer
static int cpu_flags;
89
90 3ac35bdb Marc Hoffman
struct algo algos[] = {
91 1a7ee380 Alexander Strange
  {"REF-DBL",         0, fdct,               fdct, NO_PERM},
92
  {"FAAN",            0, ff_faandct,         fdct, FAAN_SCALE},
93
  {"FAANI",           1, ff_faanidct,        idct, NO_PERM},
94
  {"IJG-AAN-INT",     0, fdct_ifast,         fdct, SCALE_PERM},
95
  {"IJG-LLM-INT",     0, ff_jpeg_fdct_islow, fdct, NO_PERM},
96
  {"REF-DBL",         1, idct,               idct, NO_PERM},
97
  {"INT",             1, j_rev_dct,          idct, MMX_PERM},
98
  {"SIMPLE-C",        1, ff_simple_idct,     idct, NO_PERM},
99 3ac35bdb Marc Hoffman
100 ee3035f3 Ronald S. Bultje
#ifdef HAVE_MMX
101 82d1605f Dominik Mierzejewski
  {"MMX",             0, ff_fdct_mmx,        fdct, NO_PERM, FF_MM_MMX},
102 94254fc0 Diego Biurrun
#ifdef HAVE_MMX2
103 82d1605f Dominik Mierzejewski
  {"MMX2",            0, ff_fdct_mmx2,       fdct, NO_PERM, FF_MM_MMXEXT},
104 83534418 Baptiste Coudurier
  {"SSE2",            0, ff_fdct_sse2,       fdct, NO_PERM, FF_MM_SSE2},
105 94254fc0 Diego Biurrun
#endif
106 3ac35bdb Marc Hoffman
107 b9702de5 Diego Biurrun
#ifdef CONFIG_GPL
108 82d1605f Dominik Mierzejewski
  {"LIBMPEG2-MMX",    1, ff_mmx_idct,        idct, MMX_PERM, FF_MM_MMX},
109
  {"LIBMPEG2-MMXEXT", 1, ff_mmxext_idct,     idct, MMX_PERM, FF_MM_MMXEXT},
110 b9702de5 Diego Biurrun
#endif
111 82d1605f Dominik Mierzejewski
  {"SIMPLE-MMX",      1, ff_simple_idct_mmx, idct, MMX_SIMPLE_PERM, FF_MM_MMX},
112
  {"XVID-MMX",        1, ff_idct_xvid_mmx,   idct, NO_PERM, FF_MM_MMX},
113
  {"XVID-MMX2",       1, ff_idct_xvid_mmx2,  idct, NO_PERM, FF_MM_MMXEXT},
114
  {"XVID-SSE2",       1, ff_idct_xvid_sse2,  idct, SSE2_PERM, FF_MM_SSE2},
115 3ac35bdb Marc Hoffman
#endif
116
117
#ifdef HAVE_ALTIVEC
118 82d1605f Dominik Mierzejewski
  {"altivecfdct",     0, fdct_altivec,       fdct, NO_PERM, FF_MM_ALTIVEC},
119 3ac35bdb Marc Hoffman
#endif
120
121
#ifdef ARCH_BFIN
122 1a7ee380 Alexander Strange
  {"BFINfdct",        0, ff_bfin_fdct,       fdct, NO_PERM},
123
  {"BFINidct",        1, ff_bfin_idct,       idct, NO_PERM},
124 3ac35bdb Marc Hoffman
#endif
125
126 a2fc0f6a Måns Rullgård
#ifdef ARCH_ARM
127 479044ce Måns Rullgård
  {"SIMPLE-ARM",      1, simple_idct_ARM,    idct, NO_PERM },
128
  {"INT-ARM",         1, j_rev_dct_ARM,      idct, MMX_PERM },
129
#ifdef HAVE_ARMV5TE
130
  {"SIMPLE-ARMV5TE",  1, simple_idct_armv5te, idct, NO_PERM },
131
#endif
132
#ifdef HAVE_ARMV6
133
  {"SIMPLE-ARMV6",    1, ff_simple_idct_armv6, idct, MMX_PERM },
134
#endif
135
#ifdef HAVE_NEON
136
  {"SIMPLE-NEON",     1, ff_simple_idct_neon, idct, PARTTRANS_PERM },
137
#endif
138 a2fc0f6a Måns Rullgård
#endif /* ARCH_ARM */
139 479044ce Måns Rullgård
140 3ac35bdb Marc Hoffman
  { 0 }
141
};
142
143 de6d9b64 Fabrice Bellard
#define AANSCALE_BITS 12
144
145 486497e0 Måns Rullgård
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
146 86748dbc Michael Niedermayer
147 0c1a9eda Zdenek Kabelac
int64_t gettime(void)
148 de6d9b64 Fabrice Bellard
{
149
    struct timeval tv;
150
    gettimeofday(&tv,NULL);
151 0c1a9eda Zdenek Kabelac
    return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
152 de6d9b64 Fabrice Bellard
}
153
154
#define NB_ITS 20000
155
#define NB_ITS_SPEED 50000
156
157 9e1586fc Fabrice Bellard
static short idct_mmx_perm[64];
158
159 86748dbc Michael Niedermayer
static short idct_simple_mmx_perm[64]={
160 bb270c08 Diego Biurrun
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
161
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
162
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
163
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
164
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
165
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
166
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
167
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
168 86748dbc Michael Niedermayer
};
169
170 ad246860 Alexander Strange
static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
171
172 9e1586fc Fabrice Bellard
void idct_mmx_init(void)
173
{
174
    int i;
175
176
    /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
177
    for (i = 0; i < 64; i++) {
178 bb270c08 Diego Biurrun
        idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
179
//        idct_simple_mmx_perm[i] = simple_block_permute_op(i);
180 9e1586fc Fabrice Bellard
    }
181
}
182
183 55727a83 Alexander Strange
static DCTELEM block[64] __attribute__ ((aligned (16)));
184 9e1586fc Fabrice Bellard
static DCTELEM block1[64] __attribute__ ((aligned (8)));
185 86748dbc Michael Niedermayer
static DCTELEM block_org[64] __attribute__ ((aligned (8)));
186 9e1586fc Fabrice Bellard
187 aadd27cd Michael Niedermayer
static inline void mmx_emms(void)
188
{
189
#ifdef HAVE_MMX
190 82d1605f Dominik Mierzejewski
    if (cpu_flags & FF_MM_MMX)
191 be449fca Diego Pettenò
        __asm__ volatile ("emms\n\t");
192 aadd27cd Michael Niedermayer
#endif
193
}
194
195 9e1586fc Fabrice Bellard
void dct_error(const char *name, int is_idct,
196
               void (*fdct_func)(DCTELEM *block),
197 3ac35bdb Marc Hoffman
               void (*fdct_ref)(DCTELEM *block), int form, int test)
198 de6d9b64 Fabrice Bellard
{
199
    int it, i, scale;
200
    int err_inf, v;
201 0c1a9eda Zdenek Kabelac
    int64_t err2, ti, ti1, it1;
202
    int64_t sysErr[64], sysErrMax=0;
203 86748dbc Michael Niedermayer
    int maxout=0;
204
    int blockSumErrMax=0, blockSumErr;
205 de6d9b64 Fabrice Bellard
206
    srandom(0);
207
208
    err_inf = 0;
209
    err2 = 0;
210 86748dbc Michael Niedermayer
    for(i=0; i<64; i++) sysErr[i]=0;
211 de6d9b64 Fabrice Bellard
    for(it=0;it<NB_ITS;it++) {
212 86748dbc Michael Niedermayer
        for(i=0;i<64;i++)
213
            block1[i] = 0;
214
        switch(test){
215 115329f1 Diego Biurrun
        case 0:
216 86748dbc Michael Niedermayer
            for(i=0;i<64;i++)
217
                block1[i] = (random() % 512) -256;
218 ad324c93 Michael Niedermayer
            if (is_idct){
219 86748dbc Michael Niedermayer
                fdct(block1);
220 ad324c93 Michael Niedermayer
221
                for(i=0;i<64;i++)
222
                    block1[i]>>=3;
223
            }
224 86748dbc Michael Niedermayer
        break;
225
        case 1:{
226
            int num= (random()%10)+1;
227
            for(i=0;i<num;i++)
228
                block1[random()%64] = (random() % 512) -256;
229
        }break;
230
        case 2:
231
            block1[0]= (random()%4096)-2048;
232
            block1[63]= (block1[0]&1)^1;
233
        break;
234
        }
235 9e1586fc Fabrice Bellard
236 86748dbc Michael Niedermayer
#if 0 // simulate mismatch control
237
{ int sum=0;
238
        for(i=0;i<64;i++)
239
           sum+=block1[i];
240

241 115329f1 Diego Biurrun
        if((sum&1)==0) block1[63]^=1;
242 86748dbc Michael Niedermayer
}
243
#endif
244
245
        for(i=0; i<64; i++)
246
            block_org[i]= block1[i];
247 9e1586fc Fabrice Bellard
248 3ac35bdb Marc Hoffman
        if (form == MMX_PERM) {
249 86748dbc Michael Niedermayer
            for(i=0;i<64;i++)
250 9e1586fc Fabrice Bellard
                block[idct_mmx_perm[i]] = block1[i];
251 3ac35bdb Marc Hoffman
            } else if (form == MMX_SIMPLE_PERM) {
252 86748dbc Michael Niedermayer
            for(i=0;i<64;i++)
253
                block[idct_simple_mmx_perm[i]] = block1[i];
254
255 ad246860 Alexander Strange
        } else if (form == SSE2_PERM) {
256
            for(i=0; i<64; i++)
257
                block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
258 875f3125 Måns Rullgård
        } else if (form == PARTTRANS_PERM) {
259
            for(i=0; i<64; i++)
260
                block[(i&0x24) | ((i&3)<<3) | ((i>>3)&3)] = block1[i];
261 bb270c08 Diego Biurrun
        } else {
262 86748dbc Michael Niedermayer
            for(i=0; i<64; i++)
263
                block[i]= block1[i];
264 9e1586fc Fabrice Bellard
        }
265 86748dbc Michael Niedermayer
#if 0 // simulate mismatch control for tested IDCT but not the ref
266
{ int sum=0;
267
        for(i=0;i<64;i++)
268
           sum+=block[i];
269

270 115329f1 Diego Biurrun
        if((sum&1)==0) block[63]^=1;
271 86748dbc Michael Niedermayer
}
272
#endif
273 9e1586fc Fabrice Bellard
274 de6d9b64 Fabrice Bellard
        fdct_func(block);
275 aadd27cd Michael Niedermayer
        mmx_emms();
276 9e1586fc Fabrice Bellard
277 3ac35bdb Marc Hoffman
        if (form == SCALE_PERM) {
278 de6d9b64 Fabrice Bellard
            for(i=0; i<64; i++) {
279 10ac3618 Diego Biurrun
                scale = 8*(1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
280 86748dbc Michael Niedermayer
                block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
281
            }
282
        }
283
284 9e1586fc Fabrice Bellard
        fdct_ref(block1);
285 de6d9b64 Fabrice Bellard
286 86748dbc Michael Niedermayer
        blockSumErr=0;
287 de6d9b64 Fabrice Bellard
        for(i=0;i<64;i++) {
288
            v = abs(block[i] - block1[i]);
289
            if (v > err_inf)
290
                err_inf = v;
291
            err2 += v * v;
292 bb270c08 Diego Biurrun
            sysErr[i] += block[i] - block1[i];
293
            blockSumErr += v;
294
            if( abs(block[i])>maxout) maxout=abs(block[i]);
295 de6d9b64 Fabrice Bellard
        }
296 86748dbc Michael Niedermayer
        if(blockSumErrMax < blockSumErr) blockSumErrMax= blockSumErr;
297
#if 0 // print different matrix pairs
298
        if(blockSumErr){
299
            printf("\n");
300
            for(i=0; i<64; i++){
301
                if((i&7)==0) printf("\n");
302
                printf("%4d ", block_org[i]);
303
            }
304
            for(i=0; i<64; i++){
305
                if((i&7)==0) printf("\n");
306
                printf("%4d ", block[i] - block1[i]);
307
            }
308
        }
309
#endif
310
    }
311 ae32e509 Diego Biurrun
    for(i=0; i<64; i++) sysErrMax= FFMAX(sysErrMax, FFABS(sysErr[i]));
312 115329f1 Diego Biurrun
313 86748dbc Michael Niedermayer
#if 1 // dump systematic errors
314
    for(i=0; i<64; i++){
315 bb270c08 Diego Biurrun
        if(i%8==0) printf("\n");
316 86748dbc Michael Niedermayer
        printf("%5d ", (int)sysErr[i]);
317 de6d9b64 Fabrice Bellard
    }
318 86748dbc Michael Niedermayer
    printf("\n");
319
#endif
320 115329f1 Diego Biurrun
321 86748dbc Michael Niedermayer
    printf("%s %s: err_inf=%d err2=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
322 9e1586fc Fabrice Bellard
           is_idct ? "IDCT" : "DCT",
323 86748dbc Michael Niedermayer
           name, err_inf, (double)err2 / NB_ITS / 64.0, (double)sysErrMax / NB_ITS, maxout, blockSumErrMax);
324
#if 1 //Speed test
325 de6d9b64 Fabrice Bellard
    /* speed test */
326 86748dbc Michael Niedermayer
    for(i=0;i<64;i++)
327
        block1[i] = 0;
328
    switch(test){
329 115329f1 Diego Biurrun
    case 0:
330 86748dbc Michael Niedermayer
        for(i=0;i<64;i++)
331
            block1[i] = (random() % 512) -256;
332 ad324c93 Michael Niedermayer
        if (is_idct){
333 86748dbc Michael Niedermayer
            fdct(block1);
334 ad324c93 Michael Niedermayer
335
            for(i=0;i<64;i++)
336
                block1[i]>>=3;
337
        }
338 86748dbc Michael Niedermayer
    break;
339
    case 1:{
340
    case 2:
341
        block1[0] = (random() % 512) -256;
342
        block1[1] = (random() % 512) -256;
343
        block1[2] = (random() % 512) -256;
344
        block1[3] = (random() % 512) -256;
345
    }break;
346
    }
347 de6d9b64 Fabrice Bellard
348 3ac35bdb Marc Hoffman
    if (form == MMX_PERM) {
349 86748dbc Michael Niedermayer
        for(i=0;i<64;i++)
350 9e1586fc Fabrice Bellard
            block[idct_mmx_perm[i]] = block1[i];
351 3ac35bdb Marc Hoffman
    } else if(form == MMX_SIMPLE_PERM) {
352 86748dbc Michael Niedermayer
        for(i=0;i<64;i++)
353
            block[idct_simple_mmx_perm[i]] = block1[i];
354
    } else {
355
        for(i=0; i<64; i++)
356
            block[i]= block1[i];
357 9e1586fc Fabrice Bellard
    }
358
359 de6d9b64 Fabrice Bellard
    ti = gettime();
360
    it1 = 0;
361
    do {
362
        for(it=0;it<NB_ITS_SPEED;it++) {
363 86748dbc Michael Niedermayer
            for(i=0; i<64; i++)
364
                block[i]= block1[i];
365
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
366 755bfeab Diego Biurrun
// do not memcpy especially not fastmemcpy because it does movntq !!!
367 de6d9b64 Fabrice Bellard
            fdct_func(block);
368
        }
369
        it1 += NB_ITS_SPEED;
370
        ti1 = gettime() - ti;
371
    } while (ti1 < 1000000);
372 aadd27cd Michael Niedermayer
    mmx_emms();
373 de6d9b64 Fabrice Bellard
374 86748dbc Michael Niedermayer
    printf("%s %s: %0.1f kdct/s\n",
375 9e1586fc Fabrice Bellard
           is_idct ? "IDCT" : "DCT",
376 de6d9b64 Fabrice Bellard
           name, (double)it1 * 1000.0 / (double)ti1);
377 86748dbc Michael Niedermayer
#endif
378 de6d9b64 Fabrice Bellard
}
379
380 0c1a9eda Zdenek Kabelac
static uint8_t img_dest[64] __attribute__ ((aligned (8)));
381
static uint8_t img_dest1[64] __attribute__ ((aligned (8)));
382 a46a3ce4 Fabrice Bellard
383 0c1a9eda Zdenek Kabelac
void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
384 a46a3ce4 Fabrice Bellard
{
385
    static int init;
386
    static double c8[8][8];
387
    static double c4[4][4];
388
    double block1[64], block2[64], block3[64];
389
    double s, sum, v;
390
    int i, j, k;
391
392
    if (!init) {
393
        init = 1;
394
395
        for(i=0;i<8;i++) {
396
            sum = 0;
397
            for(j=0;j<8;j++) {
398
                s = (i==0) ? sqrt(1.0/8.0) : sqrt(1.0/4.0);
399
                c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
400
                sum += c8[i][j] * c8[i][j];
401
            }
402
        }
403 115329f1 Diego Biurrun
404 a46a3ce4 Fabrice Bellard
        for(i=0;i<4;i++) {
405
            sum = 0;
406
            for(j=0;j<4;j++) {
407
                s = (i==0) ? sqrt(1.0/4.0) : sqrt(1.0/2.0);
408
                c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
409
                sum += c4[i][j] * c4[i][j];
410
            }
411
        }
412
    }
413
414
    /* butterfly */
415 652f0197 Fabrice Bellard
    s = 0.5 * sqrt(2.0);
416 a46a3ce4 Fabrice Bellard
    for(i=0;i<4;i++) {
417
        for(j=0;j<8;j++) {
418 652f0197 Fabrice Bellard
            block1[8*(2*i)+j] = (block[8*(2*i)+j] + block[8*(2*i+1)+j]) * s;
419
            block1[8*(2*i+1)+j] = (block[8*(2*i)+j] - block[8*(2*i+1)+j]) * s;
420 a46a3ce4 Fabrice Bellard
        }
421
    }
422
423
    /* idct8 on lines */
424
    for(i=0;i<8;i++) {
425
        for(j=0;j<8;j++) {
426
            sum = 0;
427
            for(k=0;k<8;k++)
428
                sum += c8[k][j] * block1[8*i+k];
429
            block2[8*i+j] = sum;
430
        }
431
    }
432
433
    /* idct4 */
434
    for(i=0;i<8;i++) {
435
        for(j=0;j<4;j++) {
436
            /* top */
437
            sum = 0;
438
            for(k=0;k<4;k++)
439
                sum += c4[k][j] * block2[8*(2*k)+i];
440
            block3[8*(2*j)+i] = sum;
441
442
            /* bottom */
443
            sum = 0;
444
            for(k=0;k<4;k++)
445
                sum += c4[k][j] * block2[8*(2*k+1)+i];
446
            block3[8*(2*j+1)+i] = sum;
447
        }
448
    }
449
450
    /* clamp and store the result */
451
    for(i=0;i<8;i++) {
452
        for(j=0;j<8;j++) {
453 652f0197 Fabrice Bellard
            v = block3[8*i+j];
454 a46a3ce4 Fabrice Bellard
            if (v < 0)
455
                v = 0;
456
            else if (v > 255)
457
                v = 255;
458
            dest[i * linesize + j] = (int)rint(v);
459
        }
460
    }
461
}
462
463 115329f1 Diego Biurrun
void idct248_error(const char *name,
464 0c1a9eda Zdenek Kabelac
                    void (*idct248_put)(uint8_t *dest, int line_size, int16_t *block))
465 a46a3ce4 Fabrice Bellard
{
466
    int it, i, it1, ti, ti1, err_max, v;
467
468
    srandom(0);
469 115329f1 Diego Biurrun
470 a46a3ce4 Fabrice Bellard
    /* just one test to see if code is correct (precision is less
471
       important here) */
472
    err_max = 0;
473
    for(it=0;it<NB_ITS;it++) {
474 115329f1 Diego Biurrun
475 652f0197 Fabrice Bellard
        /* XXX: use forward transform to generate values */
476
        for(i=0;i<64;i++)
477
            block1[i] = (random() % 256) - 128;
478
        block1[0] += 1024;
479
480 a46a3ce4 Fabrice Bellard
        for(i=0; i<64; i++)
481
            block[i]= block1[i];
482
        idct248_ref(img_dest1, 8, block);
483 115329f1 Diego Biurrun
484 652f0197 Fabrice Bellard
        for(i=0; i<64; i++)
485
            block[i]= block1[i];
486
        idct248_put(img_dest, 8, block);
487 115329f1 Diego Biurrun
488 652f0197 Fabrice Bellard
        for(i=0;i<64;i++) {
489
            v = abs((int)img_dest[i] - (int)img_dest1[i]);
490
            if (v == 255)
491
                printf("%d %d\n", img_dest[i], img_dest1[i]);
492
            if (v > err_max)
493
                err_max = v;
494
        }
495 a46a3ce4 Fabrice Bellard
#if 0
496
        printf("ref=\n");
497
        for(i=0;i<8;i++) {
498
            int j;
499
            for(j=0;j<8;j++) {
500
                printf(" %3d", img_dest1[i*8+j]);
501
            }
502
            printf("\n");
503
        }
504 115329f1 Diego Biurrun

505 a46a3ce4 Fabrice Bellard
        printf("out=\n");
506
        for(i=0;i<8;i++) {
507
            int j;
508
            for(j=0;j<8;j++) {
509
                printf(" %3d", img_dest[i*8+j]);
510
            }
511
            printf("\n");
512
        }
513
#endif
514
    }
515
    printf("%s %s: err_inf=%d\n",
516
           1 ? "IDCT248" : "DCT248",
517
           name, err_max);
518
519
    ti = gettime();
520
    it1 = 0;
521
    do {
522
        for(it=0;it<NB_ITS_SPEED;it++) {
523
            for(i=0; i<64; i++)
524
                block[i]= block1[i];
525
//            memcpy(block, block1, sizeof(DCTELEM) * 64);
526 755bfeab Diego Biurrun
// do not memcpy especially not fastmemcpy because it does movntq !!!
527 a46a3ce4 Fabrice Bellard
            idct248_put(img_dest, 8, block);
528
        }
529
        it1 += NB_ITS_SPEED;
530
        ti1 = gettime() - ti;
531
    } while (ti1 < 1000000);
532 aadd27cd Michael Niedermayer
    mmx_emms();
533 a46a3ce4 Fabrice Bellard
534
    printf("%s %s: %0.1f kdct/s\n",
535
           1 ? "IDCT248" : "DCT248",
536
           name, (double)it1 * 1000.0 / (double)ti1);
537
}
538
539 9e1586fc Fabrice Bellard
void help(void)
540
{
541 86748dbc Michael Niedermayer
    printf("dct-test [-i] [<test-number>]\n"
542
           "test-number 0 -> test with random matrixes\n"
543
           "            1 -> test with random sparse matrixes\n"
544
           "            2 -> do 3. test from mpeg4 std\n"
545 a46a3ce4 Fabrice Bellard
           "-i          test IDCT implementations\n"
546
           "-4          test IDCT248 implementations\n");
547 9e1586fc Fabrice Bellard
}
548
549 de6d9b64 Fabrice Bellard
int main(int argc, char **argv)
550
{
551 a46a3ce4 Fabrice Bellard
    int test_idct = 0, test_248_dct = 0;
552 86748dbc Michael Niedermayer
    int c,i;
553
    int test=1;
554 aadd27cd Michael Niedermayer
    cpu_flags = mm_support();
555 9e1586fc Fabrice Bellard
556 de6d9b64 Fabrice Bellard
    init_fdct();
557 9e1586fc Fabrice Bellard
    idct_mmx_init();
558 f67a10cd Alexander Strange
559 486497e0 Måns Rullgård
    for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
560 86748dbc Michael Niedermayer
    for(i=0;i<MAX_NEG_CROP;i++) {
561 486497e0 Måns Rullgård
        cropTbl[i] = 0;
562
        cropTbl[i + MAX_NEG_CROP + 256] = 255;
563 86748dbc Michael Niedermayer
    }
564 115329f1 Diego Biurrun
565 9e1586fc Fabrice Bellard
    for(;;) {
566 a46a3ce4 Fabrice Bellard
        c = getopt(argc, argv, "ih4");
567 9e1586fc Fabrice Bellard
        if (c == -1)
568
            break;
569
        switch(c) {
570
        case 'i':
571
            test_idct = 1;
572
            break;
573 a46a3ce4 Fabrice Bellard
        case '4':
574
            test_248_dct = 1;
575
            break;
576 86748dbc Michael Niedermayer
        default :
577 9e1586fc Fabrice Bellard
        case 'h':
578
            help();
579 c6bdc908 Marc Hoffman
            return 0;
580 9e1586fc Fabrice Bellard
        }
581
    }
582 115329f1 Diego Biurrun
583 86748dbc Michael Niedermayer
    if(optind <argc) test= atoi(argv[optind]);
584 115329f1 Diego Biurrun
585 9e1586fc Fabrice Bellard
    printf("ffmpeg DCT/IDCT test\n");
586
587 a46a3ce4 Fabrice Bellard
    if (test_248_dct) {
588 59e6f60a Aurelien Jacobs
        idct248_error("SIMPLE-C", ff_simple_idct248_put);
589 9e1586fc Fabrice Bellard
    } else {
590 3ac35bdb Marc Hoffman
      for (i=0;algos[i].name;i++)
591 dafe8824 Alexander Strange
        if (algos[i].is_idct == test_idct && !(~cpu_flags & algos[i].mm_support)) {
592 3ac35bdb Marc Hoffman
          dct_error (algos[i].name, algos[i].is_idct, algos[i].func, algos[i].ref, algos[i].format, test);
593 a46a3ce4 Fabrice Bellard
        }
594 9e1586fc Fabrice Bellard
    }
595 de6d9b64 Fabrice Bellard
    return 0;
596
}