Statistics
| Branch: | Revision:

ffmpeg / postproc / postprocess.c @ b01be121

History | View | Annotate | Download (23.5 KB)

1
/*
2
    Copyright (C) 2001-2002 Michael Niedermayer (michaelni@gmx.at)
3

4
    This program is free software; you can redistribute it and/or modify
5
    it under the terms of the GNU General Public License as published by
6
    the Free Software Foundation; either version 2 of the License, or
7
    (at your option) any later version.
8

9
    This program is distributed in the hope that it will be useful,
10
    but WITHOUT ANY WARRANTY; without even the implied warranty of
11
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
    GNU General Public License for more details.
13

14
    You should have received a copy of the GNU General Public License
15
    along with this program; if not, write to the Free Software
16
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
*/
18

    
19
/*
20
                        C        MMX        MMX2        3DNow
21
isVertDC                Ec        Ec
22
isVertMinMaxOk                Ec        Ec
23
doVertLowPass                E                e        e
24
doVertDefFilter                Ec        Ec        e        e
25
isHorizDC                Ec        Ec
26
isHorizMinMaxOk                a        E
27
doHorizLowPass                E                e        e
28
doHorizDefFilter        Ec        Ec        e        e
29
deRing                        E                e        e*
30
Vertical RKAlgo1        E                a        a
31
Horizontal RKAlgo1                        a        a
32
Vertical X1#                a                E        E
33
Horizontal X1#                a                E        E
34
LinIpolDeinterlace        e                E        E*
35
CubicIpolDeinterlace        a                e        e*
36
LinBlendDeinterlace        e                E        E*
37
MedianDeinterlace#                 Ec        Ec
38
TempDeNoiser#                E                e        e
39

40
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
41
# more or less selfinvented filters so the exactness isnt too meaningfull
42
E = Exact implementation
43
e = allmost exact implementation (slightly different rounding,...)
44
a = alternative / approximate impl
45
c = checked against the other implementations (-vo md5)
46
*/
47

    
48
/*
49
TODO:
50
reduce the time wasted on the mem transfer
51
unroll stuff if instructions depend too much on the prior one
52
move YScale thing to the end instead of fixing QP
53
write a faster and higher quality deblocking filter :)
54
make the mainloop more flexible (variable number of blocks at once
55
        (the if/else stuff per block is slowing things down)
56
compare the quality & speed of all filters
57
split this huge file
58
optimize c versions
59
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
60
...
61
*/
62

    
63
//Changelog: use the CVS log
64

    
65
#include "../config.h"
66
#include <inttypes.h>
67
#include <stdio.h>
68
#include <stdlib.h>
69
#include <string.h>
70
#ifdef HAVE_MALLOC_H
71
#include <malloc.h>
72
#endif
73
//#undef HAVE_MMX2
74
//#define HAVE_3DNOW
75
//#undef HAVE_MMX
76
//#undef ARCH_X86
77
//#define DEBUG_BRIGHTNESS
78
#include "../libvo/fastmemcpy.h"
79
#include "postprocess.h"
80
#include "postprocess_internal.h"
81
#include "../mangle.h"
82

    
83
#define MIN(a,b) ((a) > (b) ? (b) : (a))
84
#define MAX(a,b) ((a) < (b) ? (b) : (a))
85
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
86
#define SIGN(a) ((a) > 0 ? 1 : -1)
87

    
88
#define GET_MODE_BUFFER_SIZE 500
89
#define OPTIONS_ARRAY_SIZE 10
90
#define BLOCK_SIZE 8
91
#define TEMP_STRIDE 8
92
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
93

    
94
#ifdef ARCH_X86
95
static uint64_t __attribute__((aligned(8))) w05=                0x0005000500050005LL;
96
static uint64_t __attribute__((aligned(8))) w20=                0x0020002000200020LL;
97
static uint64_t __attribute__((aligned(8))) b00=                 0x0000000000000000LL;
98
static uint64_t __attribute__((aligned(8))) b01=                 0x0101010101010101LL;
99
static uint64_t __attribute__((aligned(8))) b02=                 0x0202020202020202LL;
100
static uint64_t __attribute__((aligned(8))) b08=                 0x0808080808080808LL;
101
static uint64_t __attribute__((aligned(8))) b80=                 0x8080808080808080LL;
102
#endif
103

    
104
static int verbose= 0;
105

    
106
static const int deringThreshold= 20;
107

    
108

    
109
static struct PPFilter filters[]=
110
{
111
        {"hb", "hdeblock",                 1, 1, 3, H_DEBLOCK},
112
        {"vb", "vdeblock",                 1, 2, 4, V_DEBLOCK},
113
/*        {"hr", "rkhdeblock",                 1, 1, 3, H_RK1_FILTER},
114
        {"vr", "rkvdeblock",                 1, 2, 4, V_RK1_FILTER},*/
115
        {"h1", "x1hdeblock",                 1, 1, 3, H_X1_FILTER},
116
        {"v1", "x1vdeblock",                 1, 2, 4, V_X1_FILTER},
117
        {"dr", "dering",                 1, 5, 6, DERING},
118
        {"al", "autolevels",                 0, 1, 2, LEVEL_FIX},
119
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
120
        {"li", "linipoldeint",                 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
121
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
122
        {"md", "mediandeint",                 1, 1, 4, MEDIAN_DEINT_FILTER},
123
        {"fd", "ffmpegdeint",                 1, 1, 4, FFMPEG_DEINT_FILTER},
124
        {"tn", "tmpnoise",                 1, 7, 8, TEMP_NOISE_FILTER},
125
        {"fq", "forcequant",                 1, 0, 0, FORCE_QUANT},
126
        {NULL, NULL,0,0,0,0} //End Marker
127
};
128

    
129
static char *replaceTable[]=
130
{
131
        "default",         "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
132
        "de",                 "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
133
        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
134
        "fa",                 "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
135
        NULL //End Marker
136
};
137

    
138
#ifdef ARCH_X86
139
static inline void unusedVariableWarningFixer()
140
{
141
        if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0;
142
}
143
#endif
144

    
145

    
146
#ifdef ARCH_X86
147
static inline void prefetchnta(void *p)
148
{
149
        asm volatile(        "prefetchnta (%0)\n\t"
150
                : : "r" (p)
151
        );
152
}
153

    
154
static inline void prefetcht0(void *p)
155
{
156
        asm volatile(        "prefetcht0 (%0)\n\t"
157
                : : "r" (p)
158
        );
159
}
160

    
161
static inline void prefetcht1(void *p)
162
{
163
        asm volatile(        "prefetcht1 (%0)\n\t"
164
                : : "r" (p)
165
        );
166
}
167

    
168
static inline void prefetcht2(void *p)
169
{
170
        asm volatile(        "prefetcht2 (%0)\n\t"
171
                : : "r" (p)
172
        );
173
}
174
#endif
175

    
176
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
177

    
178
/**
179
 * Check if the given 8x8 Block is mostly "flat"
180
 */
181
static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
182
{
183
        int numEq= 0;
184
        int y;
185
        const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
186
        const int dcThreshold= dcOffset*2 + 1;
187
        for(y=0; y<BLOCK_SIZE; y++)
188
        {
189
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
190
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
191
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
192
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
193
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
194
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
195
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
196
                src+= stride;
197
        }
198
        return numEq > c->ppMode.flatnessThreshold;
199
}
200

    
201
/**
202
 * Check if the middle 8x8 Block in the given 8x16 block is flat
203
 */
204
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
205
        int numEq= 0;
206
        int y;
207
        const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
208
        const int dcThreshold= dcOffset*2 + 1;
209
        src+= stride*4; // src points to begin of the 8x8 Block
210
        for(y=0; y<BLOCK_SIZE-1; y++)
211
        {
212
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
213
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
214
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
215
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
216
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
217
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
218
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
219
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
220
                src+= stride;
221
        }
222
        return numEq > c->ppMode.flatnessThreshold;
223
}
224

    
225
static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
226
{
227
        if(abs(src[0] - src[7]) > 2*QP) return 0;
228

    
229
        return 1;
230
}
231

    
232
static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
233
{
234
        int y;
235
        for(y=0; y<BLOCK_SIZE; y++)
236
        {
237
                const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);
238

    
239
                if(ABS(middleEnergy) < 8*QP)
240
                {
241
                        const int q=(dst[3] - dst[4])/2;
242
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
243
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
244

    
245
                        int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
246
                        d= MAX(d, 0);
247

    
248
                        d= (5*d + 32) >> 6;
249
                        d*= SIGN(-middleEnergy);
250

    
251
                        if(q>0)
252
                        {
253
                                d= d<0 ? 0 : d;
254
                                d= d>q ? q : d;
255
                        }
256
                        else
257
                        {
258
                                d= d>0 ? 0 : d;
259
                                d= d<q ? q : d;
260
                        }
261

    
262
                        dst[3]-= d;
263
                        dst[4]+= d;
264
                }
265
                dst+= stride;
266
        }
267
}
268

    
269
/**
270
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
271
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
272
 */
273
static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
274
{
275

    
276
        int y;
277
        for(y=0; y<BLOCK_SIZE; y++)
278
        {
279
                const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
280
                const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];
281

    
282
                int sums[9];
283
                sums[0] = first + dst[0];
284
                sums[1] = dst[0] + dst[1];
285
                sums[2] = dst[1] + dst[2];
286
                sums[3] = dst[2] + dst[3];
287
                sums[4] = dst[3] + dst[4];
288
                sums[5] = dst[4] + dst[5];
289
                sums[6] = dst[5] + dst[6];
290
                sums[7] = dst[6] + dst[7];
291
                sums[8] = dst[7] + last;
292

    
293
                dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
294
                dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
295
                dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
296
                dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
297
                dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
298
                dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
299
                dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
300
                dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
301

    
302
                dst+= stride;
303
        }
304
}
305

    
306
/**
307
 * Experimental Filter 1 (Horizontal)
308
 * will not damage linear gradients
309
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
310
 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
311
 * MMX2 version does correct clipping C version doesnt
312
 * not identical with the vertical one
313
 */
314
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
315
{
316
        int y;
317
        static uint64_t *lut= NULL;
318
        if(lut==NULL)
319
        {
320
                int i;
321
                lut= (uint64_t*)memalign(8, 256*8);
322
                for(i=0; i<256; i++)
323
                {
324
                        int v= i < 128 ? 2*i : 2*(i-256);
325
/*
326
//Simulate 112242211 9-Tap filter
327
                        uint64_t a= (v/16) & 0xFF;
328
                        uint64_t b= (v/8) & 0xFF;
329
                        uint64_t c= (v/4) & 0xFF;
330
                        uint64_t d= (3*v/8) & 0xFF;
331
*/
332
//Simulate piecewise linear interpolation
333
                        uint64_t a= (v/16) & 0xFF;
334
                        uint64_t b= (v*3/16) & 0xFF;
335
                        uint64_t c= (v*5/16) & 0xFF;
336
                        uint64_t d= (7*v/16) & 0xFF;
337
                        uint64_t A= (0x100 - a)&0xFF;
338
                        uint64_t B= (0x100 - b)&0xFF;
339
                        uint64_t C= (0x100 - c)&0xFF;
340
                        uint64_t D= (0x100 - c)&0xFF;
341

    
342
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
343
                                (D<<24) | (C<<16) | (B<<8) | (A);
344
                        //lut[i] = (v<<32) | (v<<24);
345
                }
346
        }
347

    
348
        for(y=0; y<BLOCK_SIZE; y++)
349
        {
350
                int a= src[1] - src[2];
351
                int b= src[3] - src[4];
352
                int c= src[5] - src[6];
353

    
354
                int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
355

    
356
                if(d < QP)
357
                {
358
                        int v = d * SIGN(-b);
359

    
360
                        src[1] +=v/8;
361
                        src[2] +=v/4;
362
                        src[3] +=3*v/8;
363
                        src[4] -=3*v/8;
364
                        src[5] -=v/4;
365
                        src[6] -=v/8;
366

    
367
                }
368
                src+=stride;
369
        }
370
}
371

    
372

    
373
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
374
//Plain C versions
375
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
376
#define COMPILE_C
377
#endif
378

    
379
#ifdef ARCH_X86
380

    
381
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
382
#define COMPILE_MMX
383
#endif
384

    
385
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
386
#define COMPILE_MMX2
387
#endif
388

    
389
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
390
#define COMPILE_3DNOW
391
#endif
392
#endif //ARCH_X86
393

    
394
#undef HAVE_MMX
395
#undef HAVE_MMX2
396
#undef HAVE_3DNOW
397
#undef ARCH_X86
398

    
399
#ifdef COMPILE_C
400
#undef HAVE_MMX
401
#undef HAVE_MMX2
402
#undef HAVE_3DNOW
403
#undef ARCH_X86
404
#define RENAME(a) a ## _C
405
#include "postprocess_template.c"
406
#endif
407

    
408
//MMX versions
409
#ifdef COMPILE_MMX
410
#undef RENAME
411
#define HAVE_MMX
412
#undef HAVE_MMX2
413
#undef HAVE_3DNOW
414
#define ARCH_X86
415
#define RENAME(a) a ## _MMX
416
#include "postprocess_template.c"
417
#endif
418

    
419
//MMX2 versions
420
#ifdef COMPILE_MMX2
421
#undef RENAME
422
#define HAVE_MMX
423
#define HAVE_MMX2
424
#undef HAVE_3DNOW
425
#define ARCH_X86
426
#define RENAME(a) a ## _MMX2
427
#include "postprocess_template.c"
428
#endif
429

    
430
//3DNOW versions
431
#ifdef COMPILE_3DNOW
432
#undef RENAME
433
#define HAVE_MMX
434
#undef HAVE_MMX2
435
#define HAVE_3DNOW
436
#define ARCH_X86
437
#define RENAME(a) a ## _3DNow
438
#include "postprocess_template.c"
439
#endif
440

    
441
// minor note: the HAVE_xyz is messed up after that line so dont use it
442

    
443
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
444
        QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
445
{
446
        PPContext *c= (PPContext *)vc;
447
        PPMode *ppMode= (PPMode *)vm;
448
        c->ppMode= *ppMode; //FIXME
449

    
450
        // useing ifs here as they are faster than function pointers allthough the
451
        // difference wouldnt be messureable here but its much better because
452
        // someone might exchange the cpu whithout restarting mplayer ;)
453
#ifdef RUNTIME_CPUDETECT
454
#ifdef ARCH_X86
455
        // ordered per speed fasterst first
456
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
457
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
458
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
459
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
460
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
461
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
462
        else
463
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
464
#else
465
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
466
#endif
467
#else //RUNTIME_CPUDETECT
468
#ifdef HAVE_MMX2
469
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
470
#elif defined (HAVE_3DNOW)
471
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
472
#elif defined (HAVE_MMX)
473
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
474
#else
475
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
476
#endif
477
#endif //!RUNTIME_CPUDETECT
478
}
479

    
480
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
481
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
482

    
483
/* -pp Command line Help
484
*/
485
char *pp_help=
486
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
487
"long form example:\n"
488
"vdeblock:autoq/hdeblock:autoq/linblenddeint        default,-vdeblock\n"
489
"short form example:\n"
490
"vb:a/hb:a/lb                                        de,-vb\n"
491
"more examples:\n"
492
"tn:64:128:256\n"
493
"Filters                        Options\n"
494
"short        long name        short        long option        Description\n"
495
"*        *                a        autoq                cpu power dependant enabler\n"
496
"                        c        chrom                chrominance filtring enabled\n"
497
"                        y        nochrom                chrominance filtring disabled\n"
498
"hb        hdeblock        (2 Threshold)                horizontal deblocking filter\n"
499
"        1. difference factor: default=64, higher -> more deblocking\n"
500
"        2. flatness threshold: default=40, lower -> more deblocking\n"
501
"                        the h & v deblocking filters share these\n"
502
"                        so u cant set different thresholds for h / v\n"
503
"vb        vdeblock        (2 Threshold)                vertical deblocking filter\n"
504
"h1        x1hdeblock                                Experimental h deblock filter 1\n"
505
"v1        x1vdeblock                                Experimental v deblock filter 1\n"
506
"dr        dering                                        Deringing filter\n"
507
"al        autolevels                                automatic brightness / contrast\n"
508
"                        f        fullyrange        stretch luminance to (0..255)\n"
509
"lb        linblenddeint                                linear blend deinterlacer\n"
510
"li        linipoldeint                                linear interpolating deinterlace\n"
511
"ci        cubicipoldeint                                cubic interpolating deinterlacer\n"
512
"md        mediandeint                                median deinterlacer\n"
513
"fd        ffmpegdeint                                ffmpeg deinterlacer\n"
514
"de        default                                        hb:a,vb:a,dr:a,al\n"
515
"fa        fast                                        h1:a,v1:a,dr:a,al\n"
516
"tn        tmpnoise        (3 Thresholds)                Temporal Noise Reducer\n"
517
"                        1. <= 2. <= 3.                larger -> stronger filtering\n"
518
"fq        forceQuant        <quantizer>                Force quantizer\n"
519
;
520

    
521
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
522
{
523
        char temp[GET_MODE_BUFFER_SIZE];
524
        char *p= temp;
525
        char *filterDelimiters= ",/";
526
        char *optionDelimiters= ":";
527
        struct PPMode *ppMode;
528
        char *filterToken;
529

    
530
        ppMode= memalign(8, sizeof(PPMode));
531
        
532
        ppMode->lumMode= 0;
533
        ppMode->chromMode= 0;
534
        ppMode->maxTmpNoise[0]= 700;
535
        ppMode->maxTmpNoise[1]= 1500;
536
        ppMode->maxTmpNoise[2]= 3000;
537
        ppMode->maxAllowedY= 234;
538
        ppMode->minAllowedY= 16;
539
        ppMode->baseDcDiff= 256/4;
540
        ppMode->flatnessThreshold= 56-16;
541
        ppMode->maxClippedThreshold= 0.01;
542
        ppMode->error=0;
543

    
544
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
545

    
546
        if(verbose>1) printf("pp: %s\n", name);
547

    
548
        for(;;){
549
                char *filterName;
550
                int q= 1000000; //PP_QUALITY_MAX;
551
                int chrom=-1;
552
                char *option;
553
                char *options[OPTIONS_ARRAY_SIZE];
554
                int i;
555
                int filterNameOk=0;
556
                int numOfUnknownOptions=0;
557
                int enable=1; //does the user want us to enabled or disabled the filter
558

    
559
                filterToken= strtok(p, filterDelimiters);
560
                if(filterToken == NULL) break;
561
                p+= strlen(filterToken) + 1; // p points to next filterToken
562
                filterName= strtok(filterToken, optionDelimiters);
563
                if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
564

    
565
                if(*filterName == '-')
566
                {
567
                        enable=0;
568
                        filterName++;
569
                }
570

    
571
                for(;;){ //for all options
572
                        option= strtok(NULL, optionDelimiters);
573
                        if(option == NULL) break;
574

    
575
                        if(verbose>1) printf("pp: option: %s\n", option);
576
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
577
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
578
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
579
                        else
580
                        {
581
                                options[numOfUnknownOptions] = option;
582
                                numOfUnknownOptions++;
583
                        }
584
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
585
                }
586
                options[numOfUnknownOptions] = NULL;
587

    
588
                /* replace stuff from the replace Table */
589
                for(i=0; replaceTable[2*i]!=NULL; i++)
590
                {
591
                        if(!strcmp(replaceTable[2*i], filterName))
592
                        {
593
                                int newlen= strlen(replaceTable[2*i + 1]);
594
                                int plen;
595
                                int spaceLeft;
596

    
597
                                if(p==NULL) p= temp, *p=0;         //last filter
598
                                else p--, *p=',';                //not last filter
599

    
600
                                plen= strlen(p);
601
                                spaceLeft= p - temp + plen;
602
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
603
                                {
604
                                        ppMode->error++;
605
                                        break;
606
                                }
607
                                memmove(p + newlen, p, plen+1);
608
                                memcpy(p, replaceTable[2*i + 1], newlen);
609
                                filterNameOk=1;
610
                        }
611
                }
612

    
613
                for(i=0; filters[i].shortName!=NULL; i++)
614
                {
615
//                        printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
616
                        if(   !strcmp(filters[i].longName, filterName)
617
                           || !strcmp(filters[i].shortName, filterName))
618
                        {
619
                                ppMode->lumMode &= ~filters[i].mask;
620
                                ppMode->chromMode &= ~filters[i].mask;
621

    
622
                                filterNameOk=1;
623
                                if(!enable) break; // user wants to disable it
624

    
625
                                if(q >= filters[i].minLumQuality)
626
                                        ppMode->lumMode|= filters[i].mask;
627
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
628
                                        if(q >= filters[i].minChromQuality)
629
                                                ppMode->chromMode|= filters[i].mask;
630

    
631
                                if(filters[i].mask == LEVEL_FIX)
632
                                {
633
                                        int o;
634
                                        ppMode->minAllowedY= 16;
635
                                        ppMode->maxAllowedY= 234;
636
                                        for(o=0; options[o]!=NULL; o++)
637
                                        {
638
                                                if(  !strcmp(options[o],"fullyrange")
639
                                                   ||!strcmp(options[o],"f"))
640
                                                {
641
                                                        ppMode->minAllowedY= 0;
642
                                                        ppMode->maxAllowedY= 255;
643
                                                        numOfUnknownOptions--;
644
                                                }
645
                                        }
646
                                }
647
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
648
                                {
649
                                        int o;
650
                                        int numOfNoises=0;
651

    
652
                                        for(o=0; options[o]!=NULL; o++)
653
                                        {
654
                                                char *tail;
655
                                                ppMode->maxTmpNoise[numOfNoises]=
656
                                                        strtol(options[o], &tail, 0);
657
                                                if(tail!=options[o])
658
                                                {
659
                                                        numOfNoises++;
660
                                                        numOfUnknownOptions--;
661
                                                        if(numOfNoises >= 3) break;
662
                                                }
663
                                        }
664
                                }
665
                                else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
666
                                {
667
                                        int o;
668

    
669
                                        for(o=0; options[o]!=NULL && o<2; o++)
670
                                        {
671
                                                char *tail;
672
                                                int val= strtol(options[o], &tail, 0);
673
                                                if(tail==options[o]) break;
674

    
675
                                                numOfUnknownOptions--;
676
                                                if(o==0) ppMode->baseDcDiff= val;
677
                                                else ppMode->flatnessThreshold= val;
678
                                        }
679
                                }
680
                                else if(filters[i].mask == FORCE_QUANT)
681
                                {
682
                                        int o;
683
                                        ppMode->forcedQuant= 15;
684

    
685
                                        for(o=0; options[o]!=NULL && o<1; o++)
686
                                        {
687
                                                char *tail;
688
                                                int val= strtol(options[o], &tail, 0);
689
                                                if(tail==options[o]) break;
690

    
691
                                                numOfUnknownOptions--;
692
                                                ppMode->forcedQuant= val;
693
                                        }
694
                                }
695
                        }
696
                }
697
                if(!filterNameOk) ppMode->error++;
698
                ppMode->error += numOfUnknownOptions;
699
        }
700

    
701
        if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
702
        if(ppMode->error)
703
        {
704
                fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
705
                free(ppMode);
706
                return NULL;
707
        }
708
        return ppMode;
709
}
710

    
711
void pp_free_mode(pp_mode_t *mode){
712
    if(mode) free(mode);
713
}
714

    
715
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
716
        PPContext *c= memalign(32, sizeof(PPContext));
717
        int i;
718
        int mbWidth = (width+15)>>4;
719
        int mbHeight= (height+15)>>4;
720
        
721
        c->cpuCaps= cpuCaps;
722

    
723
        c->tempBlocks= (uint8_t*)memalign(8, 2*16*8);
724
        c->yHistogram= (uint64_t*)memalign(8, 256*sizeof(uint64_t));
725
        for(i=0; i<256; i++)
726
                c->yHistogram[i]= width*height/64*15/256;
727

    
728
        for(i=0; i<3; i++)
729
        {
730
                //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
731
                c->tempBlured[i]= (uint8_t*)memalign(8, ((width+7)&(~7))*2*((height+7)&(~7)) + 17*1024); //FIXME dstStride instead of width
732
                c->tempBluredPast[i]= (uint32_t*)memalign(8, 256*((height+7)&(~7))/2 + 17*1024);
733

    
734
                memset(c->tempBlured[i], 0, ((width+7)&(~7))*2*((height+7)&(~7)) + 17*1024);
735
                memset(c->tempBluredPast[i], 0, 256*((height+7)&(~7))/2 + 17*1024);
736
        }
737
        
738
        c->tempDst= (uint8_t*)memalign(8, 1024*24);
739
        c->tempSrc= (uint8_t*)memalign(8, 1024*24);
740
        c->tempDstBlock= (uint8_t*)memalign(8, 1024*24);
741
        c->tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
742
        c->deintTemp= (uint8_t*)memalign(8, width+16);
743
        c->nonBQPTable= (QP_STORE_T*)memalign(8, mbWidth*mbHeight*sizeof(QP_STORE_T));
744
        memset(c->nonBQPTable, 0, mbWidth*mbHeight*sizeof(QP_STORE_T));
745

    
746
        c->frameNum=-1;
747

    
748
        return c;
749
}
750

    
751
void pp_free_context(void *vc){
752
        PPContext *c = (PPContext*)vc;
753
        int i;
754
        
755
        for(i=0; i<3; i++) free(c->tempBlured[i]);
756
        for(i=0; i<3; i++) free(c->tempBluredPast[i]);
757
        
758
        free(c->tempBlocks);
759
        free(c->yHistogram);
760
        free(c->tempDst);
761
        free(c->tempSrc);
762
        free(c->tempDstBlock);
763
        free(c->tempSrcBlock);
764
        free(c->deintTemp);
765
        free(c->nonBQPTable);
766
        
767
        free(c);
768
}
769

    
770
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
771
                 uint8_t * dst[3], int dstStride[3],
772
                 int width, int height,
773
                 QP_STORE_T *QP_store,  int QPStride,
774
                 pp_mode_t *vm,  void *vc, int pict_type)
775
{
776
        int mbWidth = (width+15)>>4;
777
        int mbHeight= (height+15)>>4;
778
        QP_STORE_T quantArray[2048/8];
779
        PPMode *mode = (PPMode*)vm;
780
        PPContext *c = (PPContext*)vc;
781

    
782
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) 
783
        {
784
                int i;
785
                QP_store= quantArray;
786
                QPStride= 0;
787
                if(mode->lumMode & FORCE_QUANT)
788
                        for(i=0; i<2048/8; i++) quantArray[i]= mode->forcedQuant;
789
                else
790
                        for(i=0; i<2048/8; i++) quantArray[i]= 1;
791
        }
792
if(0){
793
int x,y;
794
for(y=0; y<mbHeight; y++){
795
        for(x=0; x<mbWidth; x++){
796
                printf("%2d ", QP_store[x + y*QPStride]);
797
        }
798
        printf("\n");
799
}
800
        printf("\n");
801
}
802
//printf("pict_type:%d\n", pict_type);
803

    
804
        if(pict_type!=3)
805
        {
806
                int x,y;
807
                for(y=0; y<mbHeight; y++){
808
                        for(x=0; x<mbWidth; x++){
809
                                int qscale= QP_store[x + y*QPStride];
810
                                if(qscale&~31)
811
                                    qscale=31;
812
                                c->nonBQPTable[y*mbWidth + x]= qscale;
813
                        }
814
                }
815
        }
816

    
817
        if(verbose>2)
818
        {
819
                printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
820
        }
821

    
822
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
823
                width, height, QP_store, QPStride, 0, mode, c);
824

    
825
        width  = (width +1)>>1;
826
        height = (height+1)>>1;
827

    
828
        if(mode->chromMode)
829
        {
830
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
831
                        width, height, QP_store, QPStride, 1, mode, c);
832
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
833
                        width, height, QP_store, QPStride, 2, mode, c);
834
        }
835
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
836
        {
837
                memcpy(dst[1], src[1], srcStride[1]*height);
838
                memcpy(dst[2], src[2], srcStride[2]*height);
839
        }
840
        else
841
        {
842
                int y;
843
                for(y=0; y<height; y++)
844
                {
845
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
846
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
847
                }
848
        }
849
}
850