Statistics
| Branch: | Revision:

ffmpeg / postproc / postprocess.c @ 4407a3c4

History | View | Annotate | Download (24.7 KB)

1
/*
2
    Copyright (C) 2001-2002 Michael Niedermayer (michaelni@gmx.at)
3

4
    This program is free software; you can redistribute it and/or modify
5
    it under the terms of the GNU General Public License as published by
6
    the Free Software Foundation; either version 2 of the License, or
7
    (at your option) any later version.
8

9
    This program is distributed in the hope that it will be useful,
10
    but WITHOUT ANY WARRANTY; without even the implied warranty of
11
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
    GNU General Public License for more details.
13

14
    You should have received a copy of the GNU General Public License
15
    along with this program; if not, write to the Free Software
16
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
*/
18

    
19
/*
20
                        C        MMX        MMX2        3DNow
21
isVertDC                Ec        Ec
22
isVertMinMaxOk                Ec        Ec
23
doVertLowPass                E                e        e
24
doVertDefFilter                Ec        Ec        e        e
25
isHorizDC                Ec        Ec
26
isHorizMinMaxOk                a        E
27
doHorizLowPass                E                e        e
28
doHorizDefFilter        Ec        Ec        e        e
29
deRing                        E                e        e*
30
Vertical RKAlgo1        E                a        a
31
Horizontal RKAlgo1                        a        a
32
Vertical X1#                a                E        E
33
Horizontal X1#                a                E        E
34
LinIpolDeinterlace        e                E        E*
35
CubicIpolDeinterlace        a                e        e*
36
LinBlendDeinterlace        e                E        E*
37
MedianDeinterlace#                 Ec        Ec
38
TempDeNoiser#                E                e        e
39

40
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
41
# more or less selfinvented filters so the exactness isnt too meaningfull
42
E = Exact implementation
43
e = allmost exact implementation (slightly different rounding,...)
44
a = alternative / approximate impl
45
c = checked against the other implementations (-vo md5)
46
*/
47

    
48
/*
49
TODO:
50
reduce the time wasted on the mem transfer
51
unroll stuff if instructions depend too much on the prior one
52
move YScale thing to the end instead of fixing QP
53
write a faster and higher quality deblocking filter :)
54
make the mainloop more flexible (variable number of blocks at once
55
        (the if/else stuff per block is slowing things down)
56
compare the quality & speed of all filters
57
split this huge file
58
optimize c versions
59
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
60
...
61
*/
62

    
63
//Changelog: use the CVS log
64

    
65
#include "../config.h"
66
#include <inttypes.h>
67
#include <stdio.h>
68
#include <stdlib.h>
69
#include <string.h>
70
#ifdef HAVE_MALLOC_H
71
#include <malloc.h>
72
#endif
73
//#undef HAVE_MMX2
74
//#define HAVE_3DNOW
75
//#undef HAVE_MMX
76
//#undef ARCH_X86
77
//#define DEBUG_BRIGHTNESS
78
#include "../libvo/fastmemcpy.h"
79
#include "postprocess.h"
80
#include "../mangle.h"
81

    
82
#define MIN(a,b) ((a) > (b) ? (b) : (a))
83
#define MAX(a,b) ((a) < (b) ? (b) : (a))
84
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
85
#define SIGN(a) ((a) > 0 ? 1 : -1)
86

    
87
#define GET_MODE_BUFFER_SIZE 500
88
#define OPTIONS_ARRAY_SIZE 10
89
#define BLOCK_SIZE 8
90
#define TEMP_STRIDE 8
91
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
92

    
93
#ifdef ARCH_X86
94
static uint64_t __attribute__((aligned(8))) w05=                0x0005000500050005LL;
95
static uint64_t __attribute__((aligned(8))) w20=                0x0020002000200020LL;
96
static uint64_t __attribute__((aligned(8))) b00=                 0x0000000000000000LL;
97
static uint64_t __attribute__((aligned(8))) b01=                 0x0101010101010101LL;
98
static uint64_t __attribute__((aligned(8))) b02=                 0x0202020202020202LL;
99
static uint64_t __attribute__((aligned(8))) b08=                 0x0808080808080808LL;
100
static uint64_t __attribute__((aligned(8))) b80=                 0x8080808080808080LL;
101
#endif
102

    
103
static int verbose= 0;
104

    
105
static const int deringThreshold= 20;
106

    
107
static int cpuCaps=0;
108

    
109
struct PPFilter{
110
        char *shortName;
111
        char *longName;
112
        int chromDefault;         // is chrominance filtering on by default if this filter is manually activated
113
        int minLumQuality;         // minimum quality to turn luminance filtering on
114
        int minChromQuality;        // minimum quality to turn chrominance filtering on
115
        int mask;                 // Bitmask to turn this filter on
116
};
117

    
118
typedef struct PPContext{
119
        uint8_t *tempBlocks; //used for the horizontal code
120

    
121
        /* we need 64bit here otherwise we?ll going to have a problem
122
           after watching a black picture for 5 hours*/
123
        uint64_t *yHistogram;
124

    
125
        uint64_t __attribute__((aligned(8))) packedYOffset;
126
        uint64_t __attribute__((aligned(8))) packedYScale;
127

    
128
        /* Temporal noise reducing buffers */
129
        uint8_t *tempBlured[3];
130
        int32_t *tempBluredPast[3];
131

    
132
        /* Temporary buffers for handling the last row(s) */
133
        uint8_t *tempDst;
134
        uint8_t *tempSrc;
135

    
136
        /* Temporary buffers for handling the last block */
137
        uint8_t *tempDstBlock;
138
        uint8_t *tempSrcBlock;
139
        uint8_t *deintTemp;
140

    
141
        uint64_t __attribute__((aligned(8))) pQPb;
142
        uint64_t __attribute__((aligned(8))) pQPb2;
143

    
144
        uint64_t __attribute__((aligned(8))) mmxDcOffset[32];
145
        uint64_t __attribute__((aligned(8))) mmxDcThreshold[32];
146
        
147
        QP_STORE_T *nonBQPTable;
148
        
149
        int QP;
150
        int nonBQP;
151

    
152
        int frameNum;
153

    
154
        PPMode ppMode;
155
} PPContext;
156

    
157
static struct PPFilter filters[]=
158
{
159
        {"hb", "hdeblock",                 1, 1, 3, H_DEBLOCK},
160
        {"vb", "vdeblock",                 1, 2, 4, V_DEBLOCK},
161
/*        {"hr", "rkhdeblock",                 1, 1, 3, H_RK1_FILTER},
162
        {"vr", "rkvdeblock",                 1, 2, 4, V_RK1_FILTER},*/
163
        {"h1", "x1hdeblock",                 1, 1, 3, H_X1_FILTER},
164
        {"v1", "x1vdeblock",                 1, 2, 4, V_X1_FILTER},
165
        {"dr", "dering",                 1, 5, 6, DERING},
166
        {"al", "autolevels",                 0, 1, 2, LEVEL_FIX},
167
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
168
        {"li", "linipoldeint",                 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
169
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
170
        {"md", "mediandeint",                 1, 1, 4, MEDIAN_DEINT_FILTER},
171
        {"fd", "ffmpegdeint",                 1, 1, 4, FFMPEG_DEINT_FILTER},
172
        {"tn", "tmpnoise",                 1, 7, 8, TEMP_NOISE_FILTER},
173
        {"fq", "forcequant",                 1, 0, 0, FORCE_QUANT},
174
        {NULL, NULL,0,0,0,0} //End Marker
175
};
176

    
177
static char *replaceTable[]=
178
{
179
        "default",         "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
180
        "de",                 "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
181
        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
182
        "fa",                 "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
183
        NULL //End Marker
184
};
185

    
186
#ifdef ARCH_X86
187
static inline void unusedVariableWarningFixer()
188
{
189
        if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0;
190
}
191
#endif
192

    
193

    
194
#ifdef ARCH_X86
195
static inline void prefetchnta(void *p)
196
{
197
        asm volatile(        "prefetchnta (%0)\n\t"
198
                : : "r" (p)
199
        );
200
}
201

    
202
static inline void prefetcht0(void *p)
203
{
204
        asm volatile(        "prefetcht0 (%0)\n\t"
205
                : : "r" (p)
206
        );
207
}
208

    
209
static inline void prefetcht1(void *p)
210
{
211
        asm volatile(        "prefetcht1 (%0)\n\t"
212
                : : "r" (p)
213
        );
214
}
215

    
216
static inline void prefetcht2(void *p)
217
{
218
        asm volatile(        "prefetcht2 (%0)\n\t"
219
                : : "r" (p)
220
        );
221
}
222
#endif
223

    
224
int pp_init(int caps){
225
    cpuCaps= caps;
226
    
227
    return 0;
228
}
229

    
230
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
231

    
232
/**
233
 * Check if the given 8x8 Block is mostly "flat"
234
 */
235
static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
236
{
237
        int numEq= 0;
238
        int y;
239
        const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
240
        const int dcThreshold= dcOffset*2 + 1;
241
        for(y=0; y<BLOCK_SIZE; y++)
242
        {
243
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
244
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
245
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
246
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
247
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
248
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
249
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
250
                src+= stride;
251
        }
252
        return numEq > c->ppMode.flatnessThreshold;
253
}
254

    
255
/**
256
 * Check if the middle 8x8 Block in the given 8x16 block is flat
257
 */
258
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
259
        int numEq= 0;
260
        int y;
261
        const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
262
        const int dcThreshold= dcOffset*2 + 1;
263
        src+= stride*4; // src points to begin of the 8x8 Block
264
        for(y=0; y<BLOCK_SIZE-1; y++)
265
        {
266
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
267
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
268
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
269
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
270
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
271
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
272
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
273
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
274
                src+= stride;
275
        }
276
        return numEq > c->ppMode.flatnessThreshold;
277
}
278

    
279
static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
280
{
281
        if(abs(src[0] - src[7]) > 2*QP) return 0;
282

    
283
        return 1;
284
}
285

    
286
static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
287
{
288
        int y;
289
        for(y=0; y<BLOCK_SIZE; y++)
290
        {
291
                const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);
292

    
293
                if(ABS(middleEnergy) < 8*QP)
294
                {
295
                        const int q=(dst[3] - dst[4])/2;
296
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
297
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
298

    
299
                        int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
300
                        d= MAX(d, 0);
301

    
302
                        d= (5*d + 32) >> 6;
303
                        d*= SIGN(-middleEnergy);
304

    
305
                        if(q>0)
306
                        {
307
                                d= d<0 ? 0 : d;
308
                                d= d>q ? q : d;
309
                        }
310
                        else
311
                        {
312
                                d= d>0 ? 0 : d;
313
                                d= d<q ? q : d;
314
                        }
315

    
316
                        dst[3]-= d;
317
                        dst[4]+= d;
318
                }
319
                dst+= stride;
320
        }
321
}
322

    
323
/**
324
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
325
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
326
 */
327
static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
328
{
329

    
330
        int y;
331
        for(y=0; y<BLOCK_SIZE; y++)
332
        {
333
                const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
334
                const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];
335

    
336
                int sums[9];
337
                sums[0] = first + dst[0];
338
                sums[1] = dst[0] + dst[1];
339
                sums[2] = dst[1] + dst[2];
340
                sums[3] = dst[2] + dst[3];
341
                sums[4] = dst[3] + dst[4];
342
                sums[5] = dst[4] + dst[5];
343
                sums[6] = dst[5] + dst[6];
344
                sums[7] = dst[6] + dst[7];
345
                sums[8] = dst[7] + last;
346

    
347
                dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
348
                dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
349
                dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
350
                dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
351
                dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
352
                dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
353
                dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
354
                dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
355

    
356
                dst+= stride;
357
        }
358
}
359

    
360
/**
361
 * Experimental Filter 1 (Horizontal)
362
 * will not damage linear gradients
363
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
364
 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
365
 * MMX2 version does correct clipping C version doesnt
366
 * not identical with the vertical one
367
 */
368
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
369
{
370
        int y;
371
        static uint64_t *lut= NULL;
372
        if(lut==NULL)
373
        {
374
                int i;
375
                lut= (uint64_t*)memalign(8, 256*8);
376
                for(i=0; i<256; i++)
377
                {
378
                        int v= i < 128 ? 2*i : 2*(i-256);
379
/*
380
//Simulate 112242211 9-Tap filter
381
                        uint64_t a= (v/16) & 0xFF;
382
                        uint64_t b= (v/8) & 0xFF;
383
                        uint64_t c= (v/4) & 0xFF;
384
                        uint64_t d= (3*v/8) & 0xFF;
385
*/
386
//Simulate piecewise linear interpolation
387
                        uint64_t a= (v/16) & 0xFF;
388
                        uint64_t b= (v*3/16) & 0xFF;
389
                        uint64_t c= (v*5/16) & 0xFF;
390
                        uint64_t d= (7*v/16) & 0xFF;
391
                        uint64_t A= (0x100 - a)&0xFF;
392
                        uint64_t B= (0x100 - b)&0xFF;
393
                        uint64_t C= (0x100 - c)&0xFF;
394
                        uint64_t D= (0x100 - c)&0xFF;
395

    
396
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
397
                                (D<<24) | (C<<16) | (B<<8) | (A);
398
                        //lut[i] = (v<<32) | (v<<24);
399
                }
400
        }
401

    
402
        for(y=0; y<BLOCK_SIZE; y++)
403
        {
404
                int a= src[1] - src[2];
405
                int b= src[3] - src[4];
406
                int c= src[5] - src[6];
407

    
408
                int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
409

    
410
                if(d < QP)
411
                {
412
                        int v = d * SIGN(-b);
413

    
414
                        src[1] +=v/8;
415
                        src[2] +=v/4;
416
                        src[3] +=3*v/8;
417
                        src[4] -=3*v/8;
418
                        src[5] -=v/4;
419
                        src[6] -=v/8;
420

    
421
                }
422
                src+=stride;
423
        }
424
}
425

    
426

    
427
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
428
//Plain C versions
429
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
430
#define COMPILE_C
431
#endif
432

    
433
#ifdef ARCH_X86
434

    
435
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
436
#define COMPILE_MMX
437
#endif
438

    
439
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
440
#define COMPILE_MMX2
441
#endif
442

    
443
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
444
#define COMPILE_3DNOW
445
#endif
446
#endif //ARCH_X86
447

    
448
#undef HAVE_MMX
449
#undef HAVE_MMX2
450
#undef HAVE_3DNOW
451
#undef ARCH_X86
452

    
453
#ifdef COMPILE_C
454
#undef HAVE_MMX
455
#undef HAVE_MMX2
456
#undef HAVE_3DNOW
457
#undef ARCH_X86
458
#define RENAME(a) a ## _C
459
#include "postprocess_template.c"
460
#endif
461

    
462
//MMX versions
463
#ifdef COMPILE_MMX
464
#undef RENAME
465
#define HAVE_MMX
466
#undef HAVE_MMX2
467
#undef HAVE_3DNOW
468
#define ARCH_X86
469
#define RENAME(a) a ## _MMX
470
#include "postprocess_template.c"
471
#endif
472

    
473
//MMX2 versions
474
#ifdef COMPILE_MMX2
475
#undef RENAME
476
#define HAVE_MMX
477
#define HAVE_MMX2
478
#undef HAVE_3DNOW
479
#define ARCH_X86
480
#define RENAME(a) a ## _MMX2
481
#include "postprocess_template.c"
482
#endif
483

    
484
//3DNOW versions
485
#ifdef COMPILE_3DNOW
486
#undef RENAME
487
#define HAVE_MMX
488
#undef HAVE_MMX2
489
#define HAVE_3DNOW
490
#define ARCH_X86
491
#define RENAME(a) a ## _3DNow
492
#include "postprocess_template.c"
493
#endif
494

    
495
// minor note: the HAVE_xyz is messed up after that line so dont use it
496

    
497
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
498
        QP_STORE_T QPs[], int QPStride, int isColor, PPMode *ppMode, void *vc)
499
{
500
        PPContext *c= (PPContext *)vc;
501
        c->ppMode= *ppMode; //FIXME
502

    
503
        // useing ifs here as they are faster than function pointers allthough the
504
        // difference wouldnt be messureable here but its much better because
505
        // someone might exchange the cpu whithout restarting mplayer ;)
506
#ifdef RUNTIME_CPUDETECT
507
#ifdef ARCH_X86
508
        // ordered per speed fasterst first
509
        if(cpuCaps & PP_CPU_CAPS_MMX2)
510
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
511
        else if(cpuCaps & PP_CPU_CAPS_3DNOW)
512
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
513
        else if(cpuCaps & PP_CPU_CAPS_MMX)
514
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
515
        else
516
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
517
#else
518
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
519
#endif
520
#else //RUNTIME_CPUDETECT
521
#ifdef HAVE_MMX2
522
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
523
#elif defined (HAVE_3DNOW)
524
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
525
#elif defined (HAVE_MMX)
526
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
527
#else
528
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
529
#endif
530
#endif //!RUNTIME_CPUDETECT
531
}
532

    
533
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
534
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
535

    
536
/* -pp Command line Help
537
*/
538
char *pp_help=
539
"-npp <filterName>[:<option>[:<option>...]][,[-]<filterName>[:<option>...]]...\n"
540
"long form example:\n"
541
"-npp vdeblock:autoq,hdeblock:autoq,linblenddeint        -npp default,-vdeblock\n"
542
"short form example:\n"
543
"-npp vb:a,hb:a,lb                                        -npp de,-vb\n"
544
"more examples:\n"
545
"-npp tn:64:128:256\n"
546
"Filters                        Options\n"
547
"short        long name        short        long option        Description\n"
548
"*        *                a        autoq                cpu power dependant enabler\n"
549
"                        c        chrom                chrominance filtring enabled\n"
550
"                        y        nochrom                chrominance filtring disabled\n"
551
"hb        hdeblock        (2 Threshold)                horizontal deblocking filter\n"
552
"                        1. Threshold: default=1, higher -> more deblocking\n"
553
"                        2. Threshold: default=40, lower -> more deblocking\n"
554
"                        the h & v deblocking filters share these\n"
555
"                        so u cant set different thresholds for h / v\n"
556
"vb        vdeblock        (2 Threshold)                vertical deblocking filter\n"
557
"h1        x1hdeblock                                Experimental h deblock filter 1\n"
558
"v1        x1vdeblock                                Experimental v deblock filter 1\n"
559
"dr        dering                                        Deringing filter\n"
560
"al        autolevels                                automatic brightness / contrast\n"
561
"                        f        fullyrange        stretch luminance to (0..255)\n"
562
"lb        linblenddeint                                linear blend deinterlacer\n"
563
"li        linipoldeint                                linear interpolating deinterlace\n"
564
"ci        cubicipoldeint                                cubic interpolating deinterlacer\n"
565
"md        mediandeint                                median deinterlacer\n"
566
"fd        ffmpegdeint                                ffmpeg deinterlacer\n"
567
"de        default                                        hb:a,vb:a,dr:a,al\n"
568
"fa        fast                                        h1:a,v1:a,dr:a,al\n"
569
"tn        tmpnoise        (3 Thresholds)                Temporal Noise Reducer\n"
570
"                        1. <= 2. <= 3.                larger -> stronger filtering\n"
571
"fq        forceQuant        <quantizer>                Force quantizer\n"
572
;
573

    
574
/**
575
 * returns a PPMode struct which will have a non 0 error variable if an error occured
576
 * name is the string after "-pp" on the command line
577
 * quality is a number from 0 to GET_PP_QUALITY_MAX
578
 */
579
struct PPMode pp_get_mode_by_name_and_quality(char *name, int quality)
580
{
581
        char temp[GET_MODE_BUFFER_SIZE];
582
        char *p= temp;
583
        char *filterDelimiters= ",/";
584
        char *optionDelimiters= ":";
585
        struct PPMode ppMode;
586
        char *filterToken;
587

    
588
        ppMode.lumMode= 0;
589
        ppMode.chromMode= 0;
590
        ppMode.maxTmpNoise[0]= 700;
591
        ppMode.maxTmpNoise[1]= 1500;
592
        ppMode.maxTmpNoise[2]= 3000;
593
        ppMode.maxAllowedY= 234;
594
        ppMode.minAllowedY= 16;
595
        ppMode.baseDcDiff= 256/4;
596
        ppMode.flatnessThreshold=40;
597
        ppMode.flatnessThreshold= 56-16;
598
        ppMode.maxClippedThreshold= 0.01;
599
        ppMode.error=0;
600

    
601
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
602

    
603
        if(verbose>1) printf("pp: %s\n", name);
604

    
605
        for(;;){
606
                char *filterName;
607
                int q= 1000000; //GET_PP_QUALITY_MAX;
608
                int chrom=-1;
609
                char *option;
610
                char *options[OPTIONS_ARRAY_SIZE];
611
                int i;
612
                int filterNameOk=0;
613
                int numOfUnknownOptions=0;
614
                int enable=1; //does the user want us to enabled or disabled the filter
615

    
616
                filterToken= strtok(p, filterDelimiters);
617
                if(filterToken == NULL) break;
618
                p+= strlen(filterToken) + 1; // p points to next filterToken
619
                filterName= strtok(filterToken, optionDelimiters);
620
                if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
621

    
622
                if(*filterName == '-')
623
                {
624
                        enable=0;
625
                        filterName++;
626
                }
627

    
628
                for(;;){ //for all options
629
                        option= strtok(NULL, optionDelimiters);
630
                        if(option == NULL) break;
631

    
632
                        if(verbose>1) printf("pp: option: %s\n", option);
633
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
634
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
635
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
636
                        else
637
                        {
638
                                options[numOfUnknownOptions] = option;
639
                                numOfUnknownOptions++;
640
                        }
641
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
642
                }
643
                options[numOfUnknownOptions] = NULL;
644

    
645
                /* replace stuff from the replace Table */
646
                for(i=0; replaceTable[2*i]!=NULL; i++)
647
                {
648
                        if(!strcmp(replaceTable[2*i], filterName))
649
                        {
650
                                int newlen= strlen(replaceTable[2*i + 1]);
651
                                int plen;
652
                                int spaceLeft;
653

    
654
                                if(p==NULL) p= temp, *p=0;         //last filter
655
                                else p--, *p=',';                //not last filter
656

    
657
                                plen= strlen(p);
658
                                spaceLeft= p - temp + plen;
659
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
660
                                {
661
                                        ppMode.error++;
662
                                        break;
663
                                }
664
                                memmove(p + newlen, p, plen+1);
665
                                memcpy(p, replaceTable[2*i + 1], newlen);
666
                                filterNameOk=1;
667
                        }
668
                }
669

    
670
                for(i=0; filters[i].shortName!=NULL; i++)
671
                {
672
//                        printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
673
                        if(   !strcmp(filters[i].longName, filterName)
674
                           || !strcmp(filters[i].shortName, filterName))
675
                        {
676
                                ppMode.lumMode &= ~filters[i].mask;
677
                                ppMode.chromMode &= ~filters[i].mask;
678

    
679
                                filterNameOk=1;
680
                                if(!enable) break; // user wants to disable it
681

    
682
                                if(q >= filters[i].minLumQuality)
683
                                        ppMode.lumMode|= filters[i].mask;
684
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
685
                                        if(q >= filters[i].minChromQuality)
686
                                                ppMode.chromMode|= filters[i].mask;
687

    
688
                                if(filters[i].mask == LEVEL_FIX)
689
                                {
690
                                        int o;
691
                                        ppMode.minAllowedY= 16;
692
                                        ppMode.maxAllowedY= 234;
693
                                        for(o=0; options[o]!=NULL; o++)
694
                                        {
695
                                                if(  !strcmp(options[o],"fullyrange")
696
                                                   ||!strcmp(options[o],"f"))
697
                                                {
698
                                                        ppMode.minAllowedY= 0;
699
                                                        ppMode.maxAllowedY= 255;
700
                                                        numOfUnknownOptions--;
701
                                                }
702
                                        }
703
                                }
704
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
705
                                {
706
                                        int o;
707
                                        int numOfNoises=0;
708

    
709
                                        for(o=0; options[o]!=NULL; o++)
710
                                        {
711
                                                char *tail;
712
                                                ppMode.maxTmpNoise[numOfNoises]=
713
                                                        strtol(options[o], &tail, 0);
714
                                                if(tail!=options[o])
715
                                                {
716
                                                        numOfNoises++;
717
                                                        numOfUnknownOptions--;
718
                                                        if(numOfNoises >= 3) break;
719
                                                }
720
                                        }
721
                                }
722
                                else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
723
                                {
724
                                        int o;
725

    
726
                                        for(o=0; options[o]!=NULL && o<2; o++)
727
                                        {
728
                                                char *tail;
729
                                                int val= strtol(options[o], &tail, 0);
730
                                                if(tail==options[o]) break;
731

    
732
                                                numOfUnknownOptions--;
733
                                                if(o==0) ppMode.baseDcDiff= val;
734
                                                else ppMode.flatnessThreshold= val;
735
                                        }
736
                                }
737
                                else if(filters[i].mask == FORCE_QUANT)
738
                                {
739
                                        int o;
740
                                        ppMode.forcedQuant= 15;
741

    
742
                                        for(o=0; options[o]!=NULL && o<1; o++)
743
                                        {
744
                                                char *tail;
745
                                                int val= strtol(options[o], &tail, 0);
746
                                                if(tail==options[o]) break;
747

    
748
                                                numOfUnknownOptions--;
749
                                                ppMode.forcedQuant= val;
750
                                        }
751
                                }
752
                        }
753
                }
754
                if(!filterNameOk) ppMode.error++;
755
                ppMode.error += numOfUnknownOptions;
756
        }
757

    
758
        if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode.lumMode, ppMode.chromMode);
759
        return ppMode;
760
}
761

    
762
void *pp_get_context(int width, int height){
763
        PPContext *c= memalign(32, sizeof(PPContext));
764
        int i;
765
        int mbWidth = (width+15)>>4;
766
        int mbHeight= (height+15)>>4;
767

    
768
        c->tempBlocks= (uint8_t*)memalign(8, 2*16*8);
769
        c->yHistogram= (uint64_t*)memalign(8, 256*sizeof(uint64_t));
770
        for(i=0; i<256; i++)
771
                c->yHistogram[i]= width*height/64*15/256;
772

    
773
        for(i=0; i<3; i++)
774
        {
775
                //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
776
                c->tempBlured[i]= (uint8_t*)memalign(8, ((width+7)&(~7))*2*((height+7)&(~7)) + 17*1024); //FIXME dstStride instead of width
777
                c->tempBluredPast[i]= (uint32_t*)memalign(8, 256*((height+7)&(~7))/2 + 17*1024);
778

    
779
                memset(c->tempBlured[i], 0, ((width+7)&(~7))*2*((height+7)&(~7)) + 17*1024);
780
                memset(c->tempBluredPast[i], 0, 256*((height+7)&(~7))/2 + 17*1024);
781
        }
782
        
783
        c->tempDst= (uint8_t*)memalign(8, 1024*24);
784
        c->tempSrc= (uint8_t*)memalign(8, 1024*24);
785
        c->tempDstBlock= (uint8_t*)memalign(8, 1024*24);
786
        c->tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
787
        c->deintTemp= (uint8_t*)memalign(8, width+16);
788
        c->nonBQPTable= (QP_STORE_T*)memalign(8, mbWidth*mbHeight*sizeof(QP_STORE_T));
789
        memset(c->nonBQPTable, 0, mbWidth*mbHeight*sizeof(QP_STORE_T));
790

    
791
        c->frameNum=-1;
792

    
793
        return c;
794
}
795

    
796
void pp_free_context(void *vc){
797
        PPContext *c = (PPContext*)vc;
798
        int i;
799
        
800
        for(i=0; i<3; i++) free(c->tempBlured[i]);
801
        for(i=0; i<3; i++) free(c->tempBluredPast[i]);
802
        
803
        free(c->tempBlocks);
804
        free(c->yHistogram);
805
        free(c->tempDst);
806
        free(c->tempSrc);
807
        free(c->tempDstBlock);
808
        free(c->tempSrcBlock);
809
        free(c->deintTemp);
810
        free(c->nonBQPTable);
811
        
812
        free(c);
813
}
814

    
815
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
816
                 uint8_t * dst[3], int dstStride[3],
817
                 int width, int height,
818
                 QP_STORE_T *QP_store,  int QPStride,
819
                 PPMode *mode,  void *vc, int pict_type)
820
{
821
        int mbWidth = (width+15)>>4;
822
        int mbHeight= (height+15)>>4;
823
        QP_STORE_T quantArray[2048/8];
824
        PPContext *c = (PPContext*)vc;
825

    
826
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) 
827
        {
828
                int i;
829
                QP_store= quantArray;
830
                QPStride= 0;
831
                if(mode->lumMode & FORCE_QUANT)
832
                        for(i=0; i<2048/8; i++) quantArray[i]= mode->forcedQuant;
833
                else
834
                        for(i=0; i<2048/8; i++) quantArray[i]= 1;
835
        }
836
if(0){
837
int x,y;
838
for(y=0; y<mbHeight; y++){
839
        for(x=0; x<mbWidth; x++){
840
                printf("%2d ", QP_store[x + y*QPStride]);
841
        }
842
        printf("\n");
843
}
844
        printf("\n");
845
}
846
//printf("pict_type:%d\n", pict_type);
847

    
848
        if(pict_type!=3)
849
        {
850
                int x,y;
851
                for(y=0; y<mbHeight; y++){
852
                        for(x=0; x<mbWidth; x++){
853
                                int qscale= QP_store[x + y*QPStride];
854
                                if(qscale&~31)
855
                                    qscale=31;
856
                                c->nonBQPTable[y*mbWidth + x]= qscale;
857
                        }
858
                }
859
        }
860

    
861
        if(verbose>2)
862
        {
863
                printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
864
        }
865

    
866
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
867
                width, height, QP_store, QPStride, 0, mode, c);
868

    
869
        width  = (width +1)>>1;
870
        height = (height+1)>>1;
871

    
872
        if(mode->chromMode)
873
        {
874
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
875
                        width, height, QP_store, QPStride, 1, mode, c);
876
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
877
                        width, height, QP_store, QPStride, 2, mode, c);
878
        }
879
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
880
        {
881
                memcpy(dst[1], src[1], srcStride[1]*height);
882
                memcpy(dst[2], src[2], srcStride[2]*height);
883
        }
884
        else
885
        {
886
                int y;
887
                for(y=0; y<height; y++)
888
                {
889
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
890
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
891
                }
892
        }
893
}
894