Statistics
| Branch: | Revision:

ffmpeg / postproc / postprocess.c @ 1d9324fd

History | View | Annotate | Download (24.7 KB)

1
/*
2
    Copyright (C) 2001-2002 Michael Niedermayer (michaelni@gmx.at)
3

4
    This program is free software; you can redistribute it and/or modify
5
    it under the terms of the GNU General Public License as published by
6
    the Free Software Foundation; either version 2 of the License, or
7
    (at your option) any later version.
8

9
    This program is distributed in the hope that it will be useful,
10
    but WITHOUT ANY WARRANTY; without even the implied warranty of
11
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
    GNU General Public License for more details.
13

14
    You should have received a copy of the GNU General Public License
15
    along with this program; if not, write to the Free Software
16
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
*/
18

    
19
/*
20
                        C        MMX        MMX2        3DNow
21
isVertDC                Ec        Ec
22
isVertMinMaxOk                Ec        Ec
23
doVertLowPass                E                e        e
24
doVertDefFilter                Ec        Ec        e        e
25
isHorizDC                Ec        Ec
26
isHorizMinMaxOk                a        E
27
doHorizLowPass                E                e        e
28
doHorizDefFilter        Ec        Ec        e        e
29
deRing                        E                e        e*
30
Vertical RKAlgo1        E                a        a
31
Horizontal RKAlgo1                        a        a
32
Vertical X1#                a                E        E
33
Horizontal X1#                a                E        E
34
LinIpolDeinterlace        e                E        E*
35
CubicIpolDeinterlace        a                e        e*
36
LinBlendDeinterlace        e                E        E*
37
MedianDeinterlace#                 Ec        Ec
38
TempDeNoiser#                E                e        e
39

40
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
41
# more or less selfinvented filters so the exactness isnt too meaningfull
42
E = Exact implementation
43
e = allmost exact implementation (slightly different rounding,...)
44
a = alternative / approximate impl
45
c = checked against the other implementations (-vo md5)
46
*/
47

    
48
/*
49
TODO:
50
reduce the time wasted on the mem transfer
51
unroll stuff if instructions depend too much on the prior one
52
move YScale thing to the end instead of fixing QP
53
write a faster and higher quality deblocking filter :)
54
make the mainloop more flexible (variable number of blocks at once
55
        (the if/else stuff per block is slowing things down)
56
compare the quality & speed of all filters
57
split this huge file
58
optimize c versions
59
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
60
...
61
*/
62

    
63
//Changelog: use the CVS log
64

    
65
#include "../config.h"
66
#include <inttypes.h>
67
#include <stdio.h>
68
#include <stdlib.h>
69
#include <string.h>
70
#ifdef HAVE_MALLOC_H
71
#include <malloc.h>
72
#endif
73
//#undef HAVE_MMX2
74
//#define HAVE_3DNOW
75
//#undef HAVE_MMX
76
//#undef ARCH_X86
77
//#define DEBUG_BRIGHTNESS
78
#include "../libvo/fastmemcpy.h"
79
#include "postprocess.h"
80
#include "../mangle.h"
81

    
82
#define MIN(a,b) ((a) > (b) ? (b) : (a))
83
#define MAX(a,b) ((a) < (b) ? (b) : (a))
84
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
85
#define SIGN(a) ((a) > 0 ? 1 : -1)
86

    
87
#define GET_MODE_BUFFER_SIZE 500
88
#define OPTIONS_ARRAY_SIZE 10
89
#define BLOCK_SIZE 8
90
#define TEMP_STRIDE 8
91
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
92

    
93
#ifdef ARCH_X86
94
static uint64_t __attribute__((aligned(8))) w05=                0x0005000500050005LL;
95
static uint64_t __attribute__((aligned(8))) w20=                0x0020002000200020LL;
96
static uint64_t __attribute__((aligned(8))) b00=                 0x0000000000000000LL;
97
static uint64_t __attribute__((aligned(8))) b01=                 0x0101010101010101LL;
98
static uint64_t __attribute__((aligned(8))) b02=                 0x0202020202020202LL;
99
static uint64_t __attribute__((aligned(8))) b08=                 0x0808080808080808LL;
100
static uint64_t __attribute__((aligned(8))) b80=                 0x8080808080808080LL;
101
#endif
102

    
103
static int verbose= 0;
104

    
105
static const int deringThreshold= 20;
106

    
107
struct PPFilter{
108
        char *shortName;
109
        char *longName;
110
        int chromDefault;         // is chrominance filtering on by default if this filter is manually activated
111
        int minLumQuality;         // minimum quality to turn luminance filtering on
112
        int minChromQuality;        // minimum quality to turn chrominance filtering on
113
        int mask;                 // Bitmask to turn this filter on
114
};
115

    
116
typedef struct PPContext{
117
        uint8_t *tempBlocks; //used for the horizontal code
118

    
119
        /* we need 64bit here otherwise we?ll going to have a problem
120
           after watching a black picture for 5 hours*/
121
        uint64_t *yHistogram;
122

    
123
        uint64_t __attribute__((aligned(8))) packedYOffset;
124
        uint64_t __attribute__((aligned(8))) packedYScale;
125

    
126
        /* Temporal noise reducing buffers */
127
        uint8_t *tempBlured[3];
128
        int32_t *tempBluredPast[3];
129

    
130
        /* Temporary buffers for handling the last row(s) */
131
        uint8_t *tempDst;
132
        uint8_t *tempSrc;
133

    
134
        /* Temporary buffers for handling the last block */
135
        uint8_t *tempDstBlock;
136
        uint8_t *tempSrcBlock;
137
        uint8_t *deintTemp;
138

    
139
        uint64_t __attribute__((aligned(8))) pQPb;
140
        uint64_t __attribute__((aligned(8))) pQPb2;
141

    
142
        uint64_t __attribute__((aligned(8))) mmxDcOffset[32];
143
        uint64_t __attribute__((aligned(8))) mmxDcThreshold[32];
144
        
145
        QP_STORE_T *nonBQPTable;
146
        
147
        int QP;
148
        int nonBQP;
149

    
150
        int frameNum;
151
        
152
        int cpuCaps;
153

    
154
        PPMode ppMode;
155
} PPContext;
156

    
157
static struct PPFilter filters[]=
158
{
159
        {"hb", "hdeblock",                 1, 1, 3, H_DEBLOCK},
160
        {"vb", "vdeblock",                 1, 2, 4, V_DEBLOCK},
161
/*        {"hr", "rkhdeblock",                 1, 1, 3, H_RK1_FILTER},
162
        {"vr", "rkvdeblock",                 1, 2, 4, V_RK1_FILTER},*/
163
        {"h1", "x1hdeblock",                 1, 1, 3, H_X1_FILTER},
164
        {"v1", "x1vdeblock",                 1, 2, 4, V_X1_FILTER},
165
        {"dr", "dering",                 1, 5, 6, DERING},
166
        {"al", "autolevels",                 0, 1, 2, LEVEL_FIX},
167
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
168
        {"li", "linipoldeint",                 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
169
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
170
        {"md", "mediandeint",                 1, 1, 4, MEDIAN_DEINT_FILTER},
171
        {"fd", "ffmpegdeint",                 1, 1, 4, FFMPEG_DEINT_FILTER},
172
        {"tn", "tmpnoise",                 1, 7, 8, TEMP_NOISE_FILTER},
173
        {"fq", "forcequant",                 1, 0, 0, FORCE_QUANT},
174
        {NULL, NULL,0,0,0,0} //End Marker
175
};
176

    
177
static char *replaceTable[]=
178
{
179
        "default",         "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
180
        "de",                 "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
181
        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
182
        "fa",                 "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
183
        NULL //End Marker
184
};
185

    
186
#ifdef ARCH_X86
187
static inline void unusedVariableWarningFixer()
188
{
189
        if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0;
190
}
191
#endif
192

    
193

    
194
#ifdef ARCH_X86
195
static inline void prefetchnta(void *p)
196
{
197
        asm volatile(        "prefetchnta (%0)\n\t"
198
                : : "r" (p)
199
        );
200
}
201

    
202
static inline void prefetcht0(void *p)
203
{
204
        asm volatile(        "prefetcht0 (%0)\n\t"
205
                : : "r" (p)
206
        );
207
}
208

    
209
static inline void prefetcht1(void *p)
210
{
211
        asm volatile(        "prefetcht1 (%0)\n\t"
212
                : : "r" (p)
213
        );
214
}
215

    
216
static inline void prefetcht2(void *p)
217
{
218
        asm volatile(        "prefetcht2 (%0)\n\t"
219
                : : "r" (p)
220
        );
221
}
222
#endif
223

    
224
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
225

    
226
/**
227
 * Check if the given 8x8 Block is mostly "flat"
228
 */
229
static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
230
{
231
        int numEq= 0;
232
        int y;
233
        const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
234
        const int dcThreshold= dcOffset*2 + 1;
235
        for(y=0; y<BLOCK_SIZE; y++)
236
        {
237
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
238
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
239
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
240
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
241
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
242
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
243
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
244
                src+= stride;
245
        }
246
        return numEq > c->ppMode.flatnessThreshold;
247
}
248

    
249
/**
250
 * Check if the middle 8x8 Block in the given 8x16 block is flat
251
 */
252
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
253
        int numEq= 0;
254
        int y;
255
        const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
256
        const int dcThreshold= dcOffset*2 + 1;
257
        src+= stride*4; // src points to begin of the 8x8 Block
258
        for(y=0; y<BLOCK_SIZE-1; y++)
259
        {
260
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
261
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
262
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
263
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
264
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
265
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
266
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
267
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
268
                src+= stride;
269
        }
270
        return numEq > c->ppMode.flatnessThreshold;
271
}
272

    
273
static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
274
{
275
        if(abs(src[0] - src[7]) > 2*QP) return 0;
276

    
277
        return 1;
278
}
279

    
280
static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
281
{
282
        int y;
283
        for(y=0; y<BLOCK_SIZE; y++)
284
        {
285
                const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);
286

    
287
                if(ABS(middleEnergy) < 8*QP)
288
                {
289
                        const int q=(dst[3] - dst[4])/2;
290
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
291
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
292

    
293
                        int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
294
                        d= MAX(d, 0);
295

    
296
                        d= (5*d + 32) >> 6;
297
                        d*= SIGN(-middleEnergy);
298

    
299
                        if(q>0)
300
                        {
301
                                d= d<0 ? 0 : d;
302
                                d= d>q ? q : d;
303
                        }
304
                        else
305
                        {
306
                                d= d>0 ? 0 : d;
307
                                d= d<q ? q : d;
308
                        }
309

    
310
                        dst[3]-= d;
311
                        dst[4]+= d;
312
                }
313
                dst+= stride;
314
        }
315
}
316

    
317
/**
318
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
319
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
320
 */
321
static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
322
{
323

    
324
        int y;
325
        for(y=0; y<BLOCK_SIZE; y++)
326
        {
327
                const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
328
                const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];
329

    
330
                int sums[9];
331
                sums[0] = first + dst[0];
332
                sums[1] = dst[0] + dst[1];
333
                sums[2] = dst[1] + dst[2];
334
                sums[3] = dst[2] + dst[3];
335
                sums[4] = dst[3] + dst[4];
336
                sums[5] = dst[4] + dst[5];
337
                sums[6] = dst[5] + dst[6];
338
                sums[7] = dst[6] + dst[7];
339
                sums[8] = dst[7] + last;
340

    
341
                dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
342
                dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
343
                dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
344
                dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
345
                dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
346
                dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
347
                dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
348
                dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
349

    
350
                dst+= stride;
351
        }
352
}
353

    
354
/**
355
 * Experimental Filter 1 (Horizontal)
356
 * will not damage linear gradients
357
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
358
 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
359
 * MMX2 version does correct clipping C version doesnt
360
 * not identical with the vertical one
361
 */
362
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
363
{
364
        int y;
365
        static uint64_t *lut= NULL;
366
        if(lut==NULL)
367
        {
368
                int i;
369
                lut= (uint64_t*)memalign(8, 256*8);
370
                for(i=0; i<256; i++)
371
                {
372
                        int v= i < 128 ? 2*i : 2*(i-256);
373
/*
374
//Simulate 112242211 9-Tap filter
375
                        uint64_t a= (v/16) & 0xFF;
376
                        uint64_t b= (v/8) & 0xFF;
377
                        uint64_t c= (v/4) & 0xFF;
378
                        uint64_t d= (3*v/8) & 0xFF;
379
*/
380
//Simulate piecewise linear interpolation
381
                        uint64_t a= (v/16) & 0xFF;
382
                        uint64_t b= (v*3/16) & 0xFF;
383
                        uint64_t c= (v*5/16) & 0xFF;
384
                        uint64_t d= (7*v/16) & 0xFF;
385
                        uint64_t A= (0x100 - a)&0xFF;
386
                        uint64_t B= (0x100 - b)&0xFF;
387
                        uint64_t C= (0x100 - c)&0xFF;
388
                        uint64_t D= (0x100 - c)&0xFF;
389

    
390
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
391
                                (D<<24) | (C<<16) | (B<<8) | (A);
392
                        //lut[i] = (v<<32) | (v<<24);
393
                }
394
        }
395

    
396
        for(y=0; y<BLOCK_SIZE; y++)
397
        {
398
                int a= src[1] - src[2];
399
                int b= src[3] - src[4];
400
                int c= src[5] - src[6];
401

    
402
                int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
403

    
404
                if(d < QP)
405
                {
406
                        int v = d * SIGN(-b);
407

    
408
                        src[1] +=v/8;
409
                        src[2] +=v/4;
410
                        src[3] +=3*v/8;
411
                        src[4] -=3*v/8;
412
                        src[5] -=v/4;
413
                        src[6] -=v/8;
414

    
415
                }
416
                src+=stride;
417
        }
418
}
419

    
420

    
421
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
422
//Plain C versions
423
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
424
#define COMPILE_C
425
#endif
426

    
427
#ifdef ARCH_X86
428

    
429
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
430
#define COMPILE_MMX
431
#endif
432

    
433
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
434
#define COMPILE_MMX2
435
#endif
436

    
437
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
438
#define COMPILE_3DNOW
439
#endif
440
#endif //ARCH_X86
441

    
442
#undef HAVE_MMX
443
#undef HAVE_MMX2
444
#undef HAVE_3DNOW
445
#undef ARCH_X86
446

    
447
#ifdef COMPILE_C
448
#undef HAVE_MMX
449
#undef HAVE_MMX2
450
#undef HAVE_3DNOW
451
#undef ARCH_X86
452
#define RENAME(a) a ## _C
453
#include "postprocess_template.c"
454
#endif
455

    
456
//MMX versions
457
#ifdef COMPILE_MMX
458
#undef RENAME
459
#define HAVE_MMX
460
#undef HAVE_MMX2
461
#undef HAVE_3DNOW
462
#define ARCH_X86
463
#define RENAME(a) a ## _MMX
464
#include "postprocess_template.c"
465
#endif
466

    
467
//MMX2 versions
468
#ifdef COMPILE_MMX2
469
#undef RENAME
470
#define HAVE_MMX
471
#define HAVE_MMX2
472
#undef HAVE_3DNOW
473
#define ARCH_X86
474
#define RENAME(a) a ## _MMX2
475
#include "postprocess_template.c"
476
#endif
477

    
478
//3DNOW versions
479
#ifdef COMPILE_3DNOW
480
#undef RENAME
481
#define HAVE_MMX
482
#undef HAVE_MMX2
483
#define HAVE_3DNOW
484
#define ARCH_X86
485
#define RENAME(a) a ## _3DNow
486
#include "postprocess_template.c"
487
#endif
488

    
489
// minor note: the HAVE_xyz is messed up after that line so dont use it
490

    
491
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
492
        QP_STORE_T QPs[], int QPStride, int isColor, PPMode *ppMode, pp_context *vc)
493
{
494
        PPContext *c= (PPContext *)vc;
495
        c->ppMode= *ppMode; //FIXME
496

    
497
        // useing ifs here as they are faster than function pointers allthough the
498
        // difference wouldnt be messureable here but its much better because
499
        // someone might exchange the cpu whithout restarting mplayer ;)
500
#ifdef RUNTIME_CPUDETECT
501
#ifdef ARCH_X86
502
        // ordered per speed fasterst first
503
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
504
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
505
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
506
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
507
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
508
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
509
        else
510
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
511
#else
512
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
513
#endif
514
#else //RUNTIME_CPUDETECT
515
#ifdef HAVE_MMX2
516
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
517
#elif defined (HAVE_3DNOW)
518
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
519
#elif defined (HAVE_MMX)
520
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
521
#else
522
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
523
#endif
524
#endif //!RUNTIME_CPUDETECT
525
}
526

    
527
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
528
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
529

    
530
/* -pp Command line Help
531
*/
532
char *pp_help=
533
"<filterName>[:<option>[:<option>...]][,[-]<filterName>[:<option>...]]...\n"
534
"long form example:\n"
535
"vdeblock:autoq,hdeblock:autoq,linblenddeint        default,-vdeblock\n"
536
"short form example:\n"
537
"vb:a,hb:a,lb                                        de,-vb\n"
538
"more examples:\n"
539
"tn:64:128:256\n"
540
"Filters                        Options\n"
541
"short        long name        short        long option        Description\n"
542
"*        *                a        autoq                cpu power dependant enabler\n"
543
"                        c        chrom                chrominance filtring enabled\n"
544
"                        y        nochrom                chrominance filtring disabled\n"
545
"hb        hdeblock        (2 Threshold)                horizontal deblocking filter\n"
546
"                        1. Threshold: default=1, higher -> more deblocking\n"
547
"                        2. Threshold: default=40, lower -> more deblocking\n"
548
"                        the h & v deblocking filters share these\n"
549
"                        so u cant set different thresholds for h / v\n"
550
"vb        vdeblock        (2 Threshold)                vertical deblocking filter\n"
551
"h1        x1hdeblock                                Experimental h deblock filter 1\n"
552
"v1        x1vdeblock                                Experimental v deblock filter 1\n"
553
"dr        dering                                        Deringing filter\n"
554
"al        autolevels                                automatic brightness / contrast\n"
555
"                        f        fullyrange        stretch luminance to (0..255)\n"
556
"lb        linblenddeint                                linear blend deinterlacer\n"
557
"li        linipoldeint                                linear interpolating deinterlace\n"
558
"ci        cubicipoldeint                                cubic interpolating deinterlacer\n"
559
"md        mediandeint                                median deinterlacer\n"
560
"fd        ffmpegdeint                                ffmpeg deinterlacer\n"
561
"de        default                                        hb:a,vb:a,dr:a,al\n"
562
"fa        fast                                        h1:a,v1:a,dr:a,al\n"
563
"tn        tmpnoise        (3 Thresholds)                Temporal Noise Reducer\n"
564
"                        1. <= 2. <= 3.                larger -> stronger filtering\n"
565
"fq        forceQuant        <quantizer>                Force quantizer\n"
566
;
567

    
568
/**
569
 * returns a PPMode struct which will have a non 0 error variable if an error occured
570
 * name is the string after "-pp" on the command line
571
 * quality is a number from 0 to GET_PP_QUALITY_MAX
572
 */
573
struct PPMode pp_get_mode_by_name_and_quality(char *name, int quality)
574
{
575
        char temp[GET_MODE_BUFFER_SIZE];
576
        char *p= temp;
577
        char *filterDelimiters= ",/";
578
        char *optionDelimiters= ":";
579
        struct PPMode ppMode;
580
        char *filterToken;
581

    
582
        ppMode.lumMode= 0;
583
        ppMode.chromMode= 0;
584
        ppMode.maxTmpNoise[0]= 700;
585
        ppMode.maxTmpNoise[1]= 1500;
586
        ppMode.maxTmpNoise[2]= 3000;
587
        ppMode.maxAllowedY= 234;
588
        ppMode.minAllowedY= 16;
589
        ppMode.baseDcDiff= 256/4;
590
        ppMode.flatnessThreshold=40;
591
        ppMode.flatnessThreshold= 56-16;
592
        ppMode.maxClippedThreshold= 0.01;
593
        ppMode.error=0;
594

    
595
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
596

    
597
        if(verbose>1) printf("pp: %s\n", name);
598

    
599
        for(;;){
600
                char *filterName;
601
                int q= 1000000; //GET_PP_QUALITY_MAX;
602
                int chrom=-1;
603
                char *option;
604
                char *options[OPTIONS_ARRAY_SIZE];
605
                int i;
606
                int filterNameOk=0;
607
                int numOfUnknownOptions=0;
608
                int enable=1; //does the user want us to enabled or disabled the filter
609

    
610
                filterToken= strtok(p, filterDelimiters);
611
                if(filterToken == NULL) break;
612
                p+= strlen(filterToken) + 1; // p points to next filterToken
613
                filterName= strtok(filterToken, optionDelimiters);
614
                if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
615

    
616
                if(*filterName == '-')
617
                {
618
                        enable=0;
619
                        filterName++;
620
                }
621

    
622
                for(;;){ //for all options
623
                        option= strtok(NULL, optionDelimiters);
624
                        if(option == NULL) break;
625

    
626
                        if(verbose>1) printf("pp: option: %s\n", option);
627
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
628
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
629
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
630
                        else
631
                        {
632
                                options[numOfUnknownOptions] = option;
633
                                numOfUnknownOptions++;
634
                        }
635
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
636
                }
637
                options[numOfUnknownOptions] = NULL;
638

    
639
                /* replace stuff from the replace Table */
640
                for(i=0; replaceTable[2*i]!=NULL; i++)
641
                {
642
                        if(!strcmp(replaceTable[2*i], filterName))
643
                        {
644
                                int newlen= strlen(replaceTable[2*i + 1]);
645
                                int plen;
646
                                int spaceLeft;
647

    
648
                                if(p==NULL) p= temp, *p=0;         //last filter
649
                                else p--, *p=',';                //not last filter
650

    
651
                                plen= strlen(p);
652
                                spaceLeft= p - temp + plen;
653
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
654
                                {
655
                                        ppMode.error++;
656
                                        break;
657
                                }
658
                                memmove(p + newlen, p, plen+1);
659
                                memcpy(p, replaceTable[2*i + 1], newlen);
660
                                filterNameOk=1;
661
                        }
662
                }
663

    
664
                for(i=0; filters[i].shortName!=NULL; i++)
665
                {
666
//                        printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
667
                        if(   !strcmp(filters[i].longName, filterName)
668
                           || !strcmp(filters[i].shortName, filterName))
669
                        {
670
                                ppMode.lumMode &= ~filters[i].mask;
671
                                ppMode.chromMode &= ~filters[i].mask;
672

    
673
                                filterNameOk=1;
674
                                if(!enable) break; // user wants to disable it
675

    
676
                                if(q >= filters[i].minLumQuality)
677
                                        ppMode.lumMode|= filters[i].mask;
678
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
679
                                        if(q >= filters[i].minChromQuality)
680
                                                ppMode.chromMode|= filters[i].mask;
681

    
682
                                if(filters[i].mask == LEVEL_FIX)
683
                                {
684
                                        int o;
685
                                        ppMode.minAllowedY= 16;
686
                                        ppMode.maxAllowedY= 234;
687
                                        for(o=0; options[o]!=NULL; o++)
688
                                        {
689
                                                if(  !strcmp(options[o],"fullyrange")
690
                                                   ||!strcmp(options[o],"f"))
691
                                                {
692
                                                        ppMode.minAllowedY= 0;
693
                                                        ppMode.maxAllowedY= 255;
694
                                                        numOfUnknownOptions--;
695
                                                }
696
                                        }
697
                                }
698
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
699
                                {
700
                                        int o;
701
                                        int numOfNoises=0;
702

    
703
                                        for(o=0; options[o]!=NULL; o++)
704
                                        {
705
                                                char *tail;
706
                                                ppMode.maxTmpNoise[numOfNoises]=
707
                                                        strtol(options[o], &tail, 0);
708
                                                if(tail!=options[o])
709
                                                {
710
                                                        numOfNoises++;
711
                                                        numOfUnknownOptions--;
712
                                                        if(numOfNoises >= 3) break;
713
                                                }
714
                                        }
715
                                }
716
                                else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
717
                                {
718
                                        int o;
719

    
720
                                        for(o=0; options[o]!=NULL && o<2; o++)
721
                                        {
722
                                                char *tail;
723
                                                int val= strtol(options[o], &tail, 0);
724
                                                if(tail==options[o]) break;
725

    
726
                                                numOfUnknownOptions--;
727
                                                if(o==0) ppMode.baseDcDiff= val;
728
                                                else ppMode.flatnessThreshold= val;
729
                                        }
730
                                }
731
                                else if(filters[i].mask == FORCE_QUANT)
732
                                {
733
                                        int o;
734
                                        ppMode.forcedQuant= 15;
735

    
736
                                        for(o=0; options[o]!=NULL && o<1; o++)
737
                                        {
738
                                                char *tail;
739
                                                int val= strtol(options[o], &tail, 0);
740
                                                if(tail==options[o]) break;
741

    
742
                                                numOfUnknownOptions--;
743
                                                ppMode.forcedQuant= val;
744
                                        }
745
                                }
746
                        }
747
                }
748
                if(!filterNameOk) ppMode.error++;
749
                ppMode.error += numOfUnknownOptions;
750
        }
751

    
752
        if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode.lumMode, ppMode.chromMode);
753
        return ppMode;
754
}
755

    
756
void *pp_get_context(int width, int height, int cpuCaps){
757
        PPContext *c= memalign(32, sizeof(PPContext));
758
        int i;
759
        int mbWidth = (width+15)>>4;
760
        int mbHeight= (height+15)>>4;
761
        
762
        c->cpuCaps= cpuCaps;
763

    
764
        c->tempBlocks= (uint8_t*)memalign(8, 2*16*8);
765
        c->yHistogram= (uint64_t*)memalign(8, 256*sizeof(uint64_t));
766
        for(i=0; i<256; i++)
767
                c->yHistogram[i]= width*height/64*15/256;
768

    
769
        for(i=0; i<3; i++)
770
        {
771
                //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
772
                c->tempBlured[i]= (uint8_t*)memalign(8, ((width+7)&(~7))*2*((height+7)&(~7)) + 17*1024); //FIXME dstStride instead of width
773
                c->tempBluredPast[i]= (uint32_t*)memalign(8, 256*((height+7)&(~7))/2 + 17*1024);
774

    
775
                memset(c->tempBlured[i], 0, ((width+7)&(~7))*2*((height+7)&(~7)) + 17*1024);
776
                memset(c->tempBluredPast[i], 0, 256*((height+7)&(~7))/2 + 17*1024);
777
        }
778
        
779
        c->tempDst= (uint8_t*)memalign(8, 1024*24);
780
        c->tempSrc= (uint8_t*)memalign(8, 1024*24);
781
        c->tempDstBlock= (uint8_t*)memalign(8, 1024*24);
782
        c->tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
783
        c->deintTemp= (uint8_t*)memalign(8, width+16);
784
        c->nonBQPTable= (QP_STORE_T*)memalign(8, mbWidth*mbHeight*sizeof(QP_STORE_T));
785
        memset(c->nonBQPTable, 0, mbWidth*mbHeight*sizeof(QP_STORE_T));
786

    
787
        c->frameNum=-1;
788

    
789
        return c;
790
}
791

    
792
void pp_free_context(void *vc){
793
        PPContext *c = (PPContext*)vc;
794
        int i;
795
        
796
        for(i=0; i<3; i++) free(c->tempBlured[i]);
797
        for(i=0; i<3; i++) free(c->tempBluredPast[i]);
798
        
799
        free(c->tempBlocks);
800
        free(c->yHistogram);
801
        free(c->tempDst);
802
        free(c->tempSrc);
803
        free(c->tempDstBlock);
804
        free(c->tempSrcBlock);
805
        free(c->deintTemp);
806
        free(c->nonBQPTable);
807
        
808
        free(c);
809
}
810

    
811
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
812
                 uint8_t * dst[3], int dstStride[3],
813
                 int width, int height,
814
                 QP_STORE_T *QP_store,  int QPStride,
815
                 PPMode *mode,  void *vc, int pict_type)
816
{
817
        int mbWidth = (width+15)>>4;
818
        int mbHeight= (height+15)>>4;
819
        QP_STORE_T quantArray[2048/8];
820
        PPContext *c = (PPContext*)vc;
821

    
822
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) 
823
        {
824
                int i;
825
                QP_store= quantArray;
826
                QPStride= 0;
827
                if(mode->lumMode & FORCE_QUANT)
828
                        for(i=0; i<2048/8; i++) quantArray[i]= mode->forcedQuant;
829
                else
830
                        for(i=0; i<2048/8; i++) quantArray[i]= 1;
831
        }
832
if(0){
833
int x,y;
834
for(y=0; y<mbHeight; y++){
835
        for(x=0; x<mbWidth; x++){
836
                printf("%2d ", QP_store[x + y*QPStride]);
837
        }
838
        printf("\n");
839
}
840
        printf("\n");
841
}
842
//printf("pict_type:%d\n", pict_type);
843

    
844
        if(pict_type!=3)
845
        {
846
                int x,y;
847
                for(y=0; y<mbHeight; y++){
848
                        for(x=0; x<mbWidth; x++){
849
                                int qscale= QP_store[x + y*QPStride];
850
                                if(qscale&~31)
851
                                    qscale=31;
852
                                c->nonBQPTable[y*mbWidth + x]= qscale;
853
                        }
854
                }
855
        }
856

    
857
        if(verbose>2)
858
        {
859
                printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
860
        }
861

    
862
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
863
                width, height, QP_store, QPStride, 0, mode, c);
864

    
865
        width  = (width +1)>>1;
866
        height = (height+1)>>1;
867

    
868
        if(mode->chromMode)
869
        {
870
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
871
                        width, height, QP_store, QPStride, 1, mode, c);
872
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
873
                        width, height, QP_store, QPStride, 2, mode, c);
874
        }
875
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
876
        {
877
                memcpy(dst[1], src[1], srcStride[1]*height);
878
                memcpy(dst[2], src[2], srcStride[2]*height);
879
        }
880
        else
881
        {
882
                int y;
883
                for(y=0; y<height; y++)
884
                {
885
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
886
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
887
                }
888
        }
889
}
890