Statistics
| Branch: | Revision:

ffmpeg / libavcodec / libpostproc / postprocess.c @ ca390e72

History | View | Annotate | Download (24.1 KB)

1
/*
2
    Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3

4
    This program is free software; you can redistribute it and/or modify
5
    it under the terms of the GNU General Public License as published by
6
    the Free Software Foundation; either version 2 of the License, or
7
    (at your option) any later version.
8

9
    This program is distributed in the hope that it will be useful,
10
    but WITHOUT ANY WARRANTY; without even the implied warranty of
11
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
    GNU General Public License for more details.
13

14
    You should have received a copy of the GNU General Public License
15
    along with this program; if not, write to the Free Software
16
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
*/
18

    
19
/*
20
                        C        MMX        MMX2        3DNow
21
isVertDC                Ec        Ec
22
isVertMinMaxOk                Ec        Ec
23
doVertLowPass                E                e        e
24
doVertDefFilter                Ec        Ec        e        e
25
isHorizDC                Ec        Ec
26
isHorizMinMaxOk                a        E
27
doHorizLowPass                E                e        e
28
doHorizDefFilter        Ec        Ec        e        e
29
deRing                        E                e        e*
30
Vertical RKAlgo1        E                a        a
31
Horizontal RKAlgo1                        a        a
32
Vertical X1#                a                E        E
33
Horizontal X1#                a                E        E
34
LinIpolDeinterlace        e                E        E*
35
CubicIpolDeinterlace        a                e        e*
36
LinBlendDeinterlace        e                E        E*
37
MedianDeinterlace#        E        Ec        Ec
38
TempDeNoiser#                E                e        e
39

40
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
41
# more or less selfinvented filters so the exactness isnt too meaningfull
42
E = Exact implementation
43
e = allmost exact implementation (slightly different rounding,...)
44
a = alternative / approximate impl
45
c = checked against the other implementations (-vo md5)
46
*/
47

    
48
/*
49
TODO:
50
reduce the time wasted on the mem transfer
51
unroll stuff if instructions depend too much on the prior one
52
move YScale thing to the end instead of fixing QP
53
write a faster and higher quality deblocking filter :)
54
make the mainloop more flexible (variable number of blocks at once
55
        (the if/else stuff per block is slowing things down)
56
compare the quality & speed of all filters
57
split this huge file
58
optimize c versions
59
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
60
...
61
*/
62

    
63
//Changelog: use the CVS log
64

    
65
#include "config.h"
66
#include <inttypes.h>
67
#include <stdio.h>
68
#include <stdlib.h>
69
#include <string.h>
70
#ifdef HAVE_MALLOC_H
71
#include <malloc.h>
72
#endif
73
//#undef HAVE_MMX2
74
//#define HAVE_3DNOW
75
//#undef HAVE_MMX
76
//#undef ARCH_X86
77
//#define DEBUG_BRIGHTNESS
78
#ifdef USE_FASTMEMCPY
79
#include "libvo/fastmemcpy.h"
80
#endif
81
#include "postprocess.h"
82
#include "postprocess_internal.h"
83

    
84
#include "mangle.h" //FIXME should be supressed
85

    
86
#ifndef HAVE_MEMALIGN
87
#define memalign(a,b) malloc(b)
88
#endif
89

    
90
#define MIN(a,b) ((a) > (b) ? (b) : (a))
91
#define MAX(a,b) ((a) < (b) ? (b) : (a))
92
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
93
#define SIGN(a) ((a) > 0 ? 1 : -1)
94

    
95
#define GET_MODE_BUFFER_SIZE 500
96
#define OPTIONS_ARRAY_SIZE 10
97
#define BLOCK_SIZE 8
98
#define TEMP_STRIDE 8
99
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
100

    
101
#ifdef ARCH_X86
102
static uint64_t __attribute__((aligned(8))) w05=                0x0005000500050005LL;
103
static uint64_t __attribute__((aligned(8))) w20=                0x0020002000200020LL;
104
static uint64_t __attribute__((aligned(8))) b00=                 0x0000000000000000LL;
105
static uint64_t __attribute__((aligned(8))) b01=                 0x0101010101010101LL;
106
static uint64_t __attribute__((aligned(8))) b02=                 0x0202020202020202LL;
107
static uint64_t __attribute__((aligned(8))) b08=                 0x0808080808080808LL;
108
static uint64_t __attribute__((aligned(8))) b80=                 0x8080808080808080LL;
109
#endif
110

    
111
static int verbose= 0;
112

    
113
static const int deringThreshold= 20;
114

    
115

    
116
static struct PPFilter filters[]=
117
{
118
        {"hb", "hdeblock",                 1, 1, 3, H_DEBLOCK},
119
        {"vb", "vdeblock",                 1, 2, 4, V_DEBLOCK},
120
/*        {"hr", "rkhdeblock",                 1, 1, 3, H_RK1_FILTER},
121
        {"vr", "rkvdeblock",                 1, 2, 4, V_RK1_FILTER},*/
122
        {"h1", "x1hdeblock",                 1, 1, 3, H_X1_FILTER},
123
        {"v1", "x1vdeblock",                 1, 2, 4, V_X1_FILTER},
124
        {"dr", "dering",                 1, 5, 6, DERING},
125
        {"al", "autolevels",                 0, 1, 2, LEVEL_FIX},
126
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
127
        {"li", "linipoldeint",                 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
128
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
129
        {"md", "mediandeint",                 1, 1, 4, MEDIAN_DEINT_FILTER},
130
        {"fd", "ffmpegdeint",                 1, 1, 4, FFMPEG_DEINT_FILTER},
131
        {"tn", "tmpnoise",                 1, 7, 8, TEMP_NOISE_FILTER},
132
        {"fq", "forcequant",                 1, 0, 0, FORCE_QUANT},
133
        {NULL, NULL,0,0,0,0} //End Marker
134
};
135

    
136
static char *replaceTable[]=
137
{
138
        "default",         "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
139
        "de",                 "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
140
        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
141
        "fa",                 "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
142
        NULL //End Marker
143
};
144

    
145
#ifdef ARCH_X86
146
static inline void unusedVariableWarningFixer()
147
{
148
        if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0;
149
}
150
#endif
151

    
152

    
153
#ifdef ARCH_X86
154
static inline void prefetchnta(void *p)
155
{
156
        asm volatile(        "prefetchnta (%0)\n\t"
157
                : : "r" (p)
158
        );
159
}
160

    
161
static inline void prefetcht0(void *p)
162
{
163
        asm volatile(        "prefetcht0 (%0)\n\t"
164
                : : "r" (p)
165
        );
166
}
167

    
168
static inline void prefetcht1(void *p)
169
{
170
        asm volatile(        "prefetcht1 (%0)\n\t"
171
                : : "r" (p)
172
        );
173
}
174

    
175
static inline void prefetcht2(void *p)
176
{
177
        asm volatile(        "prefetcht2 (%0)\n\t"
178
                : : "r" (p)
179
        );
180
}
181
#endif
182

    
183
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
184

    
185
/**
186
 * Check if the given 8x8 Block is mostly "flat"
187
 */
188
static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
189
{
190
        int numEq= 0;
191
        int y;
192
        const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
193
        const int dcThreshold= dcOffset*2 + 1;
194
        for(y=0; y<BLOCK_SIZE; y++)
195
        {
196
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
197
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
198
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
199
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
200
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
201
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
202
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
203
                src+= stride;
204
        }
205
        return numEq > c->ppMode.flatnessThreshold;
206
}
207

    
208
/**
209
 * Check if the middle 8x8 Block in the given 8x16 block is flat
210
 */
211
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
212
        int numEq= 0;
213
        int y;
214
        const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
215
        const int dcThreshold= dcOffset*2 + 1;
216
        src+= stride*4; // src points to begin of the 8x8 Block
217
        for(y=0; y<BLOCK_SIZE-1; y++)
218
        {
219
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
220
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
221
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
222
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
223
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
224
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
225
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
226
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
227
                src+= stride;
228
        }
229
        return numEq > c->ppMode.flatnessThreshold;
230
}
231

    
232
static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
233
{
234
        if(abs(src[0] - src[7]) > 2*QP) return 0;
235

    
236
        return 1;
237
}
238

    
239
static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
240
{
241
        int y;
242
        for(y=0; y<BLOCK_SIZE; y++)
243
        {
244
                const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);
245

    
246
                if(ABS(middleEnergy) < 8*QP)
247
                {
248
                        const int q=(dst[3] - dst[4])/2;
249
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
250
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
251

    
252
                        int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
253
                        d= MAX(d, 0);
254

    
255
                        d= (5*d + 32) >> 6;
256
                        d*= SIGN(-middleEnergy);
257

    
258
                        if(q>0)
259
                        {
260
                                d= d<0 ? 0 : d;
261
                                d= d>q ? q : d;
262
                        }
263
                        else
264
                        {
265
                                d= d>0 ? 0 : d;
266
                                d= d<q ? q : d;
267
                        }
268

    
269
                        dst[3]-= d;
270
                        dst[4]+= d;
271
                }
272
                dst+= stride;
273
        }
274
}
275

    
276
/**
277
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
278
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
279
 */
280
static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
281
{
282

    
283
        int y;
284
        for(y=0; y<BLOCK_SIZE; y++)
285
        {
286
                const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
287
                const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];
288

    
289
                int sums[9];
290
                sums[0] = first + dst[0];
291
                sums[1] = dst[0] + dst[1];
292
                sums[2] = dst[1] + dst[2];
293
                sums[3] = dst[2] + dst[3];
294
                sums[4] = dst[3] + dst[4];
295
                sums[5] = dst[4] + dst[5];
296
                sums[6] = dst[5] + dst[6];
297
                sums[7] = dst[6] + dst[7];
298
                sums[8] = dst[7] + last;
299

    
300
                dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
301
                dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
302
                dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
303
                dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
304
                dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
305
                dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
306
                dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
307
                dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
308

    
309
                dst+= stride;
310
        }
311
}
312

    
313
/**
314
 * Experimental Filter 1 (Horizontal)
315
 * will not damage linear gradients
316
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
317
 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
318
 * MMX2 version does correct clipping C version doesnt
319
 * not identical with the vertical one
320
 */
321
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
322
{
323
        int y;
324
        static uint64_t *lut= NULL;
325
        if(lut==NULL)
326
        {
327
                int i;
328
                lut= (uint64_t*)memalign(8, 256*8);
329
                for(i=0; i<256; i++)
330
                {
331
                        int v= i < 128 ? 2*i : 2*(i-256);
332
/*
333
//Simulate 112242211 9-Tap filter
334
                        uint64_t a= (v/16) & 0xFF;
335
                        uint64_t b= (v/8) & 0xFF;
336
                        uint64_t c= (v/4) & 0xFF;
337
                        uint64_t d= (3*v/8) & 0xFF;
338
*/
339
//Simulate piecewise linear interpolation
340
                        uint64_t a= (v/16) & 0xFF;
341
                        uint64_t b= (v*3/16) & 0xFF;
342
                        uint64_t c= (v*5/16) & 0xFF;
343
                        uint64_t d= (7*v/16) & 0xFF;
344
                        uint64_t A= (0x100 - a)&0xFF;
345
                        uint64_t B= (0x100 - b)&0xFF;
346
                        uint64_t C= (0x100 - c)&0xFF;
347
                        uint64_t D= (0x100 - c)&0xFF;
348

    
349
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
350
                                (D<<24) | (C<<16) | (B<<8) | (A);
351
                        //lut[i] = (v<<32) | (v<<24);
352
                }
353
        }
354

    
355
        for(y=0; y<BLOCK_SIZE; y++)
356
        {
357
                int a= src[1] - src[2];
358
                int b= src[3] - src[4];
359
                int c= src[5] - src[6];
360

    
361
                int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
362

    
363
                if(d < QP)
364
                {
365
                        int v = d * SIGN(-b);
366

    
367
                        src[1] +=v/8;
368
                        src[2] +=v/4;
369
                        src[3] +=3*v/8;
370
                        src[4] -=3*v/8;
371
                        src[5] -=v/4;
372
                        src[6] -=v/8;
373

    
374
                }
375
                src+=stride;
376
        }
377
}
378

    
379

    
380
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
381
//Plain C versions
382
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
383
#define COMPILE_C
384
#endif
385

    
386
#ifdef ARCH_X86
387

    
388
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
389
#define COMPILE_MMX
390
#endif
391

    
392
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
393
#define COMPILE_MMX2
394
#endif
395

    
396
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
397
#define COMPILE_3DNOW
398
#endif
399
#endif //ARCH_X86
400

    
401
#undef HAVE_MMX
402
#undef HAVE_MMX2
403
#undef HAVE_3DNOW
404
#undef ARCH_X86
405

    
406
#ifdef COMPILE_C
407
#undef HAVE_MMX
408
#undef HAVE_MMX2
409
#undef HAVE_3DNOW
410
#undef ARCH_X86
411
#define RENAME(a) a ## _C
412
#include "postprocess_template.c"
413
#endif
414

    
415
//MMX versions
416
#ifdef COMPILE_MMX
417
#undef RENAME
418
#define HAVE_MMX
419
#undef HAVE_MMX2
420
#undef HAVE_3DNOW
421
#define ARCH_X86
422
#define RENAME(a) a ## _MMX
423
#include "postprocess_template.c"
424
#endif
425

    
426
//MMX2 versions
427
#ifdef COMPILE_MMX2
428
#undef RENAME
429
#define HAVE_MMX
430
#define HAVE_MMX2
431
#undef HAVE_3DNOW
432
#define ARCH_X86
433
#define RENAME(a) a ## _MMX2
434
#include "postprocess_template.c"
435
#endif
436

    
437
//3DNOW versions
438
#ifdef COMPILE_3DNOW
439
#undef RENAME
440
#define HAVE_MMX
441
#undef HAVE_MMX2
442
#define HAVE_3DNOW
443
#define ARCH_X86
444
#define RENAME(a) a ## _3DNow
445
#include "postprocess_template.c"
446
#endif
447

    
448
// minor note: the HAVE_xyz is messed up after that line so dont use it
449

    
450
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
451
        QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
452
{
453
        PPContext *c= (PPContext *)vc;
454
        PPMode *ppMode= (PPMode *)vm;
455
        c->ppMode= *ppMode; //FIXME
456

    
457
        // useing ifs here as they are faster than function pointers allthough the
458
        // difference wouldnt be messureable here but its much better because
459
        // someone might exchange the cpu whithout restarting mplayer ;)
460
#ifdef RUNTIME_CPUDETECT
461
#ifdef ARCH_X86
462
        // ordered per speed fasterst first
463
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
464
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
465
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
466
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
467
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
468
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
469
        else
470
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
471
#else
472
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
473
#endif
474
#else //RUNTIME_CPUDETECT
475
#ifdef HAVE_MMX2
476
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
477
#elif defined (HAVE_3DNOW)
478
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
479
#elif defined (HAVE_MMX)
480
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
481
#else
482
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
483
#endif
484
#endif //!RUNTIME_CPUDETECT
485
}
486

    
487
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
488
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
489

    
490
/* -pp Command line Help
491
*/
492
char *pp_help=
493
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
494
"long form example:\n"
495
"vdeblock:autoq/hdeblock:autoq/linblenddeint        default,-vdeblock\n"
496
"short form example:\n"
497
"vb:a/hb:a/lb                                        de,-vb\n"
498
"more examples:\n"
499
"tn:64:128:256\n"
500
"Filters                        Options\n"
501
"short        long name        short        long option        Description\n"
502
"*        *                a        autoq                cpu power dependant enabler\n"
503
"                        c        chrom                chrominance filtring enabled\n"
504
"                        y        nochrom                chrominance filtring disabled\n"
505
"hb        hdeblock        (2 Threshold)                horizontal deblocking filter\n"
506
"        1. difference factor: default=64, higher -> more deblocking\n"
507
"        2. flatness threshold: default=40, lower -> more deblocking\n"
508
"                        the h & v deblocking filters share these\n"
509
"                        so u cant set different thresholds for h / v\n"
510
"vb        vdeblock        (2 Threshold)                vertical deblocking filter\n"
511
"h1        x1hdeblock                                Experimental h deblock filter 1\n"
512
"v1        x1vdeblock                                Experimental v deblock filter 1\n"
513
"dr        dering                                        Deringing filter\n"
514
"al        autolevels                                automatic brightness / contrast\n"
515
"                        f        fullyrange        stretch luminance to (0..255)\n"
516
"lb        linblenddeint                                linear blend deinterlacer\n"
517
"li        linipoldeint                                linear interpolating deinterlace\n"
518
"ci        cubicipoldeint                                cubic interpolating deinterlacer\n"
519
"md        mediandeint                                median deinterlacer\n"
520
"fd        ffmpegdeint                                ffmpeg deinterlacer\n"
521
"de        default                                        hb:a,vb:a,dr:a,al\n"
522
"fa        fast                                        h1:a,v1:a,dr:a,al\n"
523
"tn        tmpnoise        (3 Thresholds)                Temporal Noise Reducer\n"
524
"                        1. <= 2. <= 3.                larger -> stronger filtering\n"
525
"fq        forceQuant        <quantizer>                Force quantizer\n"
526
;
527

    
528
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
529
{
530
        char temp[GET_MODE_BUFFER_SIZE];
531
        char *p= temp;
532
        char *filterDelimiters= ",/";
533
        char *optionDelimiters= ":";
534
        struct PPMode *ppMode;
535
        char *filterToken;
536

    
537
        ppMode= memalign(8, sizeof(PPMode));
538
        
539
        ppMode->lumMode= 0;
540
        ppMode->chromMode= 0;
541
        ppMode->maxTmpNoise[0]= 700;
542
        ppMode->maxTmpNoise[1]= 1500;
543
        ppMode->maxTmpNoise[2]= 3000;
544
        ppMode->maxAllowedY= 234;
545
        ppMode->minAllowedY= 16;
546
        ppMode->baseDcDiff= 256/4;
547
        ppMode->flatnessThreshold= 56-16;
548
        ppMode->maxClippedThreshold= 0.01;
549
        ppMode->error=0;
550

    
551
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
552

    
553
        if(verbose>1) printf("pp: %s\n", name);
554

    
555
        for(;;){
556
                char *filterName;
557
                int q= 1000000; //PP_QUALITY_MAX;
558
                int chrom=-1;
559
                char *option;
560
                char *options[OPTIONS_ARRAY_SIZE];
561
                int i;
562
                int filterNameOk=0;
563
                int numOfUnknownOptions=0;
564
                int enable=1; //does the user want us to enabled or disabled the filter
565

    
566
                filterToken= strtok(p, filterDelimiters);
567
                if(filterToken == NULL) break;
568
                p+= strlen(filterToken) + 1; // p points to next filterToken
569
                filterName= strtok(filterToken, optionDelimiters);
570
                if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
571

    
572
                if(*filterName == '-')
573
                {
574
                        enable=0;
575
                        filterName++;
576
                }
577

    
578
                for(;;){ //for all options
579
                        option= strtok(NULL, optionDelimiters);
580
                        if(option == NULL) break;
581

    
582
                        if(verbose>1) printf("pp: option: %s\n", option);
583
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
584
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
585
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
586
                        else
587
                        {
588
                                options[numOfUnknownOptions] = option;
589
                                numOfUnknownOptions++;
590
                        }
591
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
592
                }
593
                options[numOfUnknownOptions] = NULL;
594

    
595
                /* replace stuff from the replace Table */
596
                for(i=0; replaceTable[2*i]!=NULL; i++)
597
                {
598
                        if(!strcmp(replaceTable[2*i], filterName))
599
                        {
600
                                int newlen= strlen(replaceTable[2*i + 1]);
601
                                int plen;
602
                                int spaceLeft;
603

    
604
                                if(p==NULL) p= temp, *p=0;         //last filter
605
                                else p--, *p=',';                //not last filter
606

    
607
                                plen= strlen(p);
608
                                spaceLeft= p - temp + plen;
609
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
610
                                {
611
                                        ppMode->error++;
612
                                        break;
613
                                }
614
                                memmove(p + newlen, p, plen+1);
615
                                memcpy(p, replaceTable[2*i + 1], newlen);
616
                                filterNameOk=1;
617
                        }
618
                }
619

    
620
                for(i=0; filters[i].shortName!=NULL; i++)
621
                {
622
//                        printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
623
                        if(   !strcmp(filters[i].longName, filterName)
624
                           || !strcmp(filters[i].shortName, filterName))
625
                        {
626
                                ppMode->lumMode &= ~filters[i].mask;
627
                                ppMode->chromMode &= ~filters[i].mask;
628

    
629
                                filterNameOk=1;
630
                                if(!enable) break; // user wants to disable it
631

    
632
                                if(q >= filters[i].minLumQuality)
633
                                        ppMode->lumMode|= filters[i].mask;
634
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
635
                                        if(q >= filters[i].minChromQuality)
636
                                                ppMode->chromMode|= filters[i].mask;
637

    
638
                                if(filters[i].mask == LEVEL_FIX)
639
                                {
640
                                        int o;
641
                                        ppMode->minAllowedY= 16;
642
                                        ppMode->maxAllowedY= 234;
643
                                        for(o=0; options[o]!=NULL; o++)
644
                                        {
645
                                                if(  !strcmp(options[o],"fullyrange")
646
                                                   ||!strcmp(options[o],"f"))
647
                                                {
648
                                                        ppMode->minAllowedY= 0;
649
                                                        ppMode->maxAllowedY= 255;
650
                                                        numOfUnknownOptions--;
651
                                                }
652
                                        }
653
                                }
654
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
655
                                {
656
                                        int o;
657
                                        int numOfNoises=0;
658

    
659
                                        for(o=0; options[o]!=NULL; o++)
660
                                        {
661
                                                char *tail;
662
                                                ppMode->maxTmpNoise[numOfNoises]=
663
                                                        strtol(options[o], &tail, 0);
664
                                                if(tail!=options[o])
665
                                                {
666
                                                        numOfNoises++;
667
                                                        numOfUnknownOptions--;
668
                                                        if(numOfNoises >= 3) break;
669
                                                }
670
                                        }
671
                                }
672
                                else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
673
                                {
674
                                        int o;
675

    
676
                                        for(o=0; options[o]!=NULL && o<2; o++)
677
                                        {
678
                                                char *tail;
679
                                                int val= strtol(options[o], &tail, 0);
680
                                                if(tail==options[o]) break;
681

    
682
                                                numOfUnknownOptions--;
683
                                                if(o==0) ppMode->baseDcDiff= val;
684
                                                else ppMode->flatnessThreshold= val;
685
                                        }
686
                                }
687
                                else if(filters[i].mask == FORCE_QUANT)
688
                                {
689
                                        int o;
690
                                        ppMode->forcedQuant= 15;
691

    
692
                                        for(o=0; options[o]!=NULL && o<1; o++)
693
                                        {
694
                                                char *tail;
695
                                                int val= strtol(options[o], &tail, 0);
696
                                                if(tail==options[o]) break;
697

    
698
                                                numOfUnknownOptions--;
699
                                                ppMode->forcedQuant= val;
700
                                        }
701
                                }
702
                        }
703
                }
704
                if(!filterNameOk) ppMode->error++;
705
                ppMode->error += numOfUnknownOptions;
706
        }
707

    
708
        if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
709
        if(ppMode->error)
710
        {
711
                fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
712
                free(ppMode);
713
                return NULL;
714
        }
715
        return ppMode;
716
}
717

    
718
void pp_free_mode(pp_mode_t *mode){
719
    if(mode) free(mode);
720
}
721

    
722
static void reallocAlign(void **p, int alignment, int size){
723
        if(*p) free(*p);
724
        *p= memalign(alignment, size);
725
        memset(*p, 0, size);
726
}
727

    
728
static void reallocBuffers(PPContext *c, int width, int height, int stride){
729
        int mbWidth = (width+15)>>4;
730
        int mbHeight= (height+15)>>4;
731
        int i;
732

    
733
        c->stride= stride;
734

    
735
        reallocAlign((void **)&c->tempDst, 8, stride*24);
736
        reallocAlign((void **)&c->tempSrc, 8, stride*24);
737
        reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
738
        reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
739
        for(i=0; i<256; i++)
740
                c->yHistogram[i]= width*height/64*15/256;
741

    
742
        for(i=0; i<3; i++)
743
        {
744
                //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
745
                reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
746
                reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
747
        }
748

    
749
        reallocAlign((void **)&c->deintTemp, 8, width+16);
750
        reallocAlign((void **)&c->nonBQPTable, 8, mbWidth*mbHeight*sizeof(QP_STORE_T));
751
        reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
752
}
753

    
754
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
755
        PPContext *c= memalign(32, sizeof(PPContext));
756
        int stride= (width+15)&(~15); //assumed / will realloc if needed
757
        
758
        memset(c, 0, sizeof(PPContext));
759
        c->cpuCaps= cpuCaps;
760
        if(cpuCaps&PP_FORMAT){
761
                c->hChromaSubSample= cpuCaps&0x3;
762
                c->vChromaSubSample= (cpuCaps>>4)&0x3;
763
        }else{
764
                c->hChromaSubSample= 1;
765
                c->vChromaSubSample= 1;
766
        }
767

    
768
        reallocBuffers(c, width, height, stride);
769
        
770
        c->frameNum=-1;
771

    
772
        return c;
773
}
774

    
775
void pp_free_context(void *vc){
776
        PPContext *c = (PPContext*)vc;
777
        int i;
778
        
779
        for(i=0; i<3; i++) free(c->tempBlured[i]);
780
        for(i=0; i<3; i++) free(c->tempBluredPast[i]);
781
        
782
        free(c->tempBlocks);
783
        free(c->yHistogram);
784
        free(c->tempDst);
785
        free(c->tempSrc);
786
        free(c->deintTemp);
787
        free(c->nonBQPTable);
788
        free(c->forcedQPTable);
789
        
790
        memset(c, 0, sizeof(PPContext));
791

    
792
        free(c);
793
}
794

    
795
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
796
                 uint8_t * dst[3], int dstStride[3],
797
                 int width, int height,
798
                 QP_STORE_T *QP_store,  int QPStride,
799
                 pp_mode_t *vm,  void *vc, int pict_type)
800
{
801
        int mbWidth = (width+15)>>4;
802
        int mbHeight= (height+15)>>4;
803
        PPMode *mode = (PPMode*)vm;
804
        PPContext *c = (PPContext*)vc;
805
        int minStride= MAX(srcStride[0], dstStride[0]);
806
        
807
        if(c->stride < minStride)
808
                reallocBuffers(c, width, height, minStride);
809

    
810
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) 
811
        {
812
                int i;
813
                QP_store= c->forcedQPTable;
814
                QPStride= 0;
815
                if(mode->lumMode & FORCE_QUANT)
816
                        for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
817
                else
818
                        for(i=0; i<mbWidth; i++) QP_store[i]= 1;
819
        }
820
if(0){
821
int x,y;
822
for(y=0; y<mbHeight; y++){
823
        for(x=0; x<mbWidth; x++){
824
                printf("%2d ", QP_store[x + y*QPStride]);
825
        }
826
        printf("\n");
827
}
828
        printf("\n");
829
}
830
//printf("pict_type:%d\n", pict_type);
831

    
832
        if(pict_type!=3)
833
        {
834
                int x,y;
835
                for(y=0; y<mbHeight; y++){
836
                        for(x=0; x<mbWidth; x++){
837
                                int qscale= QP_store[x + y*QPStride];
838
                                if(qscale&~31)
839
                                    qscale=31;
840
                                c->nonBQPTable[y*mbWidth + x]= qscale;
841
                        }
842
                }
843
        }
844

    
845
        if(verbose>2)
846
        {
847
                printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
848
        }
849

    
850
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
851
                width, height, QP_store, QPStride, 0, mode, c);
852

    
853
        width  = (width )>>c->hChromaSubSample;
854
        height = (height)>>c->vChromaSubSample;
855

    
856
        if(mode->chromMode)
857
        {
858
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
859
                        width, height, QP_store, QPStride, 1, mode, c);
860
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
861
                        width, height, QP_store, QPStride, 2, mode, c);
862
        }
863
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
864
        {
865
                memcpy(dst[1], src[1], srcStride[1]*height);
866
                memcpy(dst[2], src[2], srcStride[2]*height);
867
        }
868
        else
869
        {
870
                int y;
871
                for(y=0; y<height; y++)
872
                {
873
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
874
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
875
                }
876
        }
877
}
878