Statistics
| Branch: | Revision:

ffmpeg / postproc / postprocess.c @ 326d40af

History | View | Annotate | Download (23.6 KB)

1
/*
2
    Copyright (C) 2001-2002 Michael Niedermayer (michaelni@gmx.at)
3

4
    This program is free software; you can redistribute it and/or modify
5
    it under the terms of the GNU General Public License as published by
6
    the Free Software Foundation; either version 2 of the License, or
7
    (at your option) any later version.
8

9
    This program is distributed in the hope that it will be useful,
10
    but WITHOUT ANY WARRANTY; without even the implied warranty of
11
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
    GNU General Public License for more details.
13

14
    You should have received a copy of the GNU General Public License
15
    along with this program; if not, write to the Free Software
16
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
*/
18

    
19
/*
20
                        C        MMX        MMX2        3DNow
21
isVertDC                Ec        Ec
22
isVertMinMaxOk                Ec        Ec
23
doVertLowPass                E                e        e
24
doVertDefFilter                Ec        Ec        e        e
25
isHorizDC                Ec        Ec
26
isHorizMinMaxOk                a        E
27
doHorizLowPass                E                e        e
28
doHorizDefFilter        Ec        Ec        e        e
29
deRing                        E                e        e*
30
Vertical RKAlgo1        E                a        a
31
Horizontal RKAlgo1                        a        a
32
Vertical X1#                a                E        E
33
Horizontal X1#                a                E        E
34
LinIpolDeinterlace        e                E        E*
35
CubicIpolDeinterlace        a                e        e*
36
LinBlendDeinterlace        e                E        E*
37
MedianDeinterlace#                 Ec        Ec
38
TempDeNoiser#                E                e        e
39

40
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
41
# more or less selfinvented filters so the exactness isnt too meaningfull
42
E = Exact implementation
43
e = allmost exact implementation (slightly different rounding,...)
44
a = alternative / approximate impl
45
c = checked against the other implementations (-vo md5)
46
*/
47

    
48
/*
49
TODO:
50
reduce the time wasted on the mem transfer
51
unroll stuff if instructions depend too much on the prior one
52
move YScale thing to the end instead of fixing QP
53
write a faster and higher quality deblocking filter :)
54
make the mainloop more flexible (variable number of blocks at once
55
        (the if/else stuff per block is slowing things down)
56
compare the quality & speed of all filters
57
split this huge file
58
optimize c versions
59
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
60
...
61
*/
62

    
63
//Changelog: use the CVS log
64

    
65
#include "../config.h"
66
#include <inttypes.h>
67
#include <stdio.h>
68
#include <stdlib.h>
69
#include <string.h>
70
#ifdef HAVE_MALLOC_H
71
#include <malloc.h>
72
#endif
73
//#undef HAVE_MMX2
74
//#define HAVE_3DNOW
75
//#undef HAVE_MMX
76
//#undef ARCH_X86
77
//#define DEBUG_BRIGHTNESS
78
#include "../libvo/fastmemcpy.h"
79
#include "postprocess.h"
80
#include "postprocess_internal.h"
81
#include "../mangle.h"
82

    
83
#define MIN(a,b) ((a) > (b) ? (b) : (a))
84
#define MAX(a,b) ((a) < (b) ? (b) : (a))
85
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
86
#define SIGN(a) ((a) > 0 ? 1 : -1)
87

    
88
#define GET_MODE_BUFFER_SIZE 500
89
#define OPTIONS_ARRAY_SIZE 10
90
#define BLOCK_SIZE 8
91
#define TEMP_STRIDE 8
92
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
93

    
94
#ifdef ARCH_X86
95
static uint64_t __attribute__((aligned(8))) w05=                0x0005000500050005LL;
96
static uint64_t __attribute__((aligned(8))) w20=                0x0020002000200020LL;
97
static uint64_t __attribute__((aligned(8))) b00=                 0x0000000000000000LL;
98
static uint64_t __attribute__((aligned(8))) b01=                 0x0101010101010101LL;
99
static uint64_t __attribute__((aligned(8))) b02=                 0x0202020202020202LL;
100
static uint64_t __attribute__((aligned(8))) b08=                 0x0808080808080808LL;
101
static uint64_t __attribute__((aligned(8))) b80=                 0x8080808080808080LL;
102
#endif
103

    
104
static int verbose= 0;
105

    
106
static const int deringThreshold= 20;
107

    
108

    
109
static struct PPFilter filters[]=
110
{
111
        {"hb", "hdeblock",                 1, 1, 3, H_DEBLOCK},
112
        {"vb", "vdeblock",                 1, 2, 4, V_DEBLOCK},
113
/*        {"hr", "rkhdeblock",                 1, 1, 3, H_RK1_FILTER},
114
        {"vr", "rkvdeblock",                 1, 2, 4, V_RK1_FILTER},*/
115
        {"h1", "x1hdeblock",                 1, 1, 3, H_X1_FILTER},
116
        {"v1", "x1vdeblock",                 1, 2, 4, V_X1_FILTER},
117
        {"dr", "dering",                 1, 5, 6, DERING},
118
        {"al", "autolevels",                 0, 1, 2, LEVEL_FIX},
119
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
120
        {"li", "linipoldeint",                 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
121
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
122
        {"md", "mediandeint",                 1, 1, 4, MEDIAN_DEINT_FILTER},
123
        {"fd", "ffmpegdeint",                 1, 1, 4, FFMPEG_DEINT_FILTER},
124
        {"tn", "tmpnoise",                 1, 7, 8, TEMP_NOISE_FILTER},
125
        {"fq", "forcequant",                 1, 0, 0, FORCE_QUANT},
126
        {NULL, NULL,0,0,0,0} //End Marker
127
};
128

    
129
static char *replaceTable[]=
130
{
131
        "default",         "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
132
        "de",                 "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
133
        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
134
        "fa",                 "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
135
        NULL //End Marker
136
};
137

    
138
#ifdef ARCH_X86
139
static inline void unusedVariableWarningFixer()
140
{
141
        if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0;
142
}
143
#endif
144

    
145

    
146
#ifdef ARCH_X86
147
static inline void prefetchnta(void *p)
148
{
149
        asm volatile(        "prefetchnta (%0)\n\t"
150
                : : "r" (p)
151
        );
152
}
153

    
154
static inline void prefetcht0(void *p)
155
{
156
        asm volatile(        "prefetcht0 (%0)\n\t"
157
                : : "r" (p)
158
        );
159
}
160

    
161
static inline void prefetcht1(void *p)
162
{
163
        asm volatile(        "prefetcht1 (%0)\n\t"
164
                : : "r" (p)
165
        );
166
}
167

    
168
static inline void prefetcht2(void *p)
169
{
170
        asm volatile(        "prefetcht2 (%0)\n\t"
171
                : : "r" (p)
172
        );
173
}
174
#endif
175

    
176
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
177

    
178
/**
179
 * Check if the given 8x8 Block is mostly "flat"
180
 */
181
static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
182
{
183
        int numEq= 0;
184
        int y;
185
        const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
186
        const int dcThreshold= dcOffset*2 + 1;
187
        for(y=0; y<BLOCK_SIZE; y++)
188
        {
189
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
190
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
191
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
192
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
193
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
194
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
195
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
196
                src+= stride;
197
        }
198
        return numEq > c->ppMode.flatnessThreshold;
199
}
200

    
201
/**
202
 * Check if the middle 8x8 Block in the given 8x16 block is flat
203
 */
204
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
205
        int numEq= 0;
206
        int y;
207
        const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
208
        const int dcThreshold= dcOffset*2 + 1;
209
        src+= stride*4; // src points to begin of the 8x8 Block
210
        for(y=0; y<BLOCK_SIZE-1; y++)
211
        {
212
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
213
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
214
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
215
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
216
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
217
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
218
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
219
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
220
                src+= stride;
221
        }
222
        return numEq > c->ppMode.flatnessThreshold;
223
}
224

    
225
static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
226
{
227
        if(abs(src[0] - src[7]) > 2*QP) return 0;
228

    
229
        return 1;
230
}
231

    
232
static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
233
{
234
        int y;
235
        for(y=0; y<BLOCK_SIZE; y++)
236
        {
237
                const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);
238

    
239
                if(ABS(middleEnergy) < 8*QP)
240
                {
241
                        const int q=(dst[3] - dst[4])/2;
242
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
243
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
244

    
245
                        int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
246
                        d= MAX(d, 0);
247

    
248
                        d= (5*d + 32) >> 6;
249
                        d*= SIGN(-middleEnergy);
250

    
251
                        if(q>0)
252
                        {
253
                                d= d<0 ? 0 : d;
254
                                d= d>q ? q : d;
255
                        }
256
                        else
257
                        {
258
                                d= d>0 ? 0 : d;
259
                                d= d<q ? q : d;
260
                        }
261

    
262
                        dst[3]-= d;
263
                        dst[4]+= d;
264
                }
265
                dst+= stride;
266
        }
267
}
268

    
269
/**
270
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
271
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
272
 */
273
static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
274
{
275

    
276
        int y;
277
        for(y=0; y<BLOCK_SIZE; y++)
278
        {
279
                const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
280
                const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];
281

    
282
                int sums[9];
283
                sums[0] = first + dst[0];
284
                sums[1] = dst[0] + dst[1];
285
                sums[2] = dst[1] + dst[2];
286
                sums[3] = dst[2] + dst[3];
287
                sums[4] = dst[3] + dst[4];
288
                sums[5] = dst[4] + dst[5];
289
                sums[6] = dst[5] + dst[6];
290
                sums[7] = dst[6] + dst[7];
291
                sums[8] = dst[7] + last;
292

    
293
                dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
294
                dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
295
                dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
296
                dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
297
                dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
298
                dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
299
                dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
300
                dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
301

    
302
                dst+= stride;
303
        }
304
}
305

    
306
/**
307
 * Experimental Filter 1 (Horizontal)
308
 * will not damage linear gradients
309
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
310
 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
311
 * MMX2 version does correct clipping C version doesnt
312
 * not identical with the vertical one
313
 */
314
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
315
{
316
        int y;
317
        static uint64_t *lut= NULL;
318
        if(lut==NULL)
319
        {
320
                int i;
321
                lut= (uint64_t*)memalign(8, 256*8);
322
                for(i=0; i<256; i++)
323
                {
324
                        int v= i < 128 ? 2*i : 2*(i-256);
325
/*
326
//Simulate 112242211 9-Tap filter
327
                        uint64_t a= (v/16) & 0xFF;
328
                        uint64_t b= (v/8) & 0xFF;
329
                        uint64_t c= (v/4) & 0xFF;
330
                        uint64_t d= (3*v/8) & 0xFF;
331
*/
332
//Simulate piecewise linear interpolation
333
                        uint64_t a= (v/16) & 0xFF;
334
                        uint64_t b= (v*3/16) & 0xFF;
335
                        uint64_t c= (v*5/16) & 0xFF;
336
                        uint64_t d= (7*v/16) & 0xFF;
337
                        uint64_t A= (0x100 - a)&0xFF;
338
                        uint64_t B= (0x100 - b)&0xFF;
339
                        uint64_t C= (0x100 - c)&0xFF;
340
                        uint64_t D= (0x100 - c)&0xFF;
341

    
342
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
343
                                (D<<24) | (C<<16) | (B<<8) | (A);
344
                        //lut[i] = (v<<32) | (v<<24);
345
                }
346
        }
347

    
348
        for(y=0; y<BLOCK_SIZE; y++)
349
        {
350
                int a= src[1] - src[2];
351
                int b= src[3] - src[4];
352
                int c= src[5] - src[6];
353

    
354
                int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
355

    
356
                if(d < QP)
357
                {
358
                        int v = d * SIGN(-b);
359

    
360
                        src[1] +=v/8;
361
                        src[2] +=v/4;
362
                        src[3] +=3*v/8;
363
                        src[4] -=3*v/8;
364
                        src[5] -=v/4;
365
                        src[6] -=v/8;
366

    
367
                }
368
                src+=stride;
369
        }
370
}
371

    
372

    
373
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
374
//Plain C versions
375
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
376
#define COMPILE_C
377
#endif
378

    
379
#ifdef ARCH_X86
380

    
381
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
382
#define COMPILE_MMX
383
#endif
384

    
385
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
386
#define COMPILE_MMX2
387
#endif
388

    
389
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
390
#define COMPILE_3DNOW
391
#endif
392
#endif //ARCH_X86
393

    
394
#undef HAVE_MMX
395
#undef HAVE_MMX2
396
#undef HAVE_3DNOW
397
#undef ARCH_X86
398

    
399
#ifdef COMPILE_C
400
#undef HAVE_MMX
401
#undef HAVE_MMX2
402
#undef HAVE_3DNOW
403
#undef ARCH_X86
404
#define RENAME(a) a ## _C
405
#include "postprocess_template.c"
406
#endif
407

    
408
//MMX versions
409
#ifdef COMPILE_MMX
410
#undef RENAME
411
#define HAVE_MMX
412
#undef HAVE_MMX2
413
#undef HAVE_3DNOW
414
#define ARCH_X86
415
#define RENAME(a) a ## _MMX
416
#include "postprocess_template.c"
417
#endif
418

    
419
//MMX2 versions
420
#ifdef COMPILE_MMX2
421
#undef RENAME
422
#define HAVE_MMX
423
#define HAVE_MMX2
424
#undef HAVE_3DNOW
425
#define ARCH_X86
426
#define RENAME(a) a ## _MMX2
427
#include "postprocess_template.c"
428
#endif
429

    
430
//3DNOW versions
431
#ifdef COMPILE_3DNOW
432
#undef RENAME
433
#define HAVE_MMX
434
#undef HAVE_MMX2
435
#define HAVE_3DNOW
436
#define ARCH_X86
437
#define RENAME(a) a ## _3DNow
438
#include "postprocess_template.c"
439
#endif
440

    
441
// minor note: the HAVE_xyz is messed up after that line so dont use it
442

    
443
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
444
        QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
445
{
446
        PPContext *c= (PPContext *)vc;
447
        PPMode *ppMode= (PPMode *)vm;
448
        c->ppMode= *ppMode; //FIXME
449

    
450
        // useing ifs here as they are faster than function pointers allthough the
451
        // difference wouldnt be messureable here but its much better because
452
        // someone might exchange the cpu whithout restarting mplayer ;)
453
#ifdef RUNTIME_CPUDETECT
454
#ifdef ARCH_X86
455
        // ordered per speed fasterst first
456
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
457
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
458
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
459
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
460
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
461
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
462
        else
463
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
464
#else
465
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
466
#endif
467
#else //RUNTIME_CPUDETECT
468
#ifdef HAVE_MMX2
469
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
470
#elif defined (HAVE_3DNOW)
471
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
472
#elif defined (HAVE_MMX)
473
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
474
#else
475
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
476
#endif
477
#endif //!RUNTIME_CPUDETECT
478
}
479

    
480
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
481
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
482

    
483
/* -pp Command line Help
484
*/
485
char *pp_help=
486
"<filterName>[:<option>[:<option>...]][,[-]<filterName>[:<option>...]]...\n"
487
"long form example:\n"
488
"vdeblock:autoq,hdeblock:autoq,linblenddeint        default,-vdeblock\n"
489
"short form example:\n"
490
"vb:a,hb:a,lb                                        de,-vb\n"
491
"more examples:\n"
492
"tn:64:128:256\n"
493
"Filters                        Options\n"
494
"short        long name        short        long option        Description\n"
495
"*        *                a        autoq                cpu power dependant enabler\n"
496
"                        c        chrom                chrominance filtring enabled\n"
497
"                        y        nochrom                chrominance filtring disabled\n"
498
"hb        hdeblock        (2 Threshold)                horizontal deblocking filter\n"
499
"                        1. Threshold: default=1, higher -> more deblocking\n"
500
"                        2. Threshold: default=40, lower -> more deblocking\n"
501
"                        the h & v deblocking filters share these\n"
502
"                        so u cant set different thresholds for h / v\n"
503
"vb        vdeblock        (2 Threshold)                vertical deblocking filter\n"
504
"h1        x1hdeblock                                Experimental h deblock filter 1\n"
505
"v1        x1vdeblock                                Experimental v deblock filter 1\n"
506
"dr        dering                                        Deringing filter\n"
507
"al        autolevels                                automatic brightness / contrast\n"
508
"                        f        fullyrange        stretch luminance to (0..255)\n"
509
"lb        linblenddeint                                linear blend deinterlacer\n"
510
"li        linipoldeint                                linear interpolating deinterlace\n"
511
"ci        cubicipoldeint                                cubic interpolating deinterlacer\n"
512
"md        mediandeint                                median deinterlacer\n"
513
"fd        ffmpegdeint                                ffmpeg deinterlacer\n"
514
"de        default                                        hb:a,vb:a,dr:a,al\n"
515
"fa        fast                                        h1:a,v1:a,dr:a,al\n"
516
"tn        tmpnoise        (3 Thresholds)                Temporal Noise Reducer\n"
517
"                        1. <= 2. <= 3.                larger -> stronger filtering\n"
518
"fq        forceQuant        <quantizer>                Force quantizer\n"
519
;
520

    
521
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
522
{
523
        char temp[GET_MODE_BUFFER_SIZE];
524
        char *p= temp;
525
        char *filterDelimiters= ",/";
526
        char *optionDelimiters= ":";
527
        struct PPMode *ppMode;
528
        char *filterToken;
529

    
530
        ppMode= memalign(8, sizeof(PPMode));
531
        
532
        ppMode->lumMode= 0;
533
        ppMode->chromMode= 0;
534
        ppMode->maxTmpNoise[0]= 700;
535
        ppMode->maxTmpNoise[1]= 1500;
536
        ppMode->maxTmpNoise[2]= 3000;
537
        ppMode->maxAllowedY= 234;
538
        ppMode->minAllowedY= 16;
539
        ppMode->baseDcDiff= 256/4;
540
        ppMode->flatnessThreshold=40;
541
        ppMode->flatnessThreshold= 56-16;
542
        ppMode->maxClippedThreshold= 0.01;
543
        ppMode->error=0;
544

    
545
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
546

    
547
        if(verbose>1) printf("pp: %s\n", name);
548

    
549
        for(;;){
550
                char *filterName;
551
                int q= 1000000; //PP_QUALITY_MAX;
552
                int chrom=-1;
553
                char *option;
554
                char *options[OPTIONS_ARRAY_SIZE];
555
                int i;
556
                int filterNameOk=0;
557
                int numOfUnknownOptions=0;
558
                int enable=1; //does the user want us to enabled or disabled the filter
559

    
560
                filterToken= strtok(p, filterDelimiters);
561
                if(filterToken == NULL) break;
562
                p+= strlen(filterToken) + 1; // p points to next filterToken
563
                filterName= strtok(filterToken, optionDelimiters);
564
                if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
565

    
566
                if(*filterName == '-')
567
                {
568
                        enable=0;
569
                        filterName++;
570
                }
571

    
572
                for(;;){ //for all options
573
                        option= strtok(NULL, optionDelimiters);
574
                        if(option == NULL) break;
575

    
576
                        if(verbose>1) printf("pp: option: %s\n", option);
577
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
578
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
579
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
580
                        else
581
                        {
582
                                options[numOfUnknownOptions] = option;
583
                                numOfUnknownOptions++;
584
                        }
585
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
586
                }
587
                options[numOfUnknownOptions] = NULL;
588

    
589
                /* replace stuff from the replace Table */
590
                for(i=0; replaceTable[2*i]!=NULL; i++)
591
                {
592
                        if(!strcmp(replaceTable[2*i], filterName))
593
                        {
594
                                int newlen= strlen(replaceTable[2*i + 1]);
595
                                int plen;
596
                                int spaceLeft;
597

    
598
                                if(p==NULL) p= temp, *p=0;         //last filter
599
                                else p--, *p=',';                //not last filter
600

    
601
                                plen= strlen(p);
602
                                spaceLeft= p - temp + plen;
603
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
604
                                {
605
                                        ppMode->error++;
606
                                        break;
607
                                }
608
                                memmove(p + newlen, p, plen+1);
609
                                memcpy(p, replaceTable[2*i + 1], newlen);
610
                                filterNameOk=1;
611
                        }
612
                }
613

    
614
                for(i=0; filters[i].shortName!=NULL; i++)
615
                {
616
//                        printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
617
                        if(   !strcmp(filters[i].longName, filterName)
618
                           || !strcmp(filters[i].shortName, filterName))
619
                        {
620
                                ppMode->lumMode &= ~filters[i].mask;
621
                                ppMode->chromMode &= ~filters[i].mask;
622

    
623
                                filterNameOk=1;
624
                                if(!enable) break; // user wants to disable it
625

    
626
                                if(q >= filters[i].minLumQuality)
627
                                        ppMode->lumMode|= filters[i].mask;
628
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
629
                                        if(q >= filters[i].minChromQuality)
630
                                                ppMode->chromMode|= filters[i].mask;
631

    
632
                                if(filters[i].mask == LEVEL_FIX)
633
                                {
634
                                        int o;
635
                                        ppMode->minAllowedY= 16;
636
                                        ppMode->maxAllowedY= 234;
637
                                        for(o=0; options[o]!=NULL; o++)
638
                                        {
639
                                                if(  !strcmp(options[o],"fullyrange")
640
                                                   ||!strcmp(options[o],"f"))
641
                                                {
642
                                                        ppMode->minAllowedY= 0;
643
                                                        ppMode->maxAllowedY= 255;
644
                                                        numOfUnknownOptions--;
645
                                                }
646
                                        }
647
                                }
648
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
649
                                {
650
                                        int o;
651
                                        int numOfNoises=0;
652

    
653
                                        for(o=0; options[o]!=NULL; o++)
654
                                        {
655
                                                char *tail;
656
                                                ppMode->maxTmpNoise[numOfNoises]=
657
                                                        strtol(options[o], &tail, 0);
658
                                                if(tail!=options[o])
659
                                                {
660
                                                        numOfNoises++;
661
                                                        numOfUnknownOptions--;
662
                                                        if(numOfNoises >= 3) break;
663
                                                }
664
                                        }
665
                                }
666
                                else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
667
                                {
668
                                        int o;
669

    
670
                                        for(o=0; options[o]!=NULL && o<2; o++)
671
                                        {
672
                                                char *tail;
673
                                                int val= strtol(options[o], &tail, 0);
674
                                                if(tail==options[o]) break;
675

    
676
                                                numOfUnknownOptions--;
677
                                                if(o==0) ppMode->baseDcDiff= val;
678
                                                else ppMode->flatnessThreshold= val;
679
                                        }
680
                                }
681
                                else if(filters[i].mask == FORCE_QUANT)
682
                                {
683
                                        int o;
684
                                        ppMode->forcedQuant= 15;
685

    
686
                                        for(o=0; options[o]!=NULL && o<1; o++)
687
                                        {
688
                                                char *tail;
689
                                                int val= strtol(options[o], &tail, 0);
690
                                                if(tail==options[o]) break;
691

    
692
                                                numOfUnknownOptions--;
693
                                                ppMode->forcedQuant= val;
694
                                        }
695
                                }
696
                        }
697
                }
698
                if(!filterNameOk) ppMode->error++;
699
                ppMode->error += numOfUnknownOptions;
700
        }
701

    
702
        if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
703
        if(ppMode->error)
704
        {
705
                fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
706
                free(ppMode);
707
                return NULL;
708
        }
709
        return ppMode;
710
}
711

    
712
void pp_free_mode(pp_mode_t *mode){
713
    if(mode) free(mode);
714
}
715

    
716
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
717
        PPContext *c= memalign(32, sizeof(PPContext));
718
        int i;
719
        int mbWidth = (width+15)>>4;
720
        int mbHeight= (height+15)>>4;
721
        
722
        c->cpuCaps= cpuCaps;
723

    
724
        c->tempBlocks= (uint8_t*)memalign(8, 2*16*8);
725
        c->yHistogram= (uint64_t*)memalign(8, 256*sizeof(uint64_t));
726
        for(i=0; i<256; i++)
727
                c->yHistogram[i]= width*height/64*15/256;
728

    
729
        for(i=0; i<3; i++)
730
        {
731
                //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
732
                c->tempBlured[i]= (uint8_t*)memalign(8, ((width+7)&(~7))*2*((height+7)&(~7)) + 17*1024); //FIXME dstStride instead of width
733
                c->tempBluredPast[i]= (uint32_t*)memalign(8, 256*((height+7)&(~7))/2 + 17*1024);
734

    
735
                memset(c->tempBlured[i], 0, ((width+7)&(~7))*2*((height+7)&(~7)) + 17*1024);
736
                memset(c->tempBluredPast[i], 0, 256*((height+7)&(~7))/2 + 17*1024);
737
        }
738
        
739
        c->tempDst= (uint8_t*)memalign(8, 1024*24);
740
        c->tempSrc= (uint8_t*)memalign(8, 1024*24);
741
        c->tempDstBlock= (uint8_t*)memalign(8, 1024*24);
742
        c->tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
743
        c->deintTemp= (uint8_t*)memalign(8, width+16);
744
        c->nonBQPTable= (QP_STORE_T*)memalign(8, mbWidth*mbHeight*sizeof(QP_STORE_T));
745
        memset(c->nonBQPTable, 0, mbWidth*mbHeight*sizeof(QP_STORE_T));
746

    
747
        c->frameNum=-1;
748

    
749
        return c;
750
}
751

    
752
void pp_free_context(void *vc){
753
        PPContext *c = (PPContext*)vc;
754
        int i;
755
        
756
        for(i=0; i<3; i++) free(c->tempBlured[i]);
757
        for(i=0; i<3; i++) free(c->tempBluredPast[i]);
758
        
759
        free(c->tempBlocks);
760
        free(c->yHistogram);
761
        free(c->tempDst);
762
        free(c->tempSrc);
763
        free(c->tempDstBlock);
764
        free(c->tempSrcBlock);
765
        free(c->deintTemp);
766
        free(c->nonBQPTable);
767
        
768
        free(c);
769
}
770

    
771
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
772
                 uint8_t * dst[3], int dstStride[3],
773
                 int width, int height,
774
                 QP_STORE_T *QP_store,  int QPStride,
775
                 pp_mode_t *vm,  void *vc, int pict_type)
776
{
777
        int mbWidth = (width+15)>>4;
778
        int mbHeight= (height+15)>>4;
779
        QP_STORE_T quantArray[2048/8];
780
        PPMode *mode = (PPMode*)vm;
781
        PPContext *c = (PPContext*)vc;
782

    
783
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) 
784
        {
785
                int i;
786
                QP_store= quantArray;
787
                QPStride= 0;
788
                if(mode->lumMode & FORCE_QUANT)
789
                        for(i=0; i<2048/8; i++) quantArray[i]= mode->forcedQuant;
790
                else
791
                        for(i=0; i<2048/8; i++) quantArray[i]= 1;
792
        }
793
if(0){
794
int x,y;
795
for(y=0; y<mbHeight; y++){
796
        for(x=0; x<mbWidth; x++){
797
                printf("%2d ", QP_store[x + y*QPStride]);
798
        }
799
        printf("\n");
800
}
801
        printf("\n");
802
}
803
//printf("pict_type:%d\n", pict_type);
804

    
805
        if(pict_type!=3)
806
        {
807
                int x,y;
808
                for(y=0; y<mbHeight; y++){
809
                        for(x=0; x<mbWidth; x++){
810
                                int qscale= QP_store[x + y*QPStride];
811
                                if(qscale&~31)
812
                                    qscale=31;
813
                                c->nonBQPTable[y*mbWidth + x]= qscale;
814
                        }
815
                }
816
        }
817

    
818
        if(verbose>2)
819
        {
820
                printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
821
        }
822

    
823
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
824
                width, height, QP_store, QPStride, 0, mode, c);
825

    
826
        width  = (width +1)>>1;
827
        height = (height+1)>>1;
828

    
829
        if(mode->chromMode)
830
        {
831
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
832
                        width, height, QP_store, QPStride, 1, mode, c);
833
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
834
                        width, height, QP_store, QPStride, 2, mode, c);
835
        }
836
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
837
        {
838
                memcpy(dst[1], src[1], srcStride[1]*height);
839
                memcpy(dst[2], src[2], srcStride[2]*height);
840
        }
841
        else
842
        {
843
                int y;
844
                for(y=0; y<height; y++)
845
                {
846
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
847
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
848
                }
849
        }
850
}
851