Statistics
| Branch: | Revision:

ffmpeg / libavcodec / libpostproc / postprocess.c @ 4cfbf61b

History | View | Annotate | Download (25.1 KB)

1
/*
2
    Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3

4
    This program is free software; you can redistribute it and/or modify
5
    it under the terms of the GNU General Public License as published by
6
    the Free Software Foundation; either version 2 of the License, or
7
    (at your option) any later version.
8

9
    This program is distributed in the hope that it will be useful,
10
    but WITHOUT ANY WARRANTY; without even the implied warranty of
11
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
    GNU General Public License for more details.
13

14
    You should have received a copy of the GNU General Public License
15
    along with this program; if not, write to the Free Software
16
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
*/
18

    
19
/**
20
 * @file postprocess.c
21
 * postprocessing.
22
 */
23
 
24
/*
25
                        C        MMX        MMX2        3DNow
26
isVertDC                Ec        Ec
27
isVertMinMaxOk                Ec        Ec
28
doVertLowPass                E                e        e
29
doVertDefFilter                Ec        Ec        e        e
30
isHorizDC                Ec        Ec
31
isHorizMinMaxOk                a        E
32
doHorizLowPass                E                e        e
33
doHorizDefFilter        Ec        Ec        e        e
34
deRing                        E                e        e*
35
Vertical RKAlgo1        E                a        a
36
Horizontal RKAlgo1                        a        a
37
Vertical X1#                a                E        E
38
Horizontal X1#                a                E        E
39
LinIpolDeinterlace        e                E        E*
40
CubicIpolDeinterlace        a                e        e*
41
LinBlendDeinterlace        e                E        E*
42
MedianDeinterlace#        E        Ec        Ec
43
TempDeNoiser#                E                e        e
44

45
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
46
# more or less selfinvented filters so the exactness isnt too meaningfull
47
E = Exact implementation
48
e = allmost exact implementation (slightly different rounding,...)
49
a = alternative / approximate impl
50
c = checked against the other implementations (-vo md5)
51
*/
52

    
53
/*
54
TODO:
55
reduce the time wasted on the mem transfer
56
unroll stuff if instructions depend too much on the prior one
57
move YScale thing to the end instead of fixing QP
58
write a faster and higher quality deblocking filter :)
59
make the mainloop more flexible (variable number of blocks at once
60
        (the if/else stuff per block is slowing things down)
61
compare the quality & speed of all filters
62
split this huge file
63
optimize c versions
64
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
65
...
66
*/
67

    
68
//Changelog: use the CVS log
69

    
70
#include "config.h"
71
#include <inttypes.h>
72
#include <stdio.h>
73
#include <stdlib.h>
74
#include <string.h>
75
#ifdef HAVE_MALLOC_H
76
#include <malloc.h>
77
#endif
78
//#undef HAVE_MMX2
79
//#define HAVE_3DNOW
80
//#undef HAVE_MMX
81
//#undef ARCH_X86
82
//#define DEBUG_BRIGHTNESS
83
#ifdef USE_FASTMEMCPY
84
#include "../fastmemcpy.h"
85
#endif
86
#include "postprocess.h"
87
#include "postprocess_internal.h"
88

    
89
#include "mangle.h" //FIXME should be supressed
90

    
91
#ifndef HAVE_MEMALIGN
92
#define memalign(a,b) malloc(b)
93
#endif
94

    
95
#define MIN(a,b) ((a) > (b) ? (b) : (a))
96
#define MAX(a,b) ((a) < (b) ? (b) : (a))
97
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
98
#define SIGN(a) ((a) > 0 ? 1 : -1)
99

    
100
#define GET_MODE_BUFFER_SIZE 500
101
#define OPTIONS_ARRAY_SIZE 10
102
#define BLOCK_SIZE 8
103
#define TEMP_STRIDE 8
104
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
105

    
106
#ifdef ARCH_X86
107
static uint64_t __attribute__((aligned(8))) w05=                0x0005000500050005LL;
108
static uint64_t __attribute__((aligned(8))) w20=                0x0020002000200020LL;
109
static uint64_t __attribute__((aligned(8))) b00=                 0x0000000000000000LL;
110
static uint64_t __attribute__((aligned(8))) b01=                 0x0101010101010101LL;
111
static uint64_t __attribute__((aligned(8))) b02=                 0x0202020202020202LL;
112
static uint64_t __attribute__((aligned(8))) b08=                 0x0808080808080808LL;
113
static uint64_t __attribute__((aligned(8))) b80=                 0x8080808080808080LL;
114
#endif
115

    
116

    
117
static uint8_t clip_table[3*256];
118
static uint8_t * const clip_tab= clip_table + 256;
119

    
120
static int verbose= 0;
121

    
122
static const int deringThreshold= 20;
123

    
124

    
125
static struct PPFilter filters[]=
126
{
127
        {"hb", "hdeblock",                 1, 1, 3, H_DEBLOCK},
128
        {"vb", "vdeblock",                 1, 2, 4, V_DEBLOCK},
129
/*        {"hr", "rkhdeblock",                 1, 1, 3, H_RK1_FILTER},
130
        {"vr", "rkvdeblock",                 1, 2, 4, V_RK1_FILTER},*/
131
        {"h1", "x1hdeblock",                 1, 1, 3, H_X1_FILTER},
132
        {"v1", "x1vdeblock",                 1, 2, 4, V_X1_FILTER},
133
        {"dr", "dering",                 1, 5, 6, DERING},
134
        {"al", "autolevels",                 0, 1, 2, LEVEL_FIX},
135
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
136
        {"li", "linipoldeint",                 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
137
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
138
        {"md", "mediandeint",                 1, 1, 4, MEDIAN_DEINT_FILTER},
139
        {"fd", "ffmpegdeint",                 1, 1, 4, FFMPEG_DEINT_FILTER},
140
        {"l5", "lowpass5",                 1, 1, 4, LOWPASS5_DEINT_FILTER},
141
        {"tn", "tmpnoise",                 1, 7, 8, TEMP_NOISE_FILTER},
142
        {"fq", "forcequant",                 1, 0, 0, FORCE_QUANT},
143
        {NULL, NULL,0,0,0,0} //End Marker
144
};
145

    
146
static char *replaceTable[]=
147
{
148
        "default",         "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
149
        "de",                 "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
150
        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
151
        "fa",                 "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
152
        NULL //End Marker
153
};
154

    
155
#ifdef ARCH_X86
156
static inline void unusedVariableWarningFixer()
157
{
158
        if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0;
159
}
160
#endif
161

    
162

    
163
#ifdef ARCH_X86
164
static inline void prefetchnta(void *p)
165
{
166
        asm volatile(        "prefetchnta (%0)\n\t"
167
                : : "r" (p)
168
        );
169
}
170

    
171
static inline void prefetcht0(void *p)
172
{
173
        asm volatile(        "prefetcht0 (%0)\n\t"
174
                : : "r" (p)
175
        );
176
}
177

    
178
static inline void prefetcht1(void *p)
179
{
180
        asm volatile(        "prefetcht1 (%0)\n\t"
181
                : : "r" (p)
182
        );
183
}
184

    
185
static inline void prefetcht2(void *p)
186
{
187
        asm volatile(        "prefetcht2 (%0)\n\t"
188
                : : "r" (p)
189
        );
190
}
191
#endif
192

    
193
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
194

    
195
/**
196
 * Check if the given 8x8 Block is mostly "flat"
197
 */
198
static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
199
{
200
        int numEq= 0;
201
        int y;
202
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
203
        const int dcThreshold= dcOffset*2 + 1;
204

    
205
        for(y=0; y<BLOCK_SIZE; y++)
206
        {
207
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
208
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
209
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
210
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
211
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
212
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
213
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
214
                src+= stride;
215
        }
216
        return numEq > c->ppMode.flatnessThreshold;
217
}
218

    
219
/**
220
 * Check if the middle 8x8 Block in the given 8x16 block is flat
221
 */
222
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
223
        int numEq= 0;
224
        int y;
225
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
226
        const int dcThreshold= dcOffset*2 + 1;
227

    
228
        src+= stride*4; // src points to begin of the 8x8 Block
229
        for(y=0; y<BLOCK_SIZE-1; y++)
230
        {
231
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
232
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
233
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
234
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
235
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
236
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
237
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
238
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
239
                src+= stride;
240
        }
241
        return numEq > c->ppMode.flatnessThreshold;
242
}
243

    
244
static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
245
{
246
        if(abs(src[0] - src[7]) > 2*QP) return 0;
247

    
248
        return 1;
249
}
250

    
251
static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
252
{
253
        int y;
254
        for(y=0; y<BLOCK_SIZE; y++)
255
        {
256
                const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);
257

    
258
                if(ABS(middleEnergy) < 8*QP)
259
                {
260
                        const int q=(dst[3] - dst[4])/2;
261
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
262
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
263

    
264
                        int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
265
                        d= MAX(d, 0);
266

    
267
                        d= (5*d + 32) >> 6;
268
                        d*= SIGN(-middleEnergy);
269

    
270
                        if(q>0)
271
                        {
272
                                d= d<0 ? 0 : d;
273
                                d= d>q ? q : d;
274
                        }
275
                        else
276
                        {
277
                                d= d>0 ? 0 : d;
278
                                d= d<q ? q : d;
279
                        }
280

    
281
                        dst[3]-= d;
282
                        dst[4]+= d;
283
                }
284
                dst+= stride;
285
        }
286
}
287

    
288
/**
289
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
290
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
291
 */
292
static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
293
{
294

    
295
        int y;
296
        for(y=0; y<BLOCK_SIZE; y++)
297
        {
298
                const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
299
                const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];
300

    
301
                int sums[9];
302
                sums[0] = first + dst[0];
303
                sums[1] = dst[0] + dst[1];
304
                sums[2] = dst[1] + dst[2];
305
                sums[3] = dst[2] + dst[3];
306
                sums[4] = dst[3] + dst[4];
307
                sums[5] = dst[4] + dst[5];
308
                sums[6] = dst[5] + dst[6];
309
                sums[7] = dst[6] + dst[7];
310
                sums[8] = dst[7] + last;
311

    
312
                dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
313
                dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
314
                dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
315
                dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
316
                dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
317
                dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
318
                dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
319
                dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
320

    
321
                dst+= stride;
322
        }
323
}
324

    
325
/**
326
 * Experimental Filter 1 (Horizontal)
327
 * will not damage linear gradients
328
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
329
 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
330
 * MMX2 version does correct clipping C version doesnt
331
 * not identical with the vertical one
332
 */
333
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
334
{
335
        int y;
336
        static uint64_t *lut= NULL;
337
        if(lut==NULL)
338
        {
339
                int i;
340
                lut= (uint64_t*)memalign(8, 256*8);
341
                for(i=0; i<256; i++)
342
                {
343
                        int v= i < 128 ? 2*i : 2*(i-256);
344
/*
345
//Simulate 112242211 9-Tap filter
346
                        uint64_t a= (v/16) & 0xFF;
347
                        uint64_t b= (v/8) & 0xFF;
348
                        uint64_t c= (v/4) & 0xFF;
349
                        uint64_t d= (3*v/8) & 0xFF;
350
*/
351
//Simulate piecewise linear interpolation
352
                        uint64_t a= (v/16) & 0xFF;
353
                        uint64_t b= (v*3/16) & 0xFF;
354
                        uint64_t c= (v*5/16) & 0xFF;
355
                        uint64_t d= (7*v/16) & 0xFF;
356
                        uint64_t A= (0x100 - a)&0xFF;
357
                        uint64_t B= (0x100 - b)&0xFF;
358
                        uint64_t C= (0x100 - c)&0xFF;
359
                        uint64_t D= (0x100 - c)&0xFF;
360

    
361
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
362
                                (D<<24) | (C<<16) | (B<<8) | (A);
363
                        //lut[i] = (v<<32) | (v<<24);
364
                }
365
        }
366

    
367
        for(y=0; y<BLOCK_SIZE; y++)
368
        {
369
                int a= src[1] - src[2];
370
                int b= src[3] - src[4];
371
                int c= src[5] - src[6];
372

    
373
                int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
374

    
375
                if(d < QP)
376
                {
377
                        int v = d * SIGN(-b);
378

    
379
                        src[1] +=v/8;
380
                        src[2] +=v/4;
381
                        src[3] +=3*v/8;
382
                        src[4] -=3*v/8;
383
                        src[5] -=v/4;
384
                        src[6] -=v/8;
385

    
386
                }
387
                src+=stride;
388
        }
389
}
390

    
391

    
392
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
393
//Plain C versions
394
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
395
#define COMPILE_C
396
#endif
397

    
398
#ifdef ARCH_X86
399

    
400
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
401
#define COMPILE_MMX
402
#endif
403

    
404
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
405
#define COMPILE_MMX2
406
#endif
407

    
408
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
409
#define COMPILE_3DNOW
410
#endif
411
#endif //ARCH_X86
412

    
413
#undef HAVE_MMX
414
#undef HAVE_MMX2
415
#undef HAVE_3DNOW
416
#undef ARCH_X86
417

    
418
#ifdef COMPILE_C
419
#undef HAVE_MMX
420
#undef HAVE_MMX2
421
#undef HAVE_3DNOW
422
#undef ARCH_X86
423
#define RENAME(a) a ## _C
424
#include "postprocess_template.c"
425
#endif
426

    
427
//MMX versions
428
#ifdef COMPILE_MMX
429
#undef RENAME
430
#define HAVE_MMX
431
#undef HAVE_MMX2
432
#undef HAVE_3DNOW
433
#define ARCH_X86
434
#define RENAME(a) a ## _MMX
435
#include "postprocess_template.c"
436
#endif
437

    
438
//MMX2 versions
439
#ifdef COMPILE_MMX2
440
#undef RENAME
441
#define HAVE_MMX
442
#define HAVE_MMX2
443
#undef HAVE_3DNOW
444
#define ARCH_X86
445
#define RENAME(a) a ## _MMX2
446
#include "postprocess_template.c"
447
#endif
448

    
449
//3DNOW versions
450
#ifdef COMPILE_3DNOW
451
#undef RENAME
452
#define HAVE_MMX
453
#undef HAVE_MMX2
454
#define HAVE_3DNOW
455
#define ARCH_X86
456
#define RENAME(a) a ## _3DNow
457
#include "postprocess_template.c"
458
#endif
459

    
460
// minor note: the HAVE_xyz is messed up after that line so dont use it
461

    
462
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
463
        QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
464
{
465
        PPContext *c= (PPContext *)vc;
466
        PPMode *ppMode= (PPMode *)vm;
467
        c->ppMode= *ppMode; //FIXME
468

    
469
        // useing ifs here as they are faster than function pointers allthough the
470
        // difference wouldnt be messureable here but its much better because
471
        // someone might exchange the cpu whithout restarting mplayer ;)
472
#ifdef RUNTIME_CPUDETECT
473
#ifdef ARCH_X86
474
        // ordered per speed fasterst first
475
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
476
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
477
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
478
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
479
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
480
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
481
        else
482
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
483
#else
484
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
485
#endif
486
#else //RUNTIME_CPUDETECT
487
#ifdef HAVE_MMX2
488
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
489
#elif defined (HAVE_3DNOW)
490
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
491
#elif defined (HAVE_MMX)
492
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
493
#else
494
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
495
#endif
496
#endif //!RUNTIME_CPUDETECT
497
}
498

    
499
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
500
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
501

    
502
/* -pp Command line Help
503
*/
504
char *pp_help=
505
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
506
"long form example:\n"
507
"vdeblock:autoq/hdeblock:autoq/linblenddeint        default,-vdeblock\n"
508
"short form example:\n"
509
"vb:a/hb:a/lb                                        de,-vb\n"
510
"more examples:\n"
511
"tn:64:128:256\n"
512
"Filters                        Options\n"
513
"short        long name        short        long option        Description\n"
514
"*        *                a        autoq                cpu power dependant enabler\n"
515
"                        c        chrom                chrominance filtring enabled\n"
516
"                        y        nochrom                chrominance filtring disabled\n"
517
"hb        hdeblock        (2 Threshold)                horizontal deblocking filter\n"
518
"        1. difference factor: default=32, higher -> more deblocking\n"
519
"        2. flatness threshold: default=39, lower -> more deblocking\n"
520
"                        the h & v deblocking filters share these\n"
521
"                        so u cant set different thresholds for h / v\n"
522
"vb        vdeblock        (2 Threshold)                vertical deblocking filter\n"
523
"h1        x1hdeblock                                Experimental h deblock filter 1\n"
524
"v1        x1vdeblock                                Experimental v deblock filter 1\n"
525
"dr        dering                                        Deringing filter\n"
526
"al        autolevels                                automatic brightness / contrast\n"
527
"                        f        fullyrange        stretch luminance to (0..255)\n"
528
"lb        linblenddeint                                linear blend deinterlacer\n"
529
"li        linipoldeint                                linear interpolating deinterlace\n"
530
"ci        cubicipoldeint                                cubic interpolating deinterlacer\n"
531
"md        mediandeint                                median deinterlacer\n"
532
"fd        ffmpegdeint                                ffmpeg deinterlacer\n"
533
"de        default                                        hb:a,vb:a,dr:a,al\n"
534
"fa        fast                                        h1:a,v1:a,dr:a,al\n"
535
"tn        tmpnoise        (3 Thresholds)                Temporal Noise Reducer\n"
536
"                        1. <= 2. <= 3.                larger -> stronger filtering\n"
537
"fq        forceQuant        <quantizer>                Force quantizer\n"
538
;
539

    
540
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
541
{
542
        char temp[GET_MODE_BUFFER_SIZE];
543
        char *p= temp;
544
        char *filterDelimiters= ",/";
545
        char *optionDelimiters= ":";
546
        struct PPMode *ppMode;
547
        char *filterToken;
548

    
549
        ppMode= memalign(8, sizeof(PPMode));
550
        
551
        ppMode->lumMode= 0;
552
        ppMode->chromMode= 0;
553
        ppMode->maxTmpNoise[0]= 700;
554
        ppMode->maxTmpNoise[1]= 1500;
555
        ppMode->maxTmpNoise[2]= 3000;
556
        ppMode->maxAllowedY= 234;
557
        ppMode->minAllowedY= 16;
558
        ppMode->baseDcDiff= 256/8;
559
        ppMode->flatnessThreshold= 56-16-1;
560
        ppMode->maxClippedThreshold= 0.01;
561
        ppMode->error=0;
562

    
563
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
564

    
565
        if(verbose>1) printf("pp: %s\n", name);
566

    
567
        for(;;){
568
                char *filterName;
569
                int q= 1000000; //PP_QUALITY_MAX;
570
                int chrom=-1;
571
                char *option;
572
                char *options[OPTIONS_ARRAY_SIZE];
573
                int i;
574
                int filterNameOk=0;
575
                int numOfUnknownOptions=0;
576
                int enable=1; //does the user want us to enabled or disabled the filter
577

    
578
                filterToken= strtok(p, filterDelimiters);
579
                if(filterToken == NULL) break;
580
                p+= strlen(filterToken) + 1; // p points to next filterToken
581
                filterName= strtok(filterToken, optionDelimiters);
582
                if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
583

    
584
                if(*filterName == '-')
585
                {
586
                        enable=0;
587
                        filterName++;
588
                }
589

    
590
                for(;;){ //for all options
591
                        option= strtok(NULL, optionDelimiters);
592
                        if(option == NULL) break;
593

    
594
                        if(verbose>1) printf("pp: option: %s\n", option);
595
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
596
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
597
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
598
                        else
599
                        {
600
                                options[numOfUnknownOptions] = option;
601
                                numOfUnknownOptions++;
602
                        }
603
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
604
                }
605
                options[numOfUnknownOptions] = NULL;
606

    
607
                /* replace stuff from the replace Table */
608
                for(i=0; replaceTable[2*i]!=NULL; i++)
609
                {
610
                        if(!strcmp(replaceTable[2*i], filterName))
611
                        {
612
                                int newlen= strlen(replaceTable[2*i + 1]);
613
                                int plen;
614
                                int spaceLeft;
615

    
616
                                if(p==NULL) p= temp, *p=0;         //last filter
617
                                else p--, *p=',';                //not last filter
618

    
619
                                plen= strlen(p);
620
                                spaceLeft= p - temp + plen;
621
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
622
                                {
623
                                        ppMode->error++;
624
                                        break;
625
                                }
626
                                memmove(p + newlen, p, plen+1);
627
                                memcpy(p, replaceTable[2*i + 1], newlen);
628
                                filterNameOk=1;
629
                        }
630
                }
631

    
632
                for(i=0; filters[i].shortName!=NULL; i++)
633
                {
634
//                        printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
635
                        if(   !strcmp(filters[i].longName, filterName)
636
                           || !strcmp(filters[i].shortName, filterName))
637
                        {
638
                                ppMode->lumMode &= ~filters[i].mask;
639
                                ppMode->chromMode &= ~filters[i].mask;
640

    
641
                                filterNameOk=1;
642
                                if(!enable) break; // user wants to disable it
643

    
644
                                if(q >= filters[i].minLumQuality)
645
                                        ppMode->lumMode|= filters[i].mask;
646
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
647
                                        if(q >= filters[i].minChromQuality)
648
                                                ppMode->chromMode|= filters[i].mask;
649

    
650
                                if(filters[i].mask == LEVEL_FIX)
651
                                {
652
                                        int o;
653
                                        ppMode->minAllowedY= 16;
654
                                        ppMode->maxAllowedY= 234;
655
                                        for(o=0; options[o]!=NULL; o++)
656
                                        {
657
                                                if(  !strcmp(options[o],"fullyrange")
658
                                                   ||!strcmp(options[o],"f"))
659
                                                {
660
                                                        ppMode->minAllowedY= 0;
661
                                                        ppMode->maxAllowedY= 255;
662
                                                        numOfUnknownOptions--;
663
                                                }
664
                                        }
665
                                }
666
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
667
                                {
668
                                        int o;
669
                                        int numOfNoises=0;
670

    
671
                                        for(o=0; options[o]!=NULL; o++)
672
                                        {
673
                                                char *tail;
674
                                                ppMode->maxTmpNoise[numOfNoises]=
675
                                                        strtol(options[o], &tail, 0);
676
                                                if(tail!=options[o])
677
                                                {
678
                                                        numOfNoises++;
679
                                                        numOfUnknownOptions--;
680
                                                        if(numOfNoises >= 3) break;
681
                                                }
682
                                        }
683
                                }
684
                                else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
685
                                {
686
                                        int o;
687

    
688
                                        for(o=0; options[o]!=NULL && o<2; o++)
689
                                        {
690
                                                char *tail;
691
                                                int val= strtol(options[o], &tail, 0);
692
                                                if(tail==options[o]) break;
693

    
694
                                                numOfUnknownOptions--;
695
                                                if(o==0) ppMode->baseDcDiff= val;
696
                                                else ppMode->flatnessThreshold= val;
697
                                        }
698
                                }
699
                                else if(filters[i].mask == FORCE_QUANT)
700
                                {
701
                                        int o;
702
                                        ppMode->forcedQuant= 15;
703

    
704
                                        for(o=0; options[o]!=NULL && o<1; o++)
705
                                        {
706
                                                char *tail;
707
                                                int val= strtol(options[o], &tail, 0);
708
                                                if(tail==options[o]) break;
709

    
710
                                                numOfUnknownOptions--;
711
                                                ppMode->forcedQuant= val;
712
                                        }
713
                                }
714
                        }
715
                }
716
                if(!filterNameOk) ppMode->error++;
717
                ppMode->error += numOfUnknownOptions;
718
        }
719

    
720
        if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
721
        if(ppMode->error)
722
        {
723
                fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
724
                free(ppMode);
725
                return NULL;
726
        }
727
        return ppMode;
728
}
729

    
730
void pp_free_mode(pp_mode_t *mode){
731
    if(mode) free(mode);
732
}
733

    
734
static void reallocAlign(void **p, int alignment, int size){
735
        if(*p) free(*p);
736
        *p= memalign(alignment, size);
737
        memset(*p, 0, size);
738
}
739

    
740
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
741
        int mbWidth = (width+15)>>4;
742
        int mbHeight= (height+15)>>4;
743
        int i;
744

    
745
        c->stride= stride;
746
        c->qpStride= qpStride;
747

    
748
        reallocAlign((void **)&c->tempDst, 8, stride*24);
749
        reallocAlign((void **)&c->tempSrc, 8, stride*24);
750
        reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
751
        reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
752
        for(i=0; i<256; i++)
753
                c->yHistogram[i]= width*height/64*15/256;
754

    
755
        for(i=0; i<3; i++)
756
        {
757
                //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
758
                reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
759
                reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
760
        }
761

    
762
        reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
763
        reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
764
        reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
765
        reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
766
}
767

    
768
static void global_init(void){
769
        int i;
770
        memset(clip_table, 0, 256);
771
        for(i=256; i<512; i++)
772
                clip_table[i]= i;
773
        memset(clip_table+512, 0, 256);
774
}
775

    
776
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
777
        PPContext *c= memalign(32, sizeof(PPContext));
778
        int stride= (width+15)&(~15); //assumed / will realloc if needed
779
        int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
780
        
781
        global_init();
782

    
783
        memset(c, 0, sizeof(PPContext));
784
        c->cpuCaps= cpuCaps;
785
        if(cpuCaps&PP_FORMAT){
786
                c->hChromaSubSample= cpuCaps&0x3;
787
                c->vChromaSubSample= (cpuCaps>>4)&0x3;
788
        }else{
789
                c->hChromaSubSample= 1;
790
                c->vChromaSubSample= 1;
791
        }
792

    
793
        reallocBuffers(c, width, height, stride, qpStride);
794
        
795
        c->frameNum=-1;
796

    
797
        return c;
798
}
799

    
800
void pp_free_context(void *vc){
801
        PPContext *c = (PPContext*)vc;
802
        int i;
803
        
804
        for(i=0; i<3; i++) free(c->tempBlured[i]);
805
        for(i=0; i<3; i++) free(c->tempBluredPast[i]);
806
        
807
        free(c->tempBlocks);
808
        free(c->yHistogram);
809
        free(c->tempDst);
810
        free(c->tempSrc);
811
        free(c->deintTemp);
812
        free(c->stdQPTable);
813
        free(c->nonBQPTable);
814
        free(c->forcedQPTable);
815
        
816
        memset(c, 0, sizeof(PPContext));
817

    
818
        free(c);
819
}
820

    
821
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
822
                 uint8_t * dst[3], int dstStride[3],
823
                 int width, int height,
824
                 QP_STORE_T *QP_store,  int QPStride,
825
                 pp_mode_t *vm,  void *vc, int pict_type)
826
{
827
        int mbWidth = (width+15)>>4;
828
        int mbHeight= (height+15)>>4;
829
        PPMode *mode = (PPMode*)vm;
830
        PPContext *c = (PPContext*)vc;
831
        int minStride= MAX(srcStride[0], dstStride[0]);
832

    
833
        if(c->stride < minStride || c->qpStride < QPStride)
834
                reallocBuffers(c, width, height, 
835
                                MAX(minStride, c->stride), 
836
                                MAX(c->qpStride, QPStride));
837

    
838
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) 
839
        {
840
                int i;
841
                QP_store= c->forcedQPTable;
842
                QPStride= 0;
843
                if(mode->lumMode & FORCE_QUANT)
844
                        for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
845
                else
846
                        for(i=0; i<mbWidth; i++) QP_store[i]= 1;
847
        }
848
//printf("pict_type:%d\n", pict_type);
849

    
850
        if(pict_type & PP_PICT_TYPE_QP2){
851
                int i;
852
                const int count= mbHeight * QPStride;
853
                for(i=0; i<(count>>2); i++){
854
                        ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
855
                }
856
                for(i<<=2; i<count; i++){
857
                        c->stdQPTable[i] = QP_store[i]>>1;
858
                }
859
                QP_store= c->stdQPTable;
860
        }
861

    
862
if(0){
863
int x,y;
864
for(y=0; y<mbHeight; y++){
865
        for(x=0; x<mbWidth; x++){
866
                printf("%2d ", QP_store[x + y*QPStride]);
867
        }
868
        printf("\n");
869
}
870
        printf("\n");
871
}
872

    
873
        if((pict_type&7)!=3)
874
        {
875
                int i;
876
                const int count= mbHeight * QPStride;
877
                for(i=0; i<(count>>2); i++){
878
                        ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x1F1F1F1F;
879
                }
880
                for(i<<=2; i<count; i++){
881
                        c->nonBQPTable[i] = QP_store[i] & 0x1F;
882
                }
883
        }
884

    
885
        if(verbose>2)
886
        {
887
                printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
888
        }
889

    
890
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
891
                width, height, QP_store, QPStride, 0, mode, c);
892

    
893
        width  = (width )>>c->hChromaSubSample;
894
        height = (height)>>c->vChromaSubSample;
895

    
896
        if(mode->chromMode)
897
        {
898
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
899
                        width, height, QP_store, QPStride, 1, mode, c);
900
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
901
                        width, height, QP_store, QPStride, 2, mode, c);
902
        }
903
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
904
        {
905
                memcpy(dst[1], src[1], srcStride[1]*height);
906
                memcpy(dst[2], src[2], srcStride[2]*height);
907
        }
908
        else
909
        {
910
                int y;
911
                for(y=0; y<height; y++)
912
                {
913
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
914
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
915
                }
916
        }
917
}
918