Statistics
| Branch: | Revision:

ffmpeg / libavcodec / libpostproc / postprocess.c @ b304569a

History | View | Annotate | Download (24.1 KB)

1
/*
2
    Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3

4
    This program is free software; you can redistribute it and/or modify
5
    it under the terms of the GNU General Public License as published by
6
    the Free Software Foundation; either version 2 of the License, or
7
    (at your option) any later version.
8

9
    This program is distributed in the hope that it will be useful,
10
    but WITHOUT ANY WARRANTY; without even the implied warranty of
11
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
    GNU General Public License for more details.
13

14
    You should have received a copy of the GNU General Public License
15
    along with this program; if not, write to the Free Software
16
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
*/
18

    
19
/**
20
 * @file postprocess.c
21
 * postprocessing.
22
 */
23
 
24
/*
25
                        C        MMX        MMX2        3DNow
26
isVertDC                Ec        Ec
27
isVertMinMaxOk                Ec        Ec
28
doVertLowPass                E                e        e
29
doVertDefFilter                Ec        Ec        e        e
30
isHorizDC                Ec        Ec
31
isHorizMinMaxOk                a        E
32
doHorizLowPass                E                e        e
33
doHorizDefFilter        Ec        Ec        e        e
34
deRing                        E                e        e*
35
Vertical RKAlgo1        E                a        a
36
Horizontal RKAlgo1                        a        a
37
Vertical X1#                a                E        E
38
Horizontal X1#                a                E        E
39
LinIpolDeinterlace        e                E        E*
40
CubicIpolDeinterlace        a                e        e*
41
LinBlendDeinterlace        e                E        E*
42
MedianDeinterlace#        E        Ec        Ec
43
TempDeNoiser#                E                e        e
44

45
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
46
# more or less selfinvented filters so the exactness isnt too meaningfull
47
E = Exact implementation
48
e = allmost exact implementation (slightly different rounding,...)
49
a = alternative / approximate impl
50
c = checked against the other implementations (-vo md5)
51
*/
52

    
53
/*
54
TODO:
55
reduce the time wasted on the mem transfer
56
unroll stuff if instructions depend too much on the prior one
57
move YScale thing to the end instead of fixing QP
58
write a faster and higher quality deblocking filter :)
59
make the mainloop more flexible (variable number of blocks at once
60
        (the if/else stuff per block is slowing things down)
61
compare the quality & speed of all filters
62
split this huge file
63
optimize c versions
64
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
65
...
66
*/
67

    
68
//Changelog: use the CVS log
69

    
70
#include "config.h"
71
#include <inttypes.h>
72
#include <stdio.h>
73
#include <stdlib.h>
74
#include <string.h>
75
#ifdef HAVE_MALLOC_H
76
#include <malloc.h>
77
#endif
78
//#undef HAVE_MMX2
79
//#define HAVE_3DNOW
80
//#undef HAVE_MMX
81
//#undef ARCH_X86
82
//#define DEBUG_BRIGHTNESS
83
#ifdef USE_FASTMEMCPY
84
#include "libvo/fastmemcpy.h"
85
#endif
86
#include "postprocess.h"
87
#include "postprocess_internal.h"
88

    
89
#include "mangle.h" //FIXME should be supressed
90

    
91
#ifndef HAVE_MEMALIGN
92
#define memalign(a,b) malloc(b)
93
#endif
94

    
95
#define MIN(a,b) ((a) > (b) ? (b) : (a))
96
#define MAX(a,b) ((a) < (b) ? (b) : (a))
97
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
98
#define SIGN(a) ((a) > 0 ? 1 : -1)
99

    
100
#define GET_MODE_BUFFER_SIZE 500
101
#define OPTIONS_ARRAY_SIZE 10
102
#define BLOCK_SIZE 8
103
#define TEMP_STRIDE 8
104
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
105

    
106
#ifdef ARCH_X86
107
static uint64_t __attribute__((aligned(8))) w05=                0x0005000500050005LL;
108
static uint64_t __attribute__((aligned(8))) w20=                0x0020002000200020LL;
109
static uint64_t __attribute__((aligned(8))) b00=                 0x0000000000000000LL;
110
static uint64_t __attribute__((aligned(8))) b01=                 0x0101010101010101LL;
111
static uint64_t __attribute__((aligned(8))) b02=                 0x0202020202020202LL;
112
static uint64_t __attribute__((aligned(8))) b08=                 0x0808080808080808LL;
113
static uint64_t __attribute__((aligned(8))) b80=                 0x8080808080808080LL;
114
#endif
115

    
116
static int verbose= 0;
117

    
118
static const int deringThreshold= 20;
119

    
120

    
121
static struct PPFilter filters[]=
122
{
123
        {"hb", "hdeblock",                 1, 1, 3, H_DEBLOCK},
124
        {"vb", "vdeblock",                 1, 2, 4, V_DEBLOCK},
125
/*        {"hr", "rkhdeblock",                 1, 1, 3, H_RK1_FILTER},
126
        {"vr", "rkvdeblock",                 1, 2, 4, V_RK1_FILTER},*/
127
        {"h1", "x1hdeblock",                 1, 1, 3, H_X1_FILTER},
128
        {"v1", "x1vdeblock",                 1, 2, 4, V_X1_FILTER},
129
        {"dr", "dering",                 1, 5, 6, DERING},
130
        {"al", "autolevels",                 0, 1, 2, LEVEL_FIX},
131
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
132
        {"li", "linipoldeint",                 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
133
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
134
        {"md", "mediandeint",                 1, 1, 4, MEDIAN_DEINT_FILTER},
135
        {"fd", "ffmpegdeint",                 1, 1, 4, FFMPEG_DEINT_FILTER},
136
        {"tn", "tmpnoise",                 1, 7, 8, TEMP_NOISE_FILTER},
137
        {"fq", "forcequant",                 1, 0, 0, FORCE_QUANT},
138
        {NULL, NULL,0,0,0,0} //End Marker
139
};
140

    
141
static char *replaceTable[]=
142
{
143
        "default",         "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
144
        "de",                 "hdeblock:a,vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
145
        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
146
        "fa",                 "x1hdeblock:a,x1vdeblock:a,dering:a,autolevels,tmpnoise:a:150:200:400",
147
        NULL //End Marker
148
};
149

    
150
#ifdef ARCH_X86
151
static inline void unusedVariableWarningFixer()
152
{
153
        if(w05 + w20 + b00 + b01 + b02 + b08 + b80 == 0) b00=0;
154
}
155
#endif
156

    
157

    
158
#ifdef ARCH_X86
159
static inline void prefetchnta(void *p)
160
{
161
        asm volatile(        "prefetchnta (%0)\n\t"
162
                : : "r" (p)
163
        );
164
}
165

    
166
static inline void prefetcht0(void *p)
167
{
168
        asm volatile(        "prefetcht0 (%0)\n\t"
169
                : : "r" (p)
170
        );
171
}
172

    
173
static inline void prefetcht1(void *p)
174
{
175
        asm volatile(        "prefetcht1 (%0)\n\t"
176
                : : "r" (p)
177
        );
178
}
179

    
180
static inline void prefetcht2(void *p)
181
{
182
        asm volatile(        "prefetcht2 (%0)\n\t"
183
                : : "r" (p)
184
        );
185
}
186
#endif
187

    
188
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
189

    
190
/**
191
 * Check if the given 8x8 Block is mostly "flat"
192
 */
193
static inline int isHorizDC(uint8_t src[], int stride, PPContext *c)
194
{
195
        int numEq= 0;
196
        int y;
197
        const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
198
        const int dcThreshold= dcOffset*2 + 1;
199
        for(y=0; y<BLOCK_SIZE; y++)
200
        {
201
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
202
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
203
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
204
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
205
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
206
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
207
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
208
                src+= stride;
209
        }
210
        return numEq > c->ppMode.flatnessThreshold;
211
}
212

    
213
/**
214
 * Check if the middle 8x8 Block in the given 8x16 block is flat
215
 */
216
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
217
        int numEq= 0;
218
        int y;
219
        const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1;
220
        const int dcThreshold= dcOffset*2 + 1;
221
        src+= stride*4; // src points to begin of the 8x8 Block
222
        for(y=0; y<BLOCK_SIZE-1; y++)
223
        {
224
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
225
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
226
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
227
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
228
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
229
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
230
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
231
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
232
                src+= stride;
233
        }
234
        return numEq > c->ppMode.flatnessThreshold;
235
}
236

    
237
static inline int isHorizMinMaxOk(uint8_t src[], int stride, int QP)
238
{
239
        if(abs(src[0] - src[7]) > 2*QP) return 0;
240

    
241
        return 1;
242
}
243

    
244
static inline void doHorizDefFilter(uint8_t dst[], int stride, int QP)
245
{
246
        int y;
247
        for(y=0; y<BLOCK_SIZE; y++)
248
        {
249
                const int middleEnergy= 5*(dst[4] - dst[5]) + 2*(dst[2] - dst[5]);
250

    
251
                if(ABS(middleEnergy) < 8*QP)
252
                {
253
                        const int q=(dst[3] - dst[4])/2;
254
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
255
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
256

    
257
                        int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
258
                        d= MAX(d, 0);
259

    
260
                        d= (5*d + 32) >> 6;
261
                        d*= SIGN(-middleEnergy);
262

    
263
                        if(q>0)
264
                        {
265
                                d= d<0 ? 0 : d;
266
                                d= d>q ? q : d;
267
                        }
268
                        else
269
                        {
270
                                d= d>0 ? 0 : d;
271
                                d= d<q ? q : d;
272
                        }
273

    
274
                        dst[3]-= d;
275
                        dst[4]+= d;
276
                }
277
                dst+= stride;
278
        }
279
}
280

    
281
/**
282
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
283
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
284
 */
285
static inline void doHorizLowPass(uint8_t dst[], int stride, int QP)
286
{
287

    
288
        int y;
289
        for(y=0; y<BLOCK_SIZE; y++)
290
        {
291
                const int first= ABS(dst[-1] - dst[0]) < QP ? dst[-1] : dst[0];
292
                const int last= ABS(dst[8] - dst[7]) < QP ? dst[8] : dst[7];
293

    
294
                int sums[9];
295
                sums[0] = first + dst[0];
296
                sums[1] = dst[0] + dst[1];
297
                sums[2] = dst[1] + dst[2];
298
                sums[3] = dst[2] + dst[3];
299
                sums[4] = dst[3] + dst[4];
300
                sums[5] = dst[4] + dst[5];
301
                sums[6] = dst[5] + dst[6];
302
                sums[7] = dst[6] + dst[7];
303
                sums[8] = dst[7] + last;
304

    
305
                dst[0]= ((sums[0]<<2) + ((first + sums[2])<<1) + sums[4] + 8)>>4;
306
                dst[1]= ((dst[1]<<2) + ((first + sums[0] + sums[3])<<1) + sums[5] + 8)>>4;
307
                dst[2]= ((dst[2]<<2) + ((first + sums[1] + sums[4])<<1) + sums[6] + 8)>>4;
308
                dst[3]= ((dst[3]<<2) + ((sums[2] + sums[5])<<1) + sums[0] + sums[7] + 8)>>4;
309
                dst[4]= ((dst[4]<<2) + ((sums[3] + sums[6])<<1) + sums[1] + sums[8] + 8)>>4;
310
                dst[5]= ((dst[5]<<2) + ((last + sums[7] + sums[4])<<1) + sums[2] + 8)>>4;
311
                dst[6]= (((last + dst[6])<<2) + ((dst[7] + sums[5])<<1) + sums[3] + 8)>>4;
312
                dst[7]= ((sums[8]<<2) + ((last + sums[6])<<1) + sums[4] + 8)>>4;
313

    
314
                dst+= stride;
315
        }
316
}
317

    
318
/**
319
 * Experimental Filter 1 (Horizontal)
320
 * will not damage linear gradients
321
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
322
 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
323
 * MMX2 version does correct clipping C version doesnt
324
 * not identical with the vertical one
325
 */
326
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
327
{
328
        int y;
329
        static uint64_t *lut= NULL;
330
        if(lut==NULL)
331
        {
332
                int i;
333
                lut= (uint64_t*)memalign(8, 256*8);
334
                for(i=0; i<256; i++)
335
                {
336
                        int v= i < 128 ? 2*i : 2*(i-256);
337
/*
338
//Simulate 112242211 9-Tap filter
339
                        uint64_t a= (v/16) & 0xFF;
340
                        uint64_t b= (v/8) & 0xFF;
341
                        uint64_t c= (v/4) & 0xFF;
342
                        uint64_t d= (3*v/8) & 0xFF;
343
*/
344
//Simulate piecewise linear interpolation
345
                        uint64_t a= (v/16) & 0xFF;
346
                        uint64_t b= (v*3/16) & 0xFF;
347
                        uint64_t c= (v*5/16) & 0xFF;
348
                        uint64_t d= (7*v/16) & 0xFF;
349
                        uint64_t A= (0x100 - a)&0xFF;
350
                        uint64_t B= (0x100 - b)&0xFF;
351
                        uint64_t C= (0x100 - c)&0xFF;
352
                        uint64_t D= (0x100 - c)&0xFF;
353

    
354
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
355
                                (D<<24) | (C<<16) | (B<<8) | (A);
356
                        //lut[i] = (v<<32) | (v<<24);
357
                }
358
        }
359

    
360
        for(y=0; y<BLOCK_SIZE; y++)
361
        {
362
                int a= src[1] - src[2];
363
                int b= src[3] - src[4];
364
                int c= src[5] - src[6];
365

    
366
                int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
367

    
368
                if(d < QP)
369
                {
370
                        int v = d * SIGN(-b);
371

    
372
                        src[1] +=v/8;
373
                        src[2] +=v/4;
374
                        src[3] +=3*v/8;
375
                        src[4] -=3*v/8;
376
                        src[5] -=v/4;
377
                        src[6] -=v/8;
378

    
379
                }
380
                src+=stride;
381
        }
382
}
383

    
384

    
385
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
386
//Plain C versions
387
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
388
#define COMPILE_C
389
#endif
390

    
391
#ifdef ARCH_X86
392

    
393
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
394
#define COMPILE_MMX
395
#endif
396

    
397
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
398
#define COMPILE_MMX2
399
#endif
400

    
401
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
402
#define COMPILE_3DNOW
403
#endif
404
#endif //ARCH_X86
405

    
406
#undef HAVE_MMX
407
#undef HAVE_MMX2
408
#undef HAVE_3DNOW
409
#undef ARCH_X86
410

    
411
#ifdef COMPILE_C
412
#undef HAVE_MMX
413
#undef HAVE_MMX2
414
#undef HAVE_3DNOW
415
#undef ARCH_X86
416
#define RENAME(a) a ## _C
417
#include "postprocess_template.c"
418
#endif
419

    
420
//MMX versions
421
#ifdef COMPILE_MMX
422
#undef RENAME
423
#define HAVE_MMX
424
#undef HAVE_MMX2
425
#undef HAVE_3DNOW
426
#define ARCH_X86
427
#define RENAME(a) a ## _MMX
428
#include "postprocess_template.c"
429
#endif
430

    
431
//MMX2 versions
432
#ifdef COMPILE_MMX2
433
#undef RENAME
434
#define HAVE_MMX
435
#define HAVE_MMX2
436
#undef HAVE_3DNOW
437
#define ARCH_X86
438
#define RENAME(a) a ## _MMX2
439
#include "postprocess_template.c"
440
#endif
441

    
442
//3DNOW versions
443
#ifdef COMPILE_3DNOW
444
#undef RENAME
445
#define HAVE_MMX
446
#undef HAVE_MMX2
447
#define HAVE_3DNOW
448
#define ARCH_X86
449
#define RENAME(a) a ## _3DNow
450
#include "postprocess_template.c"
451
#endif
452

    
453
// minor note: the HAVE_xyz is messed up after that line so dont use it
454

    
455
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
456
        QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
457
{
458
        PPContext *c= (PPContext *)vc;
459
        PPMode *ppMode= (PPMode *)vm;
460
        c->ppMode= *ppMode; //FIXME
461

    
462
        // useing ifs here as they are faster than function pointers allthough the
463
        // difference wouldnt be messureable here but its much better because
464
        // someone might exchange the cpu whithout restarting mplayer ;)
465
#ifdef RUNTIME_CPUDETECT
466
#ifdef ARCH_X86
467
        // ordered per speed fasterst first
468
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
469
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
470
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
471
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
472
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
473
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
474
        else
475
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
476
#else
477
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
478
#endif
479
#else //RUNTIME_CPUDETECT
480
#ifdef HAVE_MMX2
481
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
482
#elif defined (HAVE_3DNOW)
483
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
484
#elif defined (HAVE_MMX)
485
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
486
#else
487
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
488
#endif
489
#endif //!RUNTIME_CPUDETECT
490
}
491

    
492
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
493
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
494

    
495
/* -pp Command line Help
496
*/
497
char *pp_help=
498
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
499
"long form example:\n"
500
"vdeblock:autoq/hdeblock:autoq/linblenddeint        default,-vdeblock\n"
501
"short form example:\n"
502
"vb:a/hb:a/lb                                        de,-vb\n"
503
"more examples:\n"
504
"tn:64:128:256\n"
505
"Filters                        Options\n"
506
"short        long name        short        long option        Description\n"
507
"*        *                a        autoq                cpu power dependant enabler\n"
508
"                        c        chrom                chrominance filtring enabled\n"
509
"                        y        nochrom                chrominance filtring disabled\n"
510
"hb        hdeblock        (2 Threshold)                horizontal deblocking filter\n"
511
"        1. difference factor: default=64, higher -> more deblocking\n"
512
"        2. flatness threshold: default=40, lower -> more deblocking\n"
513
"                        the h & v deblocking filters share these\n"
514
"                        so u cant set different thresholds for h / v\n"
515
"vb        vdeblock        (2 Threshold)                vertical deblocking filter\n"
516
"h1        x1hdeblock                                Experimental h deblock filter 1\n"
517
"v1        x1vdeblock                                Experimental v deblock filter 1\n"
518
"dr        dering                                        Deringing filter\n"
519
"al        autolevels                                automatic brightness / contrast\n"
520
"                        f        fullyrange        stretch luminance to (0..255)\n"
521
"lb        linblenddeint                                linear blend deinterlacer\n"
522
"li        linipoldeint                                linear interpolating deinterlace\n"
523
"ci        cubicipoldeint                                cubic interpolating deinterlacer\n"
524
"md        mediandeint                                median deinterlacer\n"
525
"fd        ffmpegdeint                                ffmpeg deinterlacer\n"
526
"de        default                                        hb:a,vb:a,dr:a,al\n"
527
"fa        fast                                        h1:a,v1:a,dr:a,al\n"
528
"tn        tmpnoise        (3 Thresholds)                Temporal Noise Reducer\n"
529
"                        1. <= 2. <= 3.                larger -> stronger filtering\n"
530
"fq        forceQuant        <quantizer>                Force quantizer\n"
531
;
532

    
533
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
534
{
535
        char temp[GET_MODE_BUFFER_SIZE];
536
        char *p= temp;
537
        char *filterDelimiters= ",/";
538
        char *optionDelimiters= ":";
539
        struct PPMode *ppMode;
540
        char *filterToken;
541

    
542
        ppMode= memalign(8, sizeof(PPMode));
543
        
544
        ppMode->lumMode= 0;
545
        ppMode->chromMode= 0;
546
        ppMode->maxTmpNoise[0]= 700;
547
        ppMode->maxTmpNoise[1]= 1500;
548
        ppMode->maxTmpNoise[2]= 3000;
549
        ppMode->maxAllowedY= 234;
550
        ppMode->minAllowedY= 16;
551
        ppMode->baseDcDiff= 256/4;
552
        ppMode->flatnessThreshold= 56-16;
553
        ppMode->maxClippedThreshold= 0.01;
554
        ppMode->error=0;
555

    
556
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
557

    
558
        if(verbose>1) printf("pp: %s\n", name);
559

    
560
        for(;;){
561
                char *filterName;
562
                int q= 1000000; //PP_QUALITY_MAX;
563
                int chrom=-1;
564
                char *option;
565
                char *options[OPTIONS_ARRAY_SIZE];
566
                int i;
567
                int filterNameOk=0;
568
                int numOfUnknownOptions=0;
569
                int enable=1; //does the user want us to enabled or disabled the filter
570

    
571
                filterToken= strtok(p, filterDelimiters);
572
                if(filterToken == NULL) break;
573
                p+= strlen(filterToken) + 1; // p points to next filterToken
574
                filterName= strtok(filterToken, optionDelimiters);
575
                if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
576

    
577
                if(*filterName == '-')
578
                {
579
                        enable=0;
580
                        filterName++;
581
                }
582

    
583
                for(;;){ //for all options
584
                        option= strtok(NULL, optionDelimiters);
585
                        if(option == NULL) break;
586

    
587
                        if(verbose>1) printf("pp: option: %s\n", option);
588
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
589
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
590
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
591
                        else
592
                        {
593
                                options[numOfUnknownOptions] = option;
594
                                numOfUnknownOptions++;
595
                        }
596
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
597
                }
598
                options[numOfUnknownOptions] = NULL;
599

    
600
                /* replace stuff from the replace Table */
601
                for(i=0; replaceTable[2*i]!=NULL; i++)
602
                {
603
                        if(!strcmp(replaceTable[2*i], filterName))
604
                        {
605
                                int newlen= strlen(replaceTable[2*i + 1]);
606
                                int plen;
607
                                int spaceLeft;
608

    
609
                                if(p==NULL) p= temp, *p=0;         //last filter
610
                                else p--, *p=',';                //not last filter
611

    
612
                                plen= strlen(p);
613
                                spaceLeft= p - temp + plen;
614
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
615
                                {
616
                                        ppMode->error++;
617
                                        break;
618
                                }
619
                                memmove(p + newlen, p, plen+1);
620
                                memcpy(p, replaceTable[2*i + 1], newlen);
621
                                filterNameOk=1;
622
                        }
623
                }
624

    
625
                for(i=0; filters[i].shortName!=NULL; i++)
626
                {
627
//                        printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
628
                        if(   !strcmp(filters[i].longName, filterName)
629
                           || !strcmp(filters[i].shortName, filterName))
630
                        {
631
                                ppMode->lumMode &= ~filters[i].mask;
632
                                ppMode->chromMode &= ~filters[i].mask;
633

    
634
                                filterNameOk=1;
635
                                if(!enable) break; // user wants to disable it
636

    
637
                                if(q >= filters[i].minLumQuality)
638
                                        ppMode->lumMode|= filters[i].mask;
639
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
640
                                        if(q >= filters[i].minChromQuality)
641
                                                ppMode->chromMode|= filters[i].mask;
642

    
643
                                if(filters[i].mask == LEVEL_FIX)
644
                                {
645
                                        int o;
646
                                        ppMode->minAllowedY= 16;
647
                                        ppMode->maxAllowedY= 234;
648
                                        for(o=0; options[o]!=NULL; o++)
649
                                        {
650
                                                if(  !strcmp(options[o],"fullyrange")
651
                                                   ||!strcmp(options[o],"f"))
652
                                                {
653
                                                        ppMode->minAllowedY= 0;
654
                                                        ppMode->maxAllowedY= 255;
655
                                                        numOfUnknownOptions--;
656
                                                }
657
                                        }
658
                                }
659
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
660
                                {
661
                                        int o;
662
                                        int numOfNoises=0;
663

    
664
                                        for(o=0; options[o]!=NULL; o++)
665
                                        {
666
                                                char *tail;
667
                                                ppMode->maxTmpNoise[numOfNoises]=
668
                                                        strtol(options[o], &tail, 0);
669
                                                if(tail!=options[o])
670
                                                {
671
                                                        numOfNoises++;
672
                                                        numOfUnknownOptions--;
673
                                                        if(numOfNoises >= 3) break;
674
                                                }
675
                                        }
676
                                }
677
                                else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK)
678
                                {
679
                                        int o;
680

    
681
                                        for(o=0; options[o]!=NULL && o<2; o++)
682
                                        {
683
                                                char *tail;
684
                                                int val= strtol(options[o], &tail, 0);
685
                                                if(tail==options[o]) break;
686

    
687
                                                numOfUnknownOptions--;
688
                                                if(o==0) ppMode->baseDcDiff= val;
689
                                                else ppMode->flatnessThreshold= val;
690
                                        }
691
                                }
692
                                else if(filters[i].mask == FORCE_QUANT)
693
                                {
694
                                        int o;
695
                                        ppMode->forcedQuant= 15;
696

    
697
                                        for(o=0; options[o]!=NULL && o<1; o++)
698
                                        {
699
                                                char *tail;
700
                                                int val= strtol(options[o], &tail, 0);
701
                                                if(tail==options[o]) break;
702

    
703
                                                numOfUnknownOptions--;
704
                                                ppMode->forcedQuant= val;
705
                                        }
706
                                }
707
                        }
708
                }
709
                if(!filterNameOk) ppMode->error++;
710
                ppMode->error += numOfUnknownOptions;
711
        }
712

    
713
        if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
714
        if(ppMode->error)
715
        {
716
                fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
717
                free(ppMode);
718
                return NULL;
719
        }
720
        return ppMode;
721
}
722

    
723
void pp_free_mode(pp_mode_t *mode){
724
    if(mode) free(mode);
725
}
726

    
727
static void reallocAlign(void **p, int alignment, int size){
728
        if(*p) free(*p);
729
        *p= memalign(alignment, size);
730
        memset(*p, 0, size);
731
}
732

    
733
static void reallocBuffers(PPContext *c, int width, int height, int stride){
734
        int mbWidth = (width+15)>>4;
735
        int mbHeight= (height+15)>>4;
736
        int i;
737

    
738
        c->stride= stride;
739

    
740
        reallocAlign((void **)&c->tempDst, 8, stride*24);
741
        reallocAlign((void **)&c->tempSrc, 8, stride*24);
742
        reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
743
        reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
744
        for(i=0; i<256; i++)
745
                c->yHistogram[i]= width*height/64*15/256;
746

    
747
        for(i=0; i<3; i++)
748
        {
749
                //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
750
                reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
751
                reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
752
        }
753

    
754
        reallocAlign((void **)&c->deintTemp, 8, width+16);
755
        reallocAlign((void **)&c->nonBQPTable, 8, mbWidth*mbHeight*sizeof(QP_STORE_T));
756
        reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
757
}
758

    
759
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
760
        PPContext *c= memalign(32, sizeof(PPContext));
761
        int stride= (width+15)&(~15); //assumed / will realloc if needed
762
        
763
        memset(c, 0, sizeof(PPContext));
764
        c->cpuCaps= cpuCaps;
765
        if(cpuCaps&PP_FORMAT){
766
                c->hChromaSubSample= cpuCaps&0x3;
767
                c->vChromaSubSample= (cpuCaps>>4)&0x3;
768
        }else{
769
                c->hChromaSubSample= 1;
770
                c->vChromaSubSample= 1;
771
        }
772

    
773
        reallocBuffers(c, width, height, stride);
774
        
775
        c->frameNum=-1;
776

    
777
        return c;
778
}
779

    
780
void pp_free_context(void *vc){
781
        PPContext *c = (PPContext*)vc;
782
        int i;
783
        
784
        for(i=0; i<3; i++) free(c->tempBlured[i]);
785
        for(i=0; i<3; i++) free(c->tempBluredPast[i]);
786
        
787
        free(c->tempBlocks);
788
        free(c->yHistogram);
789
        free(c->tempDst);
790
        free(c->tempSrc);
791
        free(c->deintTemp);
792
        free(c->nonBQPTable);
793
        free(c->forcedQPTable);
794
        
795
        memset(c, 0, sizeof(PPContext));
796

    
797
        free(c);
798
}
799

    
800
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
801
                 uint8_t * dst[3], int dstStride[3],
802
                 int width, int height,
803
                 QP_STORE_T *QP_store,  int QPStride,
804
                 pp_mode_t *vm,  void *vc, int pict_type)
805
{
806
        int mbWidth = (width+15)>>4;
807
        int mbHeight= (height+15)>>4;
808
        PPMode *mode = (PPMode*)vm;
809
        PPContext *c = (PPContext*)vc;
810
        int minStride= MAX(srcStride[0], dstStride[0]);
811
        
812
        if(c->stride < minStride)
813
                reallocBuffers(c, width, height, minStride);
814

    
815
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) 
816
        {
817
                int i;
818
                QP_store= c->forcedQPTable;
819
                QPStride= 0;
820
                if(mode->lumMode & FORCE_QUANT)
821
                        for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
822
                else
823
                        for(i=0; i<mbWidth; i++) QP_store[i]= 1;
824
        }
825
if(0){
826
int x,y;
827
for(y=0; y<mbHeight; y++){
828
        for(x=0; x<mbWidth; x++){
829
                printf("%2d ", QP_store[x + y*QPStride]);
830
        }
831
        printf("\n");
832
}
833
        printf("\n");
834
}
835
//printf("pict_type:%d\n", pict_type);
836

    
837
        if(pict_type!=3)
838
        {
839
                int x,y;
840
                for(y=0; y<mbHeight; y++){
841
                        for(x=0; x<mbWidth; x++){
842
                                int qscale= QP_store[x + y*QPStride];
843
                                if(qscale&~31)
844
                                    qscale=31;
845
                                c->nonBQPTable[y*mbWidth + x]= qscale;
846
                        }
847
                }
848
        }
849

    
850
        if(verbose>2)
851
        {
852
                printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
853
        }
854

    
855
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
856
                width, height, QP_store, QPStride, 0, mode, c);
857

    
858
        width  = (width )>>c->hChromaSubSample;
859
        height = (height)>>c->vChromaSubSample;
860

    
861
        if(mode->chromMode)
862
        {
863
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
864
                        width, height, QP_store, QPStride, 1, mode, c);
865
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
866
                        width, height, QP_store, QPStride, 2, mode, c);
867
        }
868
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
869
        {
870
                memcpy(dst[1], src[1], srcStride[1]*height);
871
                memcpy(dst[2], src[2], srcStride[2]*height);
872
        }
873
        else
874
        {
875
                int y;
876
                for(y=0; y<height; y++)
877
                {
878
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
879
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
880
                }
881
        }
882
}
883