Statistics
| Branch: | Revision:

ffmpeg / libpostproc / postprocess.c @ 59006372

History | View | Annotate | Download (44.1 KB)

1 3057fa66 Arpi
/*
2 b78e7197 Diego Biurrun
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
 *
4
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
 *
6 ef85972b Diego Biurrun
 * This file is part of FFmpeg.
7 b78e7197 Diego Biurrun
 *
8
 * FFmpeg is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22 3057fa66 Arpi
23 b304569a Michael Niedermayer
/**
24
 * @file postprocess.c
25
 * postprocessing.
26
 */
27 115329f1 Diego Biurrun
28 3057fa66 Arpi
/*
29 bb270c08 Diego Biurrun
                        C       MMX     MMX2    3DNow   AltiVec
30
isVertDC                Ec      Ec                      Ec
31
isVertMinMaxOk          Ec      Ec                      Ec
32
doVertLowPass           E               e       e       Ec
33
doVertDefFilter         Ec      Ec      e       e       Ec
34
isHorizDC               Ec      Ec                      Ec
35
isHorizMinMaxOk         a       E                       Ec
36
doHorizLowPass          E               e       e       Ec
37
doHorizDefFilter        Ec      Ec      e       e       Ec
38
do_a_deblock            Ec      E       Ec      E
39
deRing                  E               e       e*      Ecp
40
Vertical RKAlgo1        E               a       a
41
Horizontal RKAlgo1                      a       a
42
Vertical X1#            a               E       E
43
Horizontal X1#          a               E       E
44
LinIpolDeinterlace      e               E       E*
45
CubicIpolDeinterlace    a               e       e*
46
LinBlendDeinterlace     e               E       E*
47
MedianDeinterlace#      E       Ec      Ec
48
TempDeNoiser#           E               e       e       Ec
49 d5a1a995 Michael Niedermayer

50 117e45b0 Michael Niedermayer
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
51
# more or less selfinvented filters so the exactness isnt too meaningfull
52 3057fa66 Arpi
E = Exact implementation
53 acced553 Michael Niedermayer
e = allmost exact implementation (slightly different rounding,...)
54 3057fa66 Arpi
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56 b0ac780a Michael Niedermayer
p = partially optimized, still some work to do
57 3057fa66 Arpi
*/
58
59
/*
60
TODO:
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64 13e00528 Arpi
write a faster and higher quality deblocking filter :)
65 d5a1a995 Michael Niedermayer
make the mainloop more flexible (variable number of blocks at once
66 bb270c08 Diego Biurrun
        (the if/else stuff per block is slowing things down)
67 9f45d04d Michael Niedermayer
compare the quality & speed of all filters
68
split this huge file
69 8405b3fd Michael Niedermayer
optimize c versions
70 117e45b0 Michael Niedermayer
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71 3057fa66 Arpi
...
72 13e00528 Arpi
*/
73
74 36b1b0bc Diego Biurrun
//Changelog: use the Subversion log
75 3057fa66 Arpi
76 9858f773 Michael Niedermayer
#include "config.h"
77 6ab6c7c3 Luca Barbato
#include "avutil.h"
78 3057fa66 Arpi
#include <inttypes.h>
79
#include <stdio.h>
80 d5a1a995 Michael Niedermayer
#include <stdlib.h>
81 911879d1 Michael Niedermayer
#include <string.h>
82 dda87e9f Pierre Lombard
#ifdef HAVE_MALLOC_H
83
#include <malloc.h>
84
#endif
85 3057fa66 Arpi
//#undef HAVE_MMX2
86 13e00528 Arpi
//#define HAVE_3DNOW
87 3057fa66 Arpi
//#undef HAVE_MMX
88 cc9b0679 Michael Niedermayer
//#undef ARCH_X86
89 7f16f6e6 Michael Niedermayer
//#define DEBUG_BRIGHTNESS
90 bba9b16c Michael Niedermayer
#ifdef USE_FASTMEMCPY
91 f4bd289a Diego Biurrun
#include "libvo/fastmemcpy.h"
92 70d4f2da Michael Niedermayer
#endif
93 13e00528 Arpi
#include "postprocess.h"
94 c41d972d Michael Niedermayer
#include "postprocess_internal.h"
95 bba9b16c Michael Niedermayer
96
#include "mangle.h" //FIXME should be supressed
97 3057fa66 Arpi
98 a7b2871c Romain Dolbeau
#ifdef HAVE_ALTIVEC_H
99
#include <altivec.h>
100
#endif
101
102 911879d1 Michael Niedermayer
#define GET_MODE_BUFFER_SIZE 500
103
#define OPTIONS_ARRAY_SIZE 10
104 9c9e467d Michael Niedermayer
#define BLOCK_SIZE 8
105
#define TEMP_STRIDE 8
106
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
107 911879d1 Michael Niedermayer
108 3cd52279 Diego Biurrun
#if defined(ARCH_X86)
109 bb270c08 Diego Biurrun
static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
110
static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
111
static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
112
static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
113
static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
114
static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
115
static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
116
static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
117 b28daef8 Michael Niedermayer
#endif
118 3057fa66 Arpi
119 134eb1e5 Michael Niedermayer
static uint8_t clip_table[3*256];
120
static uint8_t * const clip_tab= clip_table + 256;
121
122 3f1d4e96 Dmitry Baryshkov
static const int attribute_used deringThreshold= 20;
123 3057fa66 Arpi
124 9c9e467d Michael Niedermayer
125 911879d1 Michael Niedermayer
static struct PPFilter filters[]=
126
{
127 bb270c08 Diego Biurrun
        {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
128
        {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
129
/*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
130
        {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
131
        {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
132
        {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
133
        {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
134
        {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
135
        {"dr", "dering",                1, 5, 6, DERING},
136
        {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
137
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
138
        {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
139
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
140
        {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
141
        {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
142
        {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
143
        {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
144
        {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
145
        {NULL, NULL,0,0,0,0} //End Marker
146 911879d1 Michael Niedermayer
};
147
148 7b49ce2e Stefan Huehner
static const char *replaceTable[]=
149 911879d1 Michael Niedermayer
{
150 bb270c08 Diego Biurrun
        "default",      "hdeblock:a,vdeblock:a,dering:a",
151
        "de",           "hdeblock:a,vdeblock:a,dering:a",
152
        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a",
153
        "fa",           "x1hdeblock:a,x1vdeblock:a,dering:a",
154
        "ac",           "ha:a:128:7,va:a,dering:a",
155
        NULL //End Marker
156 911879d1 Michael Niedermayer
};
157
158 3057fa66 Arpi
159 3cd52279 Diego Biurrun
#if defined(ARCH_X86)
160 3057fa66 Arpi
static inline void prefetchnta(void *p)
161
{
162 bb270c08 Diego Biurrun
        asm volatile(   "prefetchnta (%0)\n\t"
163
                : : "r" (p)
164
        );
165 3057fa66 Arpi
}
166
167
static inline void prefetcht0(void *p)
168
{
169 bb270c08 Diego Biurrun
        asm volatile(   "prefetcht0 (%0)\n\t"
170
                : : "r" (p)
171
        );
172 3057fa66 Arpi
}
173
174
static inline void prefetcht1(void *p)
175
{
176 bb270c08 Diego Biurrun
        asm volatile(   "prefetcht1 (%0)\n\t"
177
                : : "r" (p)
178
        );
179 3057fa66 Arpi
}
180
181
static inline void prefetcht2(void *p)
182
{
183 bb270c08 Diego Biurrun
        asm volatile(   "prefetcht2 (%0)\n\t"
184
                : : "r" (p)
185
        );
186 3057fa66 Arpi
}
187 9a722af7 Arpi
#endif
188 3057fa66 Arpi
189 cc9b0679 Michael Niedermayer
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
190 3057fa66 Arpi
191 cf5ec61d Michael Niedermayer
/**
192
 * Check if the given 8x8 Block is mostly "flat"
193
 */
194 b0ac780a Michael Niedermayer
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
195 cf5ec61d Michael Niedermayer
{
196 bb270c08 Diego Biurrun
        int numEq= 0;
197
        int y;
198
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
199
        const int dcThreshold= dcOffset*2 + 1;
200
201
        for(y=0; y<BLOCK_SIZE; y++)
202
        {
203
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
204
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
205
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
206
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
207
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
208
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
209
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
210
                src+= stride;
211
        }
212
        return numEq > c->ppMode.flatnessThreshold;
213 9c9e467d Michael Niedermayer
}
214
215
/**
216
 * Check if the middle 8x8 Block in the given 8x16 block is flat
217
 */
218
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
219 bb270c08 Diego Biurrun
        int numEq= 0;
220
        int y;
221
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
222
        const int dcThreshold= dcOffset*2 + 1;
223
224
        src+= stride*4; // src points to begin of the 8x8 Block
225
        for(y=0; y<BLOCK_SIZE-1; y++)
226
        {
227
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
228
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
229
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
230
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
231
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
232
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
233
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
234
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
235
                src+= stride;
236
        }
237
        return numEq > c->ppMode.flatnessThreshold;
238 cf5ec61d Michael Niedermayer
}
239
240 b0ac780a Michael Niedermayer
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
241 cf5ec61d Michael Niedermayer
{
242 bb270c08 Diego Biurrun
        int i;
243 cb482d25 Michael Niedermayer
#if 1
244 bb270c08 Diego Biurrun
        for(i=0; i<2; i++){
245
                if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
246
                src += stride;
247
                if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
248
                src += stride;
249
                if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
250
                src += stride;
251
                if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
252
                src += stride;
253
        }
254 115329f1 Diego Biurrun
#else
255 bb270c08 Diego Biurrun
        for(i=0; i<8; i++){
256
                if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
257
                src += stride;
258
        }
259 cb482d25 Michael Niedermayer
#endif
260 bb270c08 Diego Biurrun
        return 1;
261 cb482d25 Michael Niedermayer
}
262 cf5ec61d Michael Niedermayer
263 cb482d25 Michael Niedermayer
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
264
{
265
#if 1
266
#if 1
267 bb270c08 Diego Biurrun
        int x;
268
        src+= stride*4;
269
        for(x=0; x<BLOCK_SIZE; x+=4)
270
        {
271
                if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
272
                if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
273
                if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
274
                if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
275
        }
276 cb482d25 Michael Niedermayer
#else
277 bb270c08 Diego Biurrun
        int x;
278
        src+= stride*3;
279
        for(x=0; x<BLOCK_SIZE; x++)
280
        {
281
                if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
282
        }
283 cb482d25 Michael Niedermayer
#endif
284 bb270c08 Diego Biurrun
        return 1;
285 cb482d25 Michael Niedermayer
#else
286 bb270c08 Diego Biurrun
        int x;
287
        src+= stride*4;
288
        for(x=0; x<BLOCK_SIZE; x++)
289
        {
290
                int min=255;
291
                int max=0;
292
                int y;
293
                for(y=0; y<8; y++){
294
                        int v= src[x + y*stride];
295
                        if(v>max) max=v;
296
                        if(v<min) min=v;
297
                }
298
                if(max-min > 2*QP) return 0;
299
        }
300
        return 1;
301 cb482d25 Michael Niedermayer
#endif
302
}
303
304 b0ac780a Michael Niedermayer
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
305 bb270c08 Diego Biurrun
        if( isHorizDC_C(src, stride, c) ){
306
                if( isHorizMinMaxOk_C(src, stride, c->QP) )
307
                        return 1;
308
                else
309
                        return 0;
310
        }else{
311
                return 2;
312
        }
313 b0ac780a Michael Niedermayer
}
314
315 cb482d25 Michael Niedermayer
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
316 bb270c08 Diego Biurrun
        if( isVertDC_C(src, stride, c) ){
317
                if( isVertMinMaxOk_C(src, stride, c->QP) )
318
                        return 1;
319
                else
320
                        return 0;
321
        }else{
322
                return 2;
323
        }
324 cf5ec61d Michael Niedermayer
}
325
326 b0ac780a Michael Niedermayer
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
327 cf5ec61d Michael Niedermayer
{
328 bb270c08 Diego Biurrun
        int y;
329
        for(y=0; y<BLOCK_SIZE; y++)
330
        {
331
                const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
332
333 c26abfa5 Diego Biurrun
                if(FFABS(middleEnergy) < 8*c->QP)
334 bb270c08 Diego Biurrun
                {
335
                        const int q=(dst[3] - dst[4])/2;
336
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
337
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
338
339 c26abfa5 Diego Biurrun
                        int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
340 8925915f Diego Biurrun
                        d= FFMAX(d, 0);
341 bb270c08 Diego Biurrun
342
                        d= (5*d + 32) >> 6;
343 02305ff3 Diego Biurrun
                        d*= FFSIGN(-middleEnergy);
344 bb270c08 Diego Biurrun
345
                        if(q>0)
346
                        {
347
                                d= d<0 ? 0 : d;
348
                                d= d>q ? q : d;
349
                        }
350
                        else
351
                        {
352
                                d= d>0 ? 0 : d;
353
                                d= d<q ? q : d;
354
                        }
355
356
                        dst[3]-= d;
357
                        dst[4]+= d;
358
                }
359
                dst+= stride;
360
        }
361 cf5ec61d Michael Niedermayer
}
362
363
/**
364
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
365
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
366
 */
367 b0ac780a Michael Niedermayer
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
368 cf5ec61d Michael Niedermayer
{
369 bb270c08 Diego Biurrun
        int y;
370
        for(y=0; y<BLOCK_SIZE; y++)
371
        {
372 c26abfa5 Diego Biurrun
                const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
373
                const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
374 bb270c08 Diego Biurrun
375
                int sums[10];
376
                sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
377
                sums[1] = sums[0] - first  + dst[3];
378
                sums[2] = sums[1] - first  + dst[4];
379
                sums[3] = sums[2] - first  + dst[5];
380
                sums[4] = sums[3] - first  + dst[6];
381
                sums[5] = sums[4] - dst[0] + dst[7];
382
                sums[6] = sums[5] - dst[1] + last;
383
                sums[7] = sums[6] - dst[2] + last;
384
                sums[8] = sums[7] - dst[3] + last;
385
                sums[9] = sums[8] - dst[4] + last;
386
387
                dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
388
                dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
389
                dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
390
                dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
391
                dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
392
                dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
393
                dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
394
                dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
395
396
                dst+= stride;
397
        }
398 cf5ec61d Michael Niedermayer
}
399
400 4e4dcbc5 Michael Niedermayer
/**
401 cc9b0679 Michael Niedermayer
 * Experimental Filter 1 (Horizontal)
402
 * will not damage linear gradients
403
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
404
 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
405
 * MMX2 version does correct clipping C version doesnt
406
 * not identical with the vertical one
407 4e4dcbc5 Michael Niedermayer
 */
408 cc9b0679 Michael Niedermayer
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
409
{
410 bb270c08 Diego Biurrun
        int y;
411
        static uint64_t *lut= NULL;
412
        if(lut==NULL)
413
        {
414
                int i;
415 6ab6c7c3 Luca Barbato
                lut = av_malloc(256*8);
416 bb270c08 Diego Biurrun
                for(i=0; i<256; i++)
417
                {
418
                        int v= i < 128 ? 2*i : 2*(i-256);
419 117e45b0 Michael Niedermayer
/*
420 cc9b0679 Michael Niedermayer
//Simulate 112242211 9-Tap filter
421 bb270c08 Diego Biurrun
                        uint64_t a= (v/16) & 0xFF;
422
                        uint64_t b= (v/8) & 0xFF;
423
                        uint64_t c= (v/4) & 0xFF;
424
                        uint64_t d= (3*v/8) & 0xFF;
425 117e45b0 Michael Niedermayer
*/
426 cc9b0679 Michael Niedermayer
//Simulate piecewise linear interpolation
427 bb270c08 Diego Biurrun
                        uint64_t a= (v/16) & 0xFF;
428
                        uint64_t b= (v*3/16) & 0xFF;
429
                        uint64_t c= (v*5/16) & 0xFF;
430
                        uint64_t d= (7*v/16) & 0xFF;
431
                        uint64_t A= (0x100 - a)&0xFF;
432
                        uint64_t B= (0x100 - b)&0xFF;
433
                        uint64_t C= (0x100 - c)&0xFF;
434
                        uint64_t D= (0x100 - c)&0xFF;
435
436
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
437
                                (D<<24) | (C<<16) | (B<<8) | (A);
438
                        //lut[i] = (v<<32) | (v<<24);
439
                }
440
        }
441
442
        for(y=0; y<BLOCK_SIZE; y++)
443
        {
444
                int a= src[1] - src[2];
445
                int b= src[3] - src[4];
446
                int c= src[5] - src[6];
447
448 c26abfa5 Diego Biurrun
                int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
449 bb270c08 Diego Biurrun
450
                if(d < QP)
451
                {
452 02305ff3 Diego Biurrun
                        int v = d * FFSIGN(-b);
453 bb270c08 Diego Biurrun
454
                        src[1] +=v/8;
455
                        src[2] +=v/4;
456
                        src[3] +=3*v/8;
457
                        src[4] -=3*v/8;
458
                        src[5] -=v/4;
459
                        src[6] -=v/8;
460
461
                }
462
                src+=stride;
463
        }
464 cc9b0679 Michael Niedermayer
}
465
466 12eebd26 Michael Niedermayer
/**
467
 * accurate deblock filter
468
 */
469 849f1035 Måns Rullgård
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
470 bb270c08 Diego Biurrun
        int y;
471
        const int QP= c->QP;
472
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
473
        const int dcThreshold= dcOffset*2 + 1;
474 12eebd26 Michael Niedermayer
//START_TIMER
475 bb270c08 Diego Biurrun
        src+= step*4; // src points to begin of the 8x8 Block
476
        for(y=0; y<8; y++){
477
                int numEq= 0;
478
479
                if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
480
                if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
481
                if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
482
                if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
483
                if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
484
                if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
485
                if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
486
                if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
487
                if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
488
                if(numEq > c->ppMode.flatnessThreshold){
489
                        int min, max, x;
490
491
                        if(src[0] > src[step]){
492
                            max= src[0];
493
                            min= src[step];
494
                        }else{
495
                            max= src[step];
496
                            min= src[0];
497
                        }
498
                        for(x=2; x<8; x+=2){
499
                                if(src[x*step] > src[(x+1)*step]){
500
                                        if(src[x    *step] > max) max= src[ x   *step];
501
                                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
502
                                }else{
503
                                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
504
                                        if(src[ x   *step] < min) min= src[ x   *step];
505
                                }
506
                        }
507
                        if(max-min < 2*QP){
508 c26abfa5 Diego Biurrun
                                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
509
                                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
510 bb270c08 Diego Biurrun
511
                                int sums[10];
512
                                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
513
                                sums[1] = sums[0] - first       + src[3*step];
514
                                sums[2] = sums[1] - first       + src[4*step];
515
                                sums[3] = sums[2] - first       + src[5*step];
516
                                sums[4] = sums[3] - first       + src[6*step];
517
                                sums[5] = sums[4] - src[0*step] + src[7*step];
518
                                sums[6] = sums[5] - src[1*step] + last;
519
                                sums[7] = sums[6] - src[2*step] + last;
520
                                sums[8] = sums[7] - src[3*step] + last;
521
                                sums[9] = sums[8] - src[4*step] + last;
522
523
                                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
524
                                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
525
                                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
526
                                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
527
                                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
528
                                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
529
                                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
530
                                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
531
                        }
532
                }else{
533
                        const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
534
535 c26abfa5 Diego Biurrun
                        if(FFABS(middleEnergy) < 8*QP)
536 bb270c08 Diego Biurrun
                        {
537
                                const int q=(src[3*step] - src[4*step])/2;
538
                                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
539
                                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
540
541 c26abfa5 Diego Biurrun
                                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
542 8925915f Diego Biurrun
                                d= FFMAX(d, 0);
543 bb270c08 Diego Biurrun
544
                                d= (5*d + 32) >> 6;
545 02305ff3 Diego Biurrun
                                d*= FFSIGN(-middleEnergy);
546 bb270c08 Diego Biurrun
547
                                if(q>0)
548
                                {
549
                                        d= d<0 ? 0 : d;
550
                                        d= d>q ? q : d;
551
                                }
552
                                else
553
                                {
554
                                        d= d>0 ? 0 : d;
555
                                        d= d<q ? q : d;
556
                                }
557
558
                                src[3*step]-= d;
559
                                src[4*step]+= d;
560
                        }
561
                }
562
563
                src += stride;
564
        }
565 12eebd26 Michael Niedermayer
/*if(step==16){
566
    STOP_TIMER("step16")
567
}else{
568
    STOP_TIMER("stepX")
569
}*/
570
}
571 cc9b0679 Michael Niedermayer
572 e89952aa Michael Niedermayer
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
573 cc9b0679 Michael Niedermayer
//Plain C versions
574 e89952aa Michael Niedermayer
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
575
#define COMPILE_C
576
#endif
577
578 b0ac780a Michael Niedermayer
#ifdef ARCH_POWERPC
579
#ifdef HAVE_ALTIVEC
580
#define COMPILE_ALTIVEC
581
#endif //HAVE_ALTIVEC
582
#endif //ARCH_POWERPC
583
584 3cd52279 Diego Biurrun
#if defined(ARCH_X86)
585 e89952aa Michael Niedermayer
586
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
587
#define COMPILE_MMX
588
#endif
589
590
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
591
#define COMPILE_MMX2
592
#endif
593
594
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
595
#define COMPILE_3DNOW
596
#endif
597 3cd52279 Diego Biurrun
#endif /* defined(ARCH_X86) */
598 e89952aa Michael Niedermayer
599
#undef HAVE_MMX
600
#undef HAVE_MMX2
601
#undef HAVE_3DNOW
602 b0ac780a Michael Niedermayer
#undef HAVE_ALTIVEC
603 e89952aa Michael Niedermayer
604
#ifdef COMPILE_C
605 cc9b0679 Michael Niedermayer
#undef HAVE_MMX
606
#undef HAVE_MMX2
607
#undef HAVE_3DNOW
608
#define RENAME(a) a ## _C
609
#include "postprocess_template.c"
610 e89952aa Michael Niedermayer
#endif
611 cc9b0679 Michael Niedermayer
612 b0ac780a Michael Niedermayer
#ifdef ARCH_POWERPC
613
#ifdef COMPILE_ALTIVEC
614
#undef RENAME
615
#define HAVE_ALTIVEC
616
#define RENAME(a) a ## _altivec
617
#include "postprocess_altivec_template.c"
618
#include "postprocess_template.c"
619
#endif
620
#endif //ARCH_POWERPC
621
622 cc9b0679 Michael Niedermayer
//MMX versions
623 e89952aa Michael Niedermayer
#ifdef COMPILE_MMX
624 cc9b0679 Michael Niedermayer
#undef RENAME
625
#define HAVE_MMX
626
#undef HAVE_MMX2
627
#undef HAVE_3DNOW
628
#define RENAME(a) a ## _MMX
629
#include "postprocess_template.c"
630 e89952aa Michael Niedermayer
#endif
631 cc9b0679 Michael Niedermayer
632
//MMX2 versions
633 e89952aa Michael Niedermayer
#ifdef COMPILE_MMX2
634 cc9b0679 Michael Niedermayer
#undef RENAME
635
#define HAVE_MMX
636
#define HAVE_MMX2
637
#undef HAVE_3DNOW
638
#define RENAME(a) a ## _MMX2
639
#include "postprocess_template.c"
640 e89952aa Michael Niedermayer
#endif
641 cc9b0679 Michael Niedermayer
642
//3DNOW versions
643 e89952aa Michael Niedermayer
#ifdef COMPILE_3DNOW
644 cc9b0679 Michael Niedermayer
#undef RENAME
645
#define HAVE_MMX
646
#undef HAVE_MMX2
647
#define HAVE_3DNOW
648
#define RENAME(a) a ## _3DNow
649
#include "postprocess_template.c"
650 e89952aa Michael Niedermayer
#endif
651 cc9b0679 Michael Niedermayer
652
// minor note: the HAVE_xyz is messed up after that line so dont use it
653
654
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
655 bb270c08 Diego Biurrun
        QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
656 cc9b0679 Michael Niedermayer
{
657 bb270c08 Diego Biurrun
        PPContext *c= (PPContext *)vc;
658
        PPMode *ppMode= (PPMode *)vm;
659
        c->ppMode= *ppMode; //FIXME
660 9c9e467d Michael Niedermayer
661 bb270c08 Diego Biurrun
        // useing ifs here as they are faster than function pointers allthough the
662
        // difference wouldnt be messureable here but its much better because
663
        // someone might exchange the cpu whithout restarting mplayer ;)
664 e89952aa Michael Niedermayer
#ifdef RUNTIME_CPUDETECT
665 3cd52279 Diego Biurrun
#if defined(ARCH_X86)
666 bb270c08 Diego Biurrun
        // ordered per speed fasterst first
667
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
668
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
669
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
670
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
671
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
672
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
673
        else
674
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
675 cc9b0679 Michael Niedermayer
#else
676 b0ac780a Michael Niedermayer
#ifdef ARCH_POWERPC
677
#ifdef HAVE_ALTIVEC
678 71487254 Michael Niedermayer
        if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
679 bb270c08 Diego Biurrun
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
680 b0ac780a Michael Niedermayer
        else
681
#endif
682
#endif
683 bb270c08 Diego Biurrun
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
684 be44a4d7 Michael Niedermayer
#endif
685 e89952aa Michael Niedermayer
#else //RUNTIME_CPUDETECT
686
#ifdef HAVE_MMX2
687 bb270c08 Diego Biurrun
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
688 e89952aa Michael Niedermayer
#elif defined (HAVE_3DNOW)
689 bb270c08 Diego Biurrun
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
690 e89952aa Michael Niedermayer
#elif defined (HAVE_MMX)
691 bb270c08 Diego Biurrun
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
692 b0ac780a Michael Niedermayer
#elif defined (HAVE_ALTIVEC)
693 bb270c08 Diego Biurrun
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
694 e89952aa Michael Niedermayer
#else
695 bb270c08 Diego Biurrun
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
696 e89952aa Michael Niedermayer
#endif
697
#endif //!RUNTIME_CPUDETECT
698 117e45b0 Michael Niedermayer
}
699
700 cc9b0679 Michael Niedermayer
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
701 bb270c08 Diego Biurrun
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
702 13e00528 Arpi
703 911879d1 Michael Niedermayer
/* -pp Command line Help
704
*/
705 4407a3c4 Michael Niedermayer
char *pp_help=
706 bf69c4e5 Diego Biurrun
"Available postprocessing filters:\n"
707 bb270c08 Diego Biurrun
"Filters                        Options\n"
708
"short  long name       short   long option     Description\n"
709
"*      *               a       autoq           CPU power dependent enabler\n"
710
"                       c       chrom           chrominance filtering enabled\n"
711
"                       y       nochrom         chrominance filtering disabled\n"
712
"                       n       noluma          luma filtering disabled\n"
713
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
714
"       1. difference factor: default=32, higher -> more deblocking\n"
715
"       2. flatness threshold: default=39, lower -> more deblocking\n"
716
"                       the h & v deblocking filters share these\n"
717
"                       so you can't set different thresholds for h / v\n"
718
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
719
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
720
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
721
"h1     x1hdeblock                              experimental h deblock filter 1\n"
722
"v1     x1vdeblock                              experimental v deblock filter 1\n"
723
"dr     dering                                  deringing filter\n"
724
"al     autolevels                              automatic brightness / contrast\n"
725
"                       f        fullyrange     stretch luminance to (0..255)\n"
726
"lb     linblenddeint                           linear blend deinterlacer\n"
727
"li     linipoldeint                            linear interpolating deinterlace\n"
728
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
729
"md     mediandeint                             median deinterlacer\n"
730
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
731
"l5     lowpass5                                FIR lowpass deinterlacer\n"
732
"de     default                                 hb:a,vb:a,dr:a\n"
733
"fa     fast                                    h1:a,v1:a,dr:a\n"
734
"ac                                             ha:a:128:7,va:a,dr:a\n"
735
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
736
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
737
"fq     forceQuant      <quantizer>             force quantizer\n"
738 bf69c4e5 Diego Biurrun
"Usage:\n"
739
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
740
"long form example:\n"
741 bb270c08 Diego Biurrun
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
742 bf69c4e5 Diego Biurrun
"short form example:\n"
743 bb270c08 Diego Biurrun
"vb:a/hb:a/lb                                   de,-vb\n"
744 bf69c4e5 Diego Biurrun
"more examples:\n"
745
"tn:64:128:256\n"
746 14b005d0 Diego Biurrun
"\n"
747 4b001a13 Michael Niedermayer
;
748 911879d1 Michael Niedermayer
749 c41d972d Michael Niedermayer
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
750 911879d1 Michael Niedermayer
{
751 bb270c08 Diego Biurrun
        char temp[GET_MODE_BUFFER_SIZE];
752
        char *p= temp;
753 7b49ce2e Stefan Huehner
        const char *filterDelimiters= ",/";
754
        const char *optionDelimiters= ":";
755 bb270c08 Diego Biurrun
        struct PPMode *ppMode;
756
        char *filterToken;
757
758 6ab6c7c3 Luca Barbato
        ppMode= av_malloc(sizeof(PPMode));
759 bb270c08 Diego Biurrun
760
        ppMode->lumMode= 0;
761
        ppMode->chromMode= 0;
762
        ppMode->maxTmpNoise[0]= 700;
763
        ppMode->maxTmpNoise[1]= 1500;
764
        ppMode->maxTmpNoise[2]= 3000;
765
        ppMode->maxAllowedY= 234;
766
        ppMode->minAllowedY= 16;
767
        ppMode->baseDcDiff= 256/8;
768
        ppMode->flatnessThreshold= 56-16-1;
769
        ppMode->maxClippedThreshold= 0.01;
770
        ppMode->error=0;
771
772
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
773
774 e7becfb2 Diego Biurrun
        av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
775 bb270c08 Diego Biurrun
776
        for(;;){
777
                char *filterName;
778
                int q= 1000000; //PP_QUALITY_MAX;
779
                int chrom=-1;
780
                int luma=-1;
781
                char *option;
782
                char *options[OPTIONS_ARRAY_SIZE];
783
                int i;
784
                int filterNameOk=0;
785
                int numOfUnknownOptions=0;
786
                int enable=1; //does the user want us to enabled or disabled the filter
787
788
                filterToken= strtok(p, filterDelimiters);
789
                if(filterToken == NULL) break;
790
                p+= strlen(filterToken) + 1; // p points to next filterToken
791
                filterName= strtok(filterToken, optionDelimiters);
792 e7becfb2 Diego Biurrun
                av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
793 bb270c08 Diego Biurrun
794
                if(*filterName == '-')
795
                {
796
                        enable=0;
797
                        filterName++;
798
                }
799
800
                for(;;){ //for all options
801
                        option= strtok(NULL, optionDelimiters);
802
                        if(option == NULL) break;
803
804 e7becfb2 Diego Biurrun
                        av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
805 bb270c08 Diego Biurrun
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
806
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
807
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
808
                        else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
809
                        else
810
                        {
811
                                options[numOfUnknownOptions] = option;
812
                                numOfUnknownOptions++;
813
                        }
814
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
815
                }
816
                options[numOfUnknownOptions] = NULL;
817
818
                /* replace stuff from the replace Table */
819
                for(i=0; replaceTable[2*i]!=NULL; i++)
820
                {
821
                        if(!strcmp(replaceTable[2*i], filterName))
822
                        {
823
                                int newlen= strlen(replaceTable[2*i + 1]);
824
                                int plen;
825
                                int spaceLeft;
826
827
                                if(p==NULL) p= temp, *p=0;      //last filter
828
                                else p--, *p=',';               //not last filter
829
830
                                plen= strlen(p);
831
                                spaceLeft= p - temp + plen;
832
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
833
                                {
834
                                        ppMode->error++;
835
                                        break;
836
                                }
837
                                memmove(p + newlen, p, plen+1);
838
                                memcpy(p, replaceTable[2*i + 1], newlen);
839
                                filterNameOk=1;
840
                        }
841
                }
842
843
                for(i=0; filters[i].shortName!=NULL; i++)
844
                {
845
                        if(   !strcmp(filters[i].longName, filterName)
846
                           || !strcmp(filters[i].shortName, filterName))
847
                        {
848
                                ppMode->lumMode &= ~filters[i].mask;
849
                                ppMode->chromMode &= ~filters[i].mask;
850
851
                                filterNameOk=1;
852
                                if(!enable) break; // user wants to disable it
853
854
                                if(q >= filters[i].minLumQuality && luma)
855
                                        ppMode->lumMode|= filters[i].mask;
856
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
857
                                        if(q >= filters[i].minChromQuality)
858
                                                ppMode->chromMode|= filters[i].mask;
859
860
                                if(filters[i].mask == LEVEL_FIX)
861
                                {
862
                                        int o;
863
                                        ppMode->minAllowedY= 16;
864
                                        ppMode->maxAllowedY= 234;
865
                                        for(o=0; options[o]!=NULL; o++)
866
                                        {
867
                                                if(  !strcmp(options[o],"fullyrange")
868
                                                   ||!strcmp(options[o],"f"))
869
                                                {
870
                                                        ppMode->minAllowedY= 0;
871
                                                        ppMode->maxAllowedY= 255;
872
                                                        numOfUnknownOptions--;
873
                                                }
874
                                        }
875
                                }
876
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
877
                                {
878
                                        int o;
879
                                        int numOfNoises=0;
880
881
                                        for(o=0; options[o]!=NULL; o++)
882
                                        {
883
                                                char *tail;
884
                                                ppMode->maxTmpNoise[numOfNoises]=
885
                                                        strtol(options[o], &tail, 0);
886
                                                if(tail!=options[o])
887
                                                {
888
                                                        numOfNoises++;
889
                                                        numOfUnknownOptions--;
890
                                                        if(numOfNoises >= 3) break;
891
                                                }
892
                                        }
893
                                }
894
                                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
895
                                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
896
                                {
897
                                        int o;
898
899
                                        for(o=0; options[o]!=NULL && o<2; o++)
900
                                        {
901
                                                char *tail;
902
                                                int val= strtol(options[o], &tail, 0);
903
                                                if(tail==options[o]) break;
904
905
                                                numOfUnknownOptions--;
906
                                                if(o==0) ppMode->baseDcDiff= val;
907
                                                else ppMode->flatnessThreshold= val;
908
                                        }
909
                                }
910
                                else if(filters[i].mask == FORCE_QUANT)
911
                                {
912
                                        int o;
913
                                        ppMode->forcedQuant= 15;
914
915
                                        for(o=0; options[o]!=NULL && o<1; o++)
916
                                        {
917
                                                char *tail;
918
                                                int val= strtol(options[o], &tail, 0);
919
                                                if(tail==options[o]) break;
920
921
                                                numOfUnknownOptions--;
922
                                                ppMode->forcedQuant= val;
923
                                        }
924
                                }
925
                        }
926
                }
927
                if(!filterNameOk) ppMode->error++;
928
                ppMode->error += numOfUnknownOptions;
929
        }
930
931 e7becfb2 Diego Biurrun
        av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
932 bb270c08 Diego Biurrun
        if(ppMode->error)
933
        {
934 e7becfb2 Diego Biurrun
                av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
935 6ab6c7c3 Luca Barbato
                av_free(ppMode);
936 bb270c08 Diego Biurrun
                return NULL;
937
        }
938
        return ppMode;
939 911879d1 Michael Niedermayer
}
940
941 c41d972d Michael Niedermayer
void pp_free_mode(pp_mode_t *mode){
942 6ab6c7c3 Luca Barbato
    av_free(mode);
943 c41d972d Michael Niedermayer
}
944
945 88c0bc7e Michael Niedermayer
static void reallocAlign(void **p, int alignment, int size){
946 4851f2ad Michael Niedermayer
        av_free(*p);
947 6ab6c7c3 Luca Barbato
        *p= av_mallocz(size);
948 88c0bc7e Michael Niedermayer
}
949
950 0426af31 Michael Niedermayer
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
951 bb270c08 Diego Biurrun
        int mbWidth = (width+15)>>4;
952
        int mbHeight= (height+15)>>4;
953
        int i;
954
955
        c->stride= stride;
956
        c->qpStride= qpStride;
957
958
        reallocAlign((void **)&c->tempDst, 8, stride*24);
959
        reallocAlign((void **)&c->tempSrc, 8, stride*24);
960
        reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
961
        reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
962
        for(i=0; i<256; i++)
963
                c->yHistogram[i]= width*height/64*15/256;
964
965
        for(i=0; i<3; i++)
966
        {
967
                //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
968
                reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
969
                reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
970
        }
971
972
        reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
973
        reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
974
        reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
975
        reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
976 88c0bc7e Michael Niedermayer
}
977
978 4cfbf61b Falk Hüffner
static void global_init(void){
979 bb270c08 Diego Biurrun
        int i;
980
        memset(clip_table, 0, 256);
981
        for(i=256; i<512; i++)
982
                clip_table[i]= i;
983
        memset(clip_table+512, 0, 256);
984 134eb1e5 Michael Niedermayer
}
985
986 e7becfb2 Diego Biurrun
static const char * context_to_name(void * ptr) {
987
    return "postproc";
988
}
989
990
static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
991
992 88c0bc7e Michael Niedermayer
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
993 6ab6c7c3 Luca Barbato
        PPContext *c= av_malloc(sizeof(PPContext));
994 bb270c08 Diego Biurrun
        int stride= (width+15)&(~15);    //assumed / will realloc if needed
995
        int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
996 115329f1 Diego Biurrun
997 bb270c08 Diego Biurrun
        global_init();
998 134eb1e5 Michael Niedermayer
999 bb270c08 Diego Biurrun
        memset(c, 0, sizeof(PPContext));
1000 e7becfb2 Diego Biurrun
        c->av_class = &av_codec_context_class;
1001 bb270c08 Diego Biurrun
        c->cpuCaps= cpuCaps;
1002
        if(cpuCaps&PP_FORMAT){
1003
                c->hChromaSubSample= cpuCaps&0x3;
1004
                c->vChromaSubSample= (cpuCaps>>4)&0x3;
1005
        }else{
1006
                c->hChromaSubSample= 1;
1007
                c->vChromaSubSample= 1;
1008
        }
1009 88c0bc7e Michael Niedermayer
1010 bb270c08 Diego Biurrun
        reallocBuffers(c, width, height, stride, qpStride);
1011 115329f1 Diego Biurrun
1012 bb270c08 Diego Biurrun
        c->frameNum=-1;
1013 45b4f285 Michael Niedermayer
1014 bb270c08 Diego Biurrun
        return c;
1015 45b4f285 Michael Niedermayer
}
1016
1017 9cb54f43 Michael Niedermayer
void pp_free_context(void *vc){
1018 bb270c08 Diego Biurrun
        PPContext *c = (PPContext*)vc;
1019
        int i;
1020 115329f1 Diego Biurrun
1021 6ab6c7c3 Luca Barbato
        for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1022
        for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
1023 115329f1 Diego Biurrun
1024 6ab6c7c3 Luca Barbato
        av_free(c->tempBlocks);
1025
        av_free(c->yHistogram);
1026
        av_free(c->tempDst);
1027
        av_free(c->tempSrc);
1028
        av_free(c->deintTemp);
1029
        av_free(c->stdQPTable);
1030
        av_free(c->nonBQPTable);
1031
        av_free(c->forcedQPTable);
1032 115329f1 Diego Biurrun
1033 bb270c08 Diego Biurrun
        memset(c, 0, sizeof(PPContext));
1034 88c0bc7e Michael Niedermayer
1035 6ab6c7c3 Luca Barbato
        av_free(c);
1036 9c9e467d Michael Niedermayer
}
1037
1038 9cb54f43 Michael Niedermayer
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
1039 9c9e467d Michael Niedermayer
                 uint8_t * dst[3], int dstStride[3],
1040 ec487e5d Michael Niedermayer
                 int width, int height,
1041 9c9e467d Michael Niedermayer
                 QP_STORE_T *QP_store,  int QPStride,
1042 bb270c08 Diego Biurrun
                 pp_mode_t *vm,  void *vc, int pict_type)
1043 911879d1 Michael Niedermayer
{
1044 bb270c08 Diego Biurrun
        int mbWidth = (width+15)>>4;
1045
        int mbHeight= (height+15)>>4;
1046
        PPMode *mode = (PPMode*)vm;
1047
        PPContext *c = (PPContext*)vc;
1048 c26abfa5 Diego Biurrun
        int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1049
        int absQPStride = FFABS(QPStride);
1050 bb270c08 Diego Biurrun
1051
        // c->stride and c->QPStride are always positive
1052
        if(c->stride < minStride || c->qpStride < absQPStride)
1053
                reallocBuffers(c, width, height,
1054 8925915f Diego Biurrun
                                FFMAX(minStride, c->stride),
1055
                                FFMAX(c->qpStride, absQPStride));
1056 bb270c08 Diego Biurrun
1057
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1058
        {
1059
                int i;
1060
                QP_store= c->forcedQPTable;
1061
                absQPStride = QPStride = 0;
1062
                if(mode->lumMode & FORCE_QUANT)
1063
                        for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1064
                else
1065
                        for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1066
        }
1067 0426af31 Michael Niedermayer
1068 bb270c08 Diego Biurrun
        if(pict_type & PP_PICT_TYPE_QP2){
1069
                int i;
1070
                const int count= mbHeight * absQPStride;
1071
                for(i=0; i<(count>>2); i++){
1072
                        ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1073
                }
1074
                for(i<<=2; i<count; i++){
1075
                        c->stdQPTable[i] = QP_store[i]>>1;
1076
                }
1077 0426af31 Michael Niedermayer
                QP_store= c->stdQPTable;
1078 bb270c08 Diego Biurrun
                QPStride= absQPStride;
1079
        }
1080 0426af31 Michael Niedermayer
1081 ec487e5d Michael Niedermayer
if(0){
1082
int x,y;
1083
for(y=0; y<mbHeight; y++){
1084 bb270c08 Diego Biurrun
        for(x=0; x<mbWidth; x++){
1085 e7becfb2 Diego Biurrun
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1086 bb270c08 Diego Biurrun
        }
1087 e7becfb2 Diego Biurrun
        av_log(c, AV_LOG_INFO, "\n");
1088 ec487e5d Michael Niedermayer
}
1089 e7becfb2 Diego Biurrun
        av_log(c, AV_LOG_INFO, "\n");
1090 ec487e5d Michael Niedermayer
}
1091 51e19dcc Michael Niedermayer
1092 bb270c08 Diego Biurrun
        if((pict_type&7)!=3)
1093
        {
1094
                if (QPStride >= 0) {
1095
                        int i;
1096
                        const int count= mbHeight * QPStride;
1097
                        for(i=0; i<(count>>2); i++){
1098
                                ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1099
                        }
1100
                        for(i<<=2; i<count; i++){
1101
                                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1102
                        }
1103
                } else {
1104
                        int i,j;
1105
                        for(i=0; i<mbHeight; i++) {
1106
                                    for(j=0; j<absQPStride; j++) {
1107
                                        c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1108
                                }
1109
                        }
1110
                }
1111
        }
1112
1113 e7becfb2 Diego Biurrun
        av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1114
               mode->lumMode, mode->chromMode);
1115 bb270c08 Diego Biurrun
1116
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1117
                width, height, QP_store, QPStride, 0, mode, c);
1118
1119
        width  = (width )>>c->hChromaSubSample;
1120
        height = (height)>>c->vChromaSubSample;
1121
1122
        if(mode->chromMode)
1123
        {
1124
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1125
                        width, height, QP_store, QPStride, 1, mode, c);
1126
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1127
                        width, height, QP_store, QPStride, 2, mode, c);
1128
        }
1129
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1130
        {
1131
                linecpy(dst[1], src[1], height, srcStride[1]);
1132
                linecpy(dst[2], src[2], height, srcStride[2]);
1133
        }
1134
        else
1135
        {
1136
                int y;
1137
                for(y=0; y<height; y++)
1138
                {
1139
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1140
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1141
                }
1142
        }
1143 911879d1 Michael Niedermayer
}