Statistics
| Branch: | Revision:

ffmpeg / libpostproc / postprocess.c @ a8988916

History | View | Annotate | Download (44 KB)

1 3057fa66 Arpi
/*
2 b78e7197 Diego Biurrun
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
 *
4
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
 *
6 ef85972b Diego Biurrun
 * This file is part of FFmpeg.
7 b78e7197 Diego Biurrun
 *
8
 * FFmpeg is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22 3057fa66 Arpi
23 b304569a Michael Niedermayer
/**
24
 * @file postprocess.c
25
 * postprocessing.
26
 */
27 115329f1 Diego Biurrun
28 3057fa66 Arpi
/*
29 bb270c08 Diego Biurrun
                        C       MMX     MMX2    3DNow   AltiVec
30
isVertDC                Ec      Ec                      Ec
31
isVertMinMaxOk          Ec      Ec                      Ec
32
doVertLowPass           E               e       e       Ec
33
doVertDefFilter         Ec      Ec      e       e       Ec
34
isHorizDC               Ec      Ec                      Ec
35
isHorizMinMaxOk         a       E                       Ec
36
doHorizLowPass          E               e       e       Ec
37
doHorizDefFilter        Ec      Ec      e       e       Ec
38
do_a_deblock            Ec      E       Ec      E
39
deRing                  E               e       e*      Ecp
40
Vertical RKAlgo1        E               a       a
41
Horizontal RKAlgo1                      a       a
42
Vertical X1#            a               E       E
43
Horizontal X1#          a               E       E
44
LinIpolDeinterlace      e               E       E*
45
CubicIpolDeinterlace    a               e       e*
46
LinBlendDeinterlace     e               E       E*
47
MedianDeinterlace#      E       Ec      Ec
48
TempDeNoiser#           E               e       e       Ec
49 d5a1a995 Michael Niedermayer

50 755bfeab Diego Biurrun
* i do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51
# more or less selfinvented filters so the exactness is not too meaningful
52 3057fa66 Arpi
E = Exact implementation
53 acced553 Michael Niedermayer
e = allmost exact implementation (slightly different rounding,...)
54 3057fa66 Arpi
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56 b0ac780a Michael Niedermayer
p = partially optimized, still some work to do
57 3057fa66 Arpi
*/
58
59
/*
60
TODO:
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64 13e00528 Arpi
write a faster and higher quality deblocking filter :)
65 d5a1a995 Michael Niedermayer
make the mainloop more flexible (variable number of blocks at once
66 bb270c08 Diego Biurrun
        (the if/else stuff per block is slowing things down)
67 9f45d04d Michael Niedermayer
compare the quality & speed of all filters
68
split this huge file
69 8405b3fd Michael Niedermayer
optimize c versions
70 117e45b0 Michael Niedermayer
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71 3057fa66 Arpi
...
72 13e00528 Arpi
*/
73
74 36b1b0bc Diego Biurrun
//Changelog: use the Subversion log
75 3057fa66 Arpi
76 9858f773 Michael Niedermayer
#include "config.h"
77 6ab6c7c3 Luca Barbato
#include "avutil.h"
78 3057fa66 Arpi
#include <inttypes.h>
79
#include <stdio.h>
80 d5a1a995 Michael Niedermayer
#include <stdlib.h>
81 911879d1 Michael Niedermayer
#include <string.h>
82 dda87e9f Pierre Lombard
#ifdef HAVE_MALLOC_H
83
#include <malloc.h>
84
#endif
85 3057fa66 Arpi
//#undef HAVE_MMX2
86 13e00528 Arpi
//#define HAVE_3DNOW
87 3057fa66 Arpi
//#undef HAVE_MMX
88 cc9b0679 Michael Niedermayer
//#undef ARCH_X86
89 7f16f6e6 Michael Niedermayer
//#define DEBUG_BRIGHTNESS
90 13e00528 Arpi
#include "postprocess.h"
91 c41d972d Michael Niedermayer
#include "postprocess_internal.h"
92 bba9b16c Michael Niedermayer
93 a7b2871c Romain Dolbeau
#ifdef HAVE_ALTIVEC_H
94
#include <altivec.h>
95
#endif
96
97 911879d1 Michael Niedermayer
#define GET_MODE_BUFFER_SIZE 500
98
#define OPTIONS_ARRAY_SIZE 10
99 9c9e467d Michael Niedermayer
#define BLOCK_SIZE 8
100
#define TEMP_STRIDE 8
101
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
102 911879d1 Michael Niedermayer
103 3cd52279 Diego Biurrun
#if defined(ARCH_X86)
104 0bda7817 Reimar Döffinger
static DECLARE_ALIGNED(8, uint64_t attribute_used, w05)= 0x0005000500050005LL;
105
static DECLARE_ALIGNED(8, uint64_t attribute_used, w04)= 0x0004000400040004LL;
106
static DECLARE_ALIGNED(8, uint64_t attribute_used, w20)= 0x0020002000200020LL;
107
static DECLARE_ALIGNED(8, uint64_t attribute_used, b00)= 0x0000000000000000LL;
108
static DECLARE_ALIGNED(8, uint64_t attribute_used, b01)= 0x0101010101010101LL;
109
static DECLARE_ALIGNED(8, uint64_t attribute_used, b02)= 0x0202020202020202LL;
110
static DECLARE_ALIGNED(8, uint64_t attribute_used, b08)= 0x0808080808080808LL;
111
static DECLARE_ALIGNED(8, uint64_t attribute_used, b80)= 0x8080808080808080LL;
112 b28daef8 Michael Niedermayer
#endif
113 3057fa66 Arpi
114 134eb1e5 Michael Niedermayer
static uint8_t clip_table[3*256];
115
static uint8_t * const clip_tab= clip_table + 256;
116
117 3f1d4e96 Dmitry Baryshkov
static const int attribute_used deringThreshold= 20;
118 3057fa66 Arpi
119 9c9e467d Michael Niedermayer
120 911879d1 Michael Niedermayer
static struct PPFilter filters[]=
121
{
122 bb270c08 Diego Biurrun
        {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
123
        {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
124
/*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
125
        {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
126
        {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
127
        {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
128
        {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
129
        {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
130
        {"dr", "dering",                1, 5, 6, DERING},
131
        {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
132
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
133
        {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
134
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
135
        {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
136
        {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
137
        {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
138
        {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
139
        {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
140
        {NULL, NULL,0,0,0,0} //End Marker
141 911879d1 Michael Niedermayer
};
142
143 7b49ce2e Stefan Huehner
static const char *replaceTable[]=
144 911879d1 Michael Niedermayer
{
145 bb270c08 Diego Biurrun
        "default",      "hdeblock:a,vdeblock:a,dering:a",
146
        "de",           "hdeblock:a,vdeblock:a,dering:a",
147
        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a",
148
        "fa",           "x1hdeblock:a,x1vdeblock:a,dering:a",
149
        "ac",           "ha:a:128:7,va:a,dering:a",
150
        NULL //End Marker
151 911879d1 Michael Niedermayer
};
152
153 3057fa66 Arpi
154 3cd52279 Diego Biurrun
#if defined(ARCH_X86)
155 3057fa66 Arpi
static inline void prefetchnta(void *p)
156
{
157 bb270c08 Diego Biurrun
        asm volatile(   "prefetchnta (%0)\n\t"
158
                : : "r" (p)
159
        );
160 3057fa66 Arpi
}
161
162
static inline void prefetcht0(void *p)
163
{
164 bb270c08 Diego Biurrun
        asm volatile(   "prefetcht0 (%0)\n\t"
165
                : : "r" (p)
166
        );
167 3057fa66 Arpi
}
168
169
static inline void prefetcht1(void *p)
170
{
171 bb270c08 Diego Biurrun
        asm volatile(   "prefetcht1 (%0)\n\t"
172
                : : "r" (p)
173
        );
174 3057fa66 Arpi
}
175
176
static inline void prefetcht2(void *p)
177
{
178 bb270c08 Diego Biurrun
        asm volatile(   "prefetcht2 (%0)\n\t"
179
                : : "r" (p)
180
        );
181 3057fa66 Arpi
}
182 9a722af7 Arpi
#endif
183 3057fa66 Arpi
184 cc9b0679 Michael Niedermayer
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
185 3057fa66 Arpi
186 cf5ec61d Michael Niedermayer
/**
187
 * Check if the given 8x8 Block is mostly "flat"
188
 */
189 b0ac780a Michael Niedermayer
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
190 cf5ec61d Michael Niedermayer
{
191 bb270c08 Diego Biurrun
        int numEq= 0;
192
        int y;
193
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
194
        const int dcThreshold= dcOffset*2 + 1;
195
196
        for(y=0; y<BLOCK_SIZE; y++)
197
        {
198
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
199
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
200
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
201
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
202
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
203
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
204
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
205
                src+= stride;
206
        }
207
        return numEq > c->ppMode.flatnessThreshold;
208 9c9e467d Michael Niedermayer
}
209
210
/**
211
 * Check if the middle 8x8 Block in the given 8x16 block is flat
212
 */
213
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
214 bb270c08 Diego Biurrun
        int numEq= 0;
215
        int y;
216
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
217
        const int dcThreshold= dcOffset*2 + 1;
218
219
        src+= stride*4; // src points to begin of the 8x8 Block
220
        for(y=0; y<BLOCK_SIZE-1; y++)
221
        {
222
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
223
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
224
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
225
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
226
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
227
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
228
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
229
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
230
                src+= stride;
231
        }
232
        return numEq > c->ppMode.flatnessThreshold;
233 cf5ec61d Michael Niedermayer
}
234
235 b0ac780a Michael Niedermayer
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
236 cf5ec61d Michael Niedermayer
{
237 bb270c08 Diego Biurrun
        int i;
238 cb482d25 Michael Niedermayer
#if 1
239 bb270c08 Diego Biurrun
        for(i=0; i<2; i++){
240
                if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
241
                src += stride;
242
                if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
243
                src += stride;
244
                if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
245
                src += stride;
246
                if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
247
                src += stride;
248
        }
249 115329f1 Diego Biurrun
#else
250 bb270c08 Diego Biurrun
        for(i=0; i<8; i++){
251
                if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
252
                src += stride;
253
        }
254 cb482d25 Michael Niedermayer
#endif
255 bb270c08 Diego Biurrun
        return 1;
256 cb482d25 Michael Niedermayer
}
257 cf5ec61d Michael Niedermayer
258 cb482d25 Michael Niedermayer
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
259
{
260
#if 1
261
#if 1
262 bb270c08 Diego Biurrun
        int x;
263
        src+= stride*4;
264
        for(x=0; x<BLOCK_SIZE; x+=4)
265
        {
266
                if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
267
                if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
268
                if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
269
                if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
270
        }
271 cb482d25 Michael Niedermayer
#else
272 bb270c08 Diego Biurrun
        int x;
273
        src+= stride*3;
274
        for(x=0; x<BLOCK_SIZE; x++)
275
        {
276
                if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
277
        }
278 cb482d25 Michael Niedermayer
#endif
279 bb270c08 Diego Biurrun
        return 1;
280 cb482d25 Michael Niedermayer
#else
281 bb270c08 Diego Biurrun
        int x;
282
        src+= stride*4;
283
        for(x=0; x<BLOCK_SIZE; x++)
284
        {
285
                int min=255;
286
                int max=0;
287
                int y;
288
                for(y=0; y<8; y++){
289
                        int v= src[x + y*stride];
290
                        if(v>max) max=v;
291
                        if(v<min) min=v;
292
                }
293
                if(max-min > 2*QP) return 0;
294
        }
295
        return 1;
296 cb482d25 Michael Niedermayer
#endif
297
}
298
299 b0ac780a Michael Niedermayer
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
300 bb270c08 Diego Biurrun
        if( isHorizDC_C(src, stride, c) ){
301
                if( isHorizMinMaxOk_C(src, stride, c->QP) )
302
                        return 1;
303
                else
304
                        return 0;
305
        }else{
306
                return 2;
307
        }
308 b0ac780a Michael Niedermayer
}
309
310 cb482d25 Michael Niedermayer
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
311 bb270c08 Diego Biurrun
        if( isVertDC_C(src, stride, c) ){
312
                if( isVertMinMaxOk_C(src, stride, c->QP) )
313
                        return 1;
314
                else
315
                        return 0;
316
        }else{
317
                return 2;
318
        }
319 cf5ec61d Michael Niedermayer
}
320
321 b0ac780a Michael Niedermayer
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
322 cf5ec61d Michael Niedermayer
{
323 bb270c08 Diego Biurrun
        int y;
324
        for(y=0; y<BLOCK_SIZE; y++)
325
        {
326
                const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
327
328 c26abfa5 Diego Biurrun
                if(FFABS(middleEnergy) < 8*c->QP)
329 bb270c08 Diego Biurrun
                {
330
                        const int q=(dst[3] - dst[4])/2;
331
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
332
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
333
334 c26abfa5 Diego Biurrun
                        int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
335 8925915f Diego Biurrun
                        d= FFMAX(d, 0);
336 bb270c08 Diego Biurrun
337
                        d= (5*d + 32) >> 6;
338 02305ff3 Diego Biurrun
                        d*= FFSIGN(-middleEnergy);
339 bb270c08 Diego Biurrun
340
                        if(q>0)
341
                        {
342
                                d= d<0 ? 0 : d;
343
                                d= d>q ? q : d;
344
                        }
345
                        else
346
                        {
347
                                d= d>0 ? 0 : d;
348
                                d= d<q ? q : d;
349
                        }
350
351
                        dst[3]-= d;
352
                        dst[4]+= d;
353
                }
354
                dst+= stride;
355
        }
356 cf5ec61d Michael Niedermayer
}
357
358
/**
359
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
360
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
361
 */
362 b0ac780a Michael Niedermayer
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
363 cf5ec61d Michael Niedermayer
{
364 bb270c08 Diego Biurrun
        int y;
365
        for(y=0; y<BLOCK_SIZE; y++)
366
        {
367 c26abfa5 Diego Biurrun
                const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
368
                const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
369 bb270c08 Diego Biurrun
370
                int sums[10];
371
                sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
372
                sums[1] = sums[0] - first  + dst[3];
373
                sums[2] = sums[1] - first  + dst[4];
374
                sums[3] = sums[2] - first  + dst[5];
375
                sums[4] = sums[3] - first  + dst[6];
376
                sums[5] = sums[4] - dst[0] + dst[7];
377
                sums[6] = sums[5] - dst[1] + last;
378
                sums[7] = sums[6] - dst[2] + last;
379
                sums[8] = sums[7] - dst[3] + last;
380
                sums[9] = sums[8] - dst[4] + last;
381
382
                dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
383
                dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
384
                dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
385
                dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
386
                dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
387
                dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
388
                dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
389
                dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
390
391
                dst+= stride;
392
        }
393 cf5ec61d Michael Niedermayer
}
394
395 4e4dcbc5 Michael Niedermayer
/**
396 cc9b0679 Michael Niedermayer
 * Experimental Filter 1 (Horizontal)
397
 * will not damage linear gradients
398
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
399 755bfeab Diego Biurrun
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
400
 * MMX2 version does correct clipping C version does not
401 cc9b0679 Michael Niedermayer
 * not identical with the vertical one
402 4e4dcbc5 Michael Niedermayer
 */
403 cc9b0679 Michael Niedermayer
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
404
{
405 bb270c08 Diego Biurrun
        int y;
406
        static uint64_t *lut= NULL;
407
        if(lut==NULL)
408
        {
409
                int i;
410 6ab6c7c3 Luca Barbato
                lut = av_malloc(256*8);
411 bb270c08 Diego Biurrun
                for(i=0; i<256; i++)
412
                {
413
                        int v= i < 128 ? 2*i : 2*(i-256);
414 117e45b0 Michael Niedermayer
/*
415 cc9b0679 Michael Niedermayer
//Simulate 112242211 9-Tap filter
416 bb270c08 Diego Biurrun
                        uint64_t a= (v/16) & 0xFF;
417
                        uint64_t b= (v/8) & 0xFF;
418
                        uint64_t c= (v/4) & 0xFF;
419
                        uint64_t d= (3*v/8) & 0xFF;
420 117e45b0 Michael Niedermayer
*/
421 cc9b0679 Michael Niedermayer
//Simulate piecewise linear interpolation
422 bb270c08 Diego Biurrun
                        uint64_t a= (v/16) & 0xFF;
423
                        uint64_t b= (v*3/16) & 0xFF;
424
                        uint64_t c= (v*5/16) & 0xFF;
425
                        uint64_t d= (7*v/16) & 0xFF;
426
                        uint64_t A= (0x100 - a)&0xFF;
427
                        uint64_t B= (0x100 - b)&0xFF;
428
                        uint64_t C= (0x100 - c)&0xFF;
429
                        uint64_t D= (0x100 - c)&0xFF;
430
431
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
432
                                (D<<24) | (C<<16) | (B<<8) | (A);
433
                        //lut[i] = (v<<32) | (v<<24);
434
                }
435
        }
436
437
        for(y=0; y<BLOCK_SIZE; y++)
438
        {
439
                int a= src[1] - src[2];
440
                int b= src[3] - src[4];
441
                int c= src[5] - src[6];
442
443 c26abfa5 Diego Biurrun
                int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
444 bb270c08 Diego Biurrun
445
                if(d < QP)
446
                {
447 02305ff3 Diego Biurrun
                        int v = d * FFSIGN(-b);
448 bb270c08 Diego Biurrun
449
                        src[1] +=v/8;
450
                        src[2] +=v/4;
451
                        src[3] +=3*v/8;
452
                        src[4] -=3*v/8;
453
                        src[5] -=v/4;
454
                        src[6] -=v/8;
455
456
                }
457
                src+=stride;
458
        }
459 cc9b0679 Michael Niedermayer
}
460
461 12eebd26 Michael Niedermayer
/**
462
 * accurate deblock filter
463
 */
464 849f1035 Måns Rullgård
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
465 bb270c08 Diego Biurrun
        int y;
466
        const int QP= c->QP;
467
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
468
        const int dcThreshold= dcOffset*2 + 1;
469 12eebd26 Michael Niedermayer
//START_TIMER
470 bb270c08 Diego Biurrun
        src+= step*4; // src points to begin of the 8x8 Block
471
        for(y=0; y<8; y++){
472
                int numEq= 0;
473
474
                if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
475
                if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
476
                if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
477
                if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
478
                if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
479
                if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
480
                if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
481
                if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
482
                if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
483
                if(numEq > c->ppMode.flatnessThreshold){
484
                        int min, max, x;
485
486
                        if(src[0] > src[step]){
487
                            max= src[0];
488
                            min= src[step];
489
                        }else{
490
                            max= src[step];
491
                            min= src[0];
492
                        }
493
                        for(x=2; x<8; x+=2){
494
                                if(src[x*step] > src[(x+1)*step]){
495
                                        if(src[x    *step] > max) max= src[ x   *step];
496
                                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
497
                                }else{
498
                                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
499
                                        if(src[ x   *step] < min) min= src[ x   *step];
500
                                }
501
                        }
502
                        if(max-min < 2*QP){
503 c26abfa5 Diego Biurrun
                                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
504
                                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
505 bb270c08 Diego Biurrun
506
                                int sums[10];
507
                                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
508
                                sums[1] = sums[0] - first       + src[3*step];
509
                                sums[2] = sums[1] - first       + src[4*step];
510
                                sums[3] = sums[2] - first       + src[5*step];
511
                                sums[4] = sums[3] - first       + src[6*step];
512
                                sums[5] = sums[4] - src[0*step] + src[7*step];
513
                                sums[6] = sums[5] - src[1*step] + last;
514
                                sums[7] = sums[6] - src[2*step] + last;
515
                                sums[8] = sums[7] - src[3*step] + last;
516
                                sums[9] = sums[8] - src[4*step] + last;
517
518
                                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
519
                                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
520
                                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
521
                                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
522
                                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
523
                                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
524
                                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
525
                                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
526
                        }
527
                }else{
528
                        const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
529
530 c26abfa5 Diego Biurrun
                        if(FFABS(middleEnergy) < 8*QP)
531 bb270c08 Diego Biurrun
                        {
532
                                const int q=(src[3*step] - src[4*step])/2;
533
                                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
534
                                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
535
536 c26abfa5 Diego Biurrun
                                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
537 8925915f Diego Biurrun
                                d= FFMAX(d, 0);
538 bb270c08 Diego Biurrun
539
                                d= (5*d + 32) >> 6;
540 02305ff3 Diego Biurrun
                                d*= FFSIGN(-middleEnergy);
541 bb270c08 Diego Biurrun
542
                                if(q>0)
543
                                {
544
                                        d= d<0 ? 0 : d;
545
                                        d= d>q ? q : d;
546
                                }
547
                                else
548
                                {
549
                                        d= d>0 ? 0 : d;
550
                                        d= d<q ? q : d;
551
                                }
552
553
                                src[3*step]-= d;
554
                                src[4*step]+= d;
555
                        }
556
                }
557
558
                src += stride;
559
        }
560 12eebd26 Michael Niedermayer
/*if(step==16){
561
    STOP_TIMER("step16")
562
}else{
563
    STOP_TIMER("stepX")
564
}*/
565
}
566 cc9b0679 Michael Niedermayer
567 e89952aa Michael Niedermayer
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
568 cc9b0679 Michael Niedermayer
//Plain C versions
569 e89952aa Michael Niedermayer
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
570
#define COMPILE_C
571
#endif
572
573 b0ac780a Michael Niedermayer
#ifdef ARCH_POWERPC
574
#ifdef HAVE_ALTIVEC
575
#define COMPILE_ALTIVEC
576
#endif //HAVE_ALTIVEC
577
#endif //ARCH_POWERPC
578
579 3cd52279 Diego Biurrun
#if defined(ARCH_X86)
580 e89952aa Michael Niedermayer
581
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
582
#define COMPILE_MMX
583
#endif
584
585
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
586
#define COMPILE_MMX2
587
#endif
588
589
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
590
#define COMPILE_3DNOW
591
#endif
592 3cd52279 Diego Biurrun
#endif /* defined(ARCH_X86) */
593 e89952aa Michael Niedermayer
594
#undef HAVE_MMX
595
#undef HAVE_MMX2
596
#undef HAVE_3DNOW
597 b0ac780a Michael Niedermayer
#undef HAVE_ALTIVEC
598 e89952aa Michael Niedermayer
599
#ifdef COMPILE_C
600 cc9b0679 Michael Niedermayer
#undef HAVE_MMX
601
#undef HAVE_MMX2
602
#undef HAVE_3DNOW
603
#define RENAME(a) a ## _C
604
#include "postprocess_template.c"
605 e89952aa Michael Niedermayer
#endif
606 cc9b0679 Michael Niedermayer
607 b0ac780a Michael Niedermayer
#ifdef ARCH_POWERPC
608
#ifdef COMPILE_ALTIVEC
609
#undef RENAME
610
#define HAVE_ALTIVEC
611
#define RENAME(a) a ## _altivec
612
#include "postprocess_altivec_template.c"
613
#include "postprocess_template.c"
614
#endif
615
#endif //ARCH_POWERPC
616
617 cc9b0679 Michael Niedermayer
//MMX versions
618 e89952aa Michael Niedermayer
#ifdef COMPILE_MMX
619 cc9b0679 Michael Niedermayer
#undef RENAME
620
#define HAVE_MMX
621
#undef HAVE_MMX2
622
#undef HAVE_3DNOW
623
#define RENAME(a) a ## _MMX
624
#include "postprocess_template.c"
625 e89952aa Michael Niedermayer
#endif
626 cc9b0679 Michael Niedermayer
627
//MMX2 versions
628 e89952aa Michael Niedermayer
#ifdef COMPILE_MMX2
629 cc9b0679 Michael Niedermayer
#undef RENAME
630
#define HAVE_MMX
631
#define HAVE_MMX2
632
#undef HAVE_3DNOW
633
#define RENAME(a) a ## _MMX2
634
#include "postprocess_template.c"
635 e89952aa Michael Niedermayer
#endif
636 cc9b0679 Michael Niedermayer
637
//3DNOW versions
638 e89952aa Michael Niedermayer
#ifdef COMPILE_3DNOW
639 cc9b0679 Michael Niedermayer
#undef RENAME
640
#define HAVE_MMX
641
#undef HAVE_MMX2
642
#define HAVE_3DNOW
643
#define RENAME(a) a ## _3DNow
644
#include "postprocess_template.c"
645 e89952aa Michael Niedermayer
#endif
646 cc9b0679 Michael Niedermayer
647 755bfeab Diego Biurrun
// minor note: the HAVE_xyz is messed up after that line so do not use it.
648 cc9b0679 Michael Niedermayer
649
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
650 bb270c08 Diego Biurrun
        QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
651 cc9b0679 Michael Niedermayer
{
652 bb270c08 Diego Biurrun
        PPContext *c= (PPContext *)vc;
653
        PPMode *ppMode= (PPMode *)vm;
654
        c->ppMode= *ppMode; //FIXME
655 9c9e467d Michael Niedermayer
656 755bfeab Diego Biurrun
        // Using ifs here as they are faster than function pointers although the
657
        // difference would not be measureable here but it is much better because
658
        // someone might exchange the CPU whithout restarting MPlayer ;)
659 e89952aa Michael Niedermayer
#ifdef RUNTIME_CPUDETECT
660 3cd52279 Diego Biurrun
#if defined(ARCH_X86)
661 bb270c08 Diego Biurrun
        // ordered per speed fasterst first
662
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
663
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
664
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
665
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
666
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
667
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
668
        else
669
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
670 cc9b0679 Michael Niedermayer
#else
671 b0ac780a Michael Niedermayer
#ifdef ARCH_POWERPC
672
#ifdef HAVE_ALTIVEC
673 71487254 Michael Niedermayer
        if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
674 bb270c08 Diego Biurrun
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
675 b0ac780a Michael Niedermayer
        else
676
#endif
677
#endif
678 bb270c08 Diego Biurrun
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
679 be44a4d7 Michael Niedermayer
#endif
680 e89952aa Michael Niedermayer
#else //RUNTIME_CPUDETECT
681
#ifdef HAVE_MMX2
682 bb270c08 Diego Biurrun
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
683 e89952aa Michael Niedermayer
#elif defined (HAVE_3DNOW)
684 bb270c08 Diego Biurrun
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
685 e89952aa Michael Niedermayer
#elif defined (HAVE_MMX)
686 bb270c08 Diego Biurrun
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
687 b0ac780a Michael Niedermayer
#elif defined (HAVE_ALTIVEC)
688 bb270c08 Diego Biurrun
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
689 e89952aa Michael Niedermayer
#else
690 bb270c08 Diego Biurrun
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
691 e89952aa Michael Niedermayer
#endif
692
#endif //!RUNTIME_CPUDETECT
693 117e45b0 Michael Niedermayer
}
694
695 cc9b0679 Michael Niedermayer
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
696 bb270c08 Diego Biurrun
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
697 13e00528 Arpi
698 911879d1 Michael Niedermayer
/* -pp Command line Help
699
*/
700 4407a3c4 Michael Niedermayer
char *pp_help=
701 bf69c4e5 Diego Biurrun
"Available postprocessing filters:\n"
702 bb270c08 Diego Biurrun
"Filters                        Options\n"
703
"short  long name       short   long option     Description\n"
704
"*      *               a       autoq           CPU power dependent enabler\n"
705
"                       c       chrom           chrominance filtering enabled\n"
706
"                       y       nochrom         chrominance filtering disabled\n"
707
"                       n       noluma          luma filtering disabled\n"
708
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
709
"       1. difference factor: default=32, higher -> more deblocking\n"
710
"       2. flatness threshold: default=39, lower -> more deblocking\n"
711
"                       the h & v deblocking filters share these\n"
712
"                       so you can't set different thresholds for h / v\n"
713
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
714
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
715
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
716
"h1     x1hdeblock                              experimental h deblock filter 1\n"
717
"v1     x1vdeblock                              experimental v deblock filter 1\n"
718
"dr     dering                                  deringing filter\n"
719
"al     autolevels                              automatic brightness / contrast\n"
720
"                       f        fullyrange     stretch luminance to (0..255)\n"
721
"lb     linblenddeint                           linear blend deinterlacer\n"
722
"li     linipoldeint                            linear interpolating deinterlace\n"
723
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
724
"md     mediandeint                             median deinterlacer\n"
725
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
726
"l5     lowpass5                                FIR lowpass deinterlacer\n"
727
"de     default                                 hb:a,vb:a,dr:a\n"
728
"fa     fast                                    h1:a,v1:a,dr:a\n"
729
"ac                                             ha:a:128:7,va:a,dr:a\n"
730
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
731
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
732
"fq     forceQuant      <quantizer>             force quantizer\n"
733 bf69c4e5 Diego Biurrun
"Usage:\n"
734
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
735
"long form example:\n"
736 bb270c08 Diego Biurrun
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
737 bf69c4e5 Diego Biurrun
"short form example:\n"
738 bb270c08 Diego Biurrun
"vb:a/hb:a/lb                                   de,-vb\n"
739 bf69c4e5 Diego Biurrun
"more examples:\n"
740
"tn:64:128:256\n"
741 14b005d0 Diego Biurrun
"\n"
742 4b001a13 Michael Niedermayer
;
743 911879d1 Michael Niedermayer
744 c41d972d Michael Niedermayer
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
745 911879d1 Michael Niedermayer
{
746 bb270c08 Diego Biurrun
        char temp[GET_MODE_BUFFER_SIZE];
747
        char *p= temp;
748 a8988916 Diego Pettenò
        static const char filterDelimiters[] = ",/";
749
        static const char optionDelimiters[] = ":";
750 bb270c08 Diego Biurrun
        struct PPMode *ppMode;
751
        char *filterToken;
752
753 6ab6c7c3 Luca Barbato
        ppMode= av_malloc(sizeof(PPMode));
754 bb270c08 Diego Biurrun
755
        ppMode->lumMode= 0;
756
        ppMode->chromMode= 0;
757
        ppMode->maxTmpNoise[0]= 700;
758
        ppMode->maxTmpNoise[1]= 1500;
759
        ppMode->maxTmpNoise[2]= 3000;
760
        ppMode->maxAllowedY= 234;
761
        ppMode->minAllowedY= 16;
762
        ppMode->baseDcDiff= 256/8;
763
        ppMode->flatnessThreshold= 56-16-1;
764
        ppMode->maxClippedThreshold= 0.01;
765
        ppMode->error=0;
766
767
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
768
769 e7becfb2 Diego Biurrun
        av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
770 bb270c08 Diego Biurrun
771
        for(;;){
772
                char *filterName;
773
                int q= 1000000; //PP_QUALITY_MAX;
774
                int chrom=-1;
775
                int luma=-1;
776
                char *option;
777
                char *options[OPTIONS_ARRAY_SIZE];
778
                int i;
779
                int filterNameOk=0;
780
                int numOfUnknownOptions=0;
781
                int enable=1; //does the user want us to enabled or disabled the filter
782
783
                filterToken= strtok(p, filterDelimiters);
784
                if(filterToken == NULL) break;
785
                p+= strlen(filterToken) + 1; // p points to next filterToken
786
                filterName= strtok(filterToken, optionDelimiters);
787 e7becfb2 Diego Biurrun
                av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
788 bb270c08 Diego Biurrun
789
                if(*filterName == '-')
790
                {
791
                        enable=0;
792
                        filterName++;
793
                }
794
795
                for(;;){ //for all options
796
                        option= strtok(NULL, optionDelimiters);
797
                        if(option == NULL) break;
798
799 e7becfb2 Diego Biurrun
                        av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
800 bb270c08 Diego Biurrun
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
801
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
802
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
803
                        else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
804
                        else
805
                        {
806
                                options[numOfUnknownOptions] = option;
807
                                numOfUnknownOptions++;
808
                        }
809
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
810
                }
811
                options[numOfUnknownOptions] = NULL;
812
813
                /* replace stuff from the replace Table */
814
                for(i=0; replaceTable[2*i]!=NULL; i++)
815
                {
816
                        if(!strcmp(replaceTable[2*i], filterName))
817
                        {
818
                                int newlen= strlen(replaceTable[2*i + 1]);
819
                                int plen;
820
                                int spaceLeft;
821
822
                                if(p==NULL) p= temp, *p=0;      //last filter
823
                                else p--, *p=',';               //not last filter
824
825
                                plen= strlen(p);
826
                                spaceLeft= p - temp + plen;
827
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
828
                                {
829
                                        ppMode->error++;
830
                                        break;
831
                                }
832
                                memmove(p + newlen, p, plen+1);
833
                                memcpy(p, replaceTable[2*i + 1], newlen);
834
                                filterNameOk=1;
835
                        }
836
                }
837
838
                for(i=0; filters[i].shortName!=NULL; i++)
839
                {
840
                        if(   !strcmp(filters[i].longName, filterName)
841
                           || !strcmp(filters[i].shortName, filterName))
842
                        {
843
                                ppMode->lumMode &= ~filters[i].mask;
844
                                ppMode->chromMode &= ~filters[i].mask;
845
846
                                filterNameOk=1;
847
                                if(!enable) break; // user wants to disable it
848
849
                                if(q >= filters[i].minLumQuality && luma)
850
                                        ppMode->lumMode|= filters[i].mask;
851
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
852
                                        if(q >= filters[i].minChromQuality)
853
                                                ppMode->chromMode|= filters[i].mask;
854
855
                                if(filters[i].mask == LEVEL_FIX)
856
                                {
857
                                        int o;
858
                                        ppMode->minAllowedY= 16;
859
                                        ppMode->maxAllowedY= 234;
860
                                        for(o=0; options[o]!=NULL; o++)
861
                                        {
862
                                                if(  !strcmp(options[o],"fullyrange")
863
                                                   ||!strcmp(options[o],"f"))
864
                                                {
865
                                                        ppMode->minAllowedY= 0;
866
                                                        ppMode->maxAllowedY= 255;
867
                                                        numOfUnknownOptions--;
868
                                                }
869
                                        }
870
                                }
871
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
872
                                {
873
                                        int o;
874
                                        int numOfNoises=0;
875
876
                                        for(o=0; options[o]!=NULL; o++)
877
                                        {
878
                                                char *tail;
879
                                                ppMode->maxTmpNoise[numOfNoises]=
880
                                                        strtol(options[o], &tail, 0);
881
                                                if(tail!=options[o])
882
                                                {
883
                                                        numOfNoises++;
884
                                                        numOfUnknownOptions--;
885
                                                        if(numOfNoises >= 3) break;
886
                                                }
887
                                        }
888
                                }
889
                                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
890
                                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
891
                                {
892
                                        int o;
893
894
                                        for(o=0; options[o]!=NULL && o<2; o++)
895
                                        {
896
                                                char *tail;
897
                                                int val= strtol(options[o], &tail, 0);
898
                                                if(tail==options[o]) break;
899
900
                                                numOfUnknownOptions--;
901
                                                if(o==0) ppMode->baseDcDiff= val;
902
                                                else ppMode->flatnessThreshold= val;
903
                                        }
904
                                }
905
                                else if(filters[i].mask == FORCE_QUANT)
906
                                {
907
                                        int o;
908
                                        ppMode->forcedQuant= 15;
909
910
                                        for(o=0; options[o]!=NULL && o<1; o++)
911
                                        {
912
                                                char *tail;
913
                                                int val= strtol(options[o], &tail, 0);
914
                                                if(tail==options[o]) break;
915
916
                                                numOfUnknownOptions--;
917
                                                ppMode->forcedQuant= val;
918
                                        }
919
                                }
920
                        }
921
                }
922
                if(!filterNameOk) ppMode->error++;
923
                ppMode->error += numOfUnknownOptions;
924
        }
925
926 e7becfb2 Diego Biurrun
        av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
927 bb270c08 Diego Biurrun
        if(ppMode->error)
928
        {
929 e7becfb2 Diego Biurrun
                av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
930 6ab6c7c3 Luca Barbato
                av_free(ppMode);
931 bb270c08 Diego Biurrun
                return NULL;
932
        }
933
        return ppMode;
934 911879d1 Michael Niedermayer
}
935
936 c41d972d Michael Niedermayer
void pp_free_mode(pp_mode_t *mode){
937 6ab6c7c3 Luca Barbato
    av_free(mode);
938 c41d972d Michael Niedermayer
}
939
940 88c0bc7e Michael Niedermayer
static void reallocAlign(void **p, int alignment, int size){
941 4851f2ad Michael Niedermayer
        av_free(*p);
942 6ab6c7c3 Luca Barbato
        *p= av_mallocz(size);
943 88c0bc7e Michael Niedermayer
}
944
945 0426af31 Michael Niedermayer
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
946 bb270c08 Diego Biurrun
        int mbWidth = (width+15)>>4;
947
        int mbHeight= (height+15)>>4;
948
        int i;
949
950
        c->stride= stride;
951
        c->qpStride= qpStride;
952
953
        reallocAlign((void **)&c->tempDst, 8, stride*24);
954
        reallocAlign((void **)&c->tempSrc, 8, stride*24);
955
        reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
956
        reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
957
        for(i=0; i<256; i++)
958
                c->yHistogram[i]= width*height/64*15/256;
959
960
        for(i=0; i<3; i++)
961
        {
962 755bfeab Diego Biurrun
                //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
963 bb270c08 Diego Biurrun
                reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
964
                reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
965
        }
966
967
        reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
968
        reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
969
        reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
970
        reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
971 88c0bc7e Michael Niedermayer
}
972
973 4cfbf61b Falk Hüffner
static void global_init(void){
974 bb270c08 Diego Biurrun
        int i;
975
        memset(clip_table, 0, 256);
976
        for(i=256; i<512; i++)
977
                clip_table[i]= i;
978
        memset(clip_table+512, 0, 256);
979 134eb1e5 Michael Niedermayer
}
980
981 e7becfb2 Diego Biurrun
static const char * context_to_name(void * ptr) {
982
    return "postproc";
983
}
984
985
static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
986
987 88c0bc7e Michael Niedermayer
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
988 6ab6c7c3 Luca Barbato
        PPContext *c= av_malloc(sizeof(PPContext));
989 bb270c08 Diego Biurrun
        int stride= (width+15)&(~15);    //assumed / will realloc if needed
990
        int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
991 115329f1 Diego Biurrun
992 bb270c08 Diego Biurrun
        global_init();
993 134eb1e5 Michael Niedermayer
994 bb270c08 Diego Biurrun
        memset(c, 0, sizeof(PPContext));
995 e7becfb2 Diego Biurrun
        c->av_class = &av_codec_context_class;
996 bb270c08 Diego Biurrun
        c->cpuCaps= cpuCaps;
997
        if(cpuCaps&PP_FORMAT){
998
                c->hChromaSubSample= cpuCaps&0x3;
999
                c->vChromaSubSample= (cpuCaps>>4)&0x3;
1000
        }else{
1001
                c->hChromaSubSample= 1;
1002
                c->vChromaSubSample= 1;
1003
        }
1004 88c0bc7e Michael Niedermayer
1005 bb270c08 Diego Biurrun
        reallocBuffers(c, width, height, stride, qpStride);
1006 115329f1 Diego Biurrun
1007 bb270c08 Diego Biurrun
        c->frameNum=-1;
1008 45b4f285 Michael Niedermayer
1009 bb270c08 Diego Biurrun
        return c;
1010 45b4f285 Michael Niedermayer
}
1011
1012 9cb54f43 Michael Niedermayer
void pp_free_context(void *vc){
1013 bb270c08 Diego Biurrun
        PPContext *c = (PPContext*)vc;
1014
        int i;
1015 115329f1 Diego Biurrun
1016 6ab6c7c3 Luca Barbato
        for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1017
        for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
1018 115329f1 Diego Biurrun
1019 6ab6c7c3 Luca Barbato
        av_free(c->tempBlocks);
1020
        av_free(c->yHistogram);
1021
        av_free(c->tempDst);
1022
        av_free(c->tempSrc);
1023
        av_free(c->deintTemp);
1024
        av_free(c->stdQPTable);
1025
        av_free(c->nonBQPTable);
1026
        av_free(c->forcedQPTable);
1027 115329f1 Diego Biurrun
1028 bb270c08 Diego Biurrun
        memset(c, 0, sizeof(PPContext));
1029 88c0bc7e Michael Niedermayer
1030 6ab6c7c3 Luca Barbato
        av_free(c);
1031 9c9e467d Michael Niedermayer
}
1032
1033 9cb54f43 Michael Niedermayer
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
1034 9c9e467d Michael Niedermayer
                 uint8_t * dst[3], int dstStride[3],
1035 ec487e5d Michael Niedermayer
                 int width, int height,
1036 9c9e467d Michael Niedermayer
                 QP_STORE_T *QP_store,  int QPStride,
1037 bb270c08 Diego Biurrun
                 pp_mode_t *vm,  void *vc, int pict_type)
1038 911879d1 Michael Niedermayer
{
1039 bb270c08 Diego Biurrun
        int mbWidth = (width+15)>>4;
1040
        int mbHeight= (height+15)>>4;
1041
        PPMode *mode = (PPMode*)vm;
1042
        PPContext *c = (PPContext*)vc;
1043 c26abfa5 Diego Biurrun
        int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1044
        int absQPStride = FFABS(QPStride);
1045 bb270c08 Diego Biurrun
1046
        // c->stride and c->QPStride are always positive
1047
        if(c->stride < minStride || c->qpStride < absQPStride)
1048
                reallocBuffers(c, width, height,
1049 8925915f Diego Biurrun
                                FFMAX(minStride, c->stride),
1050
                                FFMAX(c->qpStride, absQPStride));
1051 bb270c08 Diego Biurrun
1052
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1053
        {
1054
                int i;
1055
                QP_store= c->forcedQPTable;
1056
                absQPStride = QPStride = 0;
1057
                if(mode->lumMode & FORCE_QUANT)
1058
                        for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1059
                else
1060
                        for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1061
        }
1062 0426af31 Michael Niedermayer
1063 bb270c08 Diego Biurrun
        if(pict_type & PP_PICT_TYPE_QP2){
1064
                int i;
1065
                const int count= mbHeight * absQPStride;
1066
                for(i=0; i<(count>>2); i++){
1067
                        ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1068
                }
1069
                for(i<<=2; i<count; i++){
1070
                        c->stdQPTable[i] = QP_store[i]>>1;
1071
                }
1072 0426af31 Michael Niedermayer
                QP_store= c->stdQPTable;
1073 bb270c08 Diego Biurrun
                QPStride= absQPStride;
1074
        }
1075 0426af31 Michael Niedermayer
1076 ec487e5d Michael Niedermayer
if(0){
1077
int x,y;
1078
for(y=0; y<mbHeight; y++){
1079 bb270c08 Diego Biurrun
        for(x=0; x<mbWidth; x++){
1080 e7becfb2 Diego Biurrun
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1081 bb270c08 Diego Biurrun
        }
1082 e7becfb2 Diego Biurrun
        av_log(c, AV_LOG_INFO, "\n");
1083 ec487e5d Michael Niedermayer
}
1084 e7becfb2 Diego Biurrun
        av_log(c, AV_LOG_INFO, "\n");
1085 ec487e5d Michael Niedermayer
}
1086 51e19dcc Michael Niedermayer
1087 bb270c08 Diego Biurrun
        if((pict_type&7)!=3)
1088
        {
1089
                if (QPStride >= 0) {
1090
                        int i;
1091
                        const int count= mbHeight * QPStride;
1092
                        for(i=0; i<(count>>2); i++){
1093
                                ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1094
                        }
1095
                        for(i<<=2; i<count; i++){
1096
                                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1097
                        }
1098
                } else {
1099
                        int i,j;
1100
                        for(i=0; i<mbHeight; i++) {
1101
                                    for(j=0; j<absQPStride; j++) {
1102
                                        c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1103
                                }
1104
                        }
1105
                }
1106
        }
1107
1108 e7becfb2 Diego Biurrun
        av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1109
               mode->lumMode, mode->chromMode);
1110 bb270c08 Diego Biurrun
1111
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1112
                width, height, QP_store, QPStride, 0, mode, c);
1113
1114
        width  = (width )>>c->hChromaSubSample;
1115
        height = (height)>>c->vChromaSubSample;
1116
1117
        if(mode->chromMode)
1118
        {
1119
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1120
                        width, height, QP_store, QPStride, 1, mode, c);
1121
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1122
                        width, height, QP_store, QPStride, 2, mode, c);
1123
        }
1124
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1125
        {
1126
                linecpy(dst[1], src[1], height, srcStride[1]);
1127
                linecpy(dst[2], src[2], height, srcStride[2]);
1128
        }
1129
        else
1130
        {
1131
                int y;
1132
                for(y=0; y<height; y++)
1133
                {
1134
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1135
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1136
                }
1137
        }
1138 911879d1 Michael Niedermayer
}