Statistics
| Branch: | Revision:

ffmpeg / libpostproc / postprocess.c @ ebba2b3e

History | View | Annotate | Download (37.5 KB)

1 3057fa66 Arpi
/*
2 b78e7197 Diego Biurrun
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
 *
4
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
 *
6 2912e87a Mans Rullgard
 * This file is part of Libav.
7 b78e7197 Diego Biurrun
 *
8 2912e87a Mans Rullgard
 * Libav is free software; you can redistribute it and/or modify
9 b78e7197 Diego Biurrun
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13 2912e87a Mans Rullgard
 * Libav is distributed in the hope that it will be useful,
14 b78e7197 Diego Biurrun
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19 2912e87a Mans Rullgard
 * along with Libav; if not, write to the Free Software
20 b78e7197 Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22 3057fa66 Arpi
23 b304569a Michael Niedermayer
/**
24 ba87f080 Diego Biurrun
 * @file
25 b304569a Michael Niedermayer
 * postprocessing.
26
 */
27 115329f1 Diego Biurrun
28 3057fa66 Arpi
/*
29 bb270c08 Diego Biurrun
                        C       MMX     MMX2    3DNow   AltiVec
30
isVertDC                Ec      Ec                      Ec
31
isVertMinMaxOk          Ec      Ec                      Ec
32
doVertLowPass           E               e       e       Ec
33
doVertDefFilter         Ec      Ec      e       e       Ec
34
isHorizDC               Ec      Ec                      Ec
35
isHorizMinMaxOk         a       E                       Ec
36
doHorizLowPass          E               e       e       Ec
37
doHorizDefFilter        Ec      Ec      e       e       Ec
38
do_a_deblock            Ec      E       Ec      E
39
deRing                  E               e       e*      Ecp
40
Vertical RKAlgo1        E               a       a
41
Horizontal RKAlgo1                      a       a
42
Vertical X1#            a               E       E
43
Horizontal X1#          a               E       E
44
LinIpolDeinterlace      e               E       E*
45
CubicIpolDeinterlace    a               e       e*
46
LinBlendDeinterlace     e               E       E*
47
MedianDeinterlace#      E       Ec      Ec
48
TempDeNoiser#           E               e       e       Ec
49 d5a1a995 Michael Niedermayer

50 2cab6401 Diego Biurrun
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 755bfeab Diego Biurrun
# more or less selfinvented filters so the exactness is not too meaningful
52 3057fa66 Arpi
E = Exact implementation
53 04932b0d Diego Biurrun
e = almost exact implementation (slightly different rounding,...)
54 3057fa66 Arpi
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56 b0ac780a Michael Niedermayer
p = partially optimized, still some work to do
57 3057fa66 Arpi
*/
58
59
/*
60
TODO:
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64 13e00528 Arpi
write a faster and higher quality deblocking filter :)
65 d5a1a995 Michael Niedermayer
make the mainloop more flexible (variable number of blocks at once
66 bb270c08 Diego Biurrun
        (the if/else stuff per block is slowing things down)
67 9f45d04d Michael Niedermayer
compare the quality & speed of all filters
68
split this huge file
69 8405b3fd Michael Niedermayer
optimize c versions
70 117e45b0 Michael Niedermayer
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71 3057fa66 Arpi
...
72 13e00528 Arpi
*/
73
74 faa6f1c3 Janne Grunau
//Changelog: use git log
75 3057fa66 Arpi
76 9858f773 Michael Niedermayer
#include "config.h"
77 245976da Diego Biurrun
#include "libavutil/avutil.h"
78 3057fa66 Arpi
#include <inttypes.h>
79
#include <stdio.h>
80 d5a1a995 Michael Niedermayer
#include <stdlib.h>
81 911879d1 Michael Niedermayer
#include <string.h>
82 3057fa66 Arpi
//#undef HAVE_MMX2
83 ebc3209a Diego Biurrun
//#define HAVE_AMD3DNOW
84 3057fa66 Arpi
//#undef HAVE_MMX
85 cc9b0679 Michael Niedermayer
//#undef ARCH_X86
86 7f16f6e6 Michael Niedermayer
//#define DEBUG_BRIGHTNESS
87 13e00528 Arpi
#include "postprocess.h"
88 c41d972d Michael Niedermayer
#include "postprocess_internal.h"
89 bba9b16c Michael Niedermayer
90 2a4a62bf Stefano Sabatini
unsigned postproc_version(void)
91
{
92
    return LIBPOSTPROC_VERSION_INT;
93
}
94
95 41600690 Stefano Sabatini
const char *postproc_configuration(void)
96 c1736936 Diego Biurrun
{
97 29ba0911 Janne Grunau
    return LIBAV_CONFIGURATION;
98 c1736936 Diego Biurrun
}
99
100 41600690 Stefano Sabatini
const char *postproc_license(void)
101 c1736936 Diego Biurrun
{
102
#define LICENSE_PREFIX "libpostproc license: "
103 a03be6e1 Janne Grunau
    return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1;
104 c1736936 Diego Biurrun
}
105
106 b250f9c6 Aurelien Jacobs
#if HAVE_ALTIVEC_H
107 a7b2871c Romain Dolbeau
#include <altivec.h>
108
#endif
109
110 911879d1 Michael Niedermayer
#define GET_MODE_BUFFER_SIZE 500
111
#define OPTIONS_ARRAY_SIZE 10
112 9c9e467d Michael Niedermayer
#define BLOCK_SIZE 8
113
#define TEMP_STRIDE 8
114
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
115 911879d1 Michael Niedermayer
116 b250f9c6 Aurelien Jacobs
#if ARCH_X86
117 2b858d0b Reimar Döffinger
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
118
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
119
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
120
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
121
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
122
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
123
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
124
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
125 b28daef8 Michael Niedermayer
#endif
126 3057fa66 Arpi
127 2722e362 Reimar Döffinger
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
128 3057fa66 Arpi
129 9c9e467d Michael Niedermayer
130 911879d1 Michael Niedermayer
static struct PPFilter filters[]=
131
{
132 16e0bf73 Diego Biurrun
    {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
133
    {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
134
/*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
135
    {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
136
    {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
137
    {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
138
    {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
139
    {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
140
    {"dr", "dering",                1, 5, 6, DERING},
141
    {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
142
    {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
143
    {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
144
    {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
145
    {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
146
    {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
147
    {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
148
    {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
149
    {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
150
    {NULL, NULL,0,0,0,0} //End Marker
151 911879d1 Michael Niedermayer
};
152
153 7b49ce2e Stefan Huehner
static const char *replaceTable[]=
154 911879d1 Michael Niedermayer
{
155 16e0bf73 Diego Biurrun
    "default",      "hb:a,vb:a,dr:a",
156
    "de",           "hb:a,vb:a,dr:a",
157
    "fast",         "h1:a,v1:a,dr:a",
158
    "fa",           "h1:a,v1:a,dr:a",
159
    "ac",           "ha:a:128:7,va:a,dr:a",
160
    NULL //End Marker
161 911879d1 Michael Niedermayer
};
162
163 3057fa66 Arpi
164 b250f9c6 Aurelien Jacobs
#if ARCH_X86
165 3057fa66 Arpi
static inline void prefetchnta(void *p)
166
{
167 be449fca Diego Pettenò
    __asm__ volatile(   "prefetchnta (%0)\n\t"
168 16e0bf73 Diego Biurrun
        : : "r" (p)
169
    );
170 3057fa66 Arpi
}
171
172
static inline void prefetcht0(void *p)
173
{
174 be449fca Diego Pettenò
    __asm__ volatile(   "prefetcht0 (%0)\n\t"
175 16e0bf73 Diego Biurrun
        : : "r" (p)
176
    );
177 3057fa66 Arpi
}
178
179
static inline void prefetcht1(void *p)
180
{
181 be449fca Diego Pettenò
    __asm__ volatile(   "prefetcht1 (%0)\n\t"
182 16e0bf73 Diego Biurrun
        : : "r" (p)
183
    );
184 3057fa66 Arpi
}
185
186
static inline void prefetcht2(void *p)
187
{
188 be449fca Diego Pettenò
    __asm__ volatile(   "prefetcht2 (%0)\n\t"
189 16e0bf73 Diego Biurrun
        : : "r" (p)
190
    );
191 3057fa66 Arpi
}
192 9a722af7 Arpi
#endif
193 3057fa66 Arpi
194 04932b0d Diego Biurrun
/* The horizontal functions exist only in C because the MMX
195
 * code is faster with vertical filters and transposing. */
196 3057fa66 Arpi
197 cf5ec61d Michael Niedermayer
/**
198
 * Check if the given 8x8 Block is mostly "flat"
199
 */
200 b0ac780a Michael Niedermayer
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
201 cf5ec61d Michael Niedermayer
{
202 16e0bf73 Diego Biurrun
    int numEq= 0;
203
    int y;
204
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
205
    const int dcThreshold= dcOffset*2 + 1;
206
207
    for(y=0; y<BLOCK_SIZE; y++){
208
        if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
209
        if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
210
        if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
211
        if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
212
        if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
213
        if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
214
        if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
215
        src+= stride;
216
    }
217
    return numEq > c->ppMode.flatnessThreshold;
218 9c9e467d Michael Niedermayer
}
219
220
/**
221
 * Check if the middle 8x8 Block in the given 8x16 block is flat
222
 */
223 16e0bf73 Diego Biurrun
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
224
{
225
    int numEq= 0;
226
    int y;
227
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
228
    const int dcThreshold= dcOffset*2 + 1;
229
230
    src+= stride*4; // src points to begin of the 8x8 Block
231
    for(y=0; y<BLOCK_SIZE-1; y++){
232
        if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
233
        if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
234
        if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
235
        if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
236
        if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
237
        if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
238
        if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
239
        if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
240
        src+= stride;
241
    }
242
    return numEq > c->ppMode.flatnessThreshold;
243 cf5ec61d Michael Niedermayer
}
244
245 b0ac780a Michael Niedermayer
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
246 cf5ec61d Michael Niedermayer
{
247 16e0bf73 Diego Biurrun
    int i;
248 cb482d25 Michael Niedermayer
#if 1
249 16e0bf73 Diego Biurrun
    for(i=0; i<2; i++){
250
        if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
251
        src += stride;
252
        if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
253
        src += stride;
254
        if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
255
        src += stride;
256
        if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
257
        src += stride;
258
    }
259 115329f1 Diego Biurrun
#else
260 16e0bf73 Diego Biurrun
    for(i=0; i<8; i++){
261
        if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
262
        src += stride;
263
    }
264 cb482d25 Michael Niedermayer
#endif
265 16e0bf73 Diego Biurrun
    return 1;
266 cb482d25 Michael Niedermayer
}
267 cf5ec61d Michael Niedermayer
268 cb482d25 Michael Niedermayer
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
269
{
270
#if 1
271
#if 1
272 16e0bf73 Diego Biurrun
    int x;
273
    src+= stride*4;
274
    for(x=0; x<BLOCK_SIZE; x+=4){
275
        if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
276
        if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
277
        if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
278
        if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
279
    }
280 cb482d25 Michael Niedermayer
#else
281 16e0bf73 Diego Biurrun
    int x;
282
    src+= stride*3;
283
    for(x=0; x<BLOCK_SIZE; x++){
284
        if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
285
    }
286 cb482d25 Michael Niedermayer
#endif
287 16e0bf73 Diego Biurrun
    return 1;
288 cb482d25 Michael Niedermayer
#else
289 16e0bf73 Diego Biurrun
    int x;
290
    src+= stride*4;
291
    for(x=0; x<BLOCK_SIZE; x++){
292
        int min=255;
293
        int max=0;
294
        int y;
295
        for(y=0; y<8; y++){
296
            int v= src[x + y*stride];
297
            if(v>max) max=v;
298
            if(v<min) min=v;
299 bb270c08 Diego Biurrun
        }
300 16e0bf73 Diego Biurrun
        if(max-min > 2*QP) return 0;
301
    }
302
    return 1;
303 cb482d25 Michael Niedermayer
#endif
304
}
305
306 16e0bf73 Diego Biurrun
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
307
{
308
    if( isHorizDC_C(src, stride, c) ){
309
        if( isHorizMinMaxOk_C(src, stride, c->QP) )
310
            return 1;
311
        else
312
            return 0;
313
    }else{
314
        return 2;
315
    }
316 b0ac780a Michael Niedermayer
}
317
318 16e0bf73 Diego Biurrun
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
319
{
320
    if( isVertDC_C(src, stride, c) ){
321
        if( isVertMinMaxOk_C(src, stride, c->QP) )
322
            return 1;
323
        else
324
            return 0;
325
    }else{
326
        return 2;
327
    }
328 cf5ec61d Michael Niedermayer
}
329
330 b0ac780a Michael Niedermayer
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
331 cf5ec61d Michael Niedermayer
{
332 16e0bf73 Diego Biurrun
    int y;
333
    for(y=0; y<BLOCK_SIZE; y++){
334
        const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
335
336
        if(FFABS(middleEnergy) < 8*c->QP){
337
            const int q=(dst[3] - dst[4])/2;
338
            const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
339
            const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
340
341
            int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
342
            d= FFMAX(d, 0);
343
344
            d= (5*d + 32) >> 6;
345
            d*= FFSIGN(-middleEnergy);
346
347
            if(q>0)
348
            {
349
                d= d<0 ? 0 : d;
350
                d= d>q ? q : d;
351
            }
352
            else
353
            {
354
                d= d>0 ? 0 : d;
355
                d= d<q ? q : d;
356
            }
357
358
            dst[3]-= d;
359
            dst[4]+= d;
360 bb270c08 Diego Biurrun
        }
361 16e0bf73 Diego Biurrun
        dst+= stride;
362
    }
363 cf5ec61d Michael Niedermayer
}
364
365
/**
366
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
367
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
368
 */
369 b0ac780a Michael Niedermayer
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
370 cf5ec61d Michael Niedermayer
{
371 16e0bf73 Diego Biurrun
    int y;
372
    for(y=0; y<BLOCK_SIZE; y++){
373
        const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
374
        const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
375
376
        int sums[10];
377
        sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
378
        sums[1] = sums[0] - first  + dst[3];
379
        sums[2] = sums[1] - first  + dst[4];
380
        sums[3] = sums[2] - first  + dst[5];
381
        sums[4] = sums[3] - first  + dst[6];
382
        sums[5] = sums[4] - dst[0] + dst[7];
383
        sums[6] = sums[5] - dst[1] + last;
384
        sums[7] = sums[6] - dst[2] + last;
385
        sums[8] = sums[7] - dst[3] + last;
386
        sums[9] = sums[8] - dst[4] + last;
387
388
        dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
389
        dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
390
        dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
391
        dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
392
        dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
393
        dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
394
        dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
395
        dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
396
397
        dst+= stride;
398
    }
399 cf5ec61d Michael Niedermayer
}
400
401 4e4dcbc5 Michael Niedermayer
/**
402 cc9b0679 Michael Niedermayer
 * Experimental Filter 1 (Horizontal)
403
 * will not damage linear gradients
404 bd107136 Diego Biurrun
 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
405 755bfeab Diego Biurrun
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
406
 * MMX2 version does correct clipping C version does not
407 cc9b0679 Michael Niedermayer
 * not identical with the vertical one
408 4e4dcbc5 Michael Niedermayer
 */
409 cc9b0679 Michael Niedermayer
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
410
{
411 16e0bf73 Diego Biurrun
    int y;
412
    static uint64_t *lut= NULL;
413
    if(lut==NULL)
414
    {
415
        int i;
416
        lut = av_malloc(256*8);
417
        for(i=0; i<256; i++)
418 bb270c08 Diego Biurrun
        {
419 16e0bf73 Diego Biurrun
            int v= i < 128 ? 2*i : 2*(i-256);
420 117e45b0 Michael Niedermayer
/*
421 cc9b0679 Michael Niedermayer
//Simulate 112242211 9-Tap filter
422 16e0bf73 Diego Biurrun
            uint64_t a= (v/16)  & 0xFF;
423
            uint64_t b= (v/8)   & 0xFF;
424
            uint64_t c= (v/4)   & 0xFF;
425
            uint64_t d= (3*v/8) & 0xFF;
426 117e45b0 Michael Niedermayer
*/
427 cc9b0679 Michael Niedermayer
//Simulate piecewise linear interpolation
428 16e0bf73 Diego Biurrun
            uint64_t a= (v/16)   & 0xFF;
429
            uint64_t b= (v*3/16) & 0xFF;
430
            uint64_t c= (v*5/16) & 0xFF;
431
            uint64_t d= (7*v/16) & 0xFF;
432
            uint64_t A= (0x100 - a)&0xFF;
433
            uint64_t B= (0x100 - b)&0xFF;
434
            uint64_t C= (0x100 - c)&0xFF;
435
            uint64_t D= (0x100 - c)&0xFF;
436
437
            lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
438
                       (D<<24) | (C<<16) | (B<<8)  | (A);
439
            //lut[i] = (v<<32) | (v<<24);
440 bb270c08 Diego Biurrun
        }
441 16e0bf73 Diego Biurrun
    }
442 bb270c08 Diego Biurrun
443 16e0bf73 Diego Biurrun
    for(y=0; y<BLOCK_SIZE; y++){
444
        int a= src[1] - src[2];
445
        int b= src[3] - src[4];
446
        int c= src[5] - src[6];
447 bb270c08 Diego Biurrun
448 16e0bf73 Diego Biurrun
        int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
449 bb270c08 Diego Biurrun
450 16e0bf73 Diego Biurrun
        if(d < QP){
451
            int v = d * FFSIGN(-b);
452 bb270c08 Diego Biurrun
453 16e0bf73 Diego Biurrun
            src[1] +=v/8;
454
            src[2] +=v/4;
455
            src[3] +=3*v/8;
456
            src[4] -=3*v/8;
457
            src[5] -=v/4;
458
            src[6] -=v/8;
459 bb270c08 Diego Biurrun
        }
460 16e0bf73 Diego Biurrun
        src+=stride;
461
    }
462 cc9b0679 Michael Niedermayer
}
463
464 12eebd26 Michael Niedermayer
/**
465
 * accurate deblock filter
466
 */
467 849f1035 Måns Rullgård
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
468 16e0bf73 Diego Biurrun
    int y;
469
    const int QP= c->QP;
470
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
471
    const int dcThreshold= dcOffset*2 + 1;
472 12eebd26 Michael Niedermayer
//START_TIMER
473 16e0bf73 Diego Biurrun
    src+= step*4; // src points to begin of the 8x8 Block
474
    for(y=0; y<8; y++){
475
        int numEq= 0;
476
477
        if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
478
        if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
479
        if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
480
        if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
481
        if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
482
        if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
483
        if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
484
        if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
485
        if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
486
        if(numEq > c->ppMode.flatnessThreshold){
487
            int min, max, x;
488
489
            if(src[0] > src[step]){
490
                max= src[0];
491
                min= src[step];
492
            }else{
493
                max= src[step];
494
                min= src[0];
495
            }
496
            for(x=2; x<8; x+=2){
497
                if(src[x*step] > src[(x+1)*step]){
498
                        if(src[x    *step] > max) max= src[ x   *step];
499
                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
500 bb270c08 Diego Biurrun
                }else{
501 16e0bf73 Diego Biurrun
                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
502
                        if(src[ x   *step] < min) min= src[ x   *step];
503
                }
504
            }
505
            if(max-min < 2*QP){
506
                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
507
                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
508
509
                int sums[10];
510
                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
511
                sums[1] = sums[0] - first       + src[3*step];
512
                sums[2] = sums[1] - first       + src[4*step];
513
                sums[3] = sums[2] - first       + src[5*step];
514
                sums[4] = sums[3] - first       + src[6*step];
515
                sums[5] = sums[4] - src[0*step] + src[7*step];
516
                sums[6] = sums[5] - src[1*step] + last;
517
                sums[7] = sums[6] - src[2*step] + last;
518
                sums[8] = sums[7] - src[3*step] + last;
519
                sums[9] = sums[8] - src[4*step] + last;
520
521
                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
522
                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
523
                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
524
                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
525
                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
526
                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
527
                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
528
                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
529
            }
530
        }else{
531
            const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
532
533
            if(FFABS(middleEnergy) < 8*QP){
534
                const int q=(src[3*step] - src[4*step])/2;
535
                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
536
                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
537
538
                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
539
                d= FFMAX(d, 0);
540
541
                d= (5*d + 32) >> 6;
542
                d*= FFSIGN(-middleEnergy);
543
544
                if(q>0){
545
                    d= d<0 ? 0 : d;
546
                    d= d>q ? q : d;
547
                }else{
548
                    d= d>0 ? 0 : d;
549
                    d= d<q ? q : d;
550 bb270c08 Diego Biurrun
                }
551
552 16e0bf73 Diego Biurrun
                src[3*step]-= d;
553
                src[4*step]+= d;
554
            }
555 bb270c08 Diego Biurrun
        }
556 16e0bf73 Diego Biurrun
557
        src += stride;
558
    }
559 12eebd26 Michael Niedermayer
/*if(step==16){
560
    STOP_TIMER("step16")
561
}else{
562
    STOP_TIMER("stepX")
563
}*/
564
}
565 cc9b0679 Michael Niedermayer
566 e89952aa Michael Niedermayer
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
567 cc9b0679 Michael Niedermayer
//Plain C versions
568 e90f5b5a Ramiro Polla
#if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
569 e89952aa Michael Niedermayer
#define COMPILE_C
570
#endif
571
572 b250f9c6 Aurelien Jacobs
#if HAVE_ALTIVEC
573 b0ac780a Michael Niedermayer
#define COMPILE_ALTIVEC
574
#endif //HAVE_ALTIVEC
575
576 b250f9c6 Aurelien Jacobs
#if ARCH_X86
577 e89952aa Michael Niedermayer
578 e90f5b5a Ramiro Polla
#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
579 e89952aa Michael Niedermayer
#define COMPILE_MMX
580
#endif
581
582 e90f5b5a Ramiro Polla
#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
583 e89952aa Michael Niedermayer
#define COMPILE_MMX2
584
#endif
585
586 e90f5b5a Ramiro Polla
#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
587 e89952aa Michael Niedermayer
#define COMPILE_3DNOW
588
#endif
589 b250f9c6 Aurelien Jacobs
#endif /* ARCH_X86 */
590 e89952aa Michael Niedermayer
591
#undef HAVE_MMX
592 b250f9c6 Aurelien Jacobs
#define HAVE_MMX 0
593 e89952aa Michael Niedermayer
#undef HAVE_MMX2
594 b250f9c6 Aurelien Jacobs
#define HAVE_MMX2 0
595 ebc3209a Diego Biurrun
#undef HAVE_AMD3DNOW
596
#define HAVE_AMD3DNOW 0
597 b0ac780a Michael Niedermayer
#undef HAVE_ALTIVEC
598 b250f9c6 Aurelien Jacobs
#define HAVE_ALTIVEC 0
599 e89952aa Michael Niedermayer
600
#ifdef COMPILE_C
601 cc9b0679 Michael Niedermayer
#define RENAME(a) a ## _C
602
#include "postprocess_template.c"
603 e89952aa Michael Niedermayer
#endif
604 cc9b0679 Michael Niedermayer
605 b0ac780a Michael Niedermayer
#ifdef COMPILE_ALTIVEC
606
#undef RENAME
607 b250f9c6 Aurelien Jacobs
#undef HAVE_ALTIVEC
608
#define HAVE_ALTIVEC 1
609 b0ac780a Michael Niedermayer
#define RENAME(a) a ## _altivec
610
#include "postprocess_altivec_template.c"
611
#include "postprocess_template.c"
612
#endif
613
614 cc9b0679 Michael Niedermayer
//MMX versions
615 e89952aa Michael Niedermayer
#ifdef COMPILE_MMX
616 cc9b0679 Michael Niedermayer
#undef RENAME
617 b250f9c6 Aurelien Jacobs
#undef HAVE_MMX
618
#define HAVE_MMX 1
619 cc9b0679 Michael Niedermayer
#define RENAME(a) a ## _MMX
620
#include "postprocess_template.c"
621 e89952aa Michael Niedermayer
#endif
622 cc9b0679 Michael Niedermayer
623
//MMX2 versions
624 e89952aa Michael Niedermayer
#ifdef COMPILE_MMX2
625 cc9b0679 Michael Niedermayer
#undef RENAME
626 b250f9c6 Aurelien Jacobs
#undef HAVE_MMX
627
#undef HAVE_MMX2
628
#define HAVE_MMX 1
629
#define HAVE_MMX2 1
630 cc9b0679 Michael Niedermayer
#define RENAME(a) a ## _MMX2
631
#include "postprocess_template.c"
632 e89952aa Michael Niedermayer
#endif
633 cc9b0679 Michael Niedermayer
634
//3DNOW versions
635 e89952aa Michael Niedermayer
#ifdef COMPILE_3DNOW
636 cc9b0679 Michael Niedermayer
#undef RENAME
637 b250f9c6 Aurelien Jacobs
#undef HAVE_MMX
638 cc9b0679 Michael Niedermayer
#undef HAVE_MMX2
639 ebc3209a Diego Biurrun
#undef HAVE_AMD3DNOW
640 b250f9c6 Aurelien Jacobs
#define HAVE_MMX 1
641
#define HAVE_MMX2 0
642 ebc3209a Diego Biurrun
#define HAVE_AMD3DNOW 1
643 cc9b0679 Michael Niedermayer
#define RENAME(a) a ## _3DNow
644
#include "postprocess_template.c"
645 e89952aa Michael Niedermayer
#endif
646 cc9b0679 Michael Niedermayer
647 755bfeab Diego Biurrun
// minor note: the HAVE_xyz is messed up after that line so do not use it.
648 cc9b0679 Michael Niedermayer
649 6c51fd3f Michael Niedermayer
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
650 7dfea342 Diego Biurrun
        const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
651 cc9b0679 Michael Niedermayer
{
652 16e0bf73 Diego Biurrun
    PPContext *c= (PPContext *)vc;
653
    PPMode *ppMode= (PPMode *)vm;
654
    c->ppMode= *ppMode; //FIXME
655 9c9e467d Michael Niedermayer
656 16e0bf73 Diego Biurrun
    // Using ifs here as they are faster than function pointers although the
657
    // difference would not be measurable here but it is much better because
658
    // someone might exchange the CPU whithout restarting MPlayer ;)
659 e90f5b5a Ramiro Polla
#if CONFIG_RUNTIME_CPUDETECT
660 b250f9c6 Aurelien Jacobs
#if ARCH_X86
661 16e0bf73 Diego Biurrun
    // ordered per speed fastest first
662
    if(c->cpuCaps & PP_CPU_CAPS_MMX2)
663
        postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
664
    else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
665
        postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
666
    else if(c->cpuCaps & PP_CPU_CAPS_MMX)
667
        postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
668
    else
669
        postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
670 cc9b0679 Michael Niedermayer
#else
671 b250f9c6 Aurelien Jacobs
#if HAVE_ALTIVEC
672 16e0bf73 Diego Biurrun
    if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
673
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
674
    else
675 b0ac780a Michael Niedermayer
#endif
676 16e0bf73 Diego Biurrun
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
677 be44a4d7 Michael Niedermayer
#endif
678 e90f5b5a Ramiro Polla
#else //CONFIG_RUNTIME_CPUDETECT
679 b250f9c6 Aurelien Jacobs
#if   HAVE_MMX2
680 16e0bf73 Diego Biurrun
            postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
681 ebc3209a Diego Biurrun
#elif HAVE_AMD3DNOW
682 16e0bf73 Diego Biurrun
            postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
683 b250f9c6 Aurelien Jacobs
#elif HAVE_MMX
684 16e0bf73 Diego Biurrun
            postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
685 b250f9c6 Aurelien Jacobs
#elif HAVE_ALTIVEC
686 16e0bf73 Diego Biurrun
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
687 e89952aa Michael Niedermayer
#else
688 16e0bf73 Diego Biurrun
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
689 e89952aa Michael Niedermayer
#endif
690 e90f5b5a Ramiro Polla
#endif //!CONFIG_RUNTIME_CPUDETECT
691 117e45b0 Michael Niedermayer
}
692
693 cc9b0679 Michael Niedermayer
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
694 bb270c08 Diego Biurrun
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
695 13e00528 Arpi
696 911879d1 Michael Niedermayer
/* -pp Command line Help
697
*/
698 69fdc40d Diego Pettenò
#if LIBPOSTPROC_VERSION_INT < (52<<16)
699 10ff3ff4 Diego Pettenò
const char *const pp_help=
700 69fdc40d Diego Pettenò
#else
701
const char pp_help[] =
702
#endif
703 bf69c4e5 Diego Biurrun
"Available postprocessing filters:\n"
704 bb270c08 Diego Biurrun
"Filters                        Options\n"
705
"short  long name       short   long option     Description\n"
706
"*      *               a       autoq           CPU power dependent enabler\n"
707
"                       c       chrom           chrominance filtering enabled\n"
708
"                       y       nochrom         chrominance filtering disabled\n"
709
"                       n       noluma          luma filtering disabled\n"
710
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
711
"       1. difference factor: default=32, higher -> more deblocking\n"
712
"       2. flatness threshold: default=39, lower -> more deblocking\n"
713
"                       the h & v deblocking filters share these\n"
714
"                       so you can't set different thresholds for h / v\n"
715
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
716
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
717
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
718
"h1     x1hdeblock                              experimental h deblock filter 1\n"
719
"v1     x1vdeblock                              experimental v deblock filter 1\n"
720
"dr     dering                                  deringing filter\n"
721
"al     autolevels                              automatic brightness / contrast\n"
722
"                       f        fullyrange     stretch luminance to (0..255)\n"
723
"lb     linblenddeint                           linear blend deinterlacer\n"
724
"li     linipoldeint                            linear interpolating deinterlace\n"
725
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
726
"md     mediandeint                             median deinterlacer\n"
727
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
728
"l5     lowpass5                                FIR lowpass deinterlacer\n"
729
"de     default                                 hb:a,vb:a,dr:a\n"
730
"fa     fast                                    h1:a,v1:a,dr:a\n"
731
"ac                                             ha:a:128:7,va:a,dr:a\n"
732
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
733
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
734
"fq     forceQuant      <quantizer>             force quantizer\n"
735 bf69c4e5 Diego Biurrun
"Usage:\n"
736
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
737
"long form example:\n"
738 bb270c08 Diego Biurrun
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
739 bf69c4e5 Diego Biurrun
"short form example:\n"
740 bb270c08 Diego Biurrun
"vb:a/hb:a/lb                                   de,-vb\n"
741 bf69c4e5 Diego Biurrun
"more examples:\n"
742
"tn:64:128:256\n"
743 14b005d0 Diego Biurrun
"\n"
744 4b001a13 Michael Niedermayer
;
745 911879d1 Michael Niedermayer
746 7dfea342 Diego Biurrun
pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
747 911879d1 Michael Niedermayer
{
748 16e0bf73 Diego Biurrun
    char temp[GET_MODE_BUFFER_SIZE];
749
    char *p= temp;
750
    static const char filterDelimiters[] = ",/";
751
    static const char optionDelimiters[] = ":";
752
    struct PPMode *ppMode;
753
    char *filterToken;
754
755
    ppMode= av_malloc(sizeof(PPMode));
756
757
    ppMode->lumMode= 0;
758
    ppMode->chromMode= 0;
759
    ppMode->maxTmpNoise[0]= 700;
760
    ppMode->maxTmpNoise[1]= 1500;
761
    ppMode->maxTmpNoise[2]= 3000;
762
    ppMode->maxAllowedY= 234;
763
    ppMode->minAllowedY= 16;
764
    ppMode->baseDcDiff= 256/8;
765
    ppMode->flatnessThreshold= 56-16-1;
766
    ppMode->maxClippedThreshold= 0.01;
767
    ppMode->error=0;
768
769
    strncpy(temp, name, GET_MODE_BUFFER_SIZE);
770
771
    av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
772
773
    for(;;){
774
        char *filterName;
775
        int q= 1000000; //PP_QUALITY_MAX;
776
        int chrom=-1;
777
        int luma=-1;
778
        char *option;
779
        char *options[OPTIONS_ARRAY_SIZE];
780
        int i;
781
        int filterNameOk=0;
782
        int numOfUnknownOptions=0;
783
        int enable=1; //does the user want us to enabled or disabled the filter
784
785
        filterToken= strtok(p, filterDelimiters);
786
        if(filterToken == NULL) break;
787
        p+= strlen(filterToken) + 1; // p points to next filterToken
788
        filterName= strtok(filterToken, optionDelimiters);
789
        av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
790
791
        if(*filterName == '-'){
792
            enable=0;
793
            filterName++;
794
        }
795 bb270c08 Diego Biurrun
796 16e0bf73 Diego Biurrun
        for(;;){ //for all options
797
            option= strtok(NULL, optionDelimiters);
798
            if(option == NULL) break;
799
800
            av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
801
            if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
802
            else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
803
            else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
804
            else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
805
            else{
806
                options[numOfUnknownOptions] = option;
807
                numOfUnknownOptions++;
808
            }
809
            if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
810
        }
811
        options[numOfUnknownOptions] = NULL;
812
813
        /* replace stuff from the replace Table */
814
        for(i=0; replaceTable[2*i]!=NULL; i++){
815
            if(!strcmp(replaceTable[2*i], filterName)){
816
                int newlen= strlen(replaceTable[2*i + 1]);
817
                int plen;
818
                int spaceLeft;
819
820
                if(p==NULL) p= temp, *p=0;      //last filter
821
                else p--, *p=',';               //not last filter
822
823
                plen= strlen(p);
824
                spaceLeft= p - temp + plen;
825
                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE){
826
                    ppMode->error++;
827
                    break;
828 bb270c08 Diego Biurrun
                }
829 16e0bf73 Diego Biurrun
                memmove(p + newlen, p, plen+1);
830
                memcpy(p, replaceTable[2*i + 1], newlen);
831
                filterNameOk=1;
832
            }
833
        }
834 bb270c08 Diego Biurrun
835 16e0bf73 Diego Biurrun
        for(i=0; filters[i].shortName!=NULL; i++){
836
            if(   !strcmp(filters[i].longName, filterName)
837
               || !strcmp(filters[i].shortName, filterName)){
838
                ppMode->lumMode &= ~filters[i].mask;
839
                ppMode->chromMode &= ~filters[i].mask;
840
841
                filterNameOk=1;
842
                if(!enable) break; // user wants to disable it
843
844
                if(q >= filters[i].minLumQuality && luma)
845
                    ppMode->lumMode|= filters[i].mask;
846
                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
847
                    if(q >= filters[i].minChromQuality)
848
                            ppMode->chromMode|= filters[i].mask;
849
850
                if(filters[i].mask == LEVEL_FIX){
851
                    int o;
852
                    ppMode->minAllowedY= 16;
853
                    ppMode->maxAllowedY= 234;
854
                    for(o=0; options[o]!=NULL; o++){
855
                        if(  !strcmp(options[o],"fullyrange")
856
                           ||!strcmp(options[o],"f")){
857
                            ppMode->minAllowedY= 0;
858
                            ppMode->maxAllowedY= 255;
859
                            numOfUnknownOptions--;
860 bb270c08 Diego Biurrun
                        }
861 16e0bf73 Diego Biurrun
                    }
862 bb270c08 Diego Biurrun
                }
863 16e0bf73 Diego Biurrun
                else if(filters[i].mask == TEMP_NOISE_FILTER)
864 bb270c08 Diego Biurrun
                {
865 16e0bf73 Diego Biurrun
                    int o;
866
                    int numOfNoises=0;
867
868
                    for(o=0; options[o]!=NULL; o++){
869
                        char *tail;
870
                        ppMode->maxTmpNoise[numOfNoises]=
871
                            strtol(options[o], &tail, 0);
872
                        if(tail!=options[o]){
873
                            numOfNoises++;
874
                            numOfUnknownOptions--;
875
                            if(numOfNoises >= 3) break;
876 bb270c08 Diego Biurrun
                        }
877 16e0bf73 Diego Biurrun
                    }
878 bb270c08 Diego Biurrun
                }
879 16e0bf73 Diego Biurrun
                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
880
                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
881
                    int o;
882
883
                    for(o=0; options[o]!=NULL && o<2; o++){
884
                        char *tail;
885
                        int val= strtol(options[o], &tail, 0);
886
                        if(tail==options[o]) break;
887
888
                        numOfUnknownOptions--;
889
                        if(o==0) ppMode->baseDcDiff= val;
890
                        else ppMode->flatnessThreshold= val;
891
                    }
892
                }
893
                else if(filters[i].mask == FORCE_QUANT){
894
                    int o;
895
                    ppMode->forcedQuant= 15;
896
897
                    for(o=0; options[o]!=NULL && o<1; o++){
898
                        char *tail;
899
                        int val= strtol(options[o], &tail, 0);
900
                        if(tail==options[o]) break;
901
902
                        numOfUnknownOptions--;
903
                        ppMode->forcedQuant= val;
904
                    }
905
                }
906
            }
907 bb270c08 Diego Biurrun
        }
908 16e0bf73 Diego Biurrun
        if(!filterNameOk) ppMode->error++;
909
        ppMode->error += numOfUnknownOptions;
910
    }
911
912
    av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
913
    if(ppMode->error){
914
        av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
915
        av_free(ppMode);
916
        return NULL;
917
    }
918
    return ppMode;
919 911879d1 Michael Niedermayer
}
920
921 7dfea342 Diego Biurrun
void pp_free_mode(pp_mode *mode){
922 6ab6c7c3 Luca Barbato
    av_free(mode);
923 c41d972d Michael Niedermayer
}
924
925 88c0bc7e Michael Niedermayer
static void reallocAlign(void **p, int alignment, int size){
926 16e0bf73 Diego Biurrun
    av_free(*p);
927
    *p= av_mallocz(size);
928 88c0bc7e Michael Niedermayer
}
929
930 0426af31 Michael Niedermayer
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
931 16e0bf73 Diego Biurrun
    int mbWidth = (width+15)>>4;
932
    int mbHeight= (height+15)>>4;
933
    int i;
934
935
    c->stride= stride;
936
    c->qpStride= qpStride;
937
938
    reallocAlign((void **)&c->tempDst, 8, stride*24);
939
    reallocAlign((void **)&c->tempSrc, 8, stride*24);
940
    reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
941
    reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
942
    for(i=0; i<256; i++)
943
            c->yHistogram[i]= width*height/64*15/256;
944
945
    for(i=0; i<3; i++){
946
        //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
947 aa089f6c Diego Biurrun
        reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
948
        reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
949 16e0bf73 Diego Biurrun
    }
950
951
    reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
952
    reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
953
    reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
954
    reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
955 88c0bc7e Michael Niedermayer
}
956
957 e7becfb2 Diego Biurrun
static const char * context_to_name(void * ptr) {
958
    return "postproc";
959
}
960
961 31bfd6f3 Diego Pettenò
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
962 e7becfb2 Diego Biurrun
963 7dfea342 Diego Biurrun
pp_context *pp_get_context(int width, int height, int cpuCaps){
964 16e0bf73 Diego Biurrun
    PPContext *c= av_malloc(sizeof(PPContext));
965 ef516f73 David Conrad
    int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
966 16e0bf73 Diego Biurrun
    int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
967
968
    memset(c, 0, sizeof(PPContext));
969
    c->av_class = &av_codec_context_class;
970
    c->cpuCaps= cpuCaps;
971
    if(cpuCaps&PP_FORMAT){
972
        c->hChromaSubSample= cpuCaps&0x3;
973
        c->vChromaSubSample= (cpuCaps>>4)&0x3;
974
    }else{
975
        c->hChromaSubSample= 1;
976
        c->vChromaSubSample= 1;
977
    }
978
979
    reallocBuffers(c, width, height, stride, qpStride);
980
981
    c->frameNum=-1;
982
983
    return c;
984 45b4f285 Michael Niedermayer
}
985
986 9cb54f43 Michael Niedermayer
void pp_free_context(void *vc){
987 16e0bf73 Diego Biurrun
    PPContext *c = (PPContext*)vc;
988
    int i;
989 115329f1 Diego Biurrun
990 aa089f6c Diego Biurrun
    for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
991
    for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
992 115329f1 Diego Biurrun
993 16e0bf73 Diego Biurrun
    av_free(c->tempBlocks);
994
    av_free(c->yHistogram);
995
    av_free(c->tempDst);
996
    av_free(c->tempSrc);
997
    av_free(c->deintTemp);
998
    av_free(c->stdQPTable);
999
    av_free(c->nonBQPTable);
1000
    av_free(c->forcedQPTable);
1001 115329f1 Diego Biurrun
1002 16e0bf73 Diego Biurrun
    memset(c, 0, sizeof(PPContext));
1003 88c0bc7e Michael Niedermayer
1004 16e0bf73 Diego Biurrun
    av_free(c);
1005 9c9e467d Michael Niedermayer
}
1006
1007 6c51fd3f Michael Niedermayer
void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
1008 16e0bf73 Diego Biurrun
                     uint8_t * dst[3], const int dstStride[3],
1009
                     int width, int height,
1010
                     const QP_STORE_T *QP_store,  int QPStride,
1011 7dfea342 Diego Biurrun
                     pp_mode *vm,  void *vc, int pict_type)
1012 911879d1 Michael Niedermayer
{
1013 16e0bf73 Diego Biurrun
    int mbWidth = (width+15)>>4;
1014
    int mbHeight= (height+15)>>4;
1015
    PPMode *mode = (PPMode*)vm;
1016
    PPContext *c = (PPContext*)vc;
1017
    int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1018
    int absQPStride = FFABS(QPStride);
1019
1020
    // c->stride and c->QPStride are always positive
1021
    if(c->stride < minStride || c->qpStride < absQPStride)
1022
        reallocBuffers(c, width, height,
1023
                       FFMAX(minStride, c->stride),
1024
                       FFMAX(c->qpStride, absQPStride));
1025
1026
    if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1027
        int i;
1028
        QP_store= c->forcedQPTable;
1029
        absQPStride = QPStride = 0;
1030
        if(mode->lumMode & FORCE_QUANT)
1031
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1032
        else
1033
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1034
    }
1035 0426af31 Michael Niedermayer
1036 16e0bf73 Diego Biurrun
    if(pict_type & PP_PICT_TYPE_QP2){
1037
        int i;
1038
        const int count= mbHeight * absQPStride;
1039
        for(i=0; i<(count>>2); i++){
1040
            ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1041 bb270c08 Diego Biurrun
        }
1042 16e0bf73 Diego Biurrun
        for(i<<=2; i<count; i++){
1043
            c->stdQPTable[i] = QP_store[i]>>1;
1044
        }
1045
        QP_store= c->stdQPTable;
1046
        QPStride= absQPStride;
1047
    }
1048
1049
    if(0){
1050
        int x,y;
1051
        for(y=0; y<mbHeight; y++){
1052
            for(x=0; x<mbWidth; x++){
1053 e7becfb2 Diego Biurrun
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1054 16e0bf73 Diego Biurrun
            }
1055
            av_log(c, AV_LOG_INFO, "\n");
1056 bb270c08 Diego Biurrun
        }
1057 e7becfb2 Diego Biurrun
        av_log(c, AV_LOG_INFO, "\n");
1058 16e0bf73 Diego Biurrun
    }
1059
1060
    if((pict_type&7)!=3){
1061
        if (QPStride >= 0){
1062
            int i;
1063
            const int count= mbHeight * QPStride;
1064
            for(i=0; i<(count>>2); i++){
1065
                ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1066
            }
1067
            for(i<<=2; i<count; i++){
1068
                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1069
            }
1070
        } else {
1071
            int i,j;
1072
            for(i=0; i<mbHeight; i++) {
1073
                for(j=0; j<absQPStride; j++) {
1074
                    c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1075 bb270c08 Diego Biurrun
                }
1076 16e0bf73 Diego Biurrun
            }
1077 bb270c08 Diego Biurrun
        }
1078 16e0bf73 Diego Biurrun
    }
1079 bb270c08 Diego Biurrun
1080 16e0bf73 Diego Biurrun
    av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1081
           mode->lumMode, mode->chromMode);
1082 bb270c08 Diego Biurrun
1083 16e0bf73 Diego Biurrun
    postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1084 bb270c08 Diego Biurrun
                width, height, QP_store, QPStride, 0, mode, c);
1085
1086 16e0bf73 Diego Biurrun
    width  = (width )>>c->hChromaSubSample;
1087
    height = (height)>>c->vChromaSubSample;
1088
1089
    if(mode->chromMode){
1090
        postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1091
                    width, height, QP_store, QPStride, 1, mode, c);
1092
        postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1093
                    width, height, QP_store, QPStride, 2, mode, c);
1094
    }
1095
    else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1096
        linecpy(dst[1], src[1], height, srcStride[1]);
1097
        linecpy(dst[2], src[2], height, srcStride[2]);
1098
    }else{
1099
        int y;
1100
        for(y=0; y<height; y++){
1101
            memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1102
            memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1103 bb270c08 Diego Biurrun
        }
1104 16e0bf73 Diego Biurrun
    }
1105 911879d1 Michael Niedermayer
}