Statistics
| Branch: | Revision:

ffmpeg / libpostproc / postprocess.c @ c60208e7

History | View | Annotate | Download (37.2 KB)

1 3057fa66 Arpi
/*
2 b78e7197 Diego Biurrun
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
 *
4
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
 *
6 ef85972b Diego Biurrun
 * This file is part of FFmpeg.
7 b78e7197 Diego Biurrun
 *
8
 * FFmpeg is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22 3057fa66 Arpi
23 b304569a Michael Niedermayer
/**
24
 * @file postprocess.c
25
 * postprocessing.
26
 */
27 115329f1 Diego Biurrun
28 3057fa66 Arpi
/*
29 bb270c08 Diego Biurrun
                        C       MMX     MMX2    3DNow   AltiVec
30
isVertDC                Ec      Ec                      Ec
31
isVertMinMaxOk          Ec      Ec                      Ec
32
doVertLowPass           E               e       e       Ec
33
doVertDefFilter         Ec      Ec      e       e       Ec
34
isHorizDC               Ec      Ec                      Ec
35
isHorizMinMaxOk         a       E                       Ec
36
doHorizLowPass          E               e       e       Ec
37
doHorizDefFilter        Ec      Ec      e       e       Ec
38
do_a_deblock            Ec      E       Ec      E
39
deRing                  E               e       e*      Ecp
40
Vertical RKAlgo1        E               a       a
41
Horizontal RKAlgo1                      a       a
42
Vertical X1#            a               E       E
43
Horizontal X1#          a               E       E
44
LinIpolDeinterlace      e               E       E*
45
CubicIpolDeinterlace    a               e       e*
46
LinBlendDeinterlace     e               E       E*
47
MedianDeinterlace#      E       Ec      Ec
48
TempDeNoiser#           E               e       e       Ec
49 d5a1a995 Michael Niedermayer

50 2cab6401 Diego Biurrun
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 755bfeab Diego Biurrun
# more or less selfinvented filters so the exactness is not too meaningful
52 3057fa66 Arpi
E = Exact implementation
53 04932b0d Diego Biurrun
e = almost exact implementation (slightly different rounding,...)
54 3057fa66 Arpi
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56 b0ac780a Michael Niedermayer
p = partially optimized, still some work to do
57 3057fa66 Arpi
*/
58
59
/*
60
TODO:
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64 13e00528 Arpi
write a faster and higher quality deblocking filter :)
65 d5a1a995 Michael Niedermayer
make the mainloop more flexible (variable number of blocks at once
66 bb270c08 Diego Biurrun
        (the if/else stuff per block is slowing things down)
67 9f45d04d Michael Niedermayer
compare the quality & speed of all filters
68
split this huge file
69 8405b3fd Michael Niedermayer
optimize c versions
70 117e45b0 Michael Niedermayer
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71 3057fa66 Arpi
...
72 13e00528 Arpi
*/
73
74 36b1b0bc Diego Biurrun
//Changelog: use the Subversion log
75 3057fa66 Arpi
76 9858f773 Michael Niedermayer
#include "config.h"
77 245976da Diego Biurrun
#include "libavutil/avutil.h"
78 3057fa66 Arpi
#include <inttypes.h>
79
#include <stdio.h>
80 d5a1a995 Michael Niedermayer
#include <stdlib.h>
81 911879d1 Michael Niedermayer
#include <string.h>
82 3057fa66 Arpi
//#undef HAVE_MMX2
83 13e00528 Arpi
//#define HAVE_3DNOW
84 3057fa66 Arpi
//#undef HAVE_MMX
85 cc9b0679 Michael Niedermayer
//#undef ARCH_X86
86 7f16f6e6 Michael Niedermayer
//#define DEBUG_BRIGHTNESS
87 13e00528 Arpi
#include "postprocess.h"
88 c41d972d Michael Niedermayer
#include "postprocess_internal.h"
89 bba9b16c Michael Niedermayer
90 2a4a62bf Stefano Sabatini
unsigned postproc_version(void)
91
{
92
    return LIBPOSTPROC_VERSION_INT;
93
}
94
95 b250f9c6 Aurelien Jacobs
#if HAVE_ALTIVEC_H
96 a7b2871c Romain Dolbeau
#include <altivec.h>
97
#endif
98
99 911879d1 Michael Niedermayer
#define GET_MODE_BUFFER_SIZE 500
100
#define OPTIONS_ARRAY_SIZE 10
101 9c9e467d Michael Niedermayer
#define BLOCK_SIZE 8
102
#define TEMP_STRIDE 8
103
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
104 911879d1 Michael Niedermayer
105 b250f9c6 Aurelien Jacobs
#if ARCH_X86
106 2b858d0b Reimar Döffinger
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
107
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
108
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
109
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
110
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
111
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
112
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
113
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
114 b28daef8 Michael Niedermayer
#endif
115 3057fa66 Arpi
116 2722e362 Reimar Döffinger
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
117 3057fa66 Arpi
118 9c9e467d Michael Niedermayer
119 911879d1 Michael Niedermayer
static struct PPFilter filters[]=
120
{
121 16e0bf73 Diego Biurrun
    {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
122
    {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
123
/*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
124
    {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
125
    {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
126
    {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
127
    {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
128
    {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
129
    {"dr", "dering",                1, 5, 6, DERING},
130
    {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
131
    {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
132
    {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
133
    {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
134
    {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
135
    {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
136
    {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
137
    {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
138
    {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
139
    {NULL, NULL,0,0,0,0} //End Marker
140 911879d1 Michael Niedermayer
};
141
142 7b49ce2e Stefan Huehner
static const char *replaceTable[]=
143 911879d1 Michael Niedermayer
{
144 16e0bf73 Diego Biurrun
    "default",      "hb:a,vb:a,dr:a",
145
    "de",           "hb:a,vb:a,dr:a",
146
    "fast",         "h1:a,v1:a,dr:a",
147
    "fa",           "h1:a,v1:a,dr:a",
148
    "ac",           "ha:a:128:7,va:a,dr:a",
149
    NULL //End Marker
150 911879d1 Michael Niedermayer
};
151
152 3057fa66 Arpi
153 b250f9c6 Aurelien Jacobs
#if ARCH_X86
154 3057fa66 Arpi
static inline void prefetchnta(void *p)
155
{
156 be449fca Diego Pettenò
    __asm__ volatile(   "prefetchnta (%0)\n\t"
157 16e0bf73 Diego Biurrun
        : : "r" (p)
158
    );
159 3057fa66 Arpi
}
160
161
static inline void prefetcht0(void *p)
162
{
163 be449fca Diego Pettenò
    __asm__ volatile(   "prefetcht0 (%0)\n\t"
164 16e0bf73 Diego Biurrun
        : : "r" (p)
165
    );
166 3057fa66 Arpi
}
167
168
static inline void prefetcht1(void *p)
169
{
170 be449fca Diego Pettenò
    __asm__ volatile(   "prefetcht1 (%0)\n\t"
171 16e0bf73 Diego Biurrun
        : : "r" (p)
172
    );
173 3057fa66 Arpi
}
174
175
static inline void prefetcht2(void *p)
176
{
177 be449fca Diego Pettenò
    __asm__ volatile(   "prefetcht2 (%0)\n\t"
178 16e0bf73 Diego Biurrun
        : : "r" (p)
179
    );
180 3057fa66 Arpi
}
181 9a722af7 Arpi
#endif
182 3057fa66 Arpi
183 04932b0d Diego Biurrun
/* The horizontal functions exist only in C because the MMX
184
 * code is faster with vertical filters and transposing. */
185 3057fa66 Arpi
186 cf5ec61d Michael Niedermayer
/**
187
 * Check if the given 8x8 Block is mostly "flat"
188
 */
189 b0ac780a Michael Niedermayer
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
190 cf5ec61d Michael Niedermayer
{
191 16e0bf73 Diego Biurrun
    int numEq= 0;
192
    int y;
193
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
194
    const int dcThreshold= dcOffset*2 + 1;
195
196
    for(y=0; y<BLOCK_SIZE; y++){
197
        if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
198
        if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
199
        if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
200
        if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
201
        if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
202
        if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
203
        if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
204
        src+= stride;
205
    }
206
    return numEq > c->ppMode.flatnessThreshold;
207 9c9e467d Michael Niedermayer
}
208
209
/**
210
 * Check if the middle 8x8 Block in the given 8x16 block is flat
211
 */
212 16e0bf73 Diego Biurrun
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
213
{
214
    int numEq= 0;
215
    int y;
216
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
217
    const int dcThreshold= dcOffset*2 + 1;
218
219
    src+= stride*4; // src points to begin of the 8x8 Block
220
    for(y=0; y<BLOCK_SIZE-1; y++){
221
        if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
222
        if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
223
        if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
224
        if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
225
        if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
226
        if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
227
        if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
228
        if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
229
        src+= stride;
230
    }
231
    return numEq > c->ppMode.flatnessThreshold;
232 cf5ec61d Michael Niedermayer
}
233
234 b0ac780a Michael Niedermayer
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
235 cf5ec61d Michael Niedermayer
{
236 16e0bf73 Diego Biurrun
    int i;
237 cb482d25 Michael Niedermayer
#if 1
238 16e0bf73 Diego Biurrun
    for(i=0; i<2; i++){
239
        if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
240
        src += stride;
241
        if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
242
        src += stride;
243
        if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
244
        src += stride;
245
        if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
246
        src += stride;
247
    }
248 115329f1 Diego Biurrun
#else
249 16e0bf73 Diego Biurrun
    for(i=0; i<8; i++){
250
        if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
251
        src += stride;
252
    }
253 cb482d25 Michael Niedermayer
#endif
254 16e0bf73 Diego Biurrun
    return 1;
255 cb482d25 Michael Niedermayer
}
256 cf5ec61d Michael Niedermayer
257 cb482d25 Michael Niedermayer
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
258
{
259
#if 1
260
#if 1
261 16e0bf73 Diego Biurrun
    int x;
262
    src+= stride*4;
263
    for(x=0; x<BLOCK_SIZE; x+=4){
264
        if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
265
        if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
266
        if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
267
        if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
268
    }
269 cb482d25 Michael Niedermayer
#else
270 16e0bf73 Diego Biurrun
    int x;
271
    src+= stride*3;
272
    for(x=0; x<BLOCK_SIZE; x++){
273
        if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
274
    }
275 cb482d25 Michael Niedermayer
#endif
276 16e0bf73 Diego Biurrun
    return 1;
277 cb482d25 Michael Niedermayer
#else
278 16e0bf73 Diego Biurrun
    int x;
279
    src+= stride*4;
280
    for(x=0; x<BLOCK_SIZE; x++){
281
        int min=255;
282
        int max=0;
283
        int y;
284
        for(y=0; y<8; y++){
285
            int v= src[x + y*stride];
286
            if(v>max) max=v;
287
            if(v<min) min=v;
288 bb270c08 Diego Biurrun
        }
289 16e0bf73 Diego Biurrun
        if(max-min > 2*QP) return 0;
290
    }
291
    return 1;
292 cb482d25 Michael Niedermayer
#endif
293
}
294
295 16e0bf73 Diego Biurrun
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
296
{
297
    if( isHorizDC_C(src, stride, c) ){
298
        if( isHorizMinMaxOk_C(src, stride, c->QP) )
299
            return 1;
300
        else
301
            return 0;
302
    }else{
303
        return 2;
304
    }
305 b0ac780a Michael Niedermayer
}
306
307 16e0bf73 Diego Biurrun
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
308
{
309
    if( isVertDC_C(src, stride, c) ){
310
        if( isVertMinMaxOk_C(src, stride, c->QP) )
311
            return 1;
312
        else
313
            return 0;
314
    }else{
315
        return 2;
316
    }
317 cf5ec61d Michael Niedermayer
}
318
319 b0ac780a Michael Niedermayer
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
320 cf5ec61d Michael Niedermayer
{
321 16e0bf73 Diego Biurrun
    int y;
322
    for(y=0; y<BLOCK_SIZE; y++){
323
        const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
324
325
        if(FFABS(middleEnergy) < 8*c->QP){
326
            const int q=(dst[3] - dst[4])/2;
327
            const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
328
            const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
329
330
            int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
331
            d= FFMAX(d, 0);
332
333
            d= (5*d + 32) >> 6;
334
            d*= FFSIGN(-middleEnergy);
335
336
            if(q>0)
337
            {
338
                d= d<0 ? 0 : d;
339
                d= d>q ? q : d;
340
            }
341
            else
342
            {
343
                d= d>0 ? 0 : d;
344
                d= d<q ? q : d;
345
            }
346
347
            dst[3]-= d;
348
            dst[4]+= d;
349 bb270c08 Diego Biurrun
        }
350 16e0bf73 Diego Biurrun
        dst+= stride;
351
    }
352 cf5ec61d Michael Niedermayer
}
353
354
/**
355
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
356
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
357
 */
358 b0ac780a Michael Niedermayer
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
359 cf5ec61d Michael Niedermayer
{
360 16e0bf73 Diego Biurrun
    int y;
361
    for(y=0; y<BLOCK_SIZE; y++){
362
        const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
363
        const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
364
365
        int sums[10];
366
        sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
367
        sums[1] = sums[0] - first  + dst[3];
368
        sums[2] = sums[1] - first  + dst[4];
369
        sums[3] = sums[2] - first  + dst[5];
370
        sums[4] = sums[3] - first  + dst[6];
371
        sums[5] = sums[4] - dst[0] + dst[7];
372
        sums[6] = sums[5] - dst[1] + last;
373
        sums[7] = sums[6] - dst[2] + last;
374
        sums[8] = sums[7] - dst[3] + last;
375
        sums[9] = sums[8] - dst[4] + last;
376
377
        dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
378
        dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
379
        dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
380
        dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
381
        dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
382
        dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
383
        dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
384
        dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
385
386
        dst+= stride;
387
    }
388 cf5ec61d Michael Niedermayer
}
389
390 4e4dcbc5 Michael Niedermayer
/**
391 cc9b0679 Michael Niedermayer
 * Experimental Filter 1 (Horizontal)
392
 * will not damage linear gradients
393 bd107136 Diego Biurrun
 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
394 755bfeab Diego Biurrun
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
395
 * MMX2 version does correct clipping C version does not
396 cc9b0679 Michael Niedermayer
 * not identical with the vertical one
397 4e4dcbc5 Michael Niedermayer
 */
398 cc9b0679 Michael Niedermayer
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
399
{
400 16e0bf73 Diego Biurrun
    int y;
401
    static uint64_t *lut= NULL;
402
    if(lut==NULL)
403
    {
404
        int i;
405
        lut = av_malloc(256*8);
406
        for(i=0; i<256; i++)
407 bb270c08 Diego Biurrun
        {
408 16e0bf73 Diego Biurrun
            int v= i < 128 ? 2*i : 2*(i-256);
409 117e45b0 Michael Niedermayer
/*
410 cc9b0679 Michael Niedermayer
//Simulate 112242211 9-Tap filter
411 16e0bf73 Diego Biurrun
            uint64_t a= (v/16)  & 0xFF;
412
            uint64_t b= (v/8)   & 0xFF;
413
            uint64_t c= (v/4)   & 0xFF;
414
            uint64_t d= (3*v/8) & 0xFF;
415 117e45b0 Michael Niedermayer
*/
416 cc9b0679 Michael Niedermayer
//Simulate piecewise linear interpolation
417 16e0bf73 Diego Biurrun
            uint64_t a= (v/16)   & 0xFF;
418
            uint64_t b= (v*3/16) & 0xFF;
419
            uint64_t c= (v*5/16) & 0xFF;
420
            uint64_t d= (7*v/16) & 0xFF;
421
            uint64_t A= (0x100 - a)&0xFF;
422
            uint64_t B= (0x100 - b)&0xFF;
423
            uint64_t C= (0x100 - c)&0xFF;
424
            uint64_t D= (0x100 - c)&0xFF;
425
426
            lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
427
                       (D<<24) | (C<<16) | (B<<8)  | (A);
428
            //lut[i] = (v<<32) | (v<<24);
429 bb270c08 Diego Biurrun
        }
430 16e0bf73 Diego Biurrun
    }
431 bb270c08 Diego Biurrun
432 16e0bf73 Diego Biurrun
    for(y=0; y<BLOCK_SIZE; y++){
433
        int a= src[1] - src[2];
434
        int b= src[3] - src[4];
435
        int c= src[5] - src[6];
436 bb270c08 Diego Biurrun
437 16e0bf73 Diego Biurrun
        int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
438 bb270c08 Diego Biurrun
439 16e0bf73 Diego Biurrun
        if(d < QP){
440
            int v = d * FFSIGN(-b);
441 bb270c08 Diego Biurrun
442 16e0bf73 Diego Biurrun
            src[1] +=v/8;
443
            src[2] +=v/4;
444
            src[3] +=3*v/8;
445
            src[4] -=3*v/8;
446
            src[5] -=v/4;
447
            src[6] -=v/8;
448 bb270c08 Diego Biurrun
        }
449 16e0bf73 Diego Biurrun
        src+=stride;
450
    }
451 cc9b0679 Michael Niedermayer
}
452
453 12eebd26 Michael Niedermayer
/**
454
 * accurate deblock filter
455
 */
456 849f1035 Måns Rullgård
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
457 16e0bf73 Diego Biurrun
    int y;
458
    const int QP= c->QP;
459
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
460
    const int dcThreshold= dcOffset*2 + 1;
461 12eebd26 Michael Niedermayer
//START_TIMER
462 16e0bf73 Diego Biurrun
    src+= step*4; // src points to begin of the 8x8 Block
463
    for(y=0; y<8; y++){
464
        int numEq= 0;
465
466
        if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
467
        if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
468
        if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
469
        if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
470
        if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
471
        if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
472
        if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
473
        if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
474
        if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
475
        if(numEq > c->ppMode.flatnessThreshold){
476
            int min, max, x;
477
478
            if(src[0] > src[step]){
479
                max= src[0];
480
                min= src[step];
481
            }else{
482
                max= src[step];
483
                min= src[0];
484
            }
485
            for(x=2; x<8; x+=2){
486
                if(src[x*step] > src[(x+1)*step]){
487
                        if(src[x    *step] > max) max= src[ x   *step];
488
                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
489 bb270c08 Diego Biurrun
                }else{
490 16e0bf73 Diego Biurrun
                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
491
                        if(src[ x   *step] < min) min= src[ x   *step];
492
                }
493
            }
494
            if(max-min < 2*QP){
495
                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
496
                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
497
498
                int sums[10];
499
                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
500
                sums[1] = sums[0] - first       + src[3*step];
501
                sums[2] = sums[1] - first       + src[4*step];
502
                sums[3] = sums[2] - first       + src[5*step];
503
                sums[4] = sums[3] - first       + src[6*step];
504
                sums[5] = sums[4] - src[0*step] + src[7*step];
505
                sums[6] = sums[5] - src[1*step] + last;
506
                sums[7] = sums[6] - src[2*step] + last;
507
                sums[8] = sums[7] - src[3*step] + last;
508
                sums[9] = sums[8] - src[4*step] + last;
509
510
                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
511
                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
512
                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
513
                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
514
                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
515
                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
516
                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
517
                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
518
            }
519
        }else{
520
            const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
521
522
            if(FFABS(middleEnergy) < 8*QP){
523
                const int q=(src[3*step] - src[4*step])/2;
524
                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
525
                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
526
527
                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
528
                d= FFMAX(d, 0);
529
530
                d= (5*d + 32) >> 6;
531
                d*= FFSIGN(-middleEnergy);
532
533
                if(q>0){
534
                    d= d<0 ? 0 : d;
535
                    d= d>q ? q : d;
536
                }else{
537
                    d= d>0 ? 0 : d;
538
                    d= d<q ? q : d;
539 bb270c08 Diego Biurrun
                }
540
541 16e0bf73 Diego Biurrun
                src[3*step]-= d;
542
                src[4*step]+= d;
543
            }
544 bb270c08 Diego Biurrun
        }
545 16e0bf73 Diego Biurrun
546
        src += stride;
547
    }
548 12eebd26 Michael Niedermayer
/*if(step==16){
549
    STOP_TIMER("step16")
550
}else{
551
    STOP_TIMER("stepX")
552
}*/
553
}
554 cc9b0679 Michael Niedermayer
555 e89952aa Michael Niedermayer
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
556 cc9b0679 Michael Niedermayer
//Plain C versions
557 b250f9c6 Aurelien Jacobs
#if !(HAVE_MMX || HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)
558 e89952aa Michael Niedermayer
#define COMPILE_C
559
#endif
560
561 b250f9c6 Aurelien Jacobs
#if HAVE_ALTIVEC
562 b0ac780a Michael Niedermayer
#define COMPILE_ALTIVEC
563
#endif //HAVE_ALTIVEC
564
565 b250f9c6 Aurelien Jacobs
#if ARCH_X86
566 e89952aa Michael Niedermayer
567 b250f9c6 Aurelien Jacobs
#if (HAVE_MMX && !HAVE_3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
568 e89952aa Michael Niedermayer
#define COMPILE_MMX
569
#endif
570
571 b250f9c6 Aurelien Jacobs
#if HAVE_MMX2 || defined (RUNTIME_CPUDETECT)
572 e89952aa Michael Niedermayer
#define COMPILE_MMX2
573
#endif
574
575 b250f9c6 Aurelien Jacobs
#if (HAVE_3DNOW && !HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
576 e89952aa Michael Niedermayer
#define COMPILE_3DNOW
577
#endif
578 b250f9c6 Aurelien Jacobs
#endif /* ARCH_X86 */
579 e89952aa Michael Niedermayer
580
#undef HAVE_MMX
581 b250f9c6 Aurelien Jacobs
#define HAVE_MMX 0
582 e89952aa Michael Niedermayer
#undef HAVE_MMX2
583 b250f9c6 Aurelien Jacobs
#define HAVE_MMX2 0
584 e89952aa Michael Niedermayer
#undef HAVE_3DNOW
585 b250f9c6 Aurelien Jacobs
#define HAVE_3DNOW 0
586 b0ac780a Michael Niedermayer
#undef HAVE_ALTIVEC
587 b250f9c6 Aurelien Jacobs
#define HAVE_ALTIVEC 0
588 e89952aa Michael Niedermayer
589
#ifdef COMPILE_C
590 cc9b0679 Michael Niedermayer
#define RENAME(a) a ## _C
591
#include "postprocess_template.c"
592 e89952aa Michael Niedermayer
#endif
593 cc9b0679 Michael Niedermayer
594 b0ac780a Michael Niedermayer
#ifdef COMPILE_ALTIVEC
595
#undef RENAME
596 b250f9c6 Aurelien Jacobs
#undef HAVE_ALTIVEC
597
#define HAVE_ALTIVEC 1
598 b0ac780a Michael Niedermayer
#define RENAME(a) a ## _altivec
599
#include "postprocess_altivec_template.c"
600
#include "postprocess_template.c"
601
#endif
602
603 cc9b0679 Michael Niedermayer
//MMX versions
604 e89952aa Michael Niedermayer
#ifdef COMPILE_MMX
605 cc9b0679 Michael Niedermayer
#undef RENAME
606 b250f9c6 Aurelien Jacobs
#undef HAVE_MMX
607
#define HAVE_MMX 1
608 cc9b0679 Michael Niedermayer
#define RENAME(a) a ## _MMX
609
#include "postprocess_template.c"
610 e89952aa Michael Niedermayer
#endif
611 cc9b0679 Michael Niedermayer
612
//MMX2 versions
613 e89952aa Michael Niedermayer
#ifdef COMPILE_MMX2
614 cc9b0679 Michael Niedermayer
#undef RENAME
615 b250f9c6 Aurelien Jacobs
#undef HAVE_MMX
616
#undef HAVE_MMX2
617
#define HAVE_MMX 1
618
#define HAVE_MMX2 1
619 cc9b0679 Michael Niedermayer
#define RENAME(a) a ## _MMX2
620
#include "postprocess_template.c"
621 e89952aa Michael Niedermayer
#endif
622 cc9b0679 Michael Niedermayer
623
//3DNOW versions
624 e89952aa Michael Niedermayer
#ifdef COMPILE_3DNOW
625 cc9b0679 Michael Niedermayer
#undef RENAME
626 b250f9c6 Aurelien Jacobs
#undef HAVE_MMX
627 cc9b0679 Michael Niedermayer
#undef HAVE_MMX2
628 b250f9c6 Aurelien Jacobs
#undef HAVE_3DNOW
629
#define HAVE_MMX 1
630
#define HAVE_MMX2 0
631
#define HAVE_3DNOW 1
632 cc9b0679 Michael Niedermayer
#define RENAME(a) a ## _3DNow
633
#include "postprocess_template.c"
634 e89952aa Michael Niedermayer
#endif
635 cc9b0679 Michael Niedermayer
636 755bfeab Diego Biurrun
// minor note: the HAVE_xyz is messed up after that line so do not use it.
637 cc9b0679 Michael Niedermayer
638 6c51fd3f Michael Niedermayer
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
639 7dfea342 Diego Biurrun
        const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
640 cc9b0679 Michael Niedermayer
{
641 16e0bf73 Diego Biurrun
    PPContext *c= (PPContext *)vc;
642
    PPMode *ppMode= (PPMode *)vm;
643
    c->ppMode= *ppMode; //FIXME
644 9c9e467d Michael Niedermayer
645 16e0bf73 Diego Biurrun
    // Using ifs here as they are faster than function pointers although the
646
    // difference would not be measurable here but it is much better because
647
    // someone might exchange the CPU whithout restarting MPlayer ;)
648 e89952aa Michael Niedermayer
#ifdef RUNTIME_CPUDETECT
649 b250f9c6 Aurelien Jacobs
#if ARCH_X86
650 16e0bf73 Diego Biurrun
    // ordered per speed fastest first
651
    if(c->cpuCaps & PP_CPU_CAPS_MMX2)
652
        postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
653
    else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
654
        postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
655
    else if(c->cpuCaps & PP_CPU_CAPS_MMX)
656
        postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
657
    else
658
        postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
659 cc9b0679 Michael Niedermayer
#else
660 b250f9c6 Aurelien Jacobs
#if HAVE_ALTIVEC
661 16e0bf73 Diego Biurrun
    if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
662
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
663
    else
664 b0ac780a Michael Niedermayer
#endif
665 16e0bf73 Diego Biurrun
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
666 be44a4d7 Michael Niedermayer
#endif
667 e89952aa Michael Niedermayer
#else //RUNTIME_CPUDETECT
668 b250f9c6 Aurelien Jacobs
#if   HAVE_MMX2
669 16e0bf73 Diego Biurrun
            postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
670 b250f9c6 Aurelien Jacobs
#elif HAVE_3DNOW
671 16e0bf73 Diego Biurrun
            postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
672 b250f9c6 Aurelien Jacobs
#elif HAVE_MMX
673 16e0bf73 Diego Biurrun
            postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
674 b250f9c6 Aurelien Jacobs
#elif HAVE_ALTIVEC
675 16e0bf73 Diego Biurrun
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
676 e89952aa Michael Niedermayer
#else
677 16e0bf73 Diego Biurrun
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
678 e89952aa Michael Niedermayer
#endif
679
#endif //!RUNTIME_CPUDETECT
680 117e45b0 Michael Niedermayer
}
681
682 cc9b0679 Michael Niedermayer
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
683 bb270c08 Diego Biurrun
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
684 13e00528 Arpi
685 911879d1 Michael Niedermayer
/* -pp Command line Help
686
*/
687 69fdc40d Diego Pettenò
#if LIBPOSTPROC_VERSION_INT < (52<<16)
688 10ff3ff4 Diego Pettenò
const char *const pp_help=
689 69fdc40d Diego Pettenò
#else
690
const char pp_help[] =
691
#endif
692 bf69c4e5 Diego Biurrun
"Available postprocessing filters:\n"
693 bb270c08 Diego Biurrun
"Filters                        Options\n"
694
"short  long name       short   long option     Description\n"
695
"*      *               a       autoq           CPU power dependent enabler\n"
696
"                       c       chrom           chrominance filtering enabled\n"
697
"                       y       nochrom         chrominance filtering disabled\n"
698
"                       n       noluma          luma filtering disabled\n"
699
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
700
"       1. difference factor: default=32, higher -> more deblocking\n"
701
"       2. flatness threshold: default=39, lower -> more deblocking\n"
702
"                       the h & v deblocking filters share these\n"
703
"                       so you can't set different thresholds for h / v\n"
704
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
705
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
706
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
707
"h1     x1hdeblock                              experimental h deblock filter 1\n"
708
"v1     x1vdeblock                              experimental v deblock filter 1\n"
709
"dr     dering                                  deringing filter\n"
710
"al     autolevels                              automatic brightness / contrast\n"
711
"                       f        fullyrange     stretch luminance to (0..255)\n"
712
"lb     linblenddeint                           linear blend deinterlacer\n"
713
"li     linipoldeint                            linear interpolating deinterlace\n"
714
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
715
"md     mediandeint                             median deinterlacer\n"
716
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
717
"l5     lowpass5                                FIR lowpass deinterlacer\n"
718
"de     default                                 hb:a,vb:a,dr:a\n"
719
"fa     fast                                    h1:a,v1:a,dr:a\n"
720
"ac                                             ha:a:128:7,va:a,dr:a\n"
721
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
722
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
723
"fq     forceQuant      <quantizer>             force quantizer\n"
724 bf69c4e5 Diego Biurrun
"Usage:\n"
725
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
726
"long form example:\n"
727 bb270c08 Diego Biurrun
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
728 bf69c4e5 Diego Biurrun
"short form example:\n"
729 bb270c08 Diego Biurrun
"vb:a/hb:a/lb                                   de,-vb\n"
730 bf69c4e5 Diego Biurrun
"more examples:\n"
731
"tn:64:128:256\n"
732 14b005d0 Diego Biurrun
"\n"
733 4b001a13 Michael Niedermayer
;
734 911879d1 Michael Niedermayer
735 7dfea342 Diego Biurrun
pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
736 911879d1 Michael Niedermayer
{
737 16e0bf73 Diego Biurrun
    char temp[GET_MODE_BUFFER_SIZE];
738
    char *p= temp;
739
    static const char filterDelimiters[] = ",/";
740
    static const char optionDelimiters[] = ":";
741
    struct PPMode *ppMode;
742
    char *filterToken;
743
744
    ppMode= av_malloc(sizeof(PPMode));
745
746
    ppMode->lumMode= 0;
747
    ppMode->chromMode= 0;
748
    ppMode->maxTmpNoise[0]= 700;
749
    ppMode->maxTmpNoise[1]= 1500;
750
    ppMode->maxTmpNoise[2]= 3000;
751
    ppMode->maxAllowedY= 234;
752
    ppMode->minAllowedY= 16;
753
    ppMode->baseDcDiff= 256/8;
754
    ppMode->flatnessThreshold= 56-16-1;
755
    ppMode->maxClippedThreshold= 0.01;
756
    ppMode->error=0;
757
758
    strncpy(temp, name, GET_MODE_BUFFER_SIZE);
759
760
    av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
761
762
    for(;;){
763
        char *filterName;
764
        int q= 1000000; //PP_QUALITY_MAX;
765
        int chrom=-1;
766
        int luma=-1;
767
        char *option;
768
        char *options[OPTIONS_ARRAY_SIZE];
769
        int i;
770
        int filterNameOk=0;
771
        int numOfUnknownOptions=0;
772
        int enable=1; //does the user want us to enabled or disabled the filter
773
774
        filterToken= strtok(p, filterDelimiters);
775
        if(filterToken == NULL) break;
776
        p+= strlen(filterToken) + 1; // p points to next filterToken
777
        filterName= strtok(filterToken, optionDelimiters);
778
        av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
779
780
        if(*filterName == '-'){
781
            enable=0;
782
            filterName++;
783
        }
784 bb270c08 Diego Biurrun
785 16e0bf73 Diego Biurrun
        for(;;){ //for all options
786
            option= strtok(NULL, optionDelimiters);
787
            if(option == NULL) break;
788
789
            av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
790
            if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
791
            else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
792
            else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
793
            else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
794
            else{
795
                options[numOfUnknownOptions] = option;
796
                numOfUnknownOptions++;
797
            }
798
            if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
799
        }
800
        options[numOfUnknownOptions] = NULL;
801
802
        /* replace stuff from the replace Table */
803
        for(i=0; replaceTable[2*i]!=NULL; i++){
804
            if(!strcmp(replaceTable[2*i], filterName)){
805
                int newlen= strlen(replaceTable[2*i + 1]);
806
                int plen;
807
                int spaceLeft;
808
809
                if(p==NULL) p= temp, *p=0;      //last filter
810
                else p--, *p=',';               //not last filter
811
812
                plen= strlen(p);
813
                spaceLeft= p - temp + plen;
814
                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE){
815
                    ppMode->error++;
816
                    break;
817 bb270c08 Diego Biurrun
                }
818 16e0bf73 Diego Biurrun
                memmove(p + newlen, p, plen+1);
819
                memcpy(p, replaceTable[2*i + 1], newlen);
820
                filterNameOk=1;
821
            }
822
        }
823 bb270c08 Diego Biurrun
824 16e0bf73 Diego Biurrun
        for(i=0; filters[i].shortName!=NULL; i++){
825
            if(   !strcmp(filters[i].longName, filterName)
826
               || !strcmp(filters[i].shortName, filterName)){
827
                ppMode->lumMode &= ~filters[i].mask;
828
                ppMode->chromMode &= ~filters[i].mask;
829
830
                filterNameOk=1;
831
                if(!enable) break; // user wants to disable it
832
833
                if(q >= filters[i].minLumQuality && luma)
834
                    ppMode->lumMode|= filters[i].mask;
835
                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
836
                    if(q >= filters[i].minChromQuality)
837
                            ppMode->chromMode|= filters[i].mask;
838
839
                if(filters[i].mask == LEVEL_FIX){
840
                    int o;
841
                    ppMode->minAllowedY= 16;
842
                    ppMode->maxAllowedY= 234;
843
                    for(o=0; options[o]!=NULL; o++){
844
                        if(  !strcmp(options[o],"fullyrange")
845
                           ||!strcmp(options[o],"f")){
846
                            ppMode->minAllowedY= 0;
847
                            ppMode->maxAllowedY= 255;
848
                            numOfUnknownOptions--;
849 bb270c08 Diego Biurrun
                        }
850 16e0bf73 Diego Biurrun
                    }
851 bb270c08 Diego Biurrun
                }
852 16e0bf73 Diego Biurrun
                else if(filters[i].mask == TEMP_NOISE_FILTER)
853 bb270c08 Diego Biurrun
                {
854 16e0bf73 Diego Biurrun
                    int o;
855
                    int numOfNoises=0;
856
857
                    for(o=0; options[o]!=NULL; o++){
858
                        char *tail;
859
                        ppMode->maxTmpNoise[numOfNoises]=
860
                            strtol(options[o], &tail, 0);
861
                        if(tail!=options[o]){
862
                            numOfNoises++;
863
                            numOfUnknownOptions--;
864
                            if(numOfNoises >= 3) break;
865 bb270c08 Diego Biurrun
                        }
866 16e0bf73 Diego Biurrun
                    }
867 bb270c08 Diego Biurrun
                }
868 16e0bf73 Diego Biurrun
                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
869
                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
870
                    int o;
871
872
                    for(o=0; options[o]!=NULL && o<2; o++){
873
                        char *tail;
874
                        int val= strtol(options[o], &tail, 0);
875
                        if(tail==options[o]) break;
876
877
                        numOfUnknownOptions--;
878
                        if(o==0) ppMode->baseDcDiff= val;
879
                        else ppMode->flatnessThreshold= val;
880
                    }
881
                }
882
                else if(filters[i].mask == FORCE_QUANT){
883
                    int o;
884
                    ppMode->forcedQuant= 15;
885
886
                    for(o=0; options[o]!=NULL && o<1; o++){
887
                        char *tail;
888
                        int val= strtol(options[o], &tail, 0);
889
                        if(tail==options[o]) break;
890
891
                        numOfUnknownOptions--;
892
                        ppMode->forcedQuant= val;
893
                    }
894
                }
895
            }
896 bb270c08 Diego Biurrun
        }
897 16e0bf73 Diego Biurrun
        if(!filterNameOk) ppMode->error++;
898
        ppMode->error += numOfUnknownOptions;
899
    }
900
901
    av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
902
    if(ppMode->error){
903
        av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
904
        av_free(ppMode);
905
        return NULL;
906
    }
907
    return ppMode;
908 911879d1 Michael Niedermayer
}
909
910 7dfea342 Diego Biurrun
void pp_free_mode(pp_mode *mode){
911 6ab6c7c3 Luca Barbato
    av_free(mode);
912 c41d972d Michael Niedermayer
}
913
914 88c0bc7e Michael Niedermayer
static void reallocAlign(void **p, int alignment, int size){
915 16e0bf73 Diego Biurrun
    av_free(*p);
916
    *p= av_mallocz(size);
917 88c0bc7e Michael Niedermayer
}
918
919 0426af31 Michael Niedermayer
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
920 16e0bf73 Diego Biurrun
    int mbWidth = (width+15)>>4;
921
    int mbHeight= (height+15)>>4;
922
    int i;
923
924
    c->stride= stride;
925
    c->qpStride= qpStride;
926
927
    reallocAlign((void **)&c->tempDst, 8, stride*24);
928
    reallocAlign((void **)&c->tempSrc, 8, stride*24);
929
    reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
930
    reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
931
    for(i=0; i<256; i++)
932
            c->yHistogram[i]= width*height/64*15/256;
933
934
    for(i=0; i<3; i++){
935
        //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
936 aa089f6c Diego Biurrun
        reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
937
        reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
938 16e0bf73 Diego Biurrun
    }
939
940
    reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
941
    reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
942
    reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
943
    reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
944 88c0bc7e Michael Niedermayer
}
945
946 e7becfb2 Diego Biurrun
static const char * context_to_name(void * ptr) {
947
    return "postproc";
948
}
949
950 31bfd6f3 Diego Pettenò
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
951 e7becfb2 Diego Biurrun
952 7dfea342 Diego Biurrun
pp_context *pp_get_context(int width, int height, int cpuCaps){
953 16e0bf73 Diego Biurrun
    PPContext *c= av_malloc(sizeof(PPContext));
954
    int stride= (width+15)&(~15);    //assumed / will realloc if needed
955
    int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
956
957
    memset(c, 0, sizeof(PPContext));
958
    c->av_class = &av_codec_context_class;
959
    c->cpuCaps= cpuCaps;
960
    if(cpuCaps&PP_FORMAT){
961
        c->hChromaSubSample= cpuCaps&0x3;
962
        c->vChromaSubSample= (cpuCaps>>4)&0x3;
963
    }else{
964
        c->hChromaSubSample= 1;
965
        c->vChromaSubSample= 1;
966
    }
967
968
    reallocBuffers(c, width, height, stride, qpStride);
969
970
    c->frameNum=-1;
971
972
    return c;
973 45b4f285 Michael Niedermayer
}
974
975 9cb54f43 Michael Niedermayer
void pp_free_context(void *vc){
976 16e0bf73 Diego Biurrun
    PPContext *c = (PPContext*)vc;
977
    int i;
978 115329f1 Diego Biurrun
979 aa089f6c Diego Biurrun
    for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
980
    for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
981 115329f1 Diego Biurrun
982 16e0bf73 Diego Biurrun
    av_free(c->tempBlocks);
983
    av_free(c->yHistogram);
984
    av_free(c->tempDst);
985
    av_free(c->tempSrc);
986
    av_free(c->deintTemp);
987
    av_free(c->stdQPTable);
988
    av_free(c->nonBQPTable);
989
    av_free(c->forcedQPTable);
990 115329f1 Diego Biurrun
991 16e0bf73 Diego Biurrun
    memset(c, 0, sizeof(PPContext));
992 88c0bc7e Michael Niedermayer
993 16e0bf73 Diego Biurrun
    av_free(c);
994 9c9e467d Michael Niedermayer
}
995
996 6c51fd3f Michael Niedermayer
void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
997 16e0bf73 Diego Biurrun
                     uint8_t * dst[3], const int dstStride[3],
998
                     int width, int height,
999
                     const QP_STORE_T *QP_store,  int QPStride,
1000 7dfea342 Diego Biurrun
                     pp_mode *vm,  void *vc, int pict_type)
1001 911879d1 Michael Niedermayer
{
1002 16e0bf73 Diego Biurrun
    int mbWidth = (width+15)>>4;
1003
    int mbHeight= (height+15)>>4;
1004
    PPMode *mode = (PPMode*)vm;
1005
    PPContext *c = (PPContext*)vc;
1006
    int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1007
    int absQPStride = FFABS(QPStride);
1008
1009
    // c->stride and c->QPStride are always positive
1010
    if(c->stride < minStride || c->qpStride < absQPStride)
1011
        reallocBuffers(c, width, height,
1012
                       FFMAX(minStride, c->stride),
1013
                       FFMAX(c->qpStride, absQPStride));
1014
1015
    if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1016
        int i;
1017
        QP_store= c->forcedQPTable;
1018
        absQPStride = QPStride = 0;
1019
        if(mode->lumMode & FORCE_QUANT)
1020
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1021
        else
1022
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1023
    }
1024 0426af31 Michael Niedermayer
1025 16e0bf73 Diego Biurrun
    if(pict_type & PP_PICT_TYPE_QP2){
1026
        int i;
1027
        const int count= mbHeight * absQPStride;
1028
        for(i=0; i<(count>>2); i++){
1029
            ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1030 bb270c08 Diego Biurrun
        }
1031 16e0bf73 Diego Biurrun
        for(i<<=2; i<count; i++){
1032
            c->stdQPTable[i] = QP_store[i]>>1;
1033
        }
1034
        QP_store= c->stdQPTable;
1035
        QPStride= absQPStride;
1036
    }
1037
1038
    if(0){
1039
        int x,y;
1040
        for(y=0; y<mbHeight; y++){
1041
            for(x=0; x<mbWidth; x++){
1042 e7becfb2 Diego Biurrun
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1043 16e0bf73 Diego Biurrun
            }
1044
            av_log(c, AV_LOG_INFO, "\n");
1045 bb270c08 Diego Biurrun
        }
1046 e7becfb2 Diego Biurrun
        av_log(c, AV_LOG_INFO, "\n");
1047 16e0bf73 Diego Biurrun
    }
1048
1049
    if((pict_type&7)!=3){
1050
        if (QPStride >= 0){
1051
            int i;
1052
            const int count= mbHeight * QPStride;
1053
            for(i=0; i<(count>>2); i++){
1054
                ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1055
            }
1056
            for(i<<=2; i<count; i++){
1057
                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1058
            }
1059
        } else {
1060
            int i,j;
1061
            for(i=0; i<mbHeight; i++) {
1062
                for(j=0; j<absQPStride; j++) {
1063
                    c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1064 bb270c08 Diego Biurrun
                }
1065 16e0bf73 Diego Biurrun
            }
1066 bb270c08 Diego Biurrun
        }
1067 16e0bf73 Diego Biurrun
    }
1068 bb270c08 Diego Biurrun
1069 16e0bf73 Diego Biurrun
    av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1070
           mode->lumMode, mode->chromMode);
1071 bb270c08 Diego Biurrun
1072 16e0bf73 Diego Biurrun
    postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1073 bb270c08 Diego Biurrun
                width, height, QP_store, QPStride, 0, mode, c);
1074
1075 16e0bf73 Diego Biurrun
    width  = (width )>>c->hChromaSubSample;
1076
    height = (height)>>c->vChromaSubSample;
1077
1078
    if(mode->chromMode){
1079
        postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1080
                    width, height, QP_store, QPStride, 1, mode, c);
1081
        postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1082
                    width, height, QP_store, QPStride, 2, mode, c);
1083
    }
1084
    else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1085
        linecpy(dst[1], src[1], height, srcStride[1]);
1086
        linecpy(dst[2], src[2], height, srcStride[2]);
1087
    }else{
1088
        int y;
1089
        for(y=0; y<height; y++){
1090
            memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1091
            memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1092 bb270c08 Diego Biurrun
        }
1093 16e0bf73 Diego Biurrun
    }
1094 911879d1 Michael Niedermayer
}