Statistics
| Branch: | Revision:

ffmpeg / libpostproc / postprocess.c @ d7fb5a18

History | View | Annotate | Download (37.3 KB)

1 3057fa66 Arpi
/*
2 b78e7197 Diego Biurrun
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
 *
4
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
 *
6 ef85972b Diego Biurrun
 * This file is part of FFmpeg.
7 b78e7197 Diego Biurrun
 *
8
 * FFmpeg is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22 3057fa66 Arpi
23 b304569a Michael Niedermayer
/**
24
 * @file postprocess.c
25
 * postprocessing.
26
 */
27 115329f1 Diego Biurrun
28 3057fa66 Arpi
/*
29 bb270c08 Diego Biurrun
                        C       MMX     MMX2    3DNow   AltiVec
30
isVertDC                Ec      Ec                      Ec
31
isVertMinMaxOk          Ec      Ec                      Ec
32
doVertLowPass           E               e       e       Ec
33
doVertDefFilter         Ec      Ec      e       e       Ec
34
isHorizDC               Ec      Ec                      Ec
35
isHorizMinMaxOk         a       E                       Ec
36
doHorizLowPass          E               e       e       Ec
37
doHorizDefFilter        Ec      Ec      e       e       Ec
38
do_a_deblock            Ec      E       Ec      E
39
deRing                  E               e       e*      Ecp
40
Vertical RKAlgo1        E               a       a
41
Horizontal RKAlgo1                      a       a
42
Vertical X1#            a               E       E
43
Horizontal X1#          a               E       E
44
LinIpolDeinterlace      e               E       E*
45
CubicIpolDeinterlace    a               e       e*
46
LinBlendDeinterlace     e               E       E*
47
MedianDeinterlace#      E       Ec      Ec
48
TempDeNoiser#           E               e       e       Ec
49 d5a1a995 Michael Niedermayer

50 2cab6401 Diego Biurrun
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51 755bfeab Diego Biurrun
# more or less selfinvented filters so the exactness is not too meaningful
52 3057fa66 Arpi
E = Exact implementation
53 04932b0d Diego Biurrun
e = almost exact implementation (slightly different rounding,...)
54 3057fa66 Arpi
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56 b0ac780a Michael Niedermayer
p = partially optimized, still some work to do
57 3057fa66 Arpi
*/
58
59
/*
60
TODO:
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64 13e00528 Arpi
write a faster and higher quality deblocking filter :)
65 d5a1a995 Michael Niedermayer
make the mainloop more flexible (variable number of blocks at once
66 bb270c08 Diego Biurrun
        (the if/else stuff per block is slowing things down)
67 9f45d04d Michael Niedermayer
compare the quality & speed of all filters
68
split this huge file
69 8405b3fd Michael Niedermayer
optimize c versions
70 117e45b0 Michael Niedermayer
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71 3057fa66 Arpi
...
72 13e00528 Arpi
*/
73
74 36b1b0bc Diego Biurrun
//Changelog: use the Subversion log
75 3057fa66 Arpi
76 9858f773 Michael Niedermayer
#include "config.h"
77 245976da Diego Biurrun
#include "libavutil/avutil.h"
78 3057fa66 Arpi
#include <inttypes.h>
79
#include <stdio.h>
80 d5a1a995 Michael Niedermayer
#include <stdlib.h>
81 911879d1 Michael Niedermayer
#include <string.h>
82 dda87e9f Pierre Lombard
#ifdef HAVE_MALLOC_H
83
#include <malloc.h>
84
#endif
85 3057fa66 Arpi
//#undef HAVE_MMX2
86 13e00528 Arpi
//#define HAVE_3DNOW
87 3057fa66 Arpi
//#undef HAVE_MMX
88 cc9b0679 Michael Niedermayer
//#undef ARCH_X86
89 7f16f6e6 Michael Niedermayer
//#define DEBUG_BRIGHTNESS
90 13e00528 Arpi
#include "postprocess.h"
91 c41d972d Michael Niedermayer
#include "postprocess_internal.h"
92 bba9b16c Michael Niedermayer
93 2a4a62bf Stefano Sabatini
unsigned postproc_version(void)
94
{
95
    return LIBPOSTPROC_VERSION_INT;
96
}
97
98 a7b2871c Romain Dolbeau
#ifdef HAVE_ALTIVEC_H
99
#include <altivec.h>
100
#endif
101
102 911879d1 Michael Niedermayer
#define GET_MODE_BUFFER_SIZE 500
103
#define OPTIONS_ARRAY_SIZE 10
104 9c9e467d Michael Niedermayer
#define BLOCK_SIZE 8
105
#define TEMP_STRIDE 8
106
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
107 911879d1 Michael Niedermayer
108 3cd52279 Diego Biurrun
#if defined(ARCH_X86)
109 2b858d0b Reimar Döffinger
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
110
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
111
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
112
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
113
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
114
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
115
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
116
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
117 b28daef8 Michael Niedermayer
#endif
118 3057fa66 Arpi
119 2722e362 Reimar Döffinger
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
120 3057fa66 Arpi
121 9c9e467d Michael Niedermayer
122 911879d1 Michael Niedermayer
static struct PPFilter filters[]=
123
{
124 16e0bf73 Diego Biurrun
    {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
125
    {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
126
/*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
127
    {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
128
    {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
129
    {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
130
    {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
131
    {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
132
    {"dr", "dering",                1, 5, 6, DERING},
133
    {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
134
    {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
135
    {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
136
    {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
137
    {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
138
    {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
139
    {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
140
    {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
141
    {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
142
    {NULL, NULL,0,0,0,0} //End Marker
143 911879d1 Michael Niedermayer
};
144
145 7b49ce2e Stefan Huehner
static const char *replaceTable[]=
146 911879d1 Michael Niedermayer
{
147 16e0bf73 Diego Biurrun
    "default",      "hb:a,vb:a,dr:a",
148
    "de",           "hb:a,vb:a,dr:a",
149
    "fast",         "h1:a,v1:a,dr:a",
150
    "fa",           "h1:a,v1:a,dr:a",
151
    "ac",           "ha:a:128:7,va:a,dr:a",
152
    NULL //End Marker
153 911879d1 Michael Niedermayer
};
154
155 3057fa66 Arpi
156 3cd52279 Diego Biurrun
#if defined(ARCH_X86)
157 3057fa66 Arpi
static inline void prefetchnta(void *p)
158
{
159 be449fca Diego Pettenò
    __asm__ volatile(   "prefetchnta (%0)\n\t"
160 16e0bf73 Diego Biurrun
        : : "r" (p)
161
    );
162 3057fa66 Arpi
}
163
164
static inline void prefetcht0(void *p)
165
{
166 be449fca Diego Pettenò
    __asm__ volatile(   "prefetcht0 (%0)\n\t"
167 16e0bf73 Diego Biurrun
        : : "r" (p)
168
    );
169 3057fa66 Arpi
}
170
171
static inline void prefetcht1(void *p)
172
{
173 be449fca Diego Pettenò
    __asm__ volatile(   "prefetcht1 (%0)\n\t"
174 16e0bf73 Diego Biurrun
        : : "r" (p)
175
    );
176 3057fa66 Arpi
}
177
178
static inline void prefetcht2(void *p)
179
{
180 be449fca Diego Pettenò
    __asm__ volatile(   "prefetcht2 (%0)\n\t"
181 16e0bf73 Diego Biurrun
        : : "r" (p)
182
    );
183 3057fa66 Arpi
}
184 9a722af7 Arpi
#endif
185 3057fa66 Arpi
186 04932b0d Diego Biurrun
/* The horizontal functions exist only in C because the MMX
187
 * code is faster with vertical filters and transposing. */
188 3057fa66 Arpi
189 cf5ec61d Michael Niedermayer
/**
190
 * Check if the given 8x8 Block is mostly "flat"
191
 */
192 b0ac780a Michael Niedermayer
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
193 cf5ec61d Michael Niedermayer
{
194 16e0bf73 Diego Biurrun
    int numEq= 0;
195
    int y;
196
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
197
    const int dcThreshold= dcOffset*2 + 1;
198
199
    for(y=0; y<BLOCK_SIZE; y++){
200
        if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
201
        if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
202
        if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
203
        if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
204
        if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
205
        if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
206
        if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
207
        src+= stride;
208
    }
209
    return numEq > c->ppMode.flatnessThreshold;
210 9c9e467d Michael Niedermayer
}
211
212
/**
213
 * Check if the middle 8x8 Block in the given 8x16 block is flat
214
 */
215 16e0bf73 Diego Biurrun
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
216
{
217
    int numEq= 0;
218
    int y;
219
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
220
    const int dcThreshold= dcOffset*2 + 1;
221
222
    src+= stride*4; // src points to begin of the 8x8 Block
223
    for(y=0; y<BLOCK_SIZE-1; y++){
224
        if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
225
        if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
226
        if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
227
        if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
228
        if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
229
        if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
230
        if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
231
        if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
232
        src+= stride;
233
    }
234
    return numEq > c->ppMode.flatnessThreshold;
235 cf5ec61d Michael Niedermayer
}
236
237 b0ac780a Michael Niedermayer
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
238 cf5ec61d Michael Niedermayer
{
239 16e0bf73 Diego Biurrun
    int i;
240 cb482d25 Michael Niedermayer
#if 1
241 16e0bf73 Diego Biurrun
    for(i=0; i<2; i++){
242
        if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
243
        src += stride;
244
        if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
245
        src += stride;
246
        if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
247
        src += stride;
248
        if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
249
        src += stride;
250
    }
251 115329f1 Diego Biurrun
#else
252 16e0bf73 Diego Biurrun
    for(i=0; i<8; i++){
253
        if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
254
        src += stride;
255
    }
256 cb482d25 Michael Niedermayer
#endif
257 16e0bf73 Diego Biurrun
    return 1;
258 cb482d25 Michael Niedermayer
}
259 cf5ec61d Michael Niedermayer
260 cb482d25 Michael Niedermayer
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
261
{
262
#if 1
263
#if 1
264 16e0bf73 Diego Biurrun
    int x;
265
    src+= stride*4;
266
    for(x=0; x<BLOCK_SIZE; x+=4){
267
        if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
268
        if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
269
        if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
270
        if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
271
    }
272 cb482d25 Michael Niedermayer
#else
273 16e0bf73 Diego Biurrun
    int x;
274
    src+= stride*3;
275
    for(x=0; x<BLOCK_SIZE; x++){
276
        if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
277
    }
278 cb482d25 Michael Niedermayer
#endif
279 16e0bf73 Diego Biurrun
    return 1;
280 cb482d25 Michael Niedermayer
#else
281 16e0bf73 Diego Biurrun
    int x;
282
    src+= stride*4;
283
    for(x=0; x<BLOCK_SIZE; x++){
284
        int min=255;
285
        int max=0;
286
        int y;
287
        for(y=0; y<8; y++){
288
            int v= src[x + y*stride];
289
            if(v>max) max=v;
290
            if(v<min) min=v;
291 bb270c08 Diego Biurrun
        }
292 16e0bf73 Diego Biurrun
        if(max-min > 2*QP) return 0;
293
    }
294
    return 1;
295 cb482d25 Michael Niedermayer
#endif
296
}
297
298 16e0bf73 Diego Biurrun
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
299
{
300
    if( isHorizDC_C(src, stride, c) ){
301
        if( isHorizMinMaxOk_C(src, stride, c->QP) )
302
            return 1;
303
        else
304
            return 0;
305
    }else{
306
        return 2;
307
    }
308 b0ac780a Michael Niedermayer
}
309
310 16e0bf73 Diego Biurrun
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
311
{
312
    if( isVertDC_C(src, stride, c) ){
313
        if( isVertMinMaxOk_C(src, stride, c->QP) )
314
            return 1;
315
        else
316
            return 0;
317
    }else{
318
        return 2;
319
    }
320 cf5ec61d Michael Niedermayer
}
321
322 b0ac780a Michael Niedermayer
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
323 cf5ec61d Michael Niedermayer
{
324 16e0bf73 Diego Biurrun
    int y;
325
    for(y=0; y<BLOCK_SIZE; y++){
326
        const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
327
328
        if(FFABS(middleEnergy) < 8*c->QP){
329
            const int q=(dst[3] - dst[4])/2;
330
            const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
331
            const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
332
333
            int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
334
            d= FFMAX(d, 0);
335
336
            d= (5*d + 32) >> 6;
337
            d*= FFSIGN(-middleEnergy);
338
339
            if(q>0)
340
            {
341
                d= d<0 ? 0 : d;
342
                d= d>q ? q : d;
343
            }
344
            else
345
            {
346
                d= d>0 ? 0 : d;
347
                d= d<q ? q : d;
348
            }
349
350
            dst[3]-= d;
351
            dst[4]+= d;
352 bb270c08 Diego Biurrun
        }
353 16e0bf73 Diego Biurrun
        dst+= stride;
354
    }
355 cf5ec61d Michael Niedermayer
}
356
357
/**
358
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
359
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
360
 */
361 b0ac780a Michael Niedermayer
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
362 cf5ec61d Michael Niedermayer
{
363 16e0bf73 Diego Biurrun
    int y;
364
    for(y=0; y<BLOCK_SIZE; y++){
365
        const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
366
        const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
367
368
        int sums[10];
369
        sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
370
        sums[1] = sums[0] - first  + dst[3];
371
        sums[2] = sums[1] - first  + dst[4];
372
        sums[3] = sums[2] - first  + dst[5];
373
        sums[4] = sums[3] - first  + dst[6];
374
        sums[5] = sums[4] - dst[0] + dst[7];
375
        sums[6] = sums[5] - dst[1] + last;
376
        sums[7] = sums[6] - dst[2] + last;
377
        sums[8] = sums[7] - dst[3] + last;
378
        sums[9] = sums[8] - dst[4] + last;
379
380
        dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
381
        dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
382
        dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
383
        dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
384
        dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
385
        dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
386
        dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
387
        dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
388
389
        dst+= stride;
390
    }
391 cf5ec61d Michael Niedermayer
}
392
393 4e4dcbc5 Michael Niedermayer
/**
394 cc9b0679 Michael Niedermayer
 * Experimental Filter 1 (Horizontal)
395
 * will not damage linear gradients
396 bd107136 Diego Biurrun
 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
397 755bfeab Diego Biurrun
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
398
 * MMX2 version does correct clipping C version does not
399 cc9b0679 Michael Niedermayer
 * not identical with the vertical one
400 4e4dcbc5 Michael Niedermayer
 */
401 cc9b0679 Michael Niedermayer
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
402
{
403 16e0bf73 Diego Biurrun
    int y;
404
    static uint64_t *lut= NULL;
405
    if(lut==NULL)
406
    {
407
        int i;
408
        lut = av_malloc(256*8);
409
        for(i=0; i<256; i++)
410 bb270c08 Diego Biurrun
        {
411 16e0bf73 Diego Biurrun
            int v= i < 128 ? 2*i : 2*(i-256);
412 117e45b0 Michael Niedermayer
/*
413 cc9b0679 Michael Niedermayer
//Simulate 112242211 9-Tap filter
414 16e0bf73 Diego Biurrun
            uint64_t a= (v/16)  & 0xFF;
415
            uint64_t b= (v/8)   & 0xFF;
416
            uint64_t c= (v/4)   & 0xFF;
417
            uint64_t d= (3*v/8) & 0xFF;
418 117e45b0 Michael Niedermayer
*/
419 cc9b0679 Michael Niedermayer
//Simulate piecewise linear interpolation
420 16e0bf73 Diego Biurrun
            uint64_t a= (v/16)   & 0xFF;
421
            uint64_t b= (v*3/16) & 0xFF;
422
            uint64_t c= (v*5/16) & 0xFF;
423
            uint64_t d= (7*v/16) & 0xFF;
424
            uint64_t A= (0x100 - a)&0xFF;
425
            uint64_t B= (0x100 - b)&0xFF;
426
            uint64_t C= (0x100 - c)&0xFF;
427
            uint64_t D= (0x100 - c)&0xFF;
428
429
            lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
430
                       (D<<24) | (C<<16) | (B<<8)  | (A);
431
            //lut[i] = (v<<32) | (v<<24);
432 bb270c08 Diego Biurrun
        }
433 16e0bf73 Diego Biurrun
    }
434 bb270c08 Diego Biurrun
435 16e0bf73 Diego Biurrun
    for(y=0; y<BLOCK_SIZE; y++){
436
        int a= src[1] - src[2];
437
        int b= src[3] - src[4];
438
        int c= src[5] - src[6];
439 bb270c08 Diego Biurrun
440 16e0bf73 Diego Biurrun
        int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
441 bb270c08 Diego Biurrun
442 16e0bf73 Diego Biurrun
        if(d < QP){
443
            int v = d * FFSIGN(-b);
444 bb270c08 Diego Biurrun
445 16e0bf73 Diego Biurrun
            src[1] +=v/8;
446
            src[2] +=v/4;
447
            src[3] +=3*v/8;
448
            src[4] -=3*v/8;
449
            src[5] -=v/4;
450
            src[6] -=v/8;
451 bb270c08 Diego Biurrun
        }
452 16e0bf73 Diego Biurrun
        src+=stride;
453
    }
454 cc9b0679 Michael Niedermayer
}
455
456 12eebd26 Michael Niedermayer
/**
457
 * accurate deblock filter
458
 */
459 849f1035 Måns Rullgård
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
460 16e0bf73 Diego Biurrun
    int y;
461
    const int QP= c->QP;
462
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
463
    const int dcThreshold= dcOffset*2 + 1;
464 12eebd26 Michael Niedermayer
//START_TIMER
465 16e0bf73 Diego Biurrun
    src+= step*4; // src points to begin of the 8x8 Block
466
    for(y=0; y<8; y++){
467
        int numEq= 0;
468
469
        if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
470
        if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
471
        if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
472
        if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
473
        if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
474
        if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
475
        if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
476
        if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
477
        if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
478
        if(numEq > c->ppMode.flatnessThreshold){
479
            int min, max, x;
480
481
            if(src[0] > src[step]){
482
                max= src[0];
483
                min= src[step];
484
            }else{
485
                max= src[step];
486
                min= src[0];
487
            }
488
            for(x=2; x<8; x+=2){
489
                if(src[x*step] > src[(x+1)*step]){
490
                        if(src[x    *step] > max) max= src[ x   *step];
491
                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
492 bb270c08 Diego Biurrun
                }else{
493 16e0bf73 Diego Biurrun
                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
494
                        if(src[ x   *step] < min) min= src[ x   *step];
495
                }
496
            }
497
            if(max-min < 2*QP){
498
                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
499
                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
500
501
                int sums[10];
502
                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
503
                sums[1] = sums[0] - first       + src[3*step];
504
                sums[2] = sums[1] - first       + src[4*step];
505
                sums[3] = sums[2] - first       + src[5*step];
506
                sums[4] = sums[3] - first       + src[6*step];
507
                sums[5] = sums[4] - src[0*step] + src[7*step];
508
                sums[6] = sums[5] - src[1*step] + last;
509
                sums[7] = sums[6] - src[2*step] + last;
510
                sums[8] = sums[7] - src[3*step] + last;
511
                sums[9] = sums[8] - src[4*step] + last;
512
513
                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
514
                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
515
                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
516
                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
517
                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
518
                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
519
                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
520
                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
521
            }
522
        }else{
523
            const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
524
525
            if(FFABS(middleEnergy) < 8*QP){
526
                const int q=(src[3*step] - src[4*step])/2;
527
                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
528
                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
529
530
                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
531
                d= FFMAX(d, 0);
532
533
                d= (5*d + 32) >> 6;
534
                d*= FFSIGN(-middleEnergy);
535
536
                if(q>0){
537
                    d= d<0 ? 0 : d;
538
                    d= d>q ? q : d;
539
                }else{
540
                    d= d>0 ? 0 : d;
541
                    d= d<q ? q : d;
542 bb270c08 Diego Biurrun
                }
543
544 16e0bf73 Diego Biurrun
                src[3*step]-= d;
545
                src[4*step]+= d;
546
            }
547 bb270c08 Diego Biurrun
        }
548 16e0bf73 Diego Biurrun
549
        src += stride;
550
    }
551 12eebd26 Michael Niedermayer
/*if(step==16){
552
    STOP_TIMER("step16")
553
}else{
554
    STOP_TIMER("stepX")
555
}*/
556
}
557 cc9b0679 Michael Niedermayer
558 e89952aa Michael Niedermayer
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
559 cc9b0679 Michael Niedermayer
//Plain C versions
560 fe9e9d60 Diego Biurrun
#if !(defined (HAVE_MMX) || defined (HAVE_ALTIVEC)) || defined (RUNTIME_CPUDETECT)
561 e89952aa Michael Niedermayer
#define COMPILE_C
562
#endif
563
564 b0ac780a Michael Niedermayer
#ifdef HAVE_ALTIVEC
565
#define COMPILE_ALTIVEC
566
#endif //HAVE_ALTIVEC
567
568 3cd52279 Diego Biurrun
#if defined(ARCH_X86)
569 e89952aa Michael Niedermayer
570
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
571
#define COMPILE_MMX
572
#endif
573
574
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
575
#define COMPILE_MMX2
576
#endif
577
578
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
579
#define COMPILE_3DNOW
580
#endif
581 3cd52279 Diego Biurrun
#endif /* defined(ARCH_X86) */
582 e89952aa Michael Niedermayer
583
#undef HAVE_MMX
584
#undef HAVE_MMX2
585
#undef HAVE_3DNOW
586 b0ac780a Michael Niedermayer
#undef HAVE_ALTIVEC
587 e89952aa Michael Niedermayer
588
#ifdef COMPILE_C
589 cc9b0679 Michael Niedermayer
#undef HAVE_MMX
590
#undef HAVE_MMX2
591
#undef HAVE_3DNOW
592
#define RENAME(a) a ## _C
593
#include "postprocess_template.c"
594 e89952aa Michael Niedermayer
#endif
595 cc9b0679 Michael Niedermayer
596 b0ac780a Michael Niedermayer
#ifdef COMPILE_ALTIVEC
597
#undef RENAME
598
#define HAVE_ALTIVEC
599
#define RENAME(a) a ## _altivec
600
#include "postprocess_altivec_template.c"
601
#include "postprocess_template.c"
602
#endif
603
604 cc9b0679 Michael Niedermayer
//MMX versions
605 e89952aa Michael Niedermayer
#ifdef COMPILE_MMX
606 cc9b0679 Michael Niedermayer
#undef RENAME
607
#define HAVE_MMX
608
#undef HAVE_MMX2
609
#undef HAVE_3DNOW
610
#define RENAME(a) a ## _MMX
611
#include "postprocess_template.c"
612 e89952aa Michael Niedermayer
#endif
613 cc9b0679 Michael Niedermayer
614
//MMX2 versions
615 e89952aa Michael Niedermayer
#ifdef COMPILE_MMX2
616 cc9b0679 Michael Niedermayer
#undef RENAME
617
#define HAVE_MMX
618
#define HAVE_MMX2
619
#undef HAVE_3DNOW
620
#define RENAME(a) a ## _MMX2
621
#include "postprocess_template.c"
622 e89952aa Michael Niedermayer
#endif
623 cc9b0679 Michael Niedermayer
624
//3DNOW versions
625 e89952aa Michael Niedermayer
#ifdef COMPILE_3DNOW
626 cc9b0679 Michael Niedermayer
#undef RENAME
627
#define HAVE_MMX
628
#undef HAVE_MMX2
629
#define HAVE_3DNOW
630
#define RENAME(a) a ## _3DNow
631
#include "postprocess_template.c"
632 e89952aa Michael Niedermayer
#endif
633 cc9b0679 Michael Niedermayer
634 755bfeab Diego Biurrun
// minor note: the HAVE_xyz is messed up after that line so do not use it.
635 cc9b0679 Michael Niedermayer
636 6c51fd3f Michael Niedermayer
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
637 7dfea342 Diego Biurrun
        const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
638 cc9b0679 Michael Niedermayer
{
639 16e0bf73 Diego Biurrun
    PPContext *c= (PPContext *)vc;
640
    PPMode *ppMode= (PPMode *)vm;
641
    c->ppMode= *ppMode; //FIXME
642 9c9e467d Michael Niedermayer
643 16e0bf73 Diego Biurrun
    // Using ifs here as they are faster than function pointers although the
644
    // difference would not be measurable here but it is much better because
645
    // someone might exchange the CPU whithout restarting MPlayer ;)
646 e89952aa Michael Niedermayer
#ifdef RUNTIME_CPUDETECT
647 3cd52279 Diego Biurrun
#if defined(ARCH_X86)
648 16e0bf73 Diego Biurrun
    // ordered per speed fastest first
649
    if(c->cpuCaps & PP_CPU_CAPS_MMX2)
650
        postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
651
    else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
652
        postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
653
    else if(c->cpuCaps & PP_CPU_CAPS_MMX)
654
        postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
655
    else
656
        postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
657 cc9b0679 Michael Niedermayer
#else
658 b0ac780a Michael Niedermayer
#ifdef HAVE_ALTIVEC
659 16e0bf73 Diego Biurrun
    if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
660
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
661
    else
662 b0ac780a Michael Niedermayer
#endif
663 16e0bf73 Diego Biurrun
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
664 be44a4d7 Michael Niedermayer
#endif
665 e89952aa Michael Niedermayer
#else //RUNTIME_CPUDETECT
666
#ifdef HAVE_MMX2
667 16e0bf73 Diego Biurrun
            postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
668 e89952aa Michael Niedermayer
#elif defined (HAVE_3DNOW)
669 16e0bf73 Diego Biurrun
            postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
670 e89952aa Michael Niedermayer
#elif defined (HAVE_MMX)
671 16e0bf73 Diego Biurrun
            postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
672 b0ac780a Michael Niedermayer
#elif defined (HAVE_ALTIVEC)
673 16e0bf73 Diego Biurrun
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
674 e89952aa Michael Niedermayer
#else
675 16e0bf73 Diego Biurrun
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
676 e89952aa Michael Niedermayer
#endif
677
#endif //!RUNTIME_CPUDETECT
678 117e45b0 Michael Niedermayer
}
679
680 cc9b0679 Michael Niedermayer
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
681 bb270c08 Diego Biurrun
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
682 13e00528 Arpi
683 911879d1 Michael Niedermayer
/* -pp Command line Help
684
*/
685 69fdc40d Diego Pettenò
#if LIBPOSTPROC_VERSION_INT < (52<<16)
686 10ff3ff4 Diego Pettenò
const char *const pp_help=
687 69fdc40d Diego Pettenò
#else
688
const char pp_help[] =
689
#endif
690 bf69c4e5 Diego Biurrun
"Available postprocessing filters:\n"
691 bb270c08 Diego Biurrun
"Filters                        Options\n"
692
"short  long name       short   long option     Description\n"
693
"*      *               a       autoq           CPU power dependent enabler\n"
694
"                       c       chrom           chrominance filtering enabled\n"
695
"                       y       nochrom         chrominance filtering disabled\n"
696
"                       n       noluma          luma filtering disabled\n"
697
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
698
"       1. difference factor: default=32, higher -> more deblocking\n"
699
"       2. flatness threshold: default=39, lower -> more deblocking\n"
700
"                       the h & v deblocking filters share these\n"
701
"                       so you can't set different thresholds for h / v\n"
702
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
703
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
704
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
705
"h1     x1hdeblock                              experimental h deblock filter 1\n"
706
"v1     x1vdeblock                              experimental v deblock filter 1\n"
707
"dr     dering                                  deringing filter\n"
708
"al     autolevels                              automatic brightness / contrast\n"
709
"                       f        fullyrange     stretch luminance to (0..255)\n"
710
"lb     linblenddeint                           linear blend deinterlacer\n"
711
"li     linipoldeint                            linear interpolating deinterlace\n"
712
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
713
"md     mediandeint                             median deinterlacer\n"
714
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
715
"l5     lowpass5                                FIR lowpass deinterlacer\n"
716
"de     default                                 hb:a,vb:a,dr:a\n"
717
"fa     fast                                    h1:a,v1:a,dr:a\n"
718
"ac                                             ha:a:128:7,va:a,dr:a\n"
719
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
720
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
721
"fq     forceQuant      <quantizer>             force quantizer\n"
722 bf69c4e5 Diego Biurrun
"Usage:\n"
723
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
724
"long form example:\n"
725 bb270c08 Diego Biurrun
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
726 bf69c4e5 Diego Biurrun
"short form example:\n"
727 bb270c08 Diego Biurrun
"vb:a/hb:a/lb                                   de,-vb\n"
728 bf69c4e5 Diego Biurrun
"more examples:\n"
729
"tn:64:128:256\n"
730 14b005d0 Diego Biurrun
"\n"
731 4b001a13 Michael Niedermayer
;
732 911879d1 Michael Niedermayer
733 7dfea342 Diego Biurrun
pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
734 911879d1 Michael Niedermayer
{
735 16e0bf73 Diego Biurrun
    char temp[GET_MODE_BUFFER_SIZE];
736
    char *p= temp;
737
    static const char filterDelimiters[] = ",/";
738
    static const char optionDelimiters[] = ":";
739
    struct PPMode *ppMode;
740
    char *filterToken;
741
742
    ppMode= av_malloc(sizeof(PPMode));
743
744
    ppMode->lumMode= 0;
745
    ppMode->chromMode= 0;
746
    ppMode->maxTmpNoise[0]= 700;
747
    ppMode->maxTmpNoise[1]= 1500;
748
    ppMode->maxTmpNoise[2]= 3000;
749
    ppMode->maxAllowedY= 234;
750
    ppMode->minAllowedY= 16;
751
    ppMode->baseDcDiff= 256/8;
752
    ppMode->flatnessThreshold= 56-16-1;
753
    ppMode->maxClippedThreshold= 0.01;
754
    ppMode->error=0;
755
756
    strncpy(temp, name, GET_MODE_BUFFER_SIZE);
757
758
    av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
759
760
    for(;;){
761
        char *filterName;
762
        int q= 1000000; //PP_QUALITY_MAX;
763
        int chrom=-1;
764
        int luma=-1;
765
        char *option;
766
        char *options[OPTIONS_ARRAY_SIZE];
767
        int i;
768
        int filterNameOk=0;
769
        int numOfUnknownOptions=0;
770
        int enable=1; //does the user want us to enabled or disabled the filter
771
772
        filterToken= strtok(p, filterDelimiters);
773
        if(filterToken == NULL) break;
774
        p+= strlen(filterToken) + 1; // p points to next filterToken
775
        filterName= strtok(filterToken, optionDelimiters);
776
        av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
777
778
        if(*filterName == '-'){
779
            enable=0;
780
            filterName++;
781
        }
782 bb270c08 Diego Biurrun
783 16e0bf73 Diego Biurrun
        for(;;){ //for all options
784
            option= strtok(NULL, optionDelimiters);
785
            if(option == NULL) break;
786
787
            av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
788
            if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
789
            else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
790
            else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
791
            else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
792
            else{
793
                options[numOfUnknownOptions] = option;
794
                numOfUnknownOptions++;
795
            }
796
            if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
797
        }
798
        options[numOfUnknownOptions] = NULL;
799
800
        /* replace stuff from the replace Table */
801
        for(i=0; replaceTable[2*i]!=NULL; i++){
802
            if(!strcmp(replaceTable[2*i], filterName)){
803
                int newlen= strlen(replaceTable[2*i + 1]);
804
                int plen;
805
                int spaceLeft;
806
807
                if(p==NULL) p= temp, *p=0;      //last filter
808
                else p--, *p=',';               //not last filter
809
810
                plen= strlen(p);
811
                spaceLeft= p - temp + plen;
812
                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE){
813
                    ppMode->error++;
814
                    break;
815 bb270c08 Diego Biurrun
                }
816 16e0bf73 Diego Biurrun
                memmove(p + newlen, p, plen+1);
817
                memcpy(p, replaceTable[2*i + 1], newlen);
818
                filterNameOk=1;
819
            }
820
        }
821 bb270c08 Diego Biurrun
822 16e0bf73 Diego Biurrun
        for(i=0; filters[i].shortName!=NULL; i++){
823
            if(   !strcmp(filters[i].longName, filterName)
824
               || !strcmp(filters[i].shortName, filterName)){
825
                ppMode->lumMode &= ~filters[i].mask;
826
                ppMode->chromMode &= ~filters[i].mask;
827
828
                filterNameOk=1;
829
                if(!enable) break; // user wants to disable it
830
831
                if(q >= filters[i].minLumQuality && luma)
832
                    ppMode->lumMode|= filters[i].mask;
833
                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
834
                    if(q >= filters[i].minChromQuality)
835
                            ppMode->chromMode|= filters[i].mask;
836
837
                if(filters[i].mask == LEVEL_FIX){
838
                    int o;
839
                    ppMode->minAllowedY= 16;
840
                    ppMode->maxAllowedY= 234;
841
                    for(o=0; options[o]!=NULL; o++){
842
                        if(  !strcmp(options[o],"fullyrange")
843
                           ||!strcmp(options[o],"f")){
844
                            ppMode->minAllowedY= 0;
845
                            ppMode->maxAllowedY= 255;
846
                            numOfUnknownOptions--;
847 bb270c08 Diego Biurrun
                        }
848 16e0bf73 Diego Biurrun
                    }
849 bb270c08 Diego Biurrun
                }
850 16e0bf73 Diego Biurrun
                else if(filters[i].mask == TEMP_NOISE_FILTER)
851 bb270c08 Diego Biurrun
                {
852 16e0bf73 Diego Biurrun
                    int o;
853
                    int numOfNoises=0;
854
855
                    for(o=0; options[o]!=NULL; o++){
856
                        char *tail;
857
                        ppMode->maxTmpNoise[numOfNoises]=
858
                            strtol(options[o], &tail, 0);
859
                        if(tail!=options[o]){
860
                            numOfNoises++;
861
                            numOfUnknownOptions--;
862
                            if(numOfNoises >= 3) break;
863 bb270c08 Diego Biurrun
                        }
864 16e0bf73 Diego Biurrun
                    }
865 bb270c08 Diego Biurrun
                }
866 16e0bf73 Diego Biurrun
                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
867
                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
868
                    int o;
869
870
                    for(o=0; options[o]!=NULL && o<2; o++){
871
                        char *tail;
872
                        int val= strtol(options[o], &tail, 0);
873
                        if(tail==options[o]) break;
874
875
                        numOfUnknownOptions--;
876
                        if(o==0) ppMode->baseDcDiff= val;
877
                        else ppMode->flatnessThreshold= val;
878
                    }
879
                }
880
                else if(filters[i].mask == FORCE_QUANT){
881
                    int o;
882
                    ppMode->forcedQuant= 15;
883
884
                    for(o=0; options[o]!=NULL && o<1; o++){
885
                        char *tail;
886
                        int val= strtol(options[o], &tail, 0);
887
                        if(tail==options[o]) break;
888
889
                        numOfUnknownOptions--;
890
                        ppMode->forcedQuant= val;
891
                    }
892
                }
893
            }
894 bb270c08 Diego Biurrun
        }
895 16e0bf73 Diego Biurrun
        if(!filterNameOk) ppMode->error++;
896
        ppMode->error += numOfUnknownOptions;
897
    }
898
899
    av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
900
    if(ppMode->error){
901
        av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
902
        av_free(ppMode);
903
        return NULL;
904
    }
905
    return ppMode;
906 911879d1 Michael Niedermayer
}
907
908 7dfea342 Diego Biurrun
void pp_free_mode(pp_mode *mode){
909 6ab6c7c3 Luca Barbato
    av_free(mode);
910 c41d972d Michael Niedermayer
}
911
912 88c0bc7e Michael Niedermayer
static void reallocAlign(void **p, int alignment, int size){
913 16e0bf73 Diego Biurrun
    av_free(*p);
914
    *p= av_mallocz(size);
915 88c0bc7e Michael Niedermayer
}
916
917 0426af31 Michael Niedermayer
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
918 16e0bf73 Diego Biurrun
    int mbWidth = (width+15)>>4;
919
    int mbHeight= (height+15)>>4;
920
    int i;
921
922
    c->stride= stride;
923
    c->qpStride= qpStride;
924
925
    reallocAlign((void **)&c->tempDst, 8, stride*24);
926
    reallocAlign((void **)&c->tempSrc, 8, stride*24);
927
    reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
928
    reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
929
    for(i=0; i<256; i++)
930
            c->yHistogram[i]= width*height/64*15/256;
931
932
    for(i=0; i<3; i++){
933
        //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
934 aa089f6c Diego Biurrun
        reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
935
        reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
936 16e0bf73 Diego Biurrun
    }
937
938
    reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
939
    reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
940
    reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
941
    reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
942 88c0bc7e Michael Niedermayer
}
943
944 e7becfb2 Diego Biurrun
static const char * context_to_name(void * ptr) {
945
    return "postproc";
946
}
947
948 31bfd6f3 Diego Pettenò
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
949 e7becfb2 Diego Biurrun
950 7dfea342 Diego Biurrun
pp_context *pp_get_context(int width, int height, int cpuCaps){
951 16e0bf73 Diego Biurrun
    PPContext *c= av_malloc(sizeof(PPContext));
952
    int stride= (width+15)&(~15);    //assumed / will realloc if needed
953
    int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
954
955
    memset(c, 0, sizeof(PPContext));
956
    c->av_class = &av_codec_context_class;
957
    c->cpuCaps= cpuCaps;
958
    if(cpuCaps&PP_FORMAT){
959
        c->hChromaSubSample= cpuCaps&0x3;
960
        c->vChromaSubSample= (cpuCaps>>4)&0x3;
961
    }else{
962
        c->hChromaSubSample= 1;
963
        c->vChromaSubSample= 1;
964
    }
965
966
    reallocBuffers(c, width, height, stride, qpStride);
967
968
    c->frameNum=-1;
969
970
    return c;
971 45b4f285 Michael Niedermayer
}
972
973 9cb54f43 Michael Niedermayer
void pp_free_context(void *vc){
974 16e0bf73 Diego Biurrun
    PPContext *c = (PPContext*)vc;
975
    int i;
976 115329f1 Diego Biurrun
977 aa089f6c Diego Biurrun
    for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
978
    for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
979 115329f1 Diego Biurrun
980 16e0bf73 Diego Biurrun
    av_free(c->tempBlocks);
981
    av_free(c->yHistogram);
982
    av_free(c->tempDst);
983
    av_free(c->tempSrc);
984
    av_free(c->deintTemp);
985
    av_free(c->stdQPTable);
986
    av_free(c->nonBQPTable);
987
    av_free(c->forcedQPTable);
988 115329f1 Diego Biurrun
989 16e0bf73 Diego Biurrun
    memset(c, 0, sizeof(PPContext));
990 88c0bc7e Michael Niedermayer
991 16e0bf73 Diego Biurrun
    av_free(c);
992 9c9e467d Michael Niedermayer
}
993
994 6c51fd3f Michael Niedermayer
void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
995 16e0bf73 Diego Biurrun
                     uint8_t * dst[3], const int dstStride[3],
996
                     int width, int height,
997
                     const QP_STORE_T *QP_store,  int QPStride,
998 7dfea342 Diego Biurrun
                     pp_mode *vm,  void *vc, int pict_type)
999 911879d1 Michael Niedermayer
{
1000 16e0bf73 Diego Biurrun
    int mbWidth = (width+15)>>4;
1001
    int mbHeight= (height+15)>>4;
1002
    PPMode *mode = (PPMode*)vm;
1003
    PPContext *c = (PPContext*)vc;
1004
    int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1005
    int absQPStride = FFABS(QPStride);
1006
1007
    // c->stride and c->QPStride are always positive
1008
    if(c->stride < minStride || c->qpStride < absQPStride)
1009
        reallocBuffers(c, width, height,
1010
                       FFMAX(minStride, c->stride),
1011
                       FFMAX(c->qpStride, absQPStride));
1012
1013
    if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1014
        int i;
1015
        QP_store= c->forcedQPTable;
1016
        absQPStride = QPStride = 0;
1017
        if(mode->lumMode & FORCE_QUANT)
1018
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1019
        else
1020
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1021
    }
1022 0426af31 Michael Niedermayer
1023 16e0bf73 Diego Biurrun
    if(pict_type & PP_PICT_TYPE_QP2){
1024
        int i;
1025
        const int count= mbHeight * absQPStride;
1026
        for(i=0; i<(count>>2); i++){
1027
            ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1028 bb270c08 Diego Biurrun
        }
1029 16e0bf73 Diego Biurrun
        for(i<<=2; i<count; i++){
1030
            c->stdQPTable[i] = QP_store[i]>>1;
1031
        }
1032
        QP_store= c->stdQPTable;
1033
        QPStride= absQPStride;
1034
    }
1035
1036
    if(0){
1037
        int x,y;
1038
        for(y=0; y<mbHeight; y++){
1039
            for(x=0; x<mbWidth; x++){
1040 e7becfb2 Diego Biurrun
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1041 16e0bf73 Diego Biurrun
            }
1042
            av_log(c, AV_LOG_INFO, "\n");
1043 bb270c08 Diego Biurrun
        }
1044 e7becfb2 Diego Biurrun
        av_log(c, AV_LOG_INFO, "\n");
1045 16e0bf73 Diego Biurrun
    }
1046
1047
    if((pict_type&7)!=3){
1048
        if (QPStride >= 0){
1049
            int i;
1050
            const int count= mbHeight * QPStride;
1051
            for(i=0; i<(count>>2); i++){
1052
                ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1053
            }
1054
            for(i<<=2; i<count; i++){
1055
                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1056
            }
1057
        } else {
1058
            int i,j;
1059
            for(i=0; i<mbHeight; i++) {
1060
                for(j=0; j<absQPStride; j++) {
1061
                    c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1062 bb270c08 Diego Biurrun
                }
1063 16e0bf73 Diego Biurrun
            }
1064 bb270c08 Diego Biurrun
        }
1065 16e0bf73 Diego Biurrun
    }
1066 bb270c08 Diego Biurrun
1067 16e0bf73 Diego Biurrun
    av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1068
           mode->lumMode, mode->chromMode);
1069 bb270c08 Diego Biurrun
1070 16e0bf73 Diego Biurrun
    postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1071 bb270c08 Diego Biurrun
                width, height, QP_store, QPStride, 0, mode, c);
1072
1073 16e0bf73 Diego Biurrun
    width  = (width )>>c->hChromaSubSample;
1074
    height = (height)>>c->vChromaSubSample;
1075
1076
    if(mode->chromMode){
1077
        postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1078
                    width, height, QP_store, QPStride, 1, mode, c);
1079
        postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1080
                    width, height, QP_store, QPStride, 2, mode, c);
1081
    }
1082
    else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1083
        linecpy(dst[1], src[1], height, srcStride[1]);
1084
        linecpy(dst[2], src[2], height, srcStride[2]);
1085
    }else{
1086
        int y;
1087
        for(y=0; y<height; y++){
1088
            memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1089
            memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1090 bb270c08 Diego Biurrun
        }
1091 16e0bf73 Diego Biurrun
    }
1092 911879d1 Michael Niedermayer
}