ffmpeg / libpostproc / postprocess.c @ e90f5b5a
History | View | Annotate | Download (37.3 KB)
1 |
/*
|
---|---|
2 |
* Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
|
3 |
*
|
4 |
* AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
|
5 |
*
|
6 |
* This file is part of FFmpeg.
|
7 |
*
|
8 |
* FFmpeg is free software; you can redistribute it and/or modify
|
9 |
* it under the terms of the GNU General Public License as published by
|
10 |
* the Free Software Foundation; either version 2 of the License, or
|
11 |
* (at your option) any later version.
|
12 |
*
|
13 |
* FFmpeg is distributed in the hope that it will be useful,
|
14 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16 |
* GNU General Public License for more details.
|
17 |
*
|
18 |
* You should have received a copy of the GNU General Public License
|
19 |
* along with FFmpeg; if not, write to the Free Software
|
20 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
21 |
*/
|
22 |
|
23 |
/**
|
24 |
* @file libpostproc/postprocess.c
|
25 |
* postprocessing.
|
26 |
*/
|
27 |
|
28 |
/*
|
29 |
C MMX MMX2 3DNow AltiVec
|
30 |
isVertDC Ec Ec Ec
|
31 |
isVertMinMaxOk Ec Ec Ec
|
32 |
doVertLowPass E e e Ec
|
33 |
doVertDefFilter Ec Ec e e Ec
|
34 |
isHorizDC Ec Ec Ec
|
35 |
isHorizMinMaxOk a E Ec
|
36 |
doHorizLowPass E e e Ec
|
37 |
doHorizDefFilter Ec Ec e e Ec
|
38 |
do_a_deblock Ec E Ec E
|
39 |
deRing E e e* Ecp
|
40 |
Vertical RKAlgo1 E a a
|
41 |
Horizontal RKAlgo1 a a
|
42 |
Vertical X1# a E E
|
43 |
Horizontal X1# a E E
|
44 |
LinIpolDeinterlace e E E*
|
45 |
CubicIpolDeinterlace a e e*
|
46 |
LinBlendDeinterlace e E E*
|
47 |
MedianDeinterlace# E Ec Ec
|
48 |
TempDeNoiser# E e e Ec
|
49 |
|
50 |
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
|
51 |
# more or less selfinvented filters so the exactness is not too meaningful
|
52 |
E = Exact implementation
|
53 |
e = almost exact implementation (slightly different rounding,...)
|
54 |
a = alternative / approximate impl
|
55 |
c = checked against the other implementations (-vo md5)
|
56 |
p = partially optimized, still some work to do
|
57 |
*/
|
58 |
|
59 |
/*
|
60 |
TODO:
|
61 |
reduce the time wasted on the mem transfer
|
62 |
unroll stuff if instructions depend too much on the prior one
|
63 |
move YScale thing to the end instead of fixing QP
|
64 |
write a faster and higher quality deblocking filter :)
|
65 |
make the mainloop more flexible (variable number of blocks at once
|
66 |
(the if/else stuff per block is slowing things down)
|
67 |
compare the quality & speed of all filters
|
68 |
split this huge file
|
69 |
optimize c versions
|
70 |
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
|
71 |
...
|
72 |
*/
|
73 |
|
74 |
//Changelog: use the Subversion log
|
75 |
|
76 |
#include "config.h" |
77 |
#include "libavutil/avutil.h" |
78 |
#include <inttypes.h> |
79 |
#include <stdio.h> |
80 |
#include <stdlib.h> |
81 |
#include <string.h> |
82 |
//#undef HAVE_MMX2
|
83 |
//#define HAVE_AMD3DNOW
|
84 |
//#undef HAVE_MMX
|
85 |
//#undef ARCH_X86
|
86 |
//#define DEBUG_BRIGHTNESS
|
87 |
#include "postprocess.h" |
88 |
#include "postprocess_internal.h" |
89 |
|
90 |
unsigned postproc_version(void) |
91 |
{ |
92 |
return LIBPOSTPROC_VERSION_INT;
|
93 |
} |
94 |
|
95 |
#if HAVE_ALTIVEC_H
|
96 |
#include <altivec.h> |
97 |
#endif
|
98 |
|
99 |
#define GET_MODE_BUFFER_SIZE 500 |
100 |
#define OPTIONS_ARRAY_SIZE 10 |
101 |
#define BLOCK_SIZE 8 |
102 |
#define TEMP_STRIDE 8 |
103 |
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
|
104 |
|
105 |
#if ARCH_X86
|
106 |
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL; |
107 |
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL; |
108 |
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL; |
109 |
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL; |
110 |
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL; |
111 |
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL; |
112 |
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL; |
113 |
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL; |
114 |
#endif
|
115 |
|
116 |
DECLARE_ASM_CONST(8, int, deringThreshold)= 20; |
117 |
|
118 |
|
119 |
static struct PPFilter filters[]= |
120 |
{ |
121 |
{"hb", "hdeblock", 1, 1, 3, H_DEBLOCK}, |
122 |
{"vb", "vdeblock", 1, 2, 4, V_DEBLOCK}, |
123 |
/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
|
124 |
{"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
|
125 |
{"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER}, |
126 |
{"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER}, |
127 |
{"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK}, |
128 |
{"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK}, |
129 |
{"dr", "dering", 1, 5, 6, DERING}, |
130 |
{"al", "autolevels", 0, 1, 2, LEVEL_FIX}, |
131 |
{"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER}, |
132 |
{"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER}, |
133 |
{"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER}, |
134 |
{"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER}, |
135 |
{"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER}, |
136 |
{"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER}, |
137 |
{"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER}, |
138 |
{"fq", "forcequant", 1, 0, 0, FORCE_QUANT}, |
139 |
{NULL, NULL,0,0,0,0} //End Marker |
140 |
}; |
141 |
|
142 |
static const char *replaceTable[]= |
143 |
{ |
144 |
"default", "hb:a,vb:a,dr:a", |
145 |
"de", "hb:a,vb:a,dr:a", |
146 |
"fast", "h1:a,v1:a,dr:a", |
147 |
"fa", "h1:a,v1:a,dr:a", |
148 |
"ac", "ha:a:128:7,va:a,dr:a", |
149 |
NULL //End Marker |
150 |
}; |
151 |
|
152 |
|
153 |
#if ARCH_X86
|
154 |
static inline void prefetchnta(void *p) |
155 |
{ |
156 |
__asm__ volatile( "prefetchnta (%0)\n\t" |
157 |
: : "r" (p)
|
158 |
); |
159 |
} |
160 |
|
161 |
static inline void prefetcht0(void *p) |
162 |
{ |
163 |
__asm__ volatile( "prefetcht0 (%0)\n\t" |
164 |
: : "r" (p)
|
165 |
); |
166 |
} |
167 |
|
168 |
static inline void prefetcht1(void *p) |
169 |
{ |
170 |
__asm__ volatile( "prefetcht1 (%0)\n\t" |
171 |
: : "r" (p)
|
172 |
); |
173 |
} |
174 |
|
175 |
static inline void prefetcht2(void *p) |
176 |
{ |
177 |
__asm__ volatile( "prefetcht2 (%0)\n\t" |
178 |
: : "r" (p)
|
179 |
); |
180 |
} |
181 |
#endif
|
182 |
|
183 |
/* The horizontal functions exist only in C because the MMX
|
184 |
* code is faster with vertical filters and transposing. */
|
185 |
|
186 |
/**
|
187 |
* Check if the given 8x8 Block is mostly "flat"
|
188 |
*/
|
189 |
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c) |
190 |
{ |
191 |
int numEq= 0; |
192 |
int y;
|
193 |
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; |
194 |
const int dcThreshold= dcOffset*2 + 1; |
195 |
|
196 |
for(y=0; y<BLOCK_SIZE; y++){ |
197 |
if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; |
198 |
if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; |
199 |
if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; |
200 |
if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++; |
201 |
if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++; |
202 |
if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++; |
203 |
if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++; |
204 |
src+= stride; |
205 |
} |
206 |
return numEq > c->ppMode.flatnessThreshold;
|
207 |
} |
208 |
|
209 |
/**
|
210 |
* Check if the middle 8x8 Block in the given 8x16 block is flat
|
211 |
*/
|
212 |
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c) |
213 |
{ |
214 |
int numEq= 0; |
215 |
int y;
|
216 |
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; |
217 |
const int dcThreshold= dcOffset*2 + 1; |
218 |
|
219 |
src+= stride*4; // src points to begin of the 8x8 Block |
220 |
for(y=0; y<BLOCK_SIZE-1; y++){ |
221 |
if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; |
222 |
if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; |
223 |
if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++; |
224 |
if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++; |
225 |
if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++; |
226 |
if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++; |
227 |
if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++; |
228 |
if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++; |
229 |
src+= stride; |
230 |
} |
231 |
return numEq > c->ppMode.flatnessThreshold;
|
232 |
} |
233 |
|
234 |
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP) |
235 |
{ |
236 |
int i;
|
237 |
#if 1 |
238 |
for(i=0; i<2; i++){ |
239 |
if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0; |
240 |
src += stride; |
241 |
if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0; |
242 |
src += stride; |
243 |
if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0; |
244 |
src += stride; |
245 |
if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0; |
246 |
src += stride; |
247 |
} |
248 |
#else
|
249 |
for(i=0; i<8; i++){ |
250 |
if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0; |
251 |
src += stride; |
252 |
} |
253 |
#endif
|
254 |
return 1; |
255 |
} |
256 |
|
257 |
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) |
258 |
{ |
259 |
#if 1 |
260 |
#if 1 |
261 |
int x;
|
262 |
src+= stride*4;
|
263 |
for(x=0; x<BLOCK_SIZE; x+=4){ |
264 |
if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0; |
265 |
if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0; |
266 |
if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0; |
267 |
if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0; |
268 |
} |
269 |
#else
|
270 |
int x;
|
271 |
src+= stride*3;
|
272 |
for(x=0; x<BLOCK_SIZE; x++){ |
273 |
if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; |
274 |
} |
275 |
#endif
|
276 |
return 1; |
277 |
#else
|
278 |
int x;
|
279 |
src+= stride*4;
|
280 |
for(x=0; x<BLOCK_SIZE; x++){ |
281 |
int min=255; |
282 |
int max=0; |
283 |
int y;
|
284 |
for(y=0; y<8; y++){ |
285 |
int v= src[x + y*stride];
|
286 |
if(v>max) max=v;
|
287 |
if(v<min) min=v;
|
288 |
} |
289 |
if(max-min > 2*QP) return 0; |
290 |
} |
291 |
return 1; |
292 |
#endif
|
293 |
} |
294 |
|
295 |
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c) |
296 |
{ |
297 |
if( isHorizDC_C(src, stride, c) ){
|
298 |
if( isHorizMinMaxOk_C(src, stride, c->QP) )
|
299 |
return 1; |
300 |
else
|
301 |
return 0; |
302 |
}else{
|
303 |
return 2; |
304 |
} |
305 |
} |
306 |
|
307 |
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c) |
308 |
{ |
309 |
if( isVertDC_C(src, stride, c) ){
|
310 |
if( isVertMinMaxOk_C(src, stride, c->QP) )
|
311 |
return 1; |
312 |
else
|
313 |
return 0; |
314 |
}else{
|
315 |
return 2; |
316 |
} |
317 |
} |
318 |
|
319 |
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) |
320 |
{ |
321 |
int y;
|
322 |
for(y=0; y<BLOCK_SIZE; y++){ |
323 |
const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]); |
324 |
|
325 |
if(FFABS(middleEnergy) < 8*c->QP){ |
326 |
const int q=(dst[3] - dst[4])/2; |
327 |
const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); |
328 |
const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); |
329 |
|
330 |
int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
|
331 |
d= FFMAX(d, 0);
|
332 |
|
333 |
d= (5*d + 32) >> 6; |
334 |
d*= FFSIGN(-middleEnergy); |
335 |
|
336 |
if(q>0) |
337 |
{ |
338 |
d= d<0 ? 0 : d; |
339 |
d= d>q ? q : d; |
340 |
} |
341 |
else
|
342 |
{ |
343 |
d= d>0 ? 0 : d; |
344 |
d= d<q ? q : d; |
345 |
} |
346 |
|
347 |
dst[3]-= d;
|
348 |
dst[4]+= d;
|
349 |
} |
350 |
dst+= stride; |
351 |
} |
352 |
} |
353 |
|
354 |
/**
|
355 |
* Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
|
356 |
* using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
|
357 |
*/
|
358 |
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) |
359 |
{ |
360 |
int y;
|
361 |
for(y=0; y<BLOCK_SIZE; y++){ |
362 |
const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0]; |
363 |
const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; |
364 |
|
365 |
int sums[10]; |
366 |
sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4; |
367 |
sums[1] = sums[0] - first + dst[3]; |
368 |
sums[2] = sums[1] - first + dst[4]; |
369 |
sums[3] = sums[2] - first + dst[5]; |
370 |
sums[4] = sums[3] - first + dst[6]; |
371 |
sums[5] = sums[4] - dst[0] + dst[7]; |
372 |
sums[6] = sums[5] - dst[1] + last; |
373 |
sums[7] = sums[6] - dst[2] + last; |
374 |
sums[8] = sums[7] - dst[3] + last; |
375 |
sums[9] = sums[8] - dst[4] + last; |
376 |
|
377 |
dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4; |
378 |
dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4; |
379 |
dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4; |
380 |
dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4; |
381 |
dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4; |
382 |
dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4; |
383 |
dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4; |
384 |
dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4; |
385 |
|
386 |
dst+= stride; |
387 |
} |
388 |
} |
389 |
|
390 |
/**
|
391 |
* Experimental Filter 1 (Horizontal)
|
392 |
* will not damage linear gradients
|
393 |
* Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
|
394 |
* can only smooth blocks at the expected locations (it cannot smooth them if they did move)
|
395 |
* MMX2 version does correct clipping C version does not
|
396 |
* not identical with the vertical one
|
397 |
*/
|
398 |
static inline void horizX1Filter(uint8_t *src, int stride, int QP) |
399 |
{ |
400 |
int y;
|
401 |
static uint64_t *lut= NULL; |
402 |
if(lut==NULL) |
403 |
{ |
404 |
int i;
|
405 |
lut = av_malloc(256*8); |
406 |
for(i=0; i<256; i++) |
407 |
{ |
408 |
int v= i < 128 ? 2*i : 2*(i-256); |
409 |
/*
|
410 |
//Simulate 112242211 9-Tap filter
|
411 |
uint64_t a= (v/16) & 0xFF;
|
412 |
uint64_t b= (v/8) & 0xFF;
|
413 |
uint64_t c= (v/4) & 0xFF;
|
414 |
uint64_t d= (3*v/8) & 0xFF;
|
415 |
*/
|
416 |
//Simulate piecewise linear interpolation
|
417 |
uint64_t a= (v/16) & 0xFF; |
418 |
uint64_t b= (v*3/16) & 0xFF; |
419 |
uint64_t c= (v*5/16) & 0xFF; |
420 |
uint64_t d= (7*v/16) & 0xFF; |
421 |
uint64_t A= (0x100 - a)&0xFF; |
422 |
uint64_t B= (0x100 - b)&0xFF; |
423 |
uint64_t C= (0x100 - c)&0xFF; |
424 |
uint64_t D= (0x100 - c)&0xFF; |
425 |
|
426 |
lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) | |
427 |
(D<<24) | (C<<16) | (B<<8) | (A); |
428 |
//lut[i] = (v<<32) | (v<<24);
|
429 |
} |
430 |
} |
431 |
|
432 |
for(y=0; y<BLOCK_SIZE; y++){ |
433 |
int a= src[1] - src[2]; |
434 |
int b= src[3] - src[4]; |
435 |
int c= src[5] - src[6]; |
436 |
|
437 |
int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0); |
438 |
|
439 |
if(d < QP){
|
440 |
int v = d * FFSIGN(-b);
|
441 |
|
442 |
src[1] +=v/8; |
443 |
src[2] +=v/4; |
444 |
src[3] +=3*v/8; |
445 |
src[4] -=3*v/8; |
446 |
src[5] -=v/4; |
447 |
src[6] -=v/8; |
448 |
} |
449 |
src+=stride; |
450 |
} |
451 |
} |
452 |
|
453 |
/**
|
454 |
* accurate deblock filter
|
455 |
*/
|
456 |
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){ |
457 |
int y;
|
458 |
const int QP= c->QP; |
459 |
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; |
460 |
const int dcThreshold= dcOffset*2 + 1; |
461 |
//START_TIMER
|
462 |
src+= step*4; // src points to begin of the 8x8 Block |
463 |
for(y=0; y<8; y++){ |
464 |
int numEq= 0; |
465 |
|
466 |
if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++; |
467 |
if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++; |
468 |
if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++; |
469 |
if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++; |
470 |
if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++; |
471 |
if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++; |
472 |
if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++; |
473 |
if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++; |
474 |
if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++; |
475 |
if(numEq > c->ppMode.flatnessThreshold){
|
476 |
int min, max, x;
|
477 |
|
478 |
if(src[0] > src[step]){ |
479 |
max= src[0];
|
480 |
min= src[step]; |
481 |
}else{
|
482 |
max= src[step]; |
483 |
min= src[0];
|
484 |
} |
485 |
for(x=2; x<8; x+=2){ |
486 |
if(src[x*step] > src[(x+1)*step]){ |
487 |
if(src[x *step] > max) max= src[ x *step];
|
488 |
if(src[(x+1)*step] < min) min= src[(x+1)*step]; |
489 |
}else{
|
490 |
if(src[(x+1)*step] > max) max= src[(x+1)*step]; |
491 |
if(src[ x *step] < min) min= src[ x *step];
|
492 |
} |
493 |
} |
494 |
if(max-min < 2*QP){ |
495 |
const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0]; |
496 |
const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step]; |
497 |
|
498 |
int sums[10]; |
499 |
sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4; |
500 |
sums[1] = sums[0] - first + src[3*step]; |
501 |
sums[2] = sums[1] - first + src[4*step]; |
502 |
sums[3] = sums[2] - first + src[5*step]; |
503 |
sums[4] = sums[3] - first + src[6*step]; |
504 |
sums[5] = sums[4] - src[0*step] + src[7*step]; |
505 |
sums[6] = sums[5] - src[1*step] + last; |
506 |
sums[7] = sums[6] - src[2*step] + last; |
507 |
sums[8] = sums[7] - src[3*step] + last; |
508 |
sums[9] = sums[8] - src[4*step] + last; |
509 |
|
510 |
src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4; |
511 |
src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4; |
512 |
src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4; |
513 |
src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4; |
514 |
src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4; |
515 |
src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4; |
516 |
src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4; |
517 |
src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4; |
518 |
} |
519 |
}else{
|
520 |
const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]); |
521 |
|
522 |
if(FFABS(middleEnergy) < 8*QP){ |
523 |
const int q=(src[3*step] - src[4*step])/2; |
524 |
const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]); |
525 |
const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]); |
526 |
|
527 |
int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
|
528 |
d= FFMAX(d, 0);
|
529 |
|
530 |
d= (5*d + 32) >> 6; |
531 |
d*= FFSIGN(-middleEnergy); |
532 |
|
533 |
if(q>0){ |
534 |
d= d<0 ? 0 : d; |
535 |
d= d>q ? q : d; |
536 |
}else{
|
537 |
d= d>0 ? 0 : d; |
538 |
d= d<q ? q : d; |
539 |
} |
540 |
|
541 |
src[3*step]-= d;
|
542 |
src[4*step]+= d;
|
543 |
} |
544 |
} |
545 |
|
546 |
src += stride; |
547 |
} |
548 |
/*if(step==16){
|
549 |
STOP_TIMER("step16")
|
550 |
}else{
|
551 |
STOP_TIMER("stepX")
|
552 |
}*/
|
553 |
} |
554 |
|
555 |
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
|
556 |
//Plain C versions
|
557 |
#if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
|
558 |
#define COMPILE_C
|
559 |
#endif
|
560 |
|
561 |
#if HAVE_ALTIVEC
|
562 |
#define COMPILE_ALTIVEC
|
563 |
#endif //HAVE_ALTIVEC |
564 |
|
565 |
#if ARCH_X86
|
566 |
|
567 |
#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
|
568 |
#define COMPILE_MMX
|
569 |
#endif
|
570 |
|
571 |
#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
|
572 |
#define COMPILE_MMX2
|
573 |
#endif
|
574 |
|
575 |
#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
|
576 |
#define COMPILE_3DNOW
|
577 |
#endif
|
578 |
#endif /* ARCH_X86 */ |
579 |
|
580 |
#undef HAVE_MMX
|
581 |
#define HAVE_MMX 0 |
582 |
#undef HAVE_MMX2
|
583 |
#define HAVE_MMX2 0 |
584 |
#undef HAVE_AMD3DNOW
|
585 |
#define HAVE_AMD3DNOW 0 |
586 |
#undef HAVE_ALTIVEC
|
587 |
#define HAVE_ALTIVEC 0 |
588 |
|
589 |
#ifdef COMPILE_C
|
590 |
#define RENAME(a) a ## _C |
591 |
#include "postprocess_template.c" |
592 |
#endif
|
593 |
|
594 |
#ifdef COMPILE_ALTIVEC
|
595 |
#undef RENAME
|
596 |
#undef HAVE_ALTIVEC
|
597 |
#define HAVE_ALTIVEC 1 |
598 |
#define RENAME(a) a ## _altivec |
599 |
#include "postprocess_altivec_template.c" |
600 |
#include "postprocess_template.c" |
601 |
#endif
|
602 |
|
603 |
//MMX versions
|
604 |
#ifdef COMPILE_MMX
|
605 |
#undef RENAME
|
606 |
#undef HAVE_MMX
|
607 |
#define HAVE_MMX 1 |
608 |
#define RENAME(a) a ## _MMX |
609 |
#include "postprocess_template.c" |
610 |
#endif
|
611 |
|
612 |
//MMX2 versions
|
613 |
#ifdef COMPILE_MMX2
|
614 |
#undef RENAME
|
615 |
#undef HAVE_MMX
|
616 |
#undef HAVE_MMX2
|
617 |
#define HAVE_MMX 1 |
618 |
#define HAVE_MMX2 1 |
619 |
#define RENAME(a) a ## _MMX2 |
620 |
#include "postprocess_template.c" |
621 |
#endif
|
622 |
|
623 |
//3DNOW versions
|
624 |
#ifdef COMPILE_3DNOW
|
625 |
#undef RENAME
|
626 |
#undef HAVE_MMX
|
627 |
#undef HAVE_MMX2
|
628 |
#undef HAVE_AMD3DNOW
|
629 |
#define HAVE_MMX 1 |
630 |
#define HAVE_MMX2 0 |
631 |
#define HAVE_AMD3DNOW 1 |
632 |
#define RENAME(a) a ## _3DNow |
633 |
#include "postprocess_template.c" |
634 |
#endif
|
635 |
|
636 |
// minor note: the HAVE_xyz is messed up after that line so do not use it.
|
637 |
|
638 |
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
639 |
const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc) |
640 |
{ |
641 |
PPContext *c= (PPContext *)vc; |
642 |
PPMode *ppMode= (PPMode *)vm; |
643 |
c->ppMode= *ppMode; //FIXME
|
644 |
|
645 |
// Using ifs here as they are faster than function pointers although the
|
646 |
// difference would not be measurable here but it is much better because
|
647 |
// someone might exchange the CPU whithout restarting MPlayer ;)
|
648 |
#if CONFIG_RUNTIME_CPUDETECT
|
649 |
#if ARCH_X86
|
650 |
// ordered per speed fastest first
|
651 |
if(c->cpuCaps & PP_CPU_CAPS_MMX2)
|
652 |
postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
653 |
else if(c->cpuCaps & PP_CPU_CAPS_3DNOW) |
654 |
postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
655 |
else if(c->cpuCaps & PP_CPU_CAPS_MMX) |
656 |
postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
657 |
else
|
658 |
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
659 |
#else
|
660 |
#if HAVE_ALTIVEC
|
661 |
if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
|
662 |
postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
663 |
else
|
664 |
#endif
|
665 |
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
666 |
#endif
|
667 |
#else //CONFIG_RUNTIME_CPUDETECT |
668 |
#if HAVE_MMX2
|
669 |
postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
670 |
#elif HAVE_AMD3DNOW
|
671 |
postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
672 |
#elif HAVE_MMX
|
673 |
postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
674 |
#elif HAVE_ALTIVEC
|
675 |
postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
676 |
#else
|
677 |
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
678 |
#endif
|
679 |
#endif //!CONFIG_RUNTIME_CPUDETECT |
680 |
} |
681 |
|
682 |
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
|
683 |
// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
|
684 |
|
685 |
/* -pp Command line Help
|
686 |
*/
|
687 |
#if LIBPOSTPROC_VERSION_INT < (52<<16) |
688 |
const char *const pp_help= |
689 |
#else
|
690 |
const char pp_help[] = |
691 |
#endif
|
692 |
"Available postprocessing filters:\n"
|
693 |
"Filters Options\n"
|
694 |
"short long name short long option Description\n"
|
695 |
"* * a autoq CPU power dependent enabler\n"
|
696 |
" c chrom chrominance filtering enabled\n"
|
697 |
" y nochrom chrominance filtering disabled\n"
|
698 |
" n noluma luma filtering disabled\n"
|
699 |
"hb hdeblock (2 threshold) horizontal deblocking filter\n"
|
700 |
" 1. difference factor: default=32, higher -> more deblocking\n"
|
701 |
" 2. flatness threshold: default=39, lower -> more deblocking\n"
|
702 |
" the h & v deblocking filters share these\n"
|
703 |
" so you can't set different thresholds for h / v\n"
|
704 |
"vb vdeblock (2 threshold) vertical deblocking filter\n"
|
705 |
"ha hadeblock (2 threshold) horizontal deblocking filter\n"
|
706 |
"va vadeblock (2 threshold) vertical deblocking filter\n"
|
707 |
"h1 x1hdeblock experimental h deblock filter 1\n"
|
708 |
"v1 x1vdeblock experimental v deblock filter 1\n"
|
709 |
"dr dering deringing filter\n"
|
710 |
"al autolevels automatic brightness / contrast\n"
|
711 |
" f fullyrange stretch luminance to (0..255)\n"
|
712 |
"lb linblenddeint linear blend deinterlacer\n"
|
713 |
"li linipoldeint linear interpolating deinterlace\n"
|
714 |
"ci cubicipoldeint cubic interpolating deinterlacer\n"
|
715 |
"md mediandeint median deinterlacer\n"
|
716 |
"fd ffmpegdeint ffmpeg deinterlacer\n"
|
717 |
"l5 lowpass5 FIR lowpass deinterlacer\n"
|
718 |
"de default hb:a,vb:a,dr:a\n"
|
719 |
"fa fast h1:a,v1:a,dr:a\n"
|
720 |
"ac ha:a:128:7,va:a,dr:a\n"
|
721 |
"tn tmpnoise (3 threshold) temporal noise reducer\n"
|
722 |
" 1. <= 2. <= 3. larger -> stronger filtering\n"
|
723 |
"fq forceQuant <quantizer> force quantizer\n"
|
724 |
"Usage:\n"
|
725 |
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
|
726 |
"long form example:\n"
|
727 |
"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
|
728 |
"short form example:\n"
|
729 |
"vb:a/hb:a/lb de,-vb\n"
|
730 |
"more examples:\n"
|
731 |
"tn:64:128:256\n"
|
732 |
"\n"
|
733 |
; |
734 |
|
735 |
pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality) |
736 |
{ |
737 |
char temp[GET_MODE_BUFFER_SIZE];
|
738 |
char *p= temp;
|
739 |
static const char filterDelimiters[] = ",/"; |
740 |
static const char optionDelimiters[] = ":"; |
741 |
struct PPMode *ppMode;
|
742 |
char *filterToken;
|
743 |
|
744 |
ppMode= av_malloc(sizeof(PPMode));
|
745 |
|
746 |
ppMode->lumMode= 0;
|
747 |
ppMode->chromMode= 0;
|
748 |
ppMode->maxTmpNoise[0]= 700; |
749 |
ppMode->maxTmpNoise[1]= 1500; |
750 |
ppMode->maxTmpNoise[2]= 3000; |
751 |
ppMode->maxAllowedY= 234;
|
752 |
ppMode->minAllowedY= 16;
|
753 |
ppMode->baseDcDiff= 256/8; |
754 |
ppMode->flatnessThreshold= 56-16-1; |
755 |
ppMode->maxClippedThreshold= 0.01; |
756 |
ppMode->error=0;
|
757 |
|
758 |
strncpy(temp, name, GET_MODE_BUFFER_SIZE); |
759 |
|
760 |
av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name); |
761 |
|
762 |
for(;;){
|
763 |
char *filterName;
|
764 |
int q= 1000000; //PP_QUALITY_MAX; |
765 |
int chrom=-1; |
766 |
int luma=-1; |
767 |
char *option;
|
768 |
char *options[OPTIONS_ARRAY_SIZE];
|
769 |
int i;
|
770 |
int filterNameOk=0; |
771 |
int numOfUnknownOptions=0; |
772 |
int enable=1; //does the user want us to enabled or disabled the filter |
773 |
|
774 |
filterToken= strtok(p, filterDelimiters); |
775 |
if(filterToken == NULL) break; |
776 |
p+= strlen(filterToken) + 1; // p points to next filterToken |
777 |
filterName= strtok(filterToken, optionDelimiters); |
778 |
av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName); |
779 |
|
780 |
if(*filterName == '-'){ |
781 |
enable=0;
|
782 |
filterName++; |
783 |
} |
784 |
|
785 |
for(;;){ //for all options |
786 |
option= strtok(NULL, optionDelimiters);
|
787 |
if(option == NULL) break; |
788 |
|
789 |
av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option); |
790 |
if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality; |
791 |
else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0; |
792 |
else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; |
793 |
else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0; |
794 |
else{
|
795 |
options[numOfUnknownOptions] = option; |
796 |
numOfUnknownOptions++; |
797 |
} |
798 |
if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break; |
799 |
} |
800 |
options[numOfUnknownOptions] = NULL;
|
801 |
|
802 |
/* replace stuff from the replace Table */
|
803 |
for(i=0; replaceTable[2*i]!=NULL; i++){ |
804 |
if(!strcmp(replaceTable[2*i], filterName)){ |
805 |
int newlen= strlen(replaceTable[2*i + 1]); |
806 |
int plen;
|
807 |
int spaceLeft;
|
808 |
|
809 |
if(p==NULL) p= temp, *p=0; //last filter |
810 |
else p--, *p=','; //not last filter |
811 |
|
812 |
plen= strlen(p); |
813 |
spaceLeft= p - temp + plen; |
814 |
if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE){
|
815 |
ppMode->error++; |
816 |
break;
|
817 |
} |
818 |
memmove(p + newlen, p, plen+1);
|
819 |
memcpy(p, replaceTable[2*i + 1], newlen); |
820 |
filterNameOk=1;
|
821 |
} |
822 |
} |
823 |
|
824 |
for(i=0; filters[i].shortName!=NULL; i++){ |
825 |
if( !strcmp(filters[i].longName, filterName)
|
826 |
|| !strcmp(filters[i].shortName, filterName)){ |
827 |
ppMode->lumMode &= ~filters[i].mask; |
828 |
ppMode->chromMode &= ~filters[i].mask; |
829 |
|
830 |
filterNameOk=1;
|
831 |
if(!enable) break; // user wants to disable it |
832 |
|
833 |
if(q >= filters[i].minLumQuality && luma)
|
834 |
ppMode->lumMode|= filters[i].mask; |
835 |
if(chrom==1 || (chrom==-1 && filters[i].chromDefault)) |
836 |
if(q >= filters[i].minChromQuality)
|
837 |
ppMode->chromMode|= filters[i].mask; |
838 |
|
839 |
if(filters[i].mask == LEVEL_FIX){
|
840 |
int o;
|
841 |
ppMode->minAllowedY= 16;
|
842 |
ppMode->maxAllowedY= 234;
|
843 |
for(o=0; options[o]!=NULL; o++){ |
844 |
if( !strcmp(options[o],"fullyrange") |
845 |
||!strcmp(options[o],"f")){
|
846 |
ppMode->minAllowedY= 0;
|
847 |
ppMode->maxAllowedY= 255;
|
848 |
numOfUnknownOptions--; |
849 |
} |
850 |
} |
851 |
} |
852 |
else if(filters[i].mask == TEMP_NOISE_FILTER) |
853 |
{ |
854 |
int o;
|
855 |
int numOfNoises=0; |
856 |
|
857 |
for(o=0; options[o]!=NULL; o++){ |
858 |
char *tail;
|
859 |
ppMode->maxTmpNoise[numOfNoises]= |
860 |
strtol(options[o], &tail, 0);
|
861 |
if(tail!=options[o]){
|
862 |
numOfNoises++; |
863 |
numOfUnknownOptions--; |
864 |
if(numOfNoises >= 3) break; |
865 |
} |
866 |
} |
867 |
} |
868 |
else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK |
869 |
|| filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){ |
870 |
int o;
|
871 |
|
872 |
for(o=0; options[o]!=NULL && o<2; o++){ |
873 |
char *tail;
|
874 |
int val= strtol(options[o], &tail, 0); |
875 |
if(tail==options[o]) break; |
876 |
|
877 |
numOfUnknownOptions--; |
878 |
if(o==0) ppMode->baseDcDiff= val; |
879 |
else ppMode->flatnessThreshold= val;
|
880 |
} |
881 |
} |
882 |
else if(filters[i].mask == FORCE_QUANT){ |
883 |
int o;
|
884 |
ppMode->forcedQuant= 15;
|
885 |
|
886 |
for(o=0; options[o]!=NULL && o<1; o++){ |
887 |
char *tail;
|
888 |
int val= strtol(options[o], &tail, 0); |
889 |
if(tail==options[o]) break; |
890 |
|
891 |
numOfUnknownOptions--; |
892 |
ppMode->forcedQuant= val; |
893 |
} |
894 |
} |
895 |
} |
896 |
} |
897 |
if(!filterNameOk) ppMode->error++;
|
898 |
ppMode->error += numOfUnknownOptions; |
899 |
} |
900 |
|
901 |
av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode); |
902 |
if(ppMode->error){
|
903 |
av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name); |
904 |
av_free(ppMode); |
905 |
return NULL; |
906 |
} |
907 |
return ppMode;
|
908 |
} |
909 |
|
910 |
void pp_free_mode(pp_mode *mode){
|
911 |
av_free(mode); |
912 |
} |
913 |
|
914 |
static void reallocAlign(void **p, int alignment, int size){ |
915 |
av_free(*p); |
916 |
*p= av_mallocz(size); |
917 |
} |
918 |
|
919 |
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){ |
920 |
int mbWidth = (width+15)>>4; |
921 |
int mbHeight= (height+15)>>4; |
922 |
int i;
|
923 |
|
924 |
c->stride= stride; |
925 |
c->qpStride= qpStride; |
926 |
|
927 |
reallocAlign((void **)&c->tempDst, 8, stride*24); |
928 |
reallocAlign((void **)&c->tempSrc, 8, stride*24); |
929 |
reallocAlign((void **)&c->tempBlocks, 8, 2*16*8); |
930 |
reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t)); |
931 |
for(i=0; i<256; i++) |
932 |
c->yHistogram[i]= width*height/64*15/256; |
933 |
|
934 |
for(i=0; i<3; i++){ |
935 |
//Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
|
936 |
reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024); |
937 |
reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size |
938 |
} |
939 |
|
940 |
reallocAlign((void **)&c->deintTemp, 8, 2*width+32); |
941 |
reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); |
942 |
reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); |
943 |
reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T)); |
944 |
} |
945 |
|
946 |
static const char * context_to_name(void * ptr) { |
947 |
return "postproc"; |
948 |
} |
949 |
|
950 |
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL }; |
951 |
|
952 |
pp_context *pp_get_context(int width, int height, int cpuCaps){ |
953 |
PPContext *c= av_malloc(sizeof(PPContext));
|
954 |
int stride= (width+15)&(~15); //assumed / will realloc if needed |
955 |
int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed |
956 |
|
957 |
memset(c, 0, sizeof(PPContext)); |
958 |
c->av_class = &av_codec_context_class; |
959 |
c->cpuCaps= cpuCaps; |
960 |
if(cpuCaps&PP_FORMAT){
|
961 |
c->hChromaSubSample= cpuCaps&0x3;
|
962 |
c->vChromaSubSample= (cpuCaps>>4)&0x3; |
963 |
}else{
|
964 |
c->hChromaSubSample= 1;
|
965 |
c->vChromaSubSample= 1;
|
966 |
} |
967 |
|
968 |
reallocBuffers(c, width, height, stride, qpStride); |
969 |
|
970 |
c->frameNum=-1;
|
971 |
|
972 |
return c;
|
973 |
} |
974 |
|
975 |
void pp_free_context(void *vc){ |
976 |
PPContext *c = (PPContext*)vc; |
977 |
int i;
|
978 |
|
979 |
for(i=0; i<3; i++) av_free(c->tempBlurred[i]); |
980 |
for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]); |
981 |
|
982 |
av_free(c->tempBlocks); |
983 |
av_free(c->yHistogram); |
984 |
av_free(c->tempDst); |
985 |
av_free(c->tempSrc); |
986 |
av_free(c->deintTemp); |
987 |
av_free(c->stdQPTable); |
988 |
av_free(c->nonBQPTable); |
989 |
av_free(c->forcedQPTable); |
990 |
|
991 |
memset(c, 0, sizeof(PPContext)); |
992 |
|
993 |
av_free(c); |
994 |
} |
995 |
|
996 |
void pp_postprocess(const uint8_t * src[3], const int srcStride[3], |
997 |
uint8_t * dst[3], const int dstStride[3], |
998 |
int width, int height, |
999 |
const QP_STORE_T *QP_store, int QPStride, |
1000 |
pp_mode *vm, void *vc, int pict_type) |
1001 |
{ |
1002 |
int mbWidth = (width+15)>>4; |
1003 |
int mbHeight= (height+15)>>4; |
1004 |
PPMode *mode = (PPMode*)vm; |
1005 |
PPContext *c = (PPContext*)vc; |
1006 |
int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0])); |
1007 |
int absQPStride = FFABS(QPStride);
|
1008 |
|
1009 |
// c->stride and c->QPStride are always positive
|
1010 |
if(c->stride < minStride || c->qpStride < absQPStride)
|
1011 |
reallocBuffers(c, width, height, |
1012 |
FFMAX(minStride, c->stride), |
1013 |
FFMAX(c->qpStride, absQPStride)); |
1014 |
|
1015 |
if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){ |
1016 |
int i;
|
1017 |
QP_store= c->forcedQPTable; |
1018 |
absQPStride = QPStride = 0;
|
1019 |
if(mode->lumMode & FORCE_QUANT)
|
1020 |
for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant; |
1021 |
else
|
1022 |
for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1; |
1023 |
} |
1024 |
|
1025 |
if(pict_type & PP_PICT_TYPE_QP2){
|
1026 |
int i;
|
1027 |
const int count= mbHeight * absQPStride; |
1028 |
for(i=0; i<(count>>2); i++){ |
1029 |
((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F; |
1030 |
} |
1031 |
for(i<<=2; i<count; i++){ |
1032 |
c->stdQPTable[i] = QP_store[i]>>1;
|
1033 |
} |
1034 |
QP_store= c->stdQPTable; |
1035 |
QPStride= absQPStride; |
1036 |
} |
1037 |
|
1038 |
if(0){ |
1039 |
int x,y;
|
1040 |
for(y=0; y<mbHeight; y++){ |
1041 |
for(x=0; x<mbWidth; x++){ |
1042 |
av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
|
1043 |
} |
1044 |
av_log(c, AV_LOG_INFO, "\n");
|
1045 |
} |
1046 |
av_log(c, AV_LOG_INFO, "\n");
|
1047 |
} |
1048 |
|
1049 |
if((pict_type&7)!=3){ |
1050 |
if (QPStride >= 0){ |
1051 |
int i;
|
1052 |
const int count= mbHeight * QPStride; |
1053 |
for(i=0; i<(count>>2); i++){ |
1054 |
((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F; |
1055 |
} |
1056 |
for(i<<=2; i<count; i++){ |
1057 |
c->nonBQPTable[i] = QP_store[i] & 0x3F;
|
1058 |
} |
1059 |
} else {
|
1060 |
int i,j;
|
1061 |
for(i=0; i<mbHeight; i++) { |
1062 |
for(j=0; j<absQPStride; j++) { |
1063 |
c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
|
1064 |
} |
1065 |
} |
1066 |
} |
1067 |
} |
1068 |
|
1069 |
av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
|
1070 |
mode->lumMode, mode->chromMode); |
1071 |
|
1072 |
postProcess(src[0], srcStride[0], dst[0], dstStride[0], |
1073 |
width, height, QP_store, QPStride, 0, mode, c);
|
1074 |
|
1075 |
width = (width )>>c->hChromaSubSample; |
1076 |
height = (height)>>c->vChromaSubSample; |
1077 |
|
1078 |
if(mode->chromMode){
|
1079 |
postProcess(src[1], srcStride[1], dst[1], dstStride[1], |
1080 |
width, height, QP_store, QPStride, 1, mode, c);
|
1081 |
postProcess(src[2], srcStride[2], dst[2], dstStride[2], |
1082 |
width, height, QP_store, QPStride, 2, mode, c);
|
1083 |
} |
1084 |
else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){ |
1085 |
linecpy(dst[1], src[1], height, srcStride[1]); |
1086 |
linecpy(dst[2], src[2], height, srcStride[2]); |
1087 |
}else{
|
1088 |
int y;
|
1089 |
for(y=0; y<height; y++){ |
1090 |
memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width); |
1091 |
memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width); |
1092 |
} |
1093 |
} |
1094 |
} |
1095 |
|