ffmpeg / libpostproc / postprocess.c @ 849f1035
History | View | Annotate | Download (44.1 KB)
1 |
/*
|
---|---|
2 |
* Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
|
3 |
*
|
4 |
* AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
|
5 |
*
|
6 |
* This file is part of FFmpeg.
|
7 |
*
|
8 |
* FFmpeg is free software; you can redistribute it and/or modify
|
9 |
* it under the terms of the GNU General Public License as published by
|
10 |
* the Free Software Foundation; either version 2 of the License, or
|
11 |
* (at your option) any later version.
|
12 |
*
|
13 |
* FFmpeg is distributed in the hope that it will be useful,
|
14 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16 |
* GNU General Public License for more details.
|
17 |
*
|
18 |
* You should have received a copy of the GNU General Public License
|
19 |
* along with FFmpeg; if not, write to the Free Software
|
20 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
21 |
*/
|
22 |
|
23 |
/**
|
24 |
* @file postprocess.c
|
25 |
* postprocessing.
|
26 |
*/
|
27 |
|
28 |
/*
|
29 |
C MMX MMX2 3DNow AltiVec
|
30 |
isVertDC Ec Ec Ec
|
31 |
isVertMinMaxOk Ec Ec Ec
|
32 |
doVertLowPass E e e Ec
|
33 |
doVertDefFilter Ec Ec e e Ec
|
34 |
isHorizDC Ec Ec Ec
|
35 |
isHorizMinMaxOk a E Ec
|
36 |
doHorizLowPass E e e Ec
|
37 |
doHorizDefFilter Ec Ec e e Ec
|
38 |
do_a_deblock Ec E Ec E
|
39 |
deRing E e e* Ecp
|
40 |
Vertical RKAlgo1 E a a
|
41 |
Horizontal RKAlgo1 a a
|
42 |
Vertical X1# a E E
|
43 |
Horizontal X1# a E E
|
44 |
LinIpolDeinterlace e E E*
|
45 |
CubicIpolDeinterlace a e e*
|
46 |
LinBlendDeinterlace e E E*
|
47 |
MedianDeinterlace# E Ec Ec
|
48 |
TempDeNoiser# E e e Ec
|
49 |
|
50 |
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
|
51 |
# more or less selfinvented filters so the exactness isnt too meaningfull
|
52 |
E = Exact implementation
|
53 |
e = allmost exact implementation (slightly different rounding,...)
|
54 |
a = alternative / approximate impl
|
55 |
c = checked against the other implementations (-vo md5)
|
56 |
p = partially optimized, still some work to do
|
57 |
*/
|
58 |
|
59 |
/*
|
60 |
TODO:
|
61 |
reduce the time wasted on the mem transfer
|
62 |
unroll stuff if instructions depend too much on the prior one
|
63 |
move YScale thing to the end instead of fixing QP
|
64 |
write a faster and higher quality deblocking filter :)
|
65 |
make the mainloop more flexible (variable number of blocks at once
|
66 |
(the if/else stuff per block is slowing things down)
|
67 |
compare the quality & speed of all filters
|
68 |
split this huge file
|
69 |
optimize c versions
|
70 |
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
|
71 |
...
|
72 |
*/
|
73 |
|
74 |
//Changelog: use the Subversion log
|
75 |
|
76 |
#include "config.h" |
77 |
#include "avutil.h" |
78 |
#include <inttypes.h> |
79 |
#include <stdio.h> |
80 |
#include <stdlib.h> |
81 |
#include <string.h> |
82 |
#ifdef HAVE_MALLOC_H
|
83 |
#include <malloc.h> |
84 |
#endif
|
85 |
//#undef HAVE_MMX2
|
86 |
//#define HAVE_3DNOW
|
87 |
//#undef HAVE_MMX
|
88 |
//#undef ARCH_X86
|
89 |
//#define DEBUG_BRIGHTNESS
|
90 |
#ifdef USE_FASTMEMCPY
|
91 |
#include "libvo/fastmemcpy.h" |
92 |
#endif
|
93 |
#include "postprocess.h" |
94 |
#include "postprocess_internal.h" |
95 |
|
96 |
#include "mangle.h" //FIXME should be supressed |
97 |
|
98 |
#ifdef HAVE_ALTIVEC_H
|
99 |
#include <altivec.h> |
100 |
#endif
|
101 |
|
102 |
#define GET_MODE_BUFFER_SIZE 500 |
103 |
#define OPTIONS_ARRAY_SIZE 10 |
104 |
#define BLOCK_SIZE 8 |
105 |
#define TEMP_STRIDE 8 |
106 |
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
|
107 |
|
108 |
#if defined(ARCH_X86)
|
109 |
static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL; |
110 |
static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL; |
111 |
static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL; |
112 |
static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL; |
113 |
static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL; |
114 |
static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL; |
115 |
static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL; |
116 |
static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL; |
117 |
#endif
|
118 |
|
119 |
static uint8_t clip_table[3*256]; |
120 |
static uint8_t * const clip_tab= clip_table + 256; |
121 |
|
122 |
static const int attribute_used deringThreshold= 20; |
123 |
|
124 |
|
125 |
static struct PPFilter filters[]= |
126 |
{ |
127 |
{"hb", "hdeblock", 1, 1, 3, H_DEBLOCK}, |
128 |
{"vb", "vdeblock", 1, 2, 4, V_DEBLOCK}, |
129 |
/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
|
130 |
{"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
|
131 |
{"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER}, |
132 |
{"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER}, |
133 |
{"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK}, |
134 |
{"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK}, |
135 |
{"dr", "dering", 1, 5, 6, DERING}, |
136 |
{"al", "autolevels", 0, 1, 2, LEVEL_FIX}, |
137 |
{"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER}, |
138 |
{"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER}, |
139 |
{"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER}, |
140 |
{"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER}, |
141 |
{"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER}, |
142 |
{"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER}, |
143 |
{"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER}, |
144 |
{"fq", "forcequant", 1, 0, 0, FORCE_QUANT}, |
145 |
{NULL, NULL,0,0,0,0} //End Marker |
146 |
}; |
147 |
|
148 |
static const char *replaceTable[]= |
149 |
{ |
150 |
"default", "hdeblock:a,vdeblock:a,dering:a", |
151 |
"de", "hdeblock:a,vdeblock:a,dering:a", |
152 |
"fast", "x1hdeblock:a,x1vdeblock:a,dering:a", |
153 |
"fa", "x1hdeblock:a,x1vdeblock:a,dering:a", |
154 |
"ac", "ha:a:128:7,va:a,dering:a", |
155 |
NULL //End Marker |
156 |
}; |
157 |
|
158 |
|
159 |
#if defined(ARCH_X86)
|
160 |
static inline void prefetchnta(void *p) |
161 |
{ |
162 |
asm volatile( "prefetchnta (%0)\n\t" |
163 |
: : "r" (p)
|
164 |
); |
165 |
} |
166 |
|
167 |
static inline void prefetcht0(void *p) |
168 |
{ |
169 |
asm volatile( "prefetcht0 (%0)\n\t" |
170 |
: : "r" (p)
|
171 |
); |
172 |
} |
173 |
|
174 |
static inline void prefetcht1(void *p) |
175 |
{ |
176 |
asm volatile( "prefetcht1 (%0)\n\t" |
177 |
: : "r" (p)
|
178 |
); |
179 |
} |
180 |
|
181 |
static inline void prefetcht2(void *p) |
182 |
{ |
183 |
asm volatile( "prefetcht2 (%0)\n\t" |
184 |
: : "r" (p)
|
185 |
); |
186 |
} |
187 |
#endif
|
188 |
|
189 |
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
|
190 |
|
191 |
/**
|
192 |
* Check if the given 8x8 Block is mostly "flat"
|
193 |
*/
|
194 |
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c) |
195 |
{ |
196 |
int numEq= 0; |
197 |
int y;
|
198 |
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; |
199 |
const int dcThreshold= dcOffset*2 + 1; |
200 |
|
201 |
for(y=0; y<BLOCK_SIZE; y++) |
202 |
{ |
203 |
if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; |
204 |
if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; |
205 |
if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; |
206 |
if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++; |
207 |
if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++; |
208 |
if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++; |
209 |
if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++; |
210 |
src+= stride; |
211 |
} |
212 |
return numEq > c->ppMode.flatnessThreshold;
|
213 |
} |
214 |
|
215 |
/**
|
216 |
* Check if the middle 8x8 Block in the given 8x16 block is flat
|
217 |
*/
|
218 |
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ |
219 |
int numEq= 0; |
220 |
int y;
|
221 |
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; |
222 |
const int dcThreshold= dcOffset*2 + 1; |
223 |
|
224 |
src+= stride*4; // src points to begin of the 8x8 Block |
225 |
for(y=0; y<BLOCK_SIZE-1; y++) |
226 |
{ |
227 |
if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; |
228 |
if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; |
229 |
if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++; |
230 |
if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++; |
231 |
if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++; |
232 |
if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++; |
233 |
if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++; |
234 |
if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++; |
235 |
src+= stride; |
236 |
} |
237 |
return numEq > c->ppMode.flatnessThreshold;
|
238 |
} |
239 |
|
240 |
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP) |
241 |
{ |
242 |
int i;
|
243 |
#if 1 |
244 |
for(i=0; i<2; i++){ |
245 |
if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0; |
246 |
src += stride; |
247 |
if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0; |
248 |
src += stride; |
249 |
if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0; |
250 |
src += stride; |
251 |
if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0; |
252 |
src += stride; |
253 |
} |
254 |
#else
|
255 |
for(i=0; i<8; i++){ |
256 |
if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0; |
257 |
src += stride; |
258 |
} |
259 |
#endif
|
260 |
return 1; |
261 |
} |
262 |
|
263 |
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) |
264 |
{ |
265 |
#if 1 |
266 |
#if 1 |
267 |
int x;
|
268 |
src+= stride*4;
|
269 |
for(x=0; x<BLOCK_SIZE; x+=4) |
270 |
{ |
271 |
if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0; |
272 |
if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0; |
273 |
if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0; |
274 |
if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0; |
275 |
} |
276 |
#else
|
277 |
int x;
|
278 |
src+= stride*3;
|
279 |
for(x=0; x<BLOCK_SIZE; x++) |
280 |
{ |
281 |
if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; |
282 |
} |
283 |
#endif
|
284 |
return 1; |
285 |
#else
|
286 |
int x;
|
287 |
src+= stride*4;
|
288 |
for(x=0; x<BLOCK_SIZE; x++) |
289 |
{ |
290 |
int min=255; |
291 |
int max=0; |
292 |
int y;
|
293 |
for(y=0; y<8; y++){ |
294 |
int v= src[x + y*stride];
|
295 |
if(v>max) max=v;
|
296 |
if(v<min) min=v;
|
297 |
} |
298 |
if(max-min > 2*QP) return 0; |
299 |
} |
300 |
return 1; |
301 |
#endif
|
302 |
} |
303 |
|
304 |
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){ |
305 |
if( isHorizDC_C(src, stride, c) ){
|
306 |
if( isHorizMinMaxOk_C(src, stride, c->QP) )
|
307 |
return 1; |
308 |
else
|
309 |
return 0; |
310 |
}else{
|
311 |
return 2; |
312 |
} |
313 |
} |
314 |
|
315 |
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ |
316 |
if( isVertDC_C(src, stride, c) ){
|
317 |
if( isVertMinMaxOk_C(src, stride, c->QP) )
|
318 |
return 1; |
319 |
else
|
320 |
return 0; |
321 |
}else{
|
322 |
return 2; |
323 |
} |
324 |
} |
325 |
|
326 |
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) |
327 |
{ |
328 |
int y;
|
329 |
for(y=0; y<BLOCK_SIZE; y++) |
330 |
{ |
331 |
const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]); |
332 |
|
333 |
if(FFABS(middleEnergy) < 8*c->QP) |
334 |
{ |
335 |
const int q=(dst[3] - dst[4])/2; |
336 |
const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); |
337 |
const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); |
338 |
|
339 |
int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
|
340 |
d= FFMAX(d, 0);
|
341 |
|
342 |
d= (5*d + 32) >> 6; |
343 |
d*= FFSIGN(-middleEnergy); |
344 |
|
345 |
if(q>0) |
346 |
{ |
347 |
d= d<0 ? 0 : d; |
348 |
d= d>q ? q : d; |
349 |
} |
350 |
else
|
351 |
{ |
352 |
d= d>0 ? 0 : d; |
353 |
d= d<q ? q : d; |
354 |
} |
355 |
|
356 |
dst[3]-= d;
|
357 |
dst[4]+= d;
|
358 |
} |
359 |
dst+= stride; |
360 |
} |
361 |
} |
362 |
|
363 |
/**
|
364 |
* Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
|
365 |
* using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
|
366 |
*/
|
367 |
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) |
368 |
{ |
369 |
int y;
|
370 |
for(y=0; y<BLOCK_SIZE; y++) |
371 |
{ |
372 |
const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0]; |
373 |
const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; |
374 |
|
375 |
int sums[10]; |
376 |
sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4; |
377 |
sums[1] = sums[0] - first + dst[3]; |
378 |
sums[2] = sums[1] - first + dst[4]; |
379 |
sums[3] = sums[2] - first + dst[5]; |
380 |
sums[4] = sums[3] - first + dst[6]; |
381 |
sums[5] = sums[4] - dst[0] + dst[7]; |
382 |
sums[6] = sums[5] - dst[1] + last; |
383 |
sums[7] = sums[6] - dst[2] + last; |
384 |
sums[8] = sums[7] - dst[3] + last; |
385 |
sums[9] = sums[8] - dst[4] + last; |
386 |
|
387 |
dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4; |
388 |
dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4; |
389 |
dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4; |
390 |
dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4; |
391 |
dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4; |
392 |
dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4; |
393 |
dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4; |
394 |
dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4; |
395 |
|
396 |
dst+= stride; |
397 |
} |
398 |
} |
399 |
|
400 |
/**
|
401 |
* Experimental Filter 1 (Horizontal)
|
402 |
* will not damage linear gradients
|
403 |
* Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
|
404 |
* can only smooth blocks at the expected locations (it cant smooth them if they did move)
|
405 |
* MMX2 version does correct clipping C version doesnt
|
406 |
* not identical with the vertical one
|
407 |
*/
|
408 |
static inline void horizX1Filter(uint8_t *src, int stride, int QP) |
409 |
{ |
410 |
int y;
|
411 |
static uint64_t *lut= NULL; |
412 |
if(lut==NULL) |
413 |
{ |
414 |
int i;
|
415 |
lut = av_malloc(256*8); |
416 |
for(i=0; i<256; i++) |
417 |
{ |
418 |
int v= i < 128 ? 2*i : 2*(i-256); |
419 |
/*
|
420 |
//Simulate 112242211 9-Tap filter
|
421 |
uint64_t a= (v/16) & 0xFF;
|
422 |
uint64_t b= (v/8) & 0xFF;
|
423 |
uint64_t c= (v/4) & 0xFF;
|
424 |
uint64_t d= (3*v/8) & 0xFF;
|
425 |
*/
|
426 |
//Simulate piecewise linear interpolation
|
427 |
uint64_t a= (v/16) & 0xFF; |
428 |
uint64_t b= (v*3/16) & 0xFF; |
429 |
uint64_t c= (v*5/16) & 0xFF; |
430 |
uint64_t d= (7*v/16) & 0xFF; |
431 |
uint64_t A= (0x100 - a)&0xFF; |
432 |
uint64_t B= (0x100 - b)&0xFF; |
433 |
uint64_t C= (0x100 - c)&0xFF; |
434 |
uint64_t D= (0x100 - c)&0xFF; |
435 |
|
436 |
lut[i] = (a<<56) | (b<<48) | (c<<40) | (d<<32) | |
437 |
(D<<24) | (C<<16) | (B<<8) | (A); |
438 |
//lut[i] = (v<<32) | (v<<24);
|
439 |
} |
440 |
} |
441 |
|
442 |
for(y=0; y<BLOCK_SIZE; y++) |
443 |
{ |
444 |
int a= src[1] - src[2]; |
445 |
int b= src[3] - src[4]; |
446 |
int c= src[5] - src[6]; |
447 |
|
448 |
int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0); |
449 |
|
450 |
if(d < QP)
|
451 |
{ |
452 |
int v = d * FFSIGN(-b);
|
453 |
|
454 |
src[1] +=v/8; |
455 |
src[2] +=v/4; |
456 |
src[3] +=3*v/8; |
457 |
src[4] -=3*v/8; |
458 |
src[5] -=v/4; |
459 |
src[6] -=v/8; |
460 |
|
461 |
} |
462 |
src+=stride; |
463 |
} |
464 |
} |
465 |
|
466 |
/**
|
467 |
* accurate deblock filter
|
468 |
*/
|
469 |
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){ |
470 |
int y;
|
471 |
const int QP= c->QP; |
472 |
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; |
473 |
const int dcThreshold= dcOffset*2 + 1; |
474 |
//START_TIMER
|
475 |
src+= step*4; // src points to begin of the 8x8 Block |
476 |
for(y=0; y<8; y++){ |
477 |
int numEq= 0; |
478 |
|
479 |
if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++; |
480 |
if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++; |
481 |
if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++; |
482 |
if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++; |
483 |
if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++; |
484 |
if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++; |
485 |
if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++; |
486 |
if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++; |
487 |
if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++; |
488 |
if(numEq > c->ppMode.flatnessThreshold){
|
489 |
int min, max, x;
|
490 |
|
491 |
if(src[0] > src[step]){ |
492 |
max= src[0];
|
493 |
min= src[step]; |
494 |
}else{
|
495 |
max= src[step]; |
496 |
min= src[0];
|
497 |
} |
498 |
for(x=2; x<8; x+=2){ |
499 |
if(src[x*step] > src[(x+1)*step]){ |
500 |
if(src[x *step] > max) max= src[ x *step];
|
501 |
if(src[(x+1)*step] < min) min= src[(x+1)*step]; |
502 |
}else{
|
503 |
if(src[(x+1)*step] > max) max= src[(x+1)*step]; |
504 |
if(src[ x *step] < min) min= src[ x *step];
|
505 |
} |
506 |
} |
507 |
if(max-min < 2*QP){ |
508 |
const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0]; |
509 |
const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step]; |
510 |
|
511 |
int sums[10]; |
512 |
sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4; |
513 |
sums[1] = sums[0] - first + src[3*step]; |
514 |
sums[2] = sums[1] - first + src[4*step]; |
515 |
sums[3] = sums[2] - first + src[5*step]; |
516 |
sums[4] = sums[3] - first + src[6*step]; |
517 |
sums[5] = sums[4] - src[0*step] + src[7*step]; |
518 |
sums[6] = sums[5] - src[1*step] + last; |
519 |
sums[7] = sums[6] - src[2*step] + last; |
520 |
sums[8] = sums[7] - src[3*step] + last; |
521 |
sums[9] = sums[8] - src[4*step] + last; |
522 |
|
523 |
src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4; |
524 |
src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4; |
525 |
src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4; |
526 |
src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4; |
527 |
src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4; |
528 |
src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4; |
529 |
src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4; |
530 |
src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4; |
531 |
} |
532 |
}else{
|
533 |
const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]); |
534 |
|
535 |
if(FFABS(middleEnergy) < 8*QP) |
536 |
{ |
537 |
const int q=(src[3*step] - src[4*step])/2; |
538 |
const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]); |
539 |
const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]); |
540 |
|
541 |
int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
|
542 |
d= FFMAX(d, 0);
|
543 |
|
544 |
d= (5*d + 32) >> 6; |
545 |
d*= FFSIGN(-middleEnergy); |
546 |
|
547 |
if(q>0) |
548 |
{ |
549 |
d= d<0 ? 0 : d; |
550 |
d= d>q ? q : d; |
551 |
} |
552 |
else
|
553 |
{ |
554 |
d= d>0 ? 0 : d; |
555 |
d= d<q ? q : d; |
556 |
} |
557 |
|
558 |
src[3*step]-= d;
|
559 |
src[4*step]+= d;
|
560 |
} |
561 |
} |
562 |
|
563 |
src += stride; |
564 |
} |
565 |
/*if(step==16){
|
566 |
STOP_TIMER("step16")
|
567 |
}else{
|
568 |
STOP_TIMER("stepX")
|
569 |
}*/
|
570 |
} |
571 |
|
572 |
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
|
573 |
//Plain C versions
|
574 |
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
|
575 |
#define COMPILE_C
|
576 |
#endif
|
577 |
|
578 |
#ifdef ARCH_POWERPC
|
579 |
#ifdef HAVE_ALTIVEC
|
580 |
#define COMPILE_ALTIVEC
|
581 |
#endif //HAVE_ALTIVEC |
582 |
#endif //ARCH_POWERPC |
583 |
|
584 |
#if defined(ARCH_X86)
|
585 |
|
586 |
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
|
587 |
#define COMPILE_MMX
|
588 |
#endif
|
589 |
|
590 |
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
|
591 |
#define COMPILE_MMX2
|
592 |
#endif
|
593 |
|
594 |
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
|
595 |
#define COMPILE_3DNOW
|
596 |
#endif
|
597 |
#endif /* defined(ARCH_X86) */ |
598 |
|
599 |
#undef HAVE_MMX
|
600 |
#undef HAVE_MMX2
|
601 |
#undef HAVE_3DNOW
|
602 |
#undef HAVE_ALTIVEC
|
603 |
|
604 |
#ifdef COMPILE_C
|
605 |
#undef HAVE_MMX
|
606 |
#undef HAVE_MMX2
|
607 |
#undef HAVE_3DNOW
|
608 |
#define RENAME(a) a ## _C |
609 |
#include "postprocess_template.c" |
610 |
#endif
|
611 |
|
612 |
#ifdef ARCH_POWERPC
|
613 |
#ifdef COMPILE_ALTIVEC
|
614 |
#undef RENAME
|
615 |
#define HAVE_ALTIVEC
|
616 |
#define RENAME(a) a ## _altivec |
617 |
#include "postprocess_altivec_template.c" |
618 |
#include "postprocess_template.c" |
619 |
#endif
|
620 |
#endif //ARCH_POWERPC |
621 |
|
622 |
//MMX versions
|
623 |
#ifdef COMPILE_MMX
|
624 |
#undef RENAME
|
625 |
#define HAVE_MMX
|
626 |
#undef HAVE_MMX2
|
627 |
#undef HAVE_3DNOW
|
628 |
#define RENAME(a) a ## _MMX |
629 |
#include "postprocess_template.c" |
630 |
#endif
|
631 |
|
632 |
//MMX2 versions
|
633 |
#ifdef COMPILE_MMX2
|
634 |
#undef RENAME
|
635 |
#define HAVE_MMX
|
636 |
#define HAVE_MMX2
|
637 |
#undef HAVE_3DNOW
|
638 |
#define RENAME(a) a ## _MMX2 |
639 |
#include "postprocess_template.c" |
640 |
#endif
|
641 |
|
642 |
//3DNOW versions
|
643 |
#ifdef COMPILE_3DNOW
|
644 |
#undef RENAME
|
645 |
#define HAVE_MMX
|
646 |
#undef HAVE_MMX2
|
647 |
#define HAVE_3DNOW
|
648 |
#define RENAME(a) a ## _3DNow |
649 |
#include "postprocess_template.c" |
650 |
#endif
|
651 |
|
652 |
// minor note: the HAVE_xyz is messed up after that line so dont use it
|
653 |
|
654 |
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, |
655 |
QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc) |
656 |
{ |
657 |
PPContext *c= (PPContext *)vc; |
658 |
PPMode *ppMode= (PPMode *)vm; |
659 |
c->ppMode= *ppMode; //FIXME
|
660 |
|
661 |
// useing ifs here as they are faster than function pointers allthough the
|
662 |
// difference wouldnt be messureable here but its much better because
|
663 |
// someone might exchange the cpu whithout restarting mplayer ;)
|
664 |
#ifdef RUNTIME_CPUDETECT
|
665 |
#if defined(ARCH_X86)
|
666 |
// ordered per speed fasterst first
|
667 |
if(c->cpuCaps & PP_CPU_CAPS_MMX2)
|
668 |
postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
669 |
else if(c->cpuCaps & PP_CPU_CAPS_3DNOW) |
670 |
postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
671 |
else if(c->cpuCaps & PP_CPU_CAPS_MMX) |
672 |
postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
673 |
else
|
674 |
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
675 |
#else
|
676 |
#ifdef ARCH_POWERPC
|
677 |
#ifdef HAVE_ALTIVEC
|
678 |
if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
|
679 |
postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
680 |
else
|
681 |
#endif
|
682 |
#endif
|
683 |
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
684 |
#endif
|
685 |
#else //RUNTIME_CPUDETECT |
686 |
#ifdef HAVE_MMX2
|
687 |
postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
688 |
#elif defined (HAVE_3DNOW)
|
689 |
postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
690 |
#elif defined (HAVE_MMX)
|
691 |
postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
692 |
#elif defined (HAVE_ALTIVEC)
|
693 |
postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
694 |
#else
|
695 |
postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); |
696 |
#endif
|
697 |
#endif //!RUNTIME_CPUDETECT |
698 |
} |
699 |
|
700 |
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
|
701 |
// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
|
702 |
|
703 |
/* -pp Command line Help
|
704 |
*/
|
705 |
char *pp_help=
|
706 |
"Available postprocessing filters:\n"
|
707 |
"Filters Options\n"
|
708 |
"short long name short long option Description\n"
|
709 |
"* * a autoq CPU power dependent enabler\n"
|
710 |
" c chrom chrominance filtering enabled\n"
|
711 |
" y nochrom chrominance filtering disabled\n"
|
712 |
" n noluma luma filtering disabled\n"
|
713 |
"hb hdeblock (2 threshold) horizontal deblocking filter\n"
|
714 |
" 1. difference factor: default=32, higher -> more deblocking\n"
|
715 |
" 2. flatness threshold: default=39, lower -> more deblocking\n"
|
716 |
" the h & v deblocking filters share these\n"
|
717 |
" so you can't set different thresholds for h / v\n"
|
718 |
"vb vdeblock (2 threshold) vertical deblocking filter\n"
|
719 |
"ha hadeblock (2 threshold) horizontal deblocking filter\n"
|
720 |
"va vadeblock (2 threshold) vertical deblocking filter\n"
|
721 |
"h1 x1hdeblock experimental h deblock filter 1\n"
|
722 |
"v1 x1vdeblock experimental v deblock filter 1\n"
|
723 |
"dr dering deringing filter\n"
|
724 |
"al autolevels automatic brightness / contrast\n"
|
725 |
" f fullyrange stretch luminance to (0..255)\n"
|
726 |
"lb linblenddeint linear blend deinterlacer\n"
|
727 |
"li linipoldeint linear interpolating deinterlace\n"
|
728 |
"ci cubicipoldeint cubic interpolating deinterlacer\n"
|
729 |
"md mediandeint median deinterlacer\n"
|
730 |
"fd ffmpegdeint ffmpeg deinterlacer\n"
|
731 |
"l5 lowpass5 FIR lowpass deinterlacer\n"
|
732 |
"de default hb:a,vb:a,dr:a\n"
|
733 |
"fa fast h1:a,v1:a,dr:a\n"
|
734 |
"ac ha:a:128:7,va:a,dr:a\n"
|
735 |
"tn tmpnoise (3 threshold) temporal noise reducer\n"
|
736 |
" 1. <= 2. <= 3. larger -> stronger filtering\n"
|
737 |
"fq forceQuant <quantizer> force quantizer\n"
|
738 |
"Usage:\n"
|
739 |
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
|
740 |
"long form example:\n"
|
741 |
"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
|
742 |
"short form example:\n"
|
743 |
"vb:a/hb:a/lb de,-vb\n"
|
744 |
"more examples:\n"
|
745 |
"tn:64:128:256\n"
|
746 |
"\n"
|
747 |
; |
748 |
|
749 |
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality) |
750 |
{ |
751 |
char temp[GET_MODE_BUFFER_SIZE];
|
752 |
char *p= temp;
|
753 |
const char *filterDelimiters= ",/"; |
754 |
const char *optionDelimiters= ":"; |
755 |
struct PPMode *ppMode;
|
756 |
char *filterToken;
|
757 |
|
758 |
ppMode= av_malloc(sizeof(PPMode));
|
759 |
|
760 |
ppMode->lumMode= 0;
|
761 |
ppMode->chromMode= 0;
|
762 |
ppMode->maxTmpNoise[0]= 700; |
763 |
ppMode->maxTmpNoise[1]= 1500; |
764 |
ppMode->maxTmpNoise[2]= 3000; |
765 |
ppMode->maxAllowedY= 234;
|
766 |
ppMode->minAllowedY= 16;
|
767 |
ppMode->baseDcDiff= 256/8; |
768 |
ppMode->flatnessThreshold= 56-16-1; |
769 |
ppMode->maxClippedThreshold= 0.01; |
770 |
ppMode->error=0;
|
771 |
|
772 |
strncpy(temp, name, GET_MODE_BUFFER_SIZE); |
773 |
|
774 |
av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name); |
775 |
|
776 |
for(;;){
|
777 |
char *filterName;
|
778 |
int q= 1000000; //PP_QUALITY_MAX; |
779 |
int chrom=-1; |
780 |
int luma=-1; |
781 |
char *option;
|
782 |
char *options[OPTIONS_ARRAY_SIZE];
|
783 |
int i;
|
784 |
int filterNameOk=0; |
785 |
int numOfUnknownOptions=0; |
786 |
int enable=1; //does the user want us to enabled or disabled the filter |
787 |
|
788 |
filterToken= strtok(p, filterDelimiters); |
789 |
if(filterToken == NULL) break; |
790 |
p+= strlen(filterToken) + 1; // p points to next filterToken |
791 |
filterName= strtok(filterToken, optionDelimiters); |
792 |
av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName); |
793 |
|
794 |
if(*filterName == '-') |
795 |
{ |
796 |
enable=0;
|
797 |
filterName++; |
798 |
} |
799 |
|
800 |
for(;;){ //for all options |
801 |
option= strtok(NULL, optionDelimiters);
|
802 |
if(option == NULL) break; |
803 |
|
804 |
av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option); |
805 |
if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality; |
806 |
else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0; |
807 |
else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; |
808 |
else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0; |
809 |
else
|
810 |
{ |
811 |
options[numOfUnknownOptions] = option; |
812 |
numOfUnknownOptions++; |
813 |
} |
814 |
if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break; |
815 |
} |
816 |
options[numOfUnknownOptions] = NULL;
|
817 |
|
818 |
/* replace stuff from the replace Table */
|
819 |
for(i=0; replaceTable[2*i]!=NULL; i++) |
820 |
{ |
821 |
if(!strcmp(replaceTable[2*i], filterName)) |
822 |
{ |
823 |
int newlen= strlen(replaceTable[2*i + 1]); |
824 |
int plen;
|
825 |
int spaceLeft;
|
826 |
|
827 |
if(p==NULL) p= temp, *p=0; //last filter |
828 |
else p--, *p=','; //not last filter |
829 |
|
830 |
plen= strlen(p); |
831 |
spaceLeft= p - temp + plen; |
832 |
if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE)
|
833 |
{ |
834 |
ppMode->error++; |
835 |
break;
|
836 |
} |
837 |
memmove(p + newlen, p, plen+1);
|
838 |
memcpy(p, replaceTable[2*i + 1], newlen); |
839 |
filterNameOk=1;
|
840 |
} |
841 |
} |
842 |
|
843 |
for(i=0; filters[i].shortName!=NULL; i++) |
844 |
{ |
845 |
if( !strcmp(filters[i].longName, filterName)
|
846 |
|| !strcmp(filters[i].shortName, filterName)) |
847 |
{ |
848 |
ppMode->lumMode &= ~filters[i].mask; |
849 |
ppMode->chromMode &= ~filters[i].mask; |
850 |
|
851 |
filterNameOk=1;
|
852 |
if(!enable) break; // user wants to disable it |
853 |
|
854 |
if(q >= filters[i].minLumQuality && luma)
|
855 |
ppMode->lumMode|= filters[i].mask; |
856 |
if(chrom==1 || (chrom==-1 && filters[i].chromDefault)) |
857 |
if(q >= filters[i].minChromQuality)
|
858 |
ppMode->chromMode|= filters[i].mask; |
859 |
|
860 |
if(filters[i].mask == LEVEL_FIX)
|
861 |
{ |
862 |
int o;
|
863 |
ppMode->minAllowedY= 16;
|
864 |
ppMode->maxAllowedY= 234;
|
865 |
for(o=0; options[o]!=NULL; o++) |
866 |
{ |
867 |
if( !strcmp(options[o],"fullyrange") |
868 |
||!strcmp(options[o],"f"))
|
869 |
{ |
870 |
ppMode->minAllowedY= 0;
|
871 |
ppMode->maxAllowedY= 255;
|
872 |
numOfUnknownOptions--; |
873 |
} |
874 |
} |
875 |
} |
876 |
else if(filters[i].mask == TEMP_NOISE_FILTER) |
877 |
{ |
878 |
int o;
|
879 |
int numOfNoises=0; |
880 |
|
881 |
for(o=0; options[o]!=NULL; o++) |
882 |
{ |
883 |
char *tail;
|
884 |
ppMode->maxTmpNoise[numOfNoises]= |
885 |
strtol(options[o], &tail, 0);
|
886 |
if(tail!=options[o])
|
887 |
{ |
888 |
numOfNoises++; |
889 |
numOfUnknownOptions--; |
890 |
if(numOfNoises >= 3) break; |
891 |
} |
892 |
} |
893 |
} |
894 |
else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK |
895 |
|| filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK) |
896 |
{ |
897 |
int o;
|
898 |
|
899 |
for(o=0; options[o]!=NULL && o<2; o++) |
900 |
{ |
901 |
char *tail;
|
902 |
int val= strtol(options[o], &tail, 0); |
903 |
if(tail==options[o]) break; |
904 |
|
905 |
numOfUnknownOptions--; |
906 |
if(o==0) ppMode->baseDcDiff= val; |
907 |
else ppMode->flatnessThreshold= val;
|
908 |
} |
909 |
} |
910 |
else if(filters[i].mask == FORCE_QUANT) |
911 |
{ |
912 |
int o;
|
913 |
ppMode->forcedQuant= 15;
|
914 |
|
915 |
for(o=0; options[o]!=NULL && o<1; o++) |
916 |
{ |
917 |
char *tail;
|
918 |
int val= strtol(options[o], &tail, 0); |
919 |
if(tail==options[o]) break; |
920 |
|
921 |
numOfUnknownOptions--; |
922 |
ppMode->forcedQuant= val; |
923 |
} |
924 |
} |
925 |
} |
926 |
} |
927 |
if(!filterNameOk) ppMode->error++;
|
928 |
ppMode->error += numOfUnknownOptions; |
929 |
} |
930 |
|
931 |
av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode); |
932 |
if(ppMode->error)
|
933 |
{ |
934 |
av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name); |
935 |
av_free(ppMode); |
936 |
return NULL; |
937 |
} |
938 |
return ppMode;
|
939 |
} |
940 |
|
941 |
void pp_free_mode(pp_mode_t *mode){
|
942 |
av_free(mode); |
943 |
} |
944 |
|
945 |
static void reallocAlign(void **p, int alignment, int size){ |
946 |
av_free(*p); |
947 |
*p= av_mallocz(size); |
948 |
} |
949 |
|
950 |
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){ |
951 |
int mbWidth = (width+15)>>4; |
952 |
int mbHeight= (height+15)>>4; |
953 |
int i;
|
954 |
|
955 |
c->stride= stride; |
956 |
c->qpStride= qpStride; |
957 |
|
958 |
reallocAlign((void **)&c->tempDst, 8, stride*24); |
959 |
reallocAlign((void **)&c->tempSrc, 8, stride*24); |
960 |
reallocAlign((void **)&c->tempBlocks, 8, 2*16*8); |
961 |
reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t)); |
962 |
for(i=0; i<256; i++) |
963 |
c->yHistogram[i]= width*height/64*15/256; |
964 |
|
965 |
for(i=0; i<3; i++) |
966 |
{ |
967 |
//Note:the +17*1024 is just there so i dont have to worry about r/w over te end
|
968 |
reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024); |
969 |
reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size |
970 |
} |
971 |
|
972 |
reallocAlign((void **)&c->deintTemp, 8, 2*width+32); |
973 |
reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); |
974 |
reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T)); |
975 |
reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T)); |
976 |
} |
977 |
|
978 |
static void global_init(void){ |
979 |
int i;
|
980 |
memset(clip_table, 0, 256); |
981 |
for(i=256; i<512; i++) |
982 |
clip_table[i]= i; |
983 |
memset(clip_table+512, 0, 256); |
984 |
} |
985 |
|
986 |
static const char * context_to_name(void * ptr) { |
987 |
return "postproc"; |
988 |
} |
989 |
|
990 |
static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL }; |
991 |
|
992 |
pp_context_t *pp_get_context(int width, int height, int cpuCaps){ |
993 |
PPContext *c= av_malloc(sizeof(PPContext));
|
994 |
int stride= (width+15)&(~15); //assumed / will realloc if needed |
995 |
int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed |
996 |
|
997 |
global_init(); |
998 |
|
999 |
memset(c, 0, sizeof(PPContext)); |
1000 |
c->av_class = &av_codec_context_class; |
1001 |
c->cpuCaps= cpuCaps; |
1002 |
if(cpuCaps&PP_FORMAT){
|
1003 |
c->hChromaSubSample= cpuCaps&0x3;
|
1004 |
c->vChromaSubSample= (cpuCaps>>4)&0x3; |
1005 |
}else{
|
1006 |
c->hChromaSubSample= 1;
|
1007 |
c->vChromaSubSample= 1;
|
1008 |
} |
1009 |
|
1010 |
reallocBuffers(c, width, height, stride, qpStride); |
1011 |
|
1012 |
c->frameNum=-1;
|
1013 |
|
1014 |
return c;
|
1015 |
} |
1016 |
|
1017 |
void pp_free_context(void *vc){ |
1018 |
PPContext *c = (PPContext*)vc; |
1019 |
int i;
|
1020 |
|
1021 |
for(i=0; i<3; i++) av_free(c->tempBlured[i]); |
1022 |
for(i=0; i<3; i++) av_free(c->tempBluredPast[i]); |
1023 |
|
1024 |
av_free(c->tempBlocks); |
1025 |
av_free(c->yHistogram); |
1026 |
av_free(c->tempDst); |
1027 |
av_free(c->tempSrc); |
1028 |
av_free(c->deintTemp); |
1029 |
av_free(c->stdQPTable); |
1030 |
av_free(c->nonBQPTable); |
1031 |
av_free(c->forcedQPTable); |
1032 |
|
1033 |
memset(c, 0, sizeof(PPContext)); |
1034 |
|
1035 |
av_free(c); |
1036 |
} |
1037 |
|
1038 |
void pp_postprocess(uint8_t * src[3], int srcStride[3], |
1039 |
uint8_t * dst[3], int dstStride[3], |
1040 |
int width, int height, |
1041 |
QP_STORE_T *QP_store, int QPStride,
|
1042 |
pp_mode_t *vm, void *vc, int pict_type) |
1043 |
{ |
1044 |
int mbWidth = (width+15)>>4; |
1045 |
int mbHeight= (height+15)>>4; |
1046 |
PPMode *mode = (PPMode*)vm; |
1047 |
PPContext *c = (PPContext*)vc; |
1048 |
int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0])); |
1049 |
int absQPStride = FFABS(QPStride);
|
1050 |
|
1051 |
// c->stride and c->QPStride are always positive
|
1052 |
if(c->stride < minStride || c->qpStride < absQPStride)
|
1053 |
reallocBuffers(c, width, height, |
1054 |
FFMAX(minStride, c->stride), |
1055 |
FFMAX(c->qpStride, absQPStride)); |
1056 |
|
1057 |
if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) |
1058 |
{ |
1059 |
int i;
|
1060 |
QP_store= c->forcedQPTable; |
1061 |
absQPStride = QPStride = 0;
|
1062 |
if(mode->lumMode & FORCE_QUANT)
|
1063 |
for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant; |
1064 |
else
|
1065 |
for(i=0; i<mbWidth; i++) QP_store[i]= 1; |
1066 |
} |
1067 |
|
1068 |
if(pict_type & PP_PICT_TYPE_QP2){
|
1069 |
int i;
|
1070 |
const int count= mbHeight * absQPStride; |
1071 |
for(i=0; i<(count>>2); i++){ |
1072 |
((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F; |
1073 |
} |
1074 |
for(i<<=2; i<count; i++){ |
1075 |
c->stdQPTable[i] = QP_store[i]>>1;
|
1076 |
} |
1077 |
QP_store= c->stdQPTable; |
1078 |
QPStride= absQPStride; |
1079 |
} |
1080 |
|
1081 |
if(0){ |
1082 |
int x,y;
|
1083 |
for(y=0; y<mbHeight; y++){ |
1084 |
for(x=0; x<mbWidth; x++){ |
1085 |
av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
|
1086 |
} |
1087 |
av_log(c, AV_LOG_INFO, "\n");
|
1088 |
} |
1089 |
av_log(c, AV_LOG_INFO, "\n");
|
1090 |
} |
1091 |
|
1092 |
if((pict_type&7)!=3) |
1093 |
{ |
1094 |
if (QPStride >= 0) { |
1095 |
int i;
|
1096 |
const int count= mbHeight * QPStride; |
1097 |
for(i=0; i<(count>>2); i++){ |
1098 |
((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
|
1099 |
} |
1100 |
for(i<<=2; i<count; i++){ |
1101 |
c->nonBQPTable[i] = QP_store[i] & 0x3F;
|
1102 |
} |
1103 |
} else {
|
1104 |
int i,j;
|
1105 |
for(i=0; i<mbHeight; i++) { |
1106 |
for(j=0; j<absQPStride; j++) { |
1107 |
c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
|
1108 |
} |
1109 |
} |
1110 |
} |
1111 |
} |
1112 |
|
1113 |
av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
|
1114 |
mode->lumMode, mode->chromMode); |
1115 |
|
1116 |
postProcess(src[0], srcStride[0], dst[0], dstStride[0], |
1117 |
width, height, QP_store, QPStride, 0, mode, c);
|
1118 |
|
1119 |
width = (width )>>c->hChromaSubSample; |
1120 |
height = (height)>>c->vChromaSubSample; |
1121 |
|
1122 |
if(mode->chromMode)
|
1123 |
{ |
1124 |
postProcess(src[1], srcStride[1], dst[1], dstStride[1], |
1125 |
width, height, QP_store, QPStride, 1, mode, c);
|
1126 |
postProcess(src[2], srcStride[2], dst[2], dstStride[2], |
1127 |
width, height, QP_store, QPStride, 2, mode, c);
|
1128 |
} |
1129 |
else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) |
1130 |
{ |
1131 |
linecpy(dst[1], src[1], height, srcStride[1]); |
1132 |
linecpy(dst[2], src[2], height, srcStride[2]); |
1133 |
} |
1134 |
else
|
1135 |
{ |
1136 |
int y;
|
1137 |
for(y=0; y<height; y++) |
1138 |
{ |
1139 |
memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width); |
1140 |
memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width); |
1141 |
} |
1142 |
} |
1143 |
} |
1144 |
|