Statistics
| Branch: | Revision:

ffmpeg / libpostproc / postprocess.c @ 6ab6c7c3

History | View | Annotate | Download (44.4 KB)

1
/*
2
    Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3

4
    AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5

6
    This program is free software; you can redistribute it and/or modify
7
    it under the terms of the GNU General Public License as published by
8
    the Free Software Foundation; either version 2 of the License, or
9
    (at your option) any later version.
10

11
    This program is distributed in the hope that it will be useful,
12
    but WITHOUT ANY WARRANTY; without even the implied warranty of
13
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
    GNU General Public License for more details.
15

16
    You should have received a copy of the GNU General Public License
17
    along with this program; if not, write to the Free Software
18
    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
*/
20

    
21
/**
22
 * @file postprocess.c
23
 * postprocessing.
24
 */
25

    
26
/*
27
                        C       MMX     MMX2    3DNow   AltiVec
28
isVertDC                Ec      Ec                      Ec
29
isVertMinMaxOk          Ec      Ec                      Ec
30
doVertLowPass           E               e       e       Ec
31
doVertDefFilter         Ec      Ec      e       e       Ec
32
isHorizDC               Ec      Ec                      Ec
33
isHorizMinMaxOk         a       E                       Ec
34
doHorizLowPass          E               e       e       Ec
35
doHorizDefFilter        Ec      Ec      e       e       Ec
36
do_a_deblock            Ec      E       Ec      E
37
deRing                  E               e       e*      Ecp
38
Vertical RKAlgo1        E               a       a
39
Horizontal RKAlgo1                      a       a
40
Vertical X1#            a               E       E
41
Horizontal X1#          a               E       E
42
LinIpolDeinterlace      e               E       E*
43
CubicIpolDeinterlace    a               e       e*
44
LinBlendDeinterlace     e               E       E*
45
MedianDeinterlace#      E       Ec      Ec
46
TempDeNoiser#           E               e       e       Ec
47

48
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
49
# more or less selfinvented filters so the exactness isnt too meaningfull
50
E = Exact implementation
51
e = allmost exact implementation (slightly different rounding,...)
52
a = alternative / approximate impl
53
c = checked against the other implementations (-vo md5)
54
p = partially optimized, still some work to do
55
*/
56

    
57
/*
58
TODO:
59
reduce the time wasted on the mem transfer
60
unroll stuff if instructions depend too much on the prior one
61
move YScale thing to the end instead of fixing QP
62
write a faster and higher quality deblocking filter :)
63
make the mainloop more flexible (variable number of blocks at once
64
        (the if/else stuff per block is slowing things down)
65
compare the quality & speed of all filters
66
split this huge file
67
optimize c versions
68
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
69
...
70
*/
71

    
72
//Changelog: use the Subversion log
73

    
74
#include "config.h"
75
#include "avutil.h"
76
#include <inttypes.h>
77
#include <stdio.h>
78
#include <stdlib.h>
79
#include <string.h>
80
#ifdef HAVE_MALLOC_H
81
#include <malloc.h>
82
#endif
83
//#undef HAVE_MMX2
84
//#define HAVE_3DNOW
85
//#undef HAVE_MMX
86
//#undef ARCH_X86
87
//#define DEBUG_BRIGHTNESS
88
#ifdef USE_FASTMEMCPY
89
#include "libvo/fastmemcpy.h"
90
#endif
91
#include "postprocess.h"
92
#include "postprocess_internal.h"
93

    
94
#include "mangle.h" //FIXME should be supressed
95

    
96
#ifdef HAVE_ALTIVEC_H
97
#include <altivec.h>
98
#endif
99

    
100
#define MIN(a,b) ((a) > (b) ? (b) : (a))
101
#define MAX(a,b) ((a) < (b) ? (b) : (a))
102
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
103
#define SIGN(a) ((a) > 0 ? 1 : -1)
104

    
105
#define GET_MODE_BUFFER_SIZE 500
106
#define OPTIONS_ARRAY_SIZE 10
107
#define BLOCK_SIZE 8
108
#define TEMP_STRIDE 8
109
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
110

    
111
#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
112
#    define attribute_used __attribute__((used))
113
#    define always_inline __attribute__((always_inline)) inline
114
#else
115
#    define attribute_used
116
#    define always_inline inline
117
#endif
118

    
119
#if defined(ARCH_X86) || defined(ARCH_X86_64)
120
static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
121
static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
122
static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
123
static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
124
static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
125
static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
126
static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
127
static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
128
#endif
129

    
130
static uint8_t clip_table[3*256];
131
static uint8_t * const clip_tab= clip_table + 256;
132

    
133
static const int verbose= 0;
134

    
135
static const int attribute_used deringThreshold= 20;
136

    
137

    
138
static struct PPFilter filters[]=
139
{
140
        {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
141
        {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
142
/*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
143
        {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
144
        {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
145
        {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
146
        {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
147
        {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
148
        {"dr", "dering",                1, 5, 6, DERING},
149
        {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
150
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
151
        {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
152
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
153
        {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
154
        {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
155
        {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
156
        {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
157
        {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
158
        {NULL, NULL,0,0,0,0} //End Marker
159
};
160

    
161
static const char *replaceTable[]=
162
{
163
        "default",      "hdeblock:a,vdeblock:a,dering:a",
164
        "de",           "hdeblock:a,vdeblock:a,dering:a",
165
        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a",
166
        "fa",           "x1hdeblock:a,x1vdeblock:a,dering:a",
167
        "ac",           "ha:a:128:7,va:a,dering:a",
168
        NULL //End Marker
169
};
170

    
171

    
172
#if defined(ARCH_X86) || defined(ARCH_X86_64)
173
static inline void prefetchnta(void *p)
174
{
175
        asm volatile(   "prefetchnta (%0)\n\t"
176
                : : "r" (p)
177
        );
178
}
179

    
180
static inline void prefetcht0(void *p)
181
{
182
        asm volatile(   "prefetcht0 (%0)\n\t"
183
                : : "r" (p)
184
        );
185
}
186

    
187
static inline void prefetcht1(void *p)
188
{
189
        asm volatile(   "prefetcht1 (%0)\n\t"
190
                : : "r" (p)
191
        );
192
}
193

    
194
static inline void prefetcht2(void *p)
195
{
196
        asm volatile(   "prefetcht2 (%0)\n\t"
197
                : : "r" (p)
198
        );
199
}
200
#endif
201

    
202
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
203

    
204
/**
205
 * Check if the given 8x8 Block is mostly "flat"
206
 */
207
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
208
{
209
        int numEq= 0;
210
        int y;
211
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
212
        const int dcThreshold= dcOffset*2 + 1;
213

    
214
        for(y=0; y<BLOCK_SIZE; y++)
215
        {
216
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
217
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
218
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
219
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
220
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
221
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
222
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
223
                src+= stride;
224
        }
225
        return numEq > c->ppMode.flatnessThreshold;
226
}
227

    
228
/**
229
 * Check if the middle 8x8 Block in the given 8x16 block is flat
230
 */
231
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
232
        int numEq= 0;
233
        int y;
234
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
235
        const int dcThreshold= dcOffset*2 + 1;
236

    
237
        src+= stride*4; // src points to begin of the 8x8 Block
238
        for(y=0; y<BLOCK_SIZE-1; y++)
239
        {
240
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
241
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
242
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
243
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
244
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
245
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
246
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
247
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
248
                src+= stride;
249
        }
250
        return numEq > c->ppMode.flatnessThreshold;
251
}
252

    
253
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
254
{
255
        int i;
256
#if 1
257
        for(i=0; i<2; i++){
258
                if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
259
                src += stride;
260
                if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
261
                src += stride;
262
                if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
263
                src += stride;
264
                if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
265
                src += stride;
266
        }
267
#else
268
        for(i=0; i<8; i++){
269
                if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
270
                src += stride;
271
        }
272
#endif
273
        return 1;
274
}
275

    
276
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
277
{
278
#if 1
279
#if 1
280
        int x;
281
        src+= stride*4;
282
        for(x=0; x<BLOCK_SIZE; x+=4)
283
        {
284
                if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
285
                if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
286
                if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
287
                if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
288
        }
289
#else
290
        int x;
291
        src+= stride*3;
292
        for(x=0; x<BLOCK_SIZE; x++)
293
        {
294
                if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
295
        }
296
#endif
297
        return 1;
298
#else
299
        int x;
300
        src+= stride*4;
301
        for(x=0; x<BLOCK_SIZE; x++)
302
        {
303
                int min=255;
304
                int max=0;
305
                int y;
306
                for(y=0; y<8; y++){
307
                        int v= src[x + y*stride];
308
                        if(v>max) max=v;
309
                        if(v<min) min=v;
310
                }
311
                if(max-min > 2*QP) return 0;
312
        }
313
        return 1;
314
#endif
315
}
316

    
317
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
318
        if( isHorizDC_C(src, stride, c) ){
319
                if( isHorizMinMaxOk_C(src, stride, c->QP) )
320
                        return 1;
321
                else
322
                        return 0;
323
        }else{
324
                return 2;
325
        }
326
}
327

    
328
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
329
        if( isVertDC_C(src, stride, c) ){
330
                if( isVertMinMaxOk_C(src, stride, c->QP) )
331
                        return 1;
332
                else
333
                        return 0;
334
        }else{
335
                return 2;
336
        }
337
}
338

    
339
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
340
{
341
        int y;
342
        for(y=0; y<BLOCK_SIZE; y++)
343
        {
344
                const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
345

    
346
                if(ABS(middleEnergy) < 8*c->QP)
347
                {
348
                        const int q=(dst[3] - dst[4])/2;
349
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
350
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
351

    
352
                        int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
353
                        d= MAX(d, 0);
354

    
355
                        d= (5*d + 32) >> 6;
356
                        d*= SIGN(-middleEnergy);
357

    
358
                        if(q>0)
359
                        {
360
                                d= d<0 ? 0 : d;
361
                                d= d>q ? q : d;
362
                        }
363
                        else
364
                        {
365
                                d= d>0 ? 0 : d;
366
                                d= d<q ? q : d;
367
                        }
368

    
369
                        dst[3]-= d;
370
                        dst[4]+= d;
371
                }
372
                dst+= stride;
373
        }
374
}
375

    
376
/**
377
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
378
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
379
 */
380
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
381
{
382
        int y;
383
        for(y=0; y<BLOCK_SIZE; y++)
384
        {
385
                const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
386
                const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
387

    
388
                int sums[10];
389
                sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
390
                sums[1] = sums[0] - first  + dst[3];
391
                sums[2] = sums[1] - first  + dst[4];
392
                sums[3] = sums[2] - first  + dst[5];
393
                sums[4] = sums[3] - first  + dst[6];
394
                sums[5] = sums[4] - dst[0] + dst[7];
395
                sums[6] = sums[5] - dst[1] + last;
396
                sums[7] = sums[6] - dst[2] + last;
397
                sums[8] = sums[7] - dst[3] + last;
398
                sums[9] = sums[8] - dst[4] + last;
399

    
400
                dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
401
                dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
402
                dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
403
                dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
404
                dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
405
                dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
406
                dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
407
                dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
408

    
409
                dst+= stride;
410
        }
411
}
412

    
413
/**
414
 * Experimental Filter 1 (Horizontal)
415
 * will not damage linear gradients
416
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
417
 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
418
 * MMX2 version does correct clipping C version doesnt
419
 * not identical with the vertical one
420
 */
421
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
422
{
423
        int y;
424
        static uint64_t *lut= NULL;
425
        if(lut==NULL)
426
        {
427
                int i;
428
                lut = av_malloc(256*8);
429
                for(i=0; i<256; i++)
430
                {
431
                        int v= i < 128 ? 2*i : 2*(i-256);
432
/*
433
//Simulate 112242211 9-Tap filter
434
                        uint64_t a= (v/16) & 0xFF;
435
                        uint64_t b= (v/8) & 0xFF;
436
                        uint64_t c= (v/4) & 0xFF;
437
                        uint64_t d= (3*v/8) & 0xFF;
438
*/
439
//Simulate piecewise linear interpolation
440
                        uint64_t a= (v/16) & 0xFF;
441
                        uint64_t b= (v*3/16) & 0xFF;
442
                        uint64_t c= (v*5/16) & 0xFF;
443
                        uint64_t d= (7*v/16) & 0xFF;
444
                        uint64_t A= (0x100 - a)&0xFF;
445
                        uint64_t B= (0x100 - b)&0xFF;
446
                        uint64_t C= (0x100 - c)&0xFF;
447
                        uint64_t D= (0x100 - c)&0xFF;
448

    
449
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
450
                                (D<<24) | (C<<16) | (B<<8) | (A);
451
                        //lut[i] = (v<<32) | (v<<24);
452
                }
453
        }
454

    
455
        for(y=0; y<BLOCK_SIZE; y++)
456
        {
457
                int a= src[1] - src[2];
458
                int b= src[3] - src[4];
459
                int c= src[5] - src[6];
460

    
461
                int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
462

    
463
                if(d < QP)
464
                {
465
                        int v = d * SIGN(-b);
466

    
467
                        src[1] +=v/8;
468
                        src[2] +=v/4;
469
                        src[3] +=3*v/8;
470
                        src[4] -=3*v/8;
471
                        src[5] -=v/4;
472
                        src[6] -=v/8;
473

    
474
                }
475
                src+=stride;
476
        }
477
}
478

    
479
/**
480
 * accurate deblock filter
481
 */
482
static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
483
        int y;
484
        const int QP= c->QP;
485
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
486
        const int dcThreshold= dcOffset*2 + 1;
487
//START_TIMER
488
        src+= step*4; // src points to begin of the 8x8 Block
489
        for(y=0; y<8; y++){
490
                int numEq= 0;
491

    
492
                if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
493
                if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
494
                if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
495
                if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
496
                if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
497
                if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
498
                if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
499
                if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
500
                if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
501
                if(numEq > c->ppMode.flatnessThreshold){
502
                        int min, max, x;
503

    
504
                        if(src[0] > src[step]){
505
                            max= src[0];
506
                            min= src[step];
507
                        }else{
508
                            max= src[step];
509
                            min= src[0];
510
                        }
511
                        for(x=2; x<8; x+=2){
512
                                if(src[x*step] > src[(x+1)*step]){
513
                                        if(src[x    *step] > max) max= src[ x   *step];
514
                                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
515
                                }else{
516
                                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
517
                                        if(src[ x   *step] < min) min= src[ x   *step];
518
                                }
519
                        }
520
                        if(max-min < 2*QP){
521
                                const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
522
                                const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
523

    
524
                                int sums[10];
525
                                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
526
                                sums[1] = sums[0] - first       + src[3*step];
527
                                sums[2] = sums[1] - first       + src[4*step];
528
                                sums[3] = sums[2] - first       + src[5*step];
529
                                sums[4] = sums[3] - first       + src[6*step];
530
                                sums[5] = sums[4] - src[0*step] + src[7*step];
531
                                sums[6] = sums[5] - src[1*step] + last;
532
                                sums[7] = sums[6] - src[2*step] + last;
533
                                sums[8] = sums[7] - src[3*step] + last;
534
                                sums[9] = sums[8] - src[4*step] + last;
535

    
536
                                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
537
                                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
538
                                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
539
                                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
540
                                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
541
                                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
542
                                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
543
                                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
544
                        }
545
                }else{
546
                        const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
547

    
548
                        if(ABS(middleEnergy) < 8*QP)
549
                        {
550
                                const int q=(src[3*step] - src[4*step])/2;
551
                                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
552
                                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
553

    
554
                                int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
555
                                d= MAX(d, 0);
556

    
557
                                d= (5*d + 32) >> 6;
558
                                d*= SIGN(-middleEnergy);
559

    
560
                                if(q>0)
561
                                {
562
                                        d= d<0 ? 0 : d;
563
                                        d= d>q ? q : d;
564
                                }
565
                                else
566
                                {
567
                                        d= d>0 ? 0 : d;
568
                                        d= d<q ? q : d;
569
                                }
570

    
571
                                src[3*step]-= d;
572
                                src[4*step]+= d;
573
                        }
574
                }
575

    
576
                src += stride;
577
        }
578
/*if(step==16){
579
    STOP_TIMER("step16")
580
}else{
581
    STOP_TIMER("stepX")
582
}*/
583
}
584

    
585
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
586
//Plain C versions
587
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
588
#define COMPILE_C
589
#endif
590

    
591
#ifdef ARCH_POWERPC
592
#ifdef HAVE_ALTIVEC
593
#define COMPILE_ALTIVEC
594
#endif //HAVE_ALTIVEC
595
#endif //ARCH_POWERPC
596

    
597
#if defined(ARCH_X86) || defined(ARCH_X86_64)
598

    
599
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
600
#define COMPILE_MMX
601
#endif
602

    
603
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
604
#define COMPILE_MMX2
605
#endif
606

    
607
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
608
#define COMPILE_3DNOW
609
#endif
610
#endif //ARCH_X86
611

    
612
#undef HAVE_MMX
613
#undef HAVE_MMX2
614
#undef HAVE_3DNOW
615
#undef HAVE_ALTIVEC
616

    
617
#ifdef COMPILE_C
618
#undef HAVE_MMX
619
#undef HAVE_MMX2
620
#undef HAVE_3DNOW
621
#define RENAME(a) a ## _C
622
#include "postprocess_template.c"
623
#endif
624

    
625
#ifdef ARCH_POWERPC
626
#ifdef COMPILE_ALTIVEC
627
#undef RENAME
628
#define HAVE_ALTIVEC
629
#define RENAME(a) a ## _altivec
630
#include "postprocess_altivec_template.c"
631
#include "postprocess_template.c"
632
#endif
633
#endif //ARCH_POWERPC
634

    
635
//MMX versions
636
#ifdef COMPILE_MMX
637
#undef RENAME
638
#define HAVE_MMX
639
#undef HAVE_MMX2
640
#undef HAVE_3DNOW
641
#define RENAME(a) a ## _MMX
642
#include "postprocess_template.c"
643
#endif
644

    
645
//MMX2 versions
646
#ifdef COMPILE_MMX2
647
#undef RENAME
648
#define HAVE_MMX
649
#define HAVE_MMX2
650
#undef HAVE_3DNOW
651
#define RENAME(a) a ## _MMX2
652
#include "postprocess_template.c"
653
#endif
654

    
655
//3DNOW versions
656
#ifdef COMPILE_3DNOW
657
#undef RENAME
658
#define HAVE_MMX
659
#undef HAVE_MMX2
660
#define HAVE_3DNOW
661
#define RENAME(a) a ## _3DNow
662
#include "postprocess_template.c"
663
#endif
664

    
665
// minor note: the HAVE_xyz is messed up after that line so dont use it
666

    
667
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
668
        QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
669
{
670
        PPContext *c= (PPContext *)vc;
671
        PPMode *ppMode= (PPMode *)vm;
672
        c->ppMode= *ppMode; //FIXME
673

    
674
        // useing ifs here as they are faster than function pointers allthough the
675
        // difference wouldnt be messureable here but its much better because
676
        // someone might exchange the cpu whithout restarting mplayer ;)
677
#ifdef RUNTIME_CPUDETECT
678
#if defined(ARCH_X86) || defined(ARCH_X86_64)
679
        // ordered per speed fasterst first
680
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
681
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
682
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
683
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
684
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
685
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
686
        else
687
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
688
#else
689
#ifdef ARCH_POWERPC
690
#ifdef HAVE_ALTIVEC
691
        if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
692
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
693
        else
694
#endif
695
#endif
696
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
697
#endif
698
#else //RUNTIME_CPUDETECT
699
#ifdef HAVE_MMX2
700
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
701
#elif defined (HAVE_3DNOW)
702
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
703
#elif defined (HAVE_MMX)
704
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
705
#elif defined (HAVE_ALTIVEC)
706
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
707
#else
708
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
709
#endif
710
#endif //!RUNTIME_CPUDETECT
711
}
712

    
713
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
714
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
715

    
716
/* -pp Command line Help
717
*/
718
char *pp_help=
719
"Available postprocessing filters:\n"
720
"Filters                        Options\n"
721
"short  long name       short   long option     Description\n"
722
"*      *               a       autoq           CPU power dependent enabler\n"
723
"                       c       chrom           chrominance filtering enabled\n"
724
"                       y       nochrom         chrominance filtering disabled\n"
725
"                       n       noluma          luma filtering disabled\n"
726
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
727
"       1. difference factor: default=32, higher -> more deblocking\n"
728
"       2. flatness threshold: default=39, lower -> more deblocking\n"
729
"                       the h & v deblocking filters share these\n"
730
"                       so you can't set different thresholds for h / v\n"
731
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
732
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
733
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
734
"h1     x1hdeblock                              experimental h deblock filter 1\n"
735
"v1     x1vdeblock                              experimental v deblock filter 1\n"
736
"dr     dering                                  deringing filter\n"
737
"al     autolevels                              automatic brightness / contrast\n"
738
"                       f        fullyrange     stretch luminance to (0..255)\n"
739
"lb     linblenddeint                           linear blend deinterlacer\n"
740
"li     linipoldeint                            linear interpolating deinterlace\n"
741
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
742
"md     mediandeint                             median deinterlacer\n"
743
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
744
"l5     lowpass5                                FIR lowpass deinterlacer\n"
745
"de     default                                 hb:a,vb:a,dr:a\n"
746
"fa     fast                                    h1:a,v1:a,dr:a\n"
747
"ac                                             ha:a:128:7,va:a,dr:a\n"
748
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
749
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
750
"fq     forceQuant      <quantizer>             force quantizer\n"
751
"Usage:\n"
752
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
753
"long form example:\n"
754
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
755
"short form example:\n"
756
"vb:a/hb:a/lb                                   de,-vb\n"
757
"more examples:\n"
758
"tn:64:128:256\n"
759
"\n"
760
;
761

    
762
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
763
{
764
        char temp[GET_MODE_BUFFER_SIZE];
765
        char *p= temp;
766
        const char *filterDelimiters= ",/";
767
        const char *optionDelimiters= ":";
768
        struct PPMode *ppMode;
769
        char *filterToken;
770

    
771
        ppMode= av_malloc(sizeof(PPMode));
772

    
773
        ppMode->lumMode= 0;
774
        ppMode->chromMode= 0;
775
        ppMode->maxTmpNoise[0]= 700;
776
        ppMode->maxTmpNoise[1]= 1500;
777
        ppMode->maxTmpNoise[2]= 3000;
778
        ppMode->maxAllowedY= 234;
779
        ppMode->minAllowedY= 16;
780
        ppMode->baseDcDiff= 256/8;
781
        ppMode->flatnessThreshold= 56-16-1;
782
        ppMode->maxClippedThreshold= 0.01;
783
        ppMode->error=0;
784

    
785
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
786

    
787
        if(verbose>1) printf("pp: %s\n", name);
788

    
789
        for(;;){
790
                char *filterName;
791
                int q= 1000000; //PP_QUALITY_MAX;
792
                int chrom=-1;
793
                int luma=-1;
794
                char *option;
795
                char *options[OPTIONS_ARRAY_SIZE];
796
                int i;
797
                int filterNameOk=0;
798
                int numOfUnknownOptions=0;
799
                int enable=1; //does the user want us to enabled or disabled the filter
800

    
801
                filterToken= strtok(p, filterDelimiters);
802
                if(filterToken == NULL) break;
803
                p+= strlen(filterToken) + 1; // p points to next filterToken
804
                filterName= strtok(filterToken, optionDelimiters);
805
                if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
806

    
807
                if(*filterName == '-')
808
                {
809
                        enable=0;
810
                        filterName++;
811
                }
812

    
813
                for(;;){ //for all options
814
                        option= strtok(NULL, optionDelimiters);
815
                        if(option == NULL) break;
816

    
817
                        if(verbose>1) printf("pp: option: %s\n", option);
818
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
819
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
820
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
821
                        else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
822
                        else
823
                        {
824
                                options[numOfUnknownOptions] = option;
825
                                numOfUnknownOptions++;
826
                        }
827
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
828
                }
829
                options[numOfUnknownOptions] = NULL;
830

    
831
                /* replace stuff from the replace Table */
832
                for(i=0; replaceTable[2*i]!=NULL; i++)
833
                {
834
                        if(!strcmp(replaceTable[2*i], filterName))
835
                        {
836
                                int newlen= strlen(replaceTable[2*i + 1]);
837
                                int plen;
838
                                int spaceLeft;
839

    
840
                                if(p==NULL) p= temp, *p=0;      //last filter
841
                                else p--, *p=',';               //not last filter
842

    
843
                                plen= strlen(p);
844
                                spaceLeft= p - temp + plen;
845
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
846
                                {
847
                                        ppMode->error++;
848
                                        break;
849
                                }
850
                                memmove(p + newlen, p, plen+1);
851
                                memcpy(p, replaceTable[2*i + 1], newlen);
852
                                filterNameOk=1;
853
                        }
854
                }
855

    
856
                for(i=0; filters[i].shortName!=NULL; i++)
857
                {
858
//                        printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
859
                        if(   !strcmp(filters[i].longName, filterName)
860
                           || !strcmp(filters[i].shortName, filterName))
861
                        {
862
                                ppMode->lumMode &= ~filters[i].mask;
863
                                ppMode->chromMode &= ~filters[i].mask;
864

    
865
                                filterNameOk=1;
866
                                if(!enable) break; // user wants to disable it
867

    
868
                                if(q >= filters[i].minLumQuality && luma)
869
                                        ppMode->lumMode|= filters[i].mask;
870
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
871
                                        if(q >= filters[i].minChromQuality)
872
                                                ppMode->chromMode|= filters[i].mask;
873

    
874
                                if(filters[i].mask == LEVEL_FIX)
875
                                {
876
                                        int o;
877
                                        ppMode->minAllowedY= 16;
878
                                        ppMode->maxAllowedY= 234;
879
                                        for(o=0; options[o]!=NULL; o++)
880
                                        {
881
                                                if(  !strcmp(options[o],"fullyrange")
882
                                                   ||!strcmp(options[o],"f"))
883
                                                {
884
                                                        ppMode->minAllowedY= 0;
885
                                                        ppMode->maxAllowedY= 255;
886
                                                        numOfUnknownOptions--;
887
                                                }
888
                                        }
889
                                }
890
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
891
                                {
892
                                        int o;
893
                                        int numOfNoises=0;
894

    
895
                                        for(o=0; options[o]!=NULL; o++)
896
                                        {
897
                                                char *tail;
898
                                                ppMode->maxTmpNoise[numOfNoises]=
899
                                                        strtol(options[o], &tail, 0);
900
                                                if(tail!=options[o])
901
                                                {
902
                                                        numOfNoises++;
903
                                                        numOfUnknownOptions--;
904
                                                        if(numOfNoises >= 3) break;
905
                                                }
906
                                        }
907
                                }
908
                                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
909
                                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
910
                                {
911
                                        int o;
912

    
913
                                        for(o=0; options[o]!=NULL && o<2; o++)
914
                                        {
915
                                                char *tail;
916
                                                int val= strtol(options[o], &tail, 0);
917
                                                if(tail==options[o]) break;
918

    
919
                                                numOfUnknownOptions--;
920
                                                if(o==0) ppMode->baseDcDiff= val;
921
                                                else ppMode->flatnessThreshold= val;
922
                                        }
923
                                }
924
                                else if(filters[i].mask == FORCE_QUANT)
925
                                {
926
                                        int o;
927
                                        ppMode->forcedQuant= 15;
928

    
929
                                        for(o=0; options[o]!=NULL && o<1; o++)
930
                                        {
931
                                                char *tail;
932
                                                int val= strtol(options[o], &tail, 0);
933
                                                if(tail==options[o]) break;
934

    
935
                                                numOfUnknownOptions--;
936
                                                ppMode->forcedQuant= val;
937
                                        }
938
                                }
939
                        }
940
                }
941
                if(!filterNameOk) ppMode->error++;
942
                ppMode->error += numOfUnknownOptions;
943
        }
944

    
945
        if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
946
        if(ppMode->error)
947
        {
948
                fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
949
                av_free(ppMode);
950
                return NULL;
951
        }
952
        return ppMode;
953
}
954

    
955
void pp_free_mode(pp_mode_t *mode){
956
    av_free(mode);
957
}
958

    
959
static void reallocAlign(void **p, int alignment, int size){
960
        av_free(p);
961
        *p= av_mallocz(size);
962
}
963

    
964
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
965
        int mbWidth = (width+15)>>4;
966
        int mbHeight= (height+15)>>4;
967
        int i;
968

    
969
        c->stride= stride;
970
        c->qpStride= qpStride;
971

    
972
        reallocAlign((void **)&c->tempDst, 8, stride*24);
973
        reallocAlign((void **)&c->tempSrc, 8, stride*24);
974
        reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
975
        reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
976
        for(i=0; i<256; i++)
977
                c->yHistogram[i]= width*height/64*15/256;
978

    
979
        for(i=0; i<3; i++)
980
        {
981
                //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
982
                reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
983
                reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
984
        }
985

    
986
        reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
987
        reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
988
        reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
989
        reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
990
}
991

    
992
static void global_init(void){
993
        int i;
994
        memset(clip_table, 0, 256);
995
        for(i=256; i<512; i++)
996
                clip_table[i]= i;
997
        memset(clip_table+512, 0, 256);
998
}
999

    
1000
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
1001
        PPContext *c= av_malloc(sizeof(PPContext));
1002
        int stride= (width+15)&(~15);    //assumed / will realloc if needed
1003
        int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
1004

    
1005
        global_init();
1006

    
1007
        memset(c, 0, sizeof(PPContext));
1008
        c->cpuCaps= cpuCaps;
1009
        if(cpuCaps&PP_FORMAT){
1010
                c->hChromaSubSample= cpuCaps&0x3;
1011
                c->vChromaSubSample= (cpuCaps>>4)&0x3;
1012
        }else{
1013
                c->hChromaSubSample= 1;
1014
                c->vChromaSubSample= 1;
1015
        }
1016

    
1017
        reallocBuffers(c, width, height, stride, qpStride);
1018

    
1019
        c->frameNum=-1;
1020

    
1021
        return c;
1022
}
1023

    
1024
void pp_free_context(void *vc){
1025
        PPContext *c = (PPContext*)vc;
1026
        int i;
1027

    
1028
        for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1029
        for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
1030

    
1031
        av_free(c->tempBlocks);
1032
        av_free(c->yHistogram);
1033
        av_free(c->tempDst);
1034
        av_free(c->tempSrc);
1035
        av_free(c->deintTemp);
1036
        av_free(c->stdQPTable);
1037
        av_free(c->nonBQPTable);
1038
        av_free(c->forcedQPTable);
1039

    
1040
        memset(c, 0, sizeof(PPContext));
1041

    
1042
        av_free(c);
1043
}
1044

    
1045
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
1046
                 uint8_t * dst[3], int dstStride[3],
1047
                 int width, int height,
1048
                 QP_STORE_T *QP_store,  int QPStride,
1049
                 pp_mode_t *vm,  void *vc, int pict_type)
1050
{
1051
        int mbWidth = (width+15)>>4;
1052
        int mbHeight= (height+15)>>4;
1053
        PPMode *mode = (PPMode*)vm;
1054
        PPContext *c = (PPContext*)vc;
1055
        int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0]));
1056
        int absQPStride = ABS(QPStride);
1057

    
1058
        // c->stride and c->QPStride are always positive
1059
        if(c->stride < minStride || c->qpStride < absQPStride)
1060
                reallocBuffers(c, width, height,
1061
                                MAX(minStride, c->stride),
1062
                                MAX(c->qpStride, absQPStride));
1063

    
1064
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1065
        {
1066
                int i;
1067
                QP_store= c->forcedQPTable;
1068
                absQPStride = QPStride = 0;
1069
                if(mode->lumMode & FORCE_QUANT)
1070
                        for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1071
                else
1072
                        for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1073
        }
1074
//printf("pict_type:%d\n", pict_type);
1075

    
1076
        if(pict_type & PP_PICT_TYPE_QP2){
1077
                int i;
1078
                const int count= mbHeight * absQPStride;
1079
                for(i=0; i<(count>>2); i++){
1080
                        ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1081
                }
1082
                for(i<<=2; i<count; i++){
1083
                        c->stdQPTable[i] = QP_store[i]>>1;
1084
                }
1085
                QP_store= c->stdQPTable;
1086
                QPStride= absQPStride;
1087
        }
1088

    
1089
if(0){
1090
int x,y;
1091
for(y=0; y<mbHeight; y++){
1092
        for(x=0; x<mbWidth; x++){
1093
                printf("%2d ", QP_store[x + y*QPStride]);
1094
        }
1095
        printf("\n");
1096
}
1097
        printf("\n");
1098
}
1099

    
1100
        if((pict_type&7)!=3)
1101
        {
1102
                if (QPStride >= 0) {
1103
                        int i;
1104
                        const int count= mbHeight * QPStride;
1105
                        for(i=0; i<(count>>2); i++){
1106
                                ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1107
                        }
1108
                        for(i<<=2; i<count; i++){
1109
                                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1110
                        }
1111
                } else {
1112
                        int i,j;
1113
                        for(i=0; i<mbHeight; i++) {
1114
                                    for(j=0; j<absQPStride; j++) {
1115
                                        c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1116
                                }
1117
                        }
1118
                }
1119
        }
1120

    
1121
        if(verbose>2)
1122
        {
1123
                printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
1124
        }
1125

    
1126
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1127
                width, height, QP_store, QPStride, 0, mode, c);
1128

    
1129
        width  = (width )>>c->hChromaSubSample;
1130
        height = (height)>>c->vChromaSubSample;
1131

    
1132
        if(mode->chromMode)
1133
        {
1134
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1135
                        width, height, QP_store, QPStride, 1, mode, c);
1136
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1137
                        width, height, QP_store, QPStride, 2, mode, c);
1138
        }
1139
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1140
        {
1141
                linecpy(dst[1], src[1], height, srcStride[1]);
1142
                linecpy(dst[2], src[2], height, srcStride[2]);
1143
        }
1144
        else
1145
        {
1146
                int y;
1147
                for(y=0; y<height; y++)
1148
                {
1149
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1150
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1151
                }
1152
        }
1153
}
1154